From 258b2d3baf0f1e449f63ef66731d6cbabd4d411f Mon Sep 17 00:00:00 2001 From: Jeffrey Paul Date: Mon, 11 Mar 2013 23:54:52 +0100 Subject: [PATCH] branchin' out --- scrapers/amexscraper.py | 53 ++++++++------------------------------- scrapers/etradescraper.py | 30 ++++++++++++++++++++++ scrapers/scraper.py | 39 ++++++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 42 deletions(-) create mode 100755 scrapers/etradescraper.py create mode 100755 scrapers/scraper.py diff --git a/scrapers/amexscraper.py b/scrapers/amexscraper.py index d2bbf43..463ecdb 100755 --- a/scrapers/amexscraper.py +++ b/scrapers/amexscraper.py @@ -8,51 +8,20 @@ from pprint import pformat import os import re import json -import logging -logging.basicConfig(level=logging.ERROR) -log = logging.getLogger() -from ofxclient.request import Builder +from ofxclient.request import Builder as OFXClientBuilder +from scraper import FinancialScraper, MockInstitution -url = 'https://online.americanexpress.com/myca/ofxdl/desktop/' + \ - 'desktopDownload.do?request_type=nl_ofxdownload' - -# this exists because ofxclient is tightly coupled with their "Institution" -# class which shits all over my home directory with caching and -# credential storage that I don't want -class MockAmexInstitution(object): - def __init__(self,user=None,password=None): - self.username = user - self.password = password - self.dsn = { - 'url': url, - 'org': 'AMEX', - 'fid': '3101', - } - -class AmexScraper(object): - def __init__(self,*args,**kwargs): - self.user = kwargs.pop('user') - self.password = kwargs.pop('password') - - def scrape(self): - i = MockAmexInstitution( +class AmexScraper(FinancialScraper): + def getInstitution(self): + return MockInstitution( user=self.user, - password=self.password + password=self.password, + url='https://online.americanexpress.com/myca/ofxdl/desktop/' + + 'desktopDownload.do?request_type=nl_ofxdownload', + org='AMEX', + fid='3101' ) - b = Builder(i) - r = b.doQuery(b.acctQuery()) - - # i could parse the sgml. or i could do this. - c = re.compile(r'(\d+)', re.MULTILINE) - out = {} - for acctnum in re.findall(c,r): - out[acctnum] = {} - c = re.compile(r'([\d\.\-]+)', re.MULTILINE) - for acctnum in out.keys(): - r = b.doQuery(b.ccQuery(acctnum,'19700101000000')) - out[acctnum]['balance'] = re.findall(c,r)[0] - return out - + def main(): s = AmexScraper( user=os.environ['AMEXUSERNAME'], diff --git a/scrapers/etradescraper.py b/scrapers/etradescraper.py new file mode 100755 index 0000000..3d410e8 --- /dev/null +++ b/scrapers/etradescraper.py @@ -0,0 +1,30 @@ +#!/Users/sneak/dev/venv-2.7/bin/python +# shouts to @AskAmex for being a replicant and +# David Bartle for making a very difficult-to-use ofxclient library +# 2013 jeffrey paul +# 5539 AD00 DE4C 42F3 AFE1 1575 0524 43F4 DF2A 55C2 +from pprint import pformat +import os +import re +import json +from scraper import FinancialScraper, MockInstitution + +class EtradeScraper(FinancialScraper): + def getInstitution(self): + return MockInstitution( + user=self.user, + password=self.password, + url='https://ofx.etrade.com/cgi-ofx/etradeofx', + org='ETRADE BANK', + fid='9989' + ) + +def main(): + s = EtradeScraper( + user=os.environ['ETRADEUSERNAME'], + password=os.environ['ETRADEPASSWORD'] + ) + print json.dumps(s.scrape()) + +if __name__=="__main__": + main() diff --git a/scrapers/scraper.py b/scrapers/scraper.py new file mode 100755 index 0000000..6eb4301 --- /dev/null +++ b/scrapers/scraper.py @@ -0,0 +1,39 @@ +#!/Users/sneak/dev/venv-2.7/bin/python +# shouts to @AskAmex for being a replicant and +# David Bartle for making a very difficult-to-use ofxclient library +# 2013 jeffrey paul +# 5539 AD00 DE4C 42F3 AFE1 1575 0524 43F4 DF2A 55C2 + +import os +import re +from ofxclient.request import Builder as OFXClientBuilder + +class MockInstitution(object): + def __init__(self,user=None,password=None,url=None,org=None,fid=None): + self.username = user + self.password = password + self.dsn = { + 'url': url, + 'org': org, + 'fid': fid, + } + +class FinancialScraper(object): + def __init__(self,*args,**kwargs): + self.user = kwargs.pop('user') + self.password = kwargs.pop('password') + + def scrape(self): + b = OFXClientBuilder(self.getInstitution()) + r = b.doQuery(b.acctQuery()) + + # i could parse the sgml. or i could do this. + c = re.compile(r'(\d+)', re.MULTILINE) + out = {} + for acctnum in re.findall(c,r): + out[acctnum] = {} + c = re.compile(r'([\d\.\-]+)', re.MULTILINE) + for acctnum in out.keys(): + r = b.doQuery(b.ccQuery(acctnum,'19700101000000')) + out[acctnum]['balance'] = re.findall(c,r)[0] + return out