hacks/scrapers/scraper.py

45 lines
1.5 KiB
Python
Raw Normal View History

2013-03-11 22:54:52 +00:00
#!/Users/sneak/dev/venv-2.7/bin/python
# shouts to @AskAmex for being a replicant and
# David Bartle for making a very difficult-to-use ofxclient library
# 2013 jeffrey paul <sneak@datavibe.net>
# 5539 AD00 DE4C 42F3 AFE1 1575 0524 43F4 DF2A 55C2
2013-03-27 07:32:34 +00:00
from pprint import pformat
2013-03-11 22:54:52 +00:00
import os
import re
from ofxclient.request import Builder as OFXClientBuilder
class MockInstitution(object):
def __init__(self,user=None,password=None,url=None,org=None,fid=None):
self.username = user
self.password = password
self.dsn = {
'url': url,
'org': org,
'fid': fid,
}
class FinancialScraper(object):
def __init__(self,*args,**kwargs):
self.user = kwargs.pop('user')
self.password = kwargs.pop('password')
def scrape(self):
b = OFXClientBuilder(self.getInstitution())
r = b.doQuery(b.acctQuery())
# i could parse the sgml. or i could do this.
c = re.compile(r'<ACCTID>(\d+)', re.MULTILINE)
out = {}
for acctnum in re.findall(c,r):
out[acctnum] = {}
2013-03-27 07:32:34 +00:00
print(pformat(out))
2013-03-11 22:54:52 +00:00
c = re.compile(r'<BALAMT>([\d\.\-]+)', re.MULTILINE)
for acctnum in out.keys():
2013-03-27 07:32:34 +00:00
if self.isCC():
r = b.doQuery(b.ccQuery(acctnum,'19700101000000'))
if self.isBank():
r = b.doQuery(b.baQuery(acctnum,'19700101000000','',''))
2013-03-11 22:54:52 +00:00
out[acctnum]['balance'] = re.findall(c,r)[0]
return out