2013-03-03 22:28:58 +00:00
|
|
|
#!/usr/bin/env python3
|
2013-02-28 16:38:06 +00:00
|
|
|
|
|
|
|
import sys
|
|
|
|
from pprint import pformat
|
|
|
|
import requests
|
2013-03-03 22:28:58 +00:00
|
|
|
from pyquery import PyQuery as pq
|
2013-02-28 16:38:06 +00:00
|
|
|
import ssl
|
|
|
|
import OpenSSL
|
2013-03-03 22:28:58 +00:00
|
|
|
from urllib.parse import urlparse, urljoin
|
2013-02-28 16:38:06 +00:00
|
|
|
from datetime import datetime, timedelta
|
|
|
|
from pytz import UTC
|
|
|
|
import logging
|
2013-03-03 22:28:58 +00:00
|
|
|
#logging.basicConfig(level=logging.DEBUG)
|
|
|
|
logging.basicConfig(level=logging.WARNING)
|
|
|
|
log = logging.getLogger()
|
2013-02-28 16:38:06 +00:00
|
|
|
|
2013-03-03 22:28:58 +00:00
|
|
|
# FIXME: relative url stuff will not work if the url passed in redirects
|
|
|
|
# somewhere else
|
2013-02-28 16:38:06 +00:00
|
|
|
|
|
|
|
class CertificateProblem(Exception):
|
|
|
|
pass
|
|
|
|
|
|
|
|
class ReachabilityProblem(Exception):
|
|
|
|
pass
|
|
|
|
|
|
|
|
class SSLCert(object):
|
2013-03-03 22:28:58 +00:00
|
|
|
def __init__(self,c):
|
|
|
|
self.c = c
|
|
|
|
def decode_ossl_time(self,t):
|
|
|
|
f = '%Y%m%d%H%M%SZ'
|
|
|
|
return datetime.strptime(t.decode('utf-8'), f)
|
2013-02-28 16:38:06 +00:00
|
|
|
def notBefore(self):
|
2013-03-03 22:28:58 +00:00
|
|
|
return self.decode_ossl_time(self.c.get_notBefore())
|
2013-02-28 16:38:06 +00:00
|
|
|
def notAfter(self):
|
2013-03-03 22:28:58 +00:00
|
|
|
return self.decode_ossl_time(self.c.get_notAfter())
|
2013-02-28 16:38:06 +00:00
|
|
|
def commonName(self):
|
2013-03-03 22:28:58 +00:00
|
|
|
t = self.c.get_subject().get_components()
|
2013-02-28 16:38:06 +00:00
|
|
|
for x in t:
|
|
|
|
if x[0] == "CN":
|
|
|
|
return x[1]
|
|
|
|
def expired(self):
|
|
|
|
return datetime.utcnow() > self.notAfter()
|
|
|
|
def tooEarly(self):
|
|
|
|
return datetime.utcnow() < self.notBefore()
|
|
|
|
def validTime(self):
|
|
|
|
if not self.expired() and not self.tooEarly():
|
|
|
|
return True
|
|
|
|
return False
|
|
|
|
def expiresSoon(self):
|
|
|
|
week = timedelta(days=7)
|
|
|
|
then = datetime.utcnow() + week
|
|
|
|
return then > self.notAfter()
|
|
|
|
|
2013-03-03 22:28:58 +00:00
|
|
|
class Website(object):
|
|
|
|
def __init__(self,url):
|
|
|
|
self.url = urlparse(url)
|
|
|
|
if not self.url.scheme:
|
|
|
|
self.url = urlparse('http://' + url)
|
|
|
|
self.cert = None
|
|
|
|
self.res = {}
|
|
|
|
self.r = requests.get(self.urlstring(),verify=True)
|
|
|
|
def contentType(self):
|
|
|
|
if ';' in self.r.headers['content-type']:
|
|
|
|
return self.r.headers['content-type'].split(';')[0]
|
|
|
|
else:
|
|
|
|
return self.r.headers['content-type']
|
|
|
|
def resources(self):
|
|
|
|
if self.contentType() != 'text/html':
|
|
|
|
return []
|
|
|
|
d = pq(self.r.text)
|
|
|
|
#import pdb; pdb.set_trace()
|
|
|
|
res = []
|
|
|
|
for e in d('link'):
|
|
|
|
if 'openid' in e.attrib.get('rel'):
|
|
|
|
continue
|
|
|
|
res.append(e.attrib.get('href'))
|
|
|
|
for e in d('script'):
|
|
|
|
res.append(e.attrib.get('src'))
|
|
|
|
res = [
|
|
|
|
urljoin(self.urlstring(),x) if not urlparse(x).netloc else x
|
|
|
|
for x in res
|
|
|
|
]
|
|
|
|
res = [
|
|
|
|
self.url.scheme + ':' + x if not urlparse(x).scheme else x
|
|
|
|
for x in res
|
|
|
|
]
|
|
|
|
res = {x: 1 for x in res}
|
|
|
|
self.res = res.keys()
|
|
|
|
return self.res
|
|
|
|
def resources_by_host(self):
|
|
|
|
out = {}
|
|
|
|
for r in self.res:
|
|
|
|
if not out.get(urlparse(r).netloc):
|
|
|
|
out[urlparse(r).netloc] = []
|
|
|
|
out[urlparse(r).netloc].append(r)
|
|
|
|
return out
|
|
|
|
def is_tls(self):
|
|
|
|
return self.url.scheme == 'https'
|
|
|
|
def urlstring(self):
|
|
|
|
return self.url.geturl()
|
|
|
|
def check(self):
|
|
|
|
if self.r.status_code is not 200:
|
2013-03-03 22:34:13 +00:00
|
|
|
raise ReachabilityProblem("can't access: '%s'" % self.urlstring())
|
2013-03-03 22:28:58 +00:00
|
|
|
if self.is_tls():
|
|
|
|
self._get_cert()
|
|
|
|
if self.cert.expiresSoon() or not self.cert.validTime():
|
|
|
|
raise CertificateProblem(
|
2013-03-03 22:34:13 +00:00
|
|
|
"cert for %s is invalid or expires soon: %s" % (
|
2013-03-03 22:28:58 +00:00
|
|
|
self.urlstring(),
|
|
|
|
self.cert.notAfter()
|
|
|
|
)
|
|
|
|
)
|
|
|
|
def _get_cert(self):
|
|
|
|
if not self.url.port:
|
|
|
|
p = 443
|
|
|
|
else:
|
|
|
|
p = self.url.port
|
|
|
|
c = ssl.get_server_certificate(
|
|
|
|
(self.url.hostname, p),
|
|
|
|
ssl_version=ssl.PROTOCOL_TLSv1
|
|
|
|
)
|
|
|
|
self.cert = SSLCert(
|
|
|
|
OpenSSL.crypto.load_certificate(
|
|
|
|
OpenSSL.crypto.FILETYPE_PEM,
|
|
|
|
c
|
|
|
|
)
|
2013-02-28 16:38:06 +00:00
|
|
|
)
|
2013-03-03 22:28:58 +00:00
|
|
|
|
2013-02-28 16:38:06 +00:00
|
|
|
def main():
|
|
|
|
if len(sys.argv) < 2:
|
2013-03-03 22:34:13 +00:00
|
|
|
print("usage: %s <url> [url2] [url3] [...]" % sys.argv[0])
|
2013-02-28 16:38:06 +00:00
|
|
|
sys.exit(1)
|
2013-03-03 22:34:13 +00:00
|
|
|
for site in sys.argv[1:]:
|
|
|
|
s = Website(site)
|
|
|
|
s.check()
|
|
|
|
for u in s.resources():
|
|
|
|
Website(u).check()
|
2013-02-28 16:38:06 +00:00
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
main()
|