hacks/checkcert/checkcert

149 lines
4.4 KiB
Plaintext
Raw Normal View History

#!/usr/bin/env python3
2013-02-28 16:38:06 +00:00
import sys
from pprint import pformat
import requests
from pyquery import PyQuery as pq
2013-02-28 16:38:06 +00:00
import ssl
import OpenSSL
from urllib.parse import urlparse, urljoin
2013-02-28 16:38:06 +00:00
from datetime import datetime, timedelta
from pytz import UTC
import logging
#logging.basicConfig(level=logging.DEBUG)
logging.basicConfig(level=logging.WARNING)
log = logging.getLogger()
2013-02-28 16:38:06 +00:00
# FIXME: relative url stuff will not work if the url passed in redirects
# somewhere else
2013-02-28 16:38:06 +00:00
class CertificateProblem(Exception):
pass
class ReachabilityProblem(Exception):
pass
class SSLCert(object):
def __init__(self,c):
self.c = c
def decode_ossl_time(self,t):
f = '%Y%m%d%H%M%SZ'
return datetime.strptime(t.decode('utf-8'), f)
2013-02-28 16:38:06 +00:00
def notBefore(self):
return self.decode_ossl_time(self.c.get_notBefore())
2013-02-28 16:38:06 +00:00
def notAfter(self):
return self.decode_ossl_time(self.c.get_notAfter())
2013-02-28 16:38:06 +00:00
def commonName(self):
t = self.c.get_subject().get_components()
2013-02-28 16:38:06 +00:00
for x in t:
if x[0] == "CN":
return x[1]
def expired(self):
return datetime.utcnow() > self.notAfter()
def tooEarly(self):
return datetime.utcnow() < self.notBefore()
def validTime(self):
if not self.expired() and not self.tooEarly():
return True
return False
def expiresSoon(self):
week = timedelta(days=7)
then = datetime.utcnow() + week
return then > self.notAfter()
class Website(object):
def __init__(self,url):
self.url = urlparse(url)
if not self.url.scheme:
self.url = urlparse('http://' + url)
self.cert = None
self.res = {}
self.r = requests.get(self.urlstring(),verify=True)
def contentType(self):
if ';' in self.r.headers['content-type']:
return self.r.headers['content-type'].split(';')[0]
else:
return self.r.headers['content-type']
def resources(self):
if self.contentType() != 'text/html':
return []
d = pq(self.r.text)
#import pdb; pdb.set_trace()
res = []
for e in d('link'):
if 'openid' in e.attrib.get('rel'):
continue
res.append(e.attrib.get('href'))
for e in d('script'):
res.append(e.attrib.get('src'))
res = [
urljoin(self.urlstring(),x) if not urlparse(x).netloc else x
for x in res
]
res = [
self.url.scheme + ':' + x if not urlparse(x).scheme else x
for x in res
]
res = {x: 1 for x in res}
self.res = res.keys()
return self.res
def resources_by_host(self):
out = {}
for r in self.res:
if not out.get(urlparse(r).netloc):
out[urlparse(r).netloc] = []
out[urlparse(r).netloc].append(r)
return out
def is_tls(self):
return self.url.scheme == 'https'
def urlstring(self):
return self.url.geturl()
def check(self):
if self.r.status_code is not 200:
2013-03-03 22:34:13 +00:00
raise ReachabilityProblem("can't access: '%s'" % self.urlstring())
if self.is_tls():
self._get_cert()
2013-03-11 22:22:36 +00:00
if not self.cert.validTime():
raise CertificateProblem(
2013-03-11 22:22:36 +00:00
"cert for %s is invalid: %s to %s" % (
self.urlstring(),
self.cert.notBefore(),
self.cert.notAfter()
)
)
if self.cert.expiresSoon():
raise CertificateProblem(
"cert for %s expires soon: %s" % (
self.urlstring(),
self.cert.notAfter()
)
)
def _get_cert(self):
if not self.url.port:
p = 443
else:
p = self.url.port
c = ssl.get_server_certificate(
(self.url.hostname, p),
ssl_version=ssl.PROTOCOL_TLSv1
)
self.cert = SSLCert(
OpenSSL.crypto.load_certificate(
OpenSSL.crypto.FILETYPE_PEM,
c
)
2013-02-28 16:38:06 +00:00
)
2013-02-28 16:38:06 +00:00
def main():
if len(sys.argv) < 2:
2013-03-03 22:34:13 +00:00
print("usage: %s <url> [url2] [url3] [...]" % sys.argv[0])
2013-02-28 16:38:06 +00:00
sys.exit(1)
2013-03-03 22:34:13 +00:00
for site in sys.argv[1:]:
s = Website(site)
s.check()
for u in s.resources():
Website(u).check()
2013-02-28 16:38:06 +00:00
if __name__ == '__main__':
main()