diff --git a/findmastos/findmastos.py b/findmastos/findmastos.py new file mode 100755 index 0000000..54c1d4f --- /dev/null +++ b/findmastos/findmastos.py @@ -0,0 +1,131 @@ +#!/usr/bin/env python3 + +import requests +import time +import json +import pprint +import socket +import sys +import re +import random +import os + +# shouts to @mewmew@blob.cat: +# https://blob.cat/notice/9u8rxBOMWtDFFQG81Y +SEEDS = [ + "blob.cat", + "s.sneak.berlin" +] + +TIMEOUT_SECS = 5 + +fakehostre = re.compile('gab\.best$') + +simplehostre = re.compile('^[A-Za-z0-9\-\.]+$') + +def main(): + os.environ['PYTHONUNBUFFERED'] = '1' + knownhosts = {} + goodhosts = {} + badhosts = {} + fetchqueue = {} + + for seed in SEEDS: + fetchqueue[seed] = True + + if os.path.exists('fetchqueue.json'): + with open('fetchqueue.json') as f: + t = json.load(f) + for i in t: + fetchqueue[i] = True + print(f"loaded {len(t)} fetchqueue items") + + if os.path.exists('goodhosts.json'): + with open('goodhosts.json') as f: + t = json.load(f) + for i in t: + goodhosts[i] = True + print(f"loaded {len(t)} goodhosts") + + if os.path.exists('badhosts.json'): + with open('badhosts.json') as f: + t = json.load(f) + for i in t: + badhosts[i] = True + print(f"loaded {len(t)} badhosts") + + def fetchnext(hostname): + hostname = str(hostname) + del fetchqueue[hostname] + + try: + addr = socket.gethostbyname(hostname) + except: + print(f"skipping fetch from {hostname}: dns lookup failed") + return + + fail = False + try: + url = "https://" + hostname + "/api/v1/instance/peers" + print(f"GET {url}") + r = requests.get(url, timeout=TIMEOUT_SECS) + count = len(r.json()) + except: + fail = True + + if fail: + print(f"{hostname}: api fetch failed") + badhosts[hostname] = True + else: + print(f"{hostname}: api fetch got {count}") + goodhosts[hostname] = True + for newhn in r.json(): + if newhn is None: + continue + newhn = str(newhn) + if newhn in fetchqueue: + continue + if newhn in goodhosts: + continue + if newhn in badhosts: + continue + + if fakehostre.search(newhn): + print(f"skipping add of {newhn} due to re spam match") + continue + + if simplehostre.search(newhn): + print(f"got new hostname '{newhn}' from api on {hostname}") + fetchqueue[newhn] = True + else: + print(f"got weird hostname '{newhn}' from api on {hostname}, skipping") + + writefiles() + + def writefiles(): + print(f"writing files. fetchqueue={len(fetchqueue.keys())} goodhosts={len(goodhosts.keys())} badhosts={len(badhosts.keys())}") + gh = list(goodhosts.keys()) + gh.sort() + bh = list(badhosts.keys()) + bh.sort() + fq = list(fetchqueue.keys()) + fq.sort() + with open('.goodhosts.json.tmp', 'w') as outfile: + json.dump(gh, outfile) + os.rename('.goodhosts.json.tmp', 'goodhosts.json') + with open('.badhosts.json.tmp', 'w') as outfile: + json.dump(bh, outfile) + os.rename('.badhosts.json.tmp', 'badhosts.json') + with open('.fetchqueue.json.tmp', 'w') as outfile: + json.dump(fq, outfile) + os.rename('.fetchqueue.json.tmp', 'fetchqueue.json') + + while len(fetchqueue.keys()): + tl = list(fetchqueue.keys()) + random.shuffle(tl) + for h in tl: + print(f"{len(fetchqueue.keys())} hosts in fetchqueue") + fetchnext(h) + +if __name__ == "__main__": + main()