#!/usr/bin/env python3 import requests import time import json import pprint import socket import sys import re import random import os # shouts to @mewmew@blob.cat: # https://blob.cat/notice/9u8rxBOMWtDFFQG81Y SEEDS = [ "blob.cat", "s.sneak.berlin" ] TIMEOUT_SECS = 5 fakehostre = re.compile('gab\.best$') simplehostre = re.compile('^[A-Za-z0-9\-\.]+$') def main(): os.environ['PYTHONUNBUFFERED'] = '1' knownhosts = {} goodhosts = {} badhosts = {} fetchqueue = {} for seed in SEEDS: fetchqueue[seed] = True if os.path.exists('fetchqueue.json'): with open('fetchqueue.json') as f: t = json.load(f) for i in t: fetchqueue[i] = True print(f"loaded {len(t)} fetchqueue items") if os.path.exists('goodhosts.json'): with open('goodhosts.json') as f: t = json.load(f) for i in t: goodhosts[i] = True print(f"loaded {len(t)} goodhosts") if os.path.exists('badhosts.json'): with open('badhosts.json') as f: t = json.load(f) for i in t: badhosts[i] = True print(f"loaded {len(t)} badhosts") def fetchnext(hostname): hostname = str(hostname) del fetchqueue[hostname] try: addr = socket.gethostbyname(hostname) except: print(f"skipping fetch from {hostname}: dns lookup failed") return fail = False try: url = "https://" + hostname + "/api/v1/instance/peers" print(f"GET {url}") r = requests.get(url, timeout=TIMEOUT_SECS) count = len(r.json()) except: fail = True if fail: print(f"{hostname}: api fetch failed") badhosts[hostname] = True else: print(f"{hostname}: api fetch got {count}") goodhosts[hostname] = True for newhn in r.json(): if newhn is None: continue newhn = str(newhn) if newhn in fetchqueue: continue if newhn in goodhosts: continue if newhn in badhosts: continue if fakehostre.search(newhn): print(f"skipping add of {newhn} due to re spam match") continue if simplehostre.search(newhn): print(f"got new hostname '{newhn}' from api on {hostname}") fetchqueue[newhn] = True else: print(f"got weird hostname '{newhn}' from api on {hostname}, skipping") writefiles() def writefiles(): print(f"writing files. fetchqueue={len(fetchqueue.keys())} goodhosts={len(goodhosts.keys())} badhosts={len(badhosts.keys())}") gh = list(goodhosts.keys()) gh.sort() bh = list(badhosts.keys()) bh.sort() fq = list(fetchqueue.keys()) fq.sort() with open('.goodhosts.json.tmp', 'w') as outfile: json.dump(gh, outfile) os.rename('.goodhosts.json.tmp', 'goodhosts.json') with open('.badhosts.json.tmp', 'w') as outfile: json.dump(bh, outfile) os.rename('.badhosts.json.tmp', 'badhosts.json') with open('.fetchqueue.json.tmp', 'w') as outfile: json.dump(fq, outfile) os.rename('.fetchqueue.json.tmp', 'fetchqueue.json') while len(fetchqueue.keys()): tl = list(fetchqueue.keys()) random.shuffle(tl) for h in tl: print(f"{len(fetchqueue.keys())} hosts in fetchqueue") fetchnext(h) if __name__ == "__main__": main()