hacks/findmastos/findmastos.py
2020-04-20 08:31:05 -07:00

132 lines
3.7 KiB
Python
Executable File

#!/usr/bin/env python3
import requests
import time
import json
import pprint
import socket
import sys
import re
import random
import os
# shouts to @mewmew@blob.cat:
# https://blob.cat/notice/9u8rxBOMWtDFFQG81Y
SEEDS = [
"blob.cat",
"s.sneak.berlin"
]
TIMEOUT_SECS = 5
fakehostre = re.compile('gab\.best$')
simplehostre = re.compile('^[A-Za-z0-9\-\.]+$')
def main():
os.environ['PYTHONUNBUFFERED'] = '1'
knownhosts = {}
goodhosts = {}
badhosts = {}
fetchqueue = {}
for seed in SEEDS:
fetchqueue[seed] = True
if os.path.exists('fetchqueue.json'):
with open('fetchqueue.json') as f:
t = json.load(f)
for i in t:
fetchqueue[i] = True
print(f"loaded {len(t)} fetchqueue items")
if os.path.exists('goodhosts.json'):
with open('goodhosts.json') as f:
t = json.load(f)
for i in t:
goodhosts[i] = True
print(f"loaded {len(t)} goodhosts")
if os.path.exists('badhosts.json'):
with open('badhosts.json') as f:
t = json.load(f)
for i in t:
badhosts[i] = True
print(f"loaded {len(t)} badhosts")
def fetchnext(hostname):
hostname = str(hostname)
del fetchqueue[hostname]
try:
addr = socket.gethostbyname(hostname)
except:
print(f"skipping fetch from {hostname}: dns lookup failed")
return
fail = False
try:
url = "https://" + hostname + "/api/v1/instance/peers"
print(f"GET {url}")
r = requests.get(url, timeout=TIMEOUT_SECS)
count = len(r.json())
except:
fail = True
if fail:
print(f"{hostname}: api fetch failed")
badhosts[hostname] = True
else:
print(f"{hostname}: api fetch got {count}")
goodhosts[hostname] = True
for newhn in r.json():
if newhn is None:
continue
newhn = str(newhn)
if newhn in fetchqueue:
continue
if newhn in goodhosts:
continue
if newhn in badhosts:
continue
if fakehostre.search(newhn):
print(f"skipping add of {newhn} due to re spam match")
continue
if simplehostre.search(newhn):
print(f"got new hostname '{newhn}' from api on {hostname}")
fetchqueue[newhn] = True
else:
print(f"got weird hostname '{newhn}' from api on {hostname}, skipping")
writefiles()
def writefiles():
print(f"writing files. fetchqueue={len(fetchqueue.keys())} goodhosts={len(goodhosts.keys())} badhosts={len(badhosts.keys())}")
gh = list(goodhosts.keys())
gh.sort()
bh = list(badhosts.keys())
bh.sort()
fq = list(fetchqueue.keys())
fq.sort()
with open('.goodhosts.json.tmp', 'w') as outfile:
json.dump(gh, outfile)
os.rename('.goodhosts.json.tmp', 'goodhosts.json')
with open('.badhosts.json.tmp', 'w') as outfile:
json.dump(bh, outfile)
os.rename('.badhosts.json.tmp', 'badhosts.json')
with open('.fetchqueue.json.tmp', 'w') as outfile:
json.dump(fq, outfile)
os.rename('.fetchqueue.json.tmp', 'fetchqueue.json')
while len(fetchqueue.keys()):
tl = list(fetchqueue.keys())
random.shuffle(tl)
for h in tl:
print(f"{len(fetchqueue.keys())} hosts in fetchqueue")
fetchnext(h)
if __name__ == "__main__":
main()