132 lines
3.7 KiB
Python
Executable File
132 lines
3.7 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
|
|
import requests
|
|
import time
|
|
import json
|
|
import pprint
|
|
import socket
|
|
import sys
|
|
import re
|
|
import random
|
|
import os
|
|
|
|
# shouts to @mewmew@blob.cat:
|
|
# https://blob.cat/notice/9u8rxBOMWtDFFQG81Y
|
|
SEEDS = [
|
|
"blob.cat",
|
|
"s.sneak.berlin"
|
|
]
|
|
|
|
TIMEOUT_SECS = 5
|
|
|
|
fakehostre = re.compile('gab\.best$')
|
|
|
|
simplehostre = re.compile('^[A-Za-z0-9\-\.]+$')
|
|
|
|
def main():
|
|
os.environ['PYTHONUNBUFFERED'] = '1'
|
|
knownhosts = {}
|
|
goodhosts = {}
|
|
badhosts = {}
|
|
fetchqueue = {}
|
|
|
|
for seed in SEEDS:
|
|
fetchqueue[seed] = True
|
|
|
|
if os.path.exists('fetchqueue.json'):
|
|
with open('fetchqueue.json') as f:
|
|
t = json.load(f)
|
|
for i in t:
|
|
fetchqueue[i] = True
|
|
print(f"loaded {len(t)} fetchqueue items")
|
|
|
|
if os.path.exists('goodhosts.json'):
|
|
with open('goodhosts.json') as f:
|
|
t = json.load(f)
|
|
for i in t:
|
|
goodhosts[i] = True
|
|
print(f"loaded {len(t)} goodhosts")
|
|
|
|
if os.path.exists('badhosts.json'):
|
|
with open('badhosts.json') as f:
|
|
t = json.load(f)
|
|
for i in t:
|
|
badhosts[i] = True
|
|
print(f"loaded {len(t)} badhosts")
|
|
|
|
def fetchnext(hostname):
|
|
hostname = str(hostname)
|
|
del fetchqueue[hostname]
|
|
|
|
try:
|
|
addr = socket.gethostbyname(hostname)
|
|
except:
|
|
print(f"skipping fetch from {hostname}: dns lookup failed")
|
|
return
|
|
|
|
fail = False
|
|
try:
|
|
url = "https://" + hostname + "/api/v1/instance/peers"
|
|
print(f"GET {url}")
|
|
r = requests.get(url, timeout=TIMEOUT_SECS)
|
|
count = len(r.json())
|
|
except:
|
|
fail = True
|
|
|
|
if fail:
|
|
print(f"{hostname}: api fetch failed")
|
|
badhosts[hostname] = True
|
|
else:
|
|
print(f"{hostname}: api fetch got {count}")
|
|
goodhosts[hostname] = True
|
|
for newhn in r.json():
|
|
if newhn is None:
|
|
continue
|
|
newhn = str(newhn)
|
|
if newhn in fetchqueue:
|
|
continue
|
|
if newhn in goodhosts:
|
|
continue
|
|
if newhn in badhosts:
|
|
continue
|
|
|
|
if fakehostre.search(newhn):
|
|
print(f"skipping add of {newhn} due to re spam match")
|
|
continue
|
|
|
|
if simplehostre.search(newhn):
|
|
print(f"got new hostname '{newhn}' from api on {hostname}")
|
|
fetchqueue[newhn] = True
|
|
else:
|
|
print(f"got weird hostname '{newhn}' from api on {hostname}, skipping")
|
|
|
|
writefiles()
|
|
|
|
def writefiles():
|
|
print(f"writing files. fetchqueue={len(fetchqueue.keys())} goodhosts={len(goodhosts.keys())} badhosts={len(badhosts.keys())}")
|
|
gh = list(goodhosts.keys())
|
|
gh.sort()
|
|
bh = list(badhosts.keys())
|
|
bh.sort()
|
|
fq = list(fetchqueue.keys())
|
|
fq.sort()
|
|
with open('.goodhosts.json.tmp', 'w') as outfile:
|
|
json.dump(gh, outfile)
|
|
os.rename('.goodhosts.json.tmp', 'goodhosts.json')
|
|
with open('.badhosts.json.tmp', 'w') as outfile:
|
|
json.dump(bh, outfile)
|
|
os.rename('.badhosts.json.tmp', 'badhosts.json')
|
|
with open('.fetchqueue.json.tmp', 'w') as outfile:
|
|
json.dump(fq, outfile)
|
|
os.rename('.fetchqueue.json.tmp', 'fetchqueue.json')
|
|
|
|
while len(fetchqueue.keys()):
|
|
tl = list(fetchqueue.keys())
|
|
random.shuffle(tl)
|
|
for h in tl:
|
|
print(f"{len(fetchqueue.keys())} hosts in fetchqueue")
|
|
fetchnext(h)
|
|
|
|
if __name__ == "__main__":
|
|
main()
|