parent
2601053909
commit
639e51a0b4
@ -0,0 +1,131 @@ |
||||
#!/usr/bin/env python3 |
||||
|
||||
import requests |
||||
import time |
||||
import json |
||||
import pprint |
||||
import socket |
||||
import sys |
||||
import re |
||||
import random |
||||
import os |
||||
|
||||
# shouts to @mewmew@blob.cat: |
||||
# https://blob.cat/notice/9u8rxBOMWtDFFQG81Y |
||||
SEEDS = [ |
||||
"blob.cat", |
||||
"s.sneak.berlin" |
||||
] |
||||
|
||||
TIMEOUT_SECS = 5 |
||||
|
||||
fakehostre = re.compile('gab\.best$') |
||||
|
||||
simplehostre = re.compile('^[A-Za-z0-9\-\.]+$') |
||||
|
||||
def main(): |
||||
os.environ['PYTHONUNBUFFERED'] = '1' |
||||
knownhosts = {} |
||||
goodhosts = {} |
||||
badhosts = {} |
||||
fetchqueue = {} |
||||
|
||||
for seed in SEEDS: |
||||
fetchqueue[seed] = True |
||||
|
||||
if os.path.exists('fetchqueue.json'): |
||||
with open('fetchqueue.json') as f: |
||||
t = json.load(f) |
||||
for i in t: |
||||
fetchqueue[i] = True |
||||
print(f"loaded {len(t)} fetchqueue items") |
||||
|
||||
if os.path.exists('goodhosts.json'): |
||||
with open('goodhosts.json') as f: |
||||
t = json.load(f) |
||||
for i in t: |
||||
goodhosts[i] = True |
||||
print(f"loaded {len(t)} goodhosts") |
||||
|
||||
if os.path.exists('badhosts.json'): |
||||
with open('badhosts.json') as f: |
||||
t = json.load(f) |
||||
for i in t: |
||||
badhosts[i] = True |
||||
print(f"loaded {len(t)} badhosts") |
||||
|
||||
def fetchnext(hostname): |
||||
hostname = str(hostname) |
||||
del fetchqueue[hostname] |
||||
|
||||
try: |
||||
addr = socket.gethostbyname(hostname) |
||||
except: |
||||
print(f"skipping fetch from {hostname}: dns lookup failed") |
||||
return |
||||
|
||||
fail = False |
||||
try: |
||||
url = "https://" + hostname + "/api/v1/instance/peers" |
||||
print(f"GET {url}") |
||||
r = requests.get(url, timeout=TIMEOUT_SECS) |
||||
count = len(r.json()) |
||||
except: |
||||
fail = True |
||||
|
||||
if fail: |
||||
print(f"{hostname}: api fetch failed") |
||||
badhosts[hostname] = True |
||||
else: |
||||
print(f"{hostname}: api fetch got {count}") |
||||
goodhosts[hostname] = True |
||||
for newhn in r.json(): |
||||
if newhn is None: |
||||
continue |
||||
newhn = str(newhn) |
||||
if newhn in fetchqueue: |
||||
continue |
||||
if newhn in goodhosts: |
||||
continue |
||||
if newhn in badhosts: |
||||
continue |
||||
|
||||
if fakehostre.search(newhn): |
||||
print(f"skipping add of {newhn} due to re spam match") |
||||
continue |
||||
|
||||
if simplehostre.search(newhn): |
||||
print(f"got new hostname '{newhn}' from api on {hostname}") |
||||
fetchqueue[newhn] = True |
||||
else: |
||||
print(f"got weird hostname '{newhn}' from api on {hostname}, skipping") |
||||
|
||||
writefiles() |
||||
|
||||
def writefiles(): |
||||
print(f"writing files. fetchqueue={len(fetchqueue.keys())} goodhosts={len(goodhosts.keys())} badhosts={len(badhosts.keys())}") |
||||
gh = list(goodhosts.keys()) |
||||
gh.sort() |
||||
bh = list(badhosts.keys()) |
||||
bh.sort() |
||||
fq = list(fetchqueue.keys()) |
||||
fq.sort() |
||||
with open('.goodhosts.json.tmp', 'w') as outfile: |
||||
json.dump(gh, outfile) |
||||
os.rename('.goodhosts.json.tmp', 'goodhosts.json') |
||||
with open('.badhosts.json.tmp', 'w') as outfile: |
||||
json.dump(bh, outfile) |
||||
os.rename('.badhosts.json.tmp', 'badhosts.json') |
||||
with open('.fetchqueue.json.tmp', 'w') as outfile: |
||||
json.dump(fq, outfile) |
||||
os.rename('.fetchqueue.json.tmp', 'fetchqueue.json') |
||||
|
||||
while len(fetchqueue.keys()): |
||||
tl = list(fetchqueue.keys()) |
||||
random.shuffle(tl) |
||||
for h in tl: |
||||
print(f"{len(fetchqueue.keys())} hosts in fetchqueue") |
||||
fetchnext(h) |
||||
|
||||
if __name__ == "__main__": |
||||
main() |
Loading…
Reference in new issue