improve the sort order of domains - siblings to the primary hostname were not sorted right

This commit is contained in:
Joshua Tauberer 2015-07-21 11:25:11 +00:00
parent d0ccde7b48
commit 1900e512f2
2 changed files with 53 additions and 23 deletions

View File

@ -19,6 +19,7 @@ System:
* ownCloud updated to version 8.1.0. * ownCloud updated to version 8.1.0.
* When upgrading, network checks like blocked port 25 are now skipped. * When upgrading, network checks like blocked port 25 are now skipped.
* Tweaks to the intrusion detection rules for IMAP. * Tweaks to the intrusion detection rules for IMAP.
* Improve the sort order of the domains in the status checks.
v0.12c (July 19, 2015) v0.12c (July 19, 2015)
---------------------- ----------------------

View File

@ -24,32 +24,52 @@ def safe_domain_name(name):
return urllib.parse.quote(name, safe='') return urllib.parse.quote(name, safe='')
def sort_domains(domain_names, env): def sort_domains(domain_names, env):
# Put domain names in a nice sorted order. For web_update, PRIMARY_HOSTNAME # Put domain names in a nice sorted order.
# must appear first so it becomes the nginx default server.
# The nice order will group domain names by DNS zone, i.e. the top-most
# First group PRIMARY_HOSTNAME and its subdomains, then parent domains of PRIMARY_HOSTNAME, then other domains. # domain name that we serve that ecompasses a set of subdomains. Map
groups = ( [], [], [] ) # each of the domain names to the zone that contains them. Walk the domains
for d in domain_names: # from shortest to longest since zones are always shorter than their
if d == env['PRIMARY_HOSTNAME'] or d.endswith("." + env['PRIMARY_HOSTNAME']): # subdomains.
groups[0].append(d) zones = { }
elif env['PRIMARY_HOSTNAME'].endswith("." + d): for domain in sorted(domain_names, key=lambda d : len(d)):
groups[1].append(d) for z in zones.values():
if domain.endswith("." + z):
# We found a parent domain already in the list.
zones[domain] = z
break
else: else:
groups[2].append(d) # 'break' did not occur: there is no parent domain, so it is its
# own zone.
zones[domain] = domain
# Within each group, sort parent domains before subdomains and after that sort lexicographically. # Sort the zones.
def sort_group(group): zone_domains = sorted(zones.values(),
# Find the top-most domains. key = lambda d : (
top_domains = sorted(d for d in group if len([s for s in group if d.endswith("." + s)]) == 0) # PRIMARY_HOSTNAME or the zone that contains it is always first.
ret = [] not (d == env['PRIMARY_HOSTNAME'] or env['PRIMARY_HOSTNAME'].endswith("." + d)),
for d in top_domains:
ret.append(d)
ret.extend( sort_group([s for s in group if s.endswith("." + d)]) )
return ret
groups = [sort_group(g) for g in groups]
return groups[0] + groups[1] + groups[2] # Then just dumb lexicographically.
d,
))
# Now sort the domain names that fall within each zone.
domain_names = sorted(domain_names,
key = lambda d : (
# First by zone.
zone_domains.index(zones[d]),
# PRIMARY_HOSTNAME is always first within the zone that contains it.
d != env['PRIMARY_HOSTNAME'],
# Followed by any of its subdomains.
not d.endswith("." + env['PRIMARY_HOSTNAME']),
# Then in right-to-left lexicographic order of the .-separated parts of the name.
list(reversed(d.split("."))),
))
return domain_names
def sort_email_addresses(email_addresses, env): def sort_email_addresses(email_addresses, env):
email_addresses = set(email_addresses) email_addresses = set(email_addresses)
@ -200,3 +220,12 @@ def wait_for_service(port, public, env, timeout):
if time.perf_counter() > start+timeout: if time.perf_counter() > start+timeout:
return False return False
time.sleep(min(timeout/4, 1)) time.sleep(min(timeout/4, 1))
if __name__ == "__main__":
from dns_update import get_dns_domains
from web_update import get_web_domains, get_default_www_redirects
env = load_environment()
domains = get_dns_domains(env) | set(get_web_domains(env) + get_default_www_redirects(env))
domains = sort_domains(domains, env)
for domain in domains:
print(domain)