From 640751b6067aa5d4fd505df3395fc3da292214fb Mon Sep 17 00:00:00 2001 From: KiekerJan Date: Sun, 20 Mar 2022 20:57:19 +0100 Subject: [PATCH 01/12] initial changes to use unbound as local dns resolver instead of bind --- conf/NetworkManager.conf | 2 ++ conf/unbound.conf | 56 +++++++++++++++++++++++++++++++++++++ management/dns_update.py | 4 +-- management/status_checks.py | 15 +++++----- setup/dns.sh | 4 +-- setup/system.sh | 53 ++++++++++++----------------------- 6 files changed, 87 insertions(+), 47 deletions(-) create mode 100644 conf/NetworkManager.conf create mode 100644 conf/unbound.conf diff --git a/conf/NetworkManager.conf b/conf/NetworkManager.conf new file mode 100644 index 00000000..72c85bcf --- /dev/null +++ b/conf/NetworkManager.conf @@ -0,0 +1,2 @@ +[main] +dns=unbound diff --git a/conf/unbound.conf b/conf/unbound.conf new file mode 100644 index 00000000..ae6d53bb --- /dev/null +++ b/conf/unbound.conf @@ -0,0 +1,56 @@ +server: + # the working directory. + directory: "/etc/unbound" + + # run as the unbound user + username: unbound + + verbosity: 0 # uncomment and increase to get more logging. + # logfile: "/var/log/unbound.log" # won't work due to apparmor + # use-syslog: no + + # By default listen only to localhost + #interface: ::1 + #interface: 127.0.0.1 + port: 53 + + # Only allow localhost to use this Unbound instance. + access-control: 127.0.0.1/8 allow + access-control: ::1/128 allow + + # Functionality + do-ip4: yes + do-ip6: yes + do-udp: yes + do-tcp: yes + + # Performance + num-threads: 2 + cache-min-ttl: 300 + cache-max-ttl: 86400 + serve-expired: yes + neg-cache-size: 4M + msg-cache-size: 50m + rrset-cache-size: 100m + + so-reuseport: yes + so-rcvbuf: 4m + so-sndbuf: 4m + + # Privacy / hardening + # hide server info from clients + hide-identity: yes + hide-version: yes + harden-glue: yes + harden-dnssec-stripped: yes + harden-algo-downgrade: yes + harden-large-queries: yes + harden-short-bufsize: yes + + rrset-roundrobin: yes + minimal-responses: yes + identity: "Server" # + +remote-control: + control-enable: yes + control-port: 953 diff --git a/management/dns_update.py b/management/dns_update.py index 62a053f5..79b9ad8a 100755 --- a/management/dns_update.py +++ b/management/dns_update.py @@ -115,9 +115,9 @@ def do_dns_update(env, force=False): # If this is the only thing that changed? updated_domains.append("DKIM configuration") - # Clear bind9's DNS cache so our own DNS resolver is up to date. + # Clear unbound's DNS cache so our own DNS resolver is up to date. # (ignore errors with trap=True) - shell('check_call', ["/usr/sbin/rndc", "flush"], trap=True) + shell('check_call', ["/usr/sbin/unbound-control", "reload"], trap=True) if len(updated_domains) == 0: # if nothing was updated (except maybe DKIM's files), don't show any output diff --git a/management/status_checks.py b/management/status_checks.py index 5554cf0f..040695d8 100755 --- a/management/status_checks.py +++ b/management/status_checks.py @@ -22,9 +22,8 @@ from utils import shell, sort_domains, load_env_vars_from_file, load_settings def get_services(): return [ - { "name": "Local DNS (bind9)", "port": 53, "public": False, }, - #{ "name": "NSD Control", "port": 8952, "public": False, }, - { "name": "Local DNS Control (bind9/rndc)", "port": 953, "public": False, }, + { "name": "Local DNS (unbound)", "port": 53, "public": False, }, + { "name": "Local DNS Control (unbound)", "port": 953, "public": False, }, { "name": "Dovecot LMTP LDA", "port": 10026, "public": False, }, { "name": "Postgrey", "port": 10023, "public": False, }, { "name": "Spamassassin", "port": 10025, "public": False, }, @@ -49,15 +48,15 @@ def run_checks(rounded_values, env, output, pool, domains_to_check=None): # check that services are running if not run_services_checks(env, output, pool): - # If critical services are not running, stop. If bind9 isn't running, + # If critical services are not running, stop. If unbound isn't running, # all later DNS checks will timeout and that will take forever to # go through, and if running over the web will cause a fastcgi timeout. return - # clear bind9's DNS cache so our DNS checks are up to date - # (ignore errors; if bind9/rndc isn't running we'd already report + # clear unbound's DNS cache so our DNS checks are up to date + # (ignore errors; if unbound isn't running we'd already report # that in run_services checks.) - shell('check_call', ["/usr/sbin/rndc", "flush"], trap=True) + shell('check_call', ["/usr/sbin/unbound-control", "reload"], trap=True) run_system_checks(rounded_values, env, output) @@ -793,7 +792,7 @@ def query_dns(qname, rtype, nxdomain='[Not Set]', at=None, as_list=False): qname += "." # Use the default nameservers (as defined by the system, which is our locally - # running bind server), or if the 'at' argument is specified, use that host + # running unbound server), or if the 'at' argument is specified, use that host # as the nameserver. resolver = dns.resolver.get_default_resolver() if at: diff --git a/setup/dns.sh b/setup/dns.sh index fd0dec90..f24eb56b 100755 --- a/setup/dns.sh +++ b/setup/dns.sh @@ -14,7 +14,7 @@ echo "Installing nsd (DNS server)..." # Prepare nsd's configuration. # We configure nsd before installation as we only want it to bind to some addresses -# and it otherwise will have port / bind conflicts with bind9 used as the local resolver +# and it otherwise will have port / bind conflicts with unbound used as the local resolver mkdir -p /var/run/nsd mkdir -p /etc/nsd mkdir -p /etc/nsd/zones @@ -40,7 +40,7 @@ server: EOF -# Since we have bind9 listening on localhost for locally-generated +# Since we have unbound listening on localhost for locally-generated # DNS queries that require a recursive nameserver, and the system # might have other network interfaces for e.g. tunnelling, we have # to be specific about the network interfaces that nsd binds to. diff --git a/setup/system.sh b/setup/system.sh index 4d5ae33b..0d433463 100755 --- a/setup/system.sh +++ b/setup/system.sh @@ -310,50 +310,33 @@ fi #NODOC # DNS server, which won't work for RBLs. So we really need a local recursive # nameserver. # -# We'll install `bind9`, which as packaged for Ubuntu, has DNSSEC enabled by default via "dnssec-validation auto". +# We'll install unbound, which as packaged for Ubuntu, has DNSSEC enabled by default. # We'll have it be bound to 127.0.0.1 so that it does not interfere with # the public, recursive nameserver `nsd` bound to the public ethernet interfaces. -# -# About the settings: -# -# * The listen-on directive in named.conf.options restricts `bind9` to -# binding to the loopback interface instead of all interfaces. -# * The max-recursion-queries directive increases the maximum number of iterative queries. -# If more queries than specified are sent, bind9 returns SERVFAIL. After flushing the cache during system checks, -# we ran into the limit thus we are increasing it from 75 (default value) to 100. -apt_install bind9 -if ! grep -q "listen-on " /etc/bind/named.conf.options; then - # Add a listen-on directive if it doesn't exist inside the options block. - sed -i "s/^}/\n\tlisten-on { 127.0.0.1; };\n}/" /etc/bind/named.conf.options -fi -if ! grep -q "listen-on-v6 " /etc/bind/named.conf.options; then - # Add a listen-on-v6 directive if it doesn't exist inside the options block. - sed -i "s/^}/\n\tlisten-on-v6 { ::1; };\n}/" /etc/bind/named.conf.options -else - # Modify the listen-on-v6 directive if it does exist - sed -i "s/listen-on-v6 { any; }/listen-on-v6 { ::1; }/" /etc/bind/named.conf.options -fi +# remove bind9 in case it is still there +apt-get purge -qq -y bind9 -if ! grep -q "max-recursion-queries " /etc/bind/named.conf.options; then - # Add a max-recursion-queries directive if it doesn't exist inside the options block. - sed -i "s/^}/\n\tmax-recursion-queries 100;\n}/" /etc/bind/named.conf.options -fi +# Install unbound and dns utils (e.g. dig) +apt_install unbound python3-unbound bind9-dnsutils -# First we'll disable systemd-resolved's management of resolv.conf and its stub server. -# Breaking the symlink to /run/systemd/resolve/stub-resolv.conf means -# systemd-resolved will read it for DNS servers to use. Put in 127.0.0.1, -# which is where bind9 will be running. Obviously don't do this before -# installing bind9 or else apt won't be able to resolve a server to -# download bind9 from. -rm -f /etc/resolv.conf -tools/editconf.py /etc/systemd/resolved.conf DNSStubListener=no -echo "nameserver 127.0.0.1" > /etc/resolv.conf +# Configure unbound +cp -f conf/unbound.conf /etc/unbound/unbound.conf.d/miabunbound.conf + +# Configure network manager +mkdir /etc/NetworkManager/conf.d +cp -f conf/NetworkManager.conf /etc/NetworkManager/conf.d/unbound.conf + +# Modify systemd settings +tools/editconf.py /etc/systemd/resolv.conf \ + DNS=127.0.0.1 \ + DNSSEC=yes \ + DNSStubListener=no # Restart the DNS services. -restart_service bind9 systemctl restart systemd-resolved +systemctl restart unbound # ### Fail2Ban Service From 0f80d071db7f4727cec7d9652422101c70693f05 Mon Sep 17 00:00:00 2001 From: KiekerJan Date: Sun, 20 Mar 2022 22:12:02 +0100 Subject: [PATCH 02/12] remove elaborate dns config, simply delete resolv.conf symlink --- conf/NetworkManager.conf | 2 -- setup/system.sh | 8 +++----- 2 files changed, 3 insertions(+), 7 deletions(-) delete mode 100644 conf/NetworkManager.conf diff --git a/conf/NetworkManager.conf b/conf/NetworkManager.conf deleted file mode 100644 index 72c85bcf..00000000 --- a/conf/NetworkManager.conf +++ /dev/null @@ -1,2 +0,0 @@ -[main] -dns=unbound diff --git a/setup/system.sh b/setup/system.sh index 0d433463..2470c19e 100755 --- a/setup/system.sh +++ b/setup/system.sh @@ -323,15 +323,13 @@ apt_install unbound python3-unbound bind9-dnsutils # Configure unbound cp -f conf/unbound.conf /etc/unbound/unbound.conf.d/miabunbound.conf -# Configure network manager -mkdir /etc/NetworkManager/conf.d -cp -f conf/NetworkManager.conf /etc/NetworkManager/conf.d/unbound.conf - # Modify systemd settings -tools/editconf.py /etc/systemd/resolv.conf \ +rm -f /etc/resolv.conf +tools/editconf.py /etc/systemd/resolved.conf \ DNS=127.0.0.1 \ DNSSEC=yes \ DNSStubListener=no +echo "nameserver 127.0.0.1" > /etc/resolv.conf # Restart the DNS services. From 600c07fb478059bbd335a31ef6d8336c81e54ba8 Mon Sep 17 00:00:00 2001 From: KiekerJan Date: Sun, 20 Mar 2022 22:26:50 +0100 Subject: [PATCH 03/12] document bind9 replacement --- CHANGELOG.md | 1 + README.md | 1 + setup/system.sh | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 70ac8cc6..6a57eac5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ No features of Mail-in-a-Box have changed in this release, but with the newer ve * certbot is upgraded to 1.21 (via the Ubuntu repository instead of a PPA). * fail2ban is upgraded to 0.11.2. * nginx is upgraded to 1.18. +* bind9 is replaced with unbound In Development -------------- diff --git a/README.md b/README.md index 72159006..61909b95 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ Functionality changes and additions Removed older cryptos following internet.nl recommendations * Replace opendkim with dkimpy (https://launchpad.net/dkimpy-milter) Added support for Ed25519 signing +* Replace bind9 with unbound DNS resolver Bug fixes * Munin error report fixed [see github issue](https://github.com/mail-in-a-box/mailinabox/issues/1555) diff --git a/setup/system.sh b/setup/system.sh index 2470c19e..04b3dc0a 100755 --- a/setup/system.sh +++ b/setup/system.sh @@ -315,7 +315,7 @@ fi #NODOC # the public, recursive nameserver `nsd` bound to the public ethernet interfaces. # remove bind9 in case it is still there -apt-get purge -qq -y bind9 +apt-get purge -qq -y bind9 bind9-utils # Install unbound and dns utils (e.g. dig) apt_install unbound python3-unbound bind9-dnsutils From a4b6b15c14c8b8c071a16e9a569ced21a42e19a7 Mon Sep 17 00:00:00 2001 From: KiekerJan Date: Tue, 22 Mar 2022 13:05:25 +0100 Subject: [PATCH 04/12] add possibility for unbound blocklist --- conf/unbound.conf | 14 +++++++++++++- setup/system.sh | 4 ++++ tools/create_dns_blocklist.sh | 33 +++++++++++++++++++++++++++++++++ 3 files changed, 50 insertions(+), 1 deletion(-) create mode 100755 tools/create_dns_blocklist.sh diff --git a/conf/unbound.conf b/conf/unbound.conf index ae6d53bb..30880afe 100644 --- a/conf/unbound.conf +++ b/conf/unbound.conf @@ -18,6 +18,14 @@ server: access-control: 127.0.0.1/8 allow access-control: ::1/128 allow + # Private IP ranges, which shall never be returned or forwarded as public DNS response. + private-address: 10.0.0.0/8 + private-address: 172.16.0.0/12 + private-address: 192.168.0.0/16 + private-address: 169.254.0.0/16 + private-address: fd00::/8 + private-address: fe80::/10 + # Functionality do-ip4: yes do-ip6: yes @@ -49,8 +57,12 @@ server: rrset-roundrobin: yes minimal-responses: yes - identity: "Server" # + identity: "Server" + + # Include possible white/blacklists + include: /etc/unbound/lists.d/*.conf remote-control: control-enable: yes control-port: 953 + diff --git a/setup/system.sh b/setup/system.sh index 04b3dc0a..be605475 100755 --- a/setup/system.sh +++ b/setup/system.sh @@ -323,6 +323,10 @@ apt_install unbound python3-unbound bind9-dnsutils # Configure unbound cp -f conf/unbound.conf /etc/unbound/unbound.conf.d/miabunbound.conf +if [ -d /etc/unbound/lists.d ]; then + mkdir /etc/unbound/lists.d +fi + # Modify systemd settings rm -f /etc/resolv.conf tools/editconf.py /etc/systemd/resolved.conf \ diff --git a/tools/create_dns_blocklist.sh b/tools/create_dns_blocklist.sh new file mode 100755 index 00000000..5b8bab86 --- /dev/null +++ b/tools/create_dns_blocklist.sh @@ -0,0 +1,33 @@ +#!/bin/bash +set -euo pipefail + +# Download select set of malware blocklists from The Firebog's "The Big Blocklist +# Collection" [0] and block access to them with Unbound by returning NXDOMAIN. +# +# [0]: https://firebog.net +( + # Malicious Lists + curl -sSf "https://raw.githubusercontent.com/DandelionSprout/adfilt/master/Alternate%20versions%20Anti-Malware%20List/AntiMalwareHosts.txt" ; + curl -sSf "https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt" ; + curl -sSf "https://s3.amazonaws.com/lists.disconnect.me/simple_malvertising.txt" ; + curl -sSf "https://v.firebog.net/hosts/Prigent-Crypto.txt" ; + curl -sSf "https://bitbucket.org/ethanr/dns-blacklists/raw/8575c9f96e5b4a1308f2f12394abd86d0927a4a0/bad_lists/Mandiant_APT1_Report_Appendix_D.txt" ; + curl -sSf "https://phishing.army/download/phishing_army_blocklist_extended.txt" ; + curl -sSf "https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-malware.txt" ; + curl -sSf "https://raw.githubusercontent.com/Spam404/lists/master/main-blacklist.txt" ; + curl -sSf "https://raw.githubusercontent.com/FadeMind/hosts.extras/master/add.Risk/hosts" ; + curl -sSf "https://urlhaus.abuse.ch/downloads/hostfile/" ; +# curl -sSf "https://v.firebog.net/hosts/Prigent-Malware.txt" ; +# curl -sSf "https://v.firebog.net/hosts/Shalla-mal.txt" ; + +) | + cat | # Combine all lists into one + grep -v '#' | # Remove comments lines + grep -v '::' | # Remove universal ipv6 address + tr -d '\r' | # Normalize line endings by removing Windows carriage returns + sed -e 's/0\.0\.0\.0\s\{0,\}//g' | # Remove ip address from start of line + sed -e 's/127\.0\.0\.1\s\{0,\}//g' | + sed -e '/^$/d' | # Remove empty line + sort -u | # Sort and remove duplicates + awk '{print "local-zone: " ""$1"" " always_nxdomain"}' # Convert to Unbound configuration + \ No newline at end of file From 7ac4b412b08b4d0b6026cd30f22af9f8ee4e13fc Mon Sep 17 00:00:00 2001 From: KiekerJan Date: Sun, 3 Apr 2022 16:37:51 +0200 Subject: [PATCH 05/12] attempts to reduce unnecessary dns update messages --- management/daemon.py | 8 ++++++++ management/dns_update.py | 20 ++++++++++++++++++-- management/status_checks.py | 2 +- management/utils.py | 4 +++- 4 files changed, 30 insertions(+), 4 deletions(-) diff --git a/management/daemon.py b/management/daemon.py index 0bbb1ad5..8ba11e7e 100755 --- a/management/daemon.py +++ b/management/daemon.py @@ -12,6 +12,7 @@ import os, os.path, re, json, time import multiprocessing.pool, subprocess +import logging from functools import wraps @@ -762,14 +763,21 @@ def log_failed_login(request): # APP if __name__ == '__main__': + logging_level = logging.INFO + if "DEBUG" in os.environ: # Turn on Flask debugging. app.debug = True + logging_level = logging.DEBUG if not app.debug: app.logger.addHandler(utils.create_syslog_handler()) #app.logger.info('API key: ' + auth_service.key) + logging.basicConfig(level=logging_level, format='%(levelname)s:%(module)s.%(funcName)s %(message)s') + logging.info('Logging level set to %s', logging.getLevelName(logging_level)) + # Start the application server. Listens on 127.0.0.1 (IPv4 only). app.run(port=10222) + diff --git a/management/dns_update.py b/management/dns_update.py index 79b9ad8a..e1583528 100755 --- a/management/dns_update.py +++ b/management/dns_update.py @@ -8,6 +8,7 @@ import sys, os, os.path, urllib.parse, datetime, re, hashlib, base64 import ipaddress import rtyaml import dns.resolver +import logging from utils import shell, load_env_vars_from_file, safe_domain_name, sort_domains from ssl_certificates import get_ssl_certificates, check_certificate @@ -117,7 +118,7 @@ def do_dns_update(env, force=False): # Clear unbound's DNS cache so our own DNS resolver is up to date. # (ignore errors with trap=True) - shell('check_call', ["/usr/sbin/unbound-control", "reload"], trap=True) + shell('check_call', ["/usr/sbin/unbound-control", "reload"], trap=True, capture_stdout=False) if len(updated_domains) == 0: # if nothing was updated (except maybe DKIM's files), don't show any output @@ -1101,17 +1102,32 @@ def set_secondary_dns(hostnames, env): if len(hostnames) > 0: # Validate that all hostnames are valid and that all zone-xfer IP addresses are valid. resolver = dns.resolver.get_default_resolver() - resolver.timeout = 5 + resolver.timeout = 6 for item in hostnames: if not item.startswith("xfr:"): # Resolve hostname. try: response = resolver.resolve(item, "A") except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): + logging.debug('Error on resolving ipv4 address, trying ipv6') try: response = resolver.resolve(item, "AAAA") except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): raise ValueError("Could not resolve the IP address of %s." % item) + except (dns.resolver.Timeout): + resolver.timeout = 7 + logging.warning('Timeout on resolving ipv4 address re-trying') + try: + response = resolver.resolve(item, "A") + except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): + logging.debug('Error on resolving ipv4 address, trying ipv6 (2)') + try: + response = resolver.resolve(item, "AAAA") + except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): + raise ValueError("Could not resolve the IP address of %s." % item) + except (dns.resolver.Timeout): + raise ValueError("Could not resolve the IP address of %s due to timeout." % item) + resolver.timeout = 6 else: # Validate IP address. try: diff --git a/management/status_checks.py b/management/status_checks.py index 040695d8..2fdba742 100755 --- a/management/status_checks.py +++ b/management/status_checks.py @@ -56,7 +56,7 @@ def run_checks(rounded_values, env, output, pool, domains_to_check=None): # clear unbound's DNS cache so our DNS checks are up to date # (ignore errors; if unbound isn't running we'd already report # that in run_services checks.) - shell('check_call', ["/usr/sbin/unbound-control", "reload"], trap=True) + shell('check_call', ["/usr/sbin/unbound-control", "reload"], trap=True, capture_stdout=False) run_system_checks(rounded_values, env, output) diff --git a/management/utils.py b/management/utils.py index bc357040..de23361c 100644 --- a/management/utils.py +++ b/management/utils.py @@ -106,7 +106,7 @@ def sort_email_addresses(email_addresses, env): ret.extend(sorted(email_addresses)) # whatever is left return ret -def shell(method, cmd_args, env={}, capture_stderr=False, return_bytes=False, trap=False, input=None): +def shell(method, cmd_args, env={}, capture_stdout=True, capture_stderr=False, return_bytes=False, trap=False, input=None): # A safe way to execute processes. # Some processes like apt-get require being given a sane PATH. import subprocess @@ -116,6 +116,8 @@ def shell(method, cmd_args, env={}, capture_stderr=False, return_bytes=False, tr 'env': env, 'stderr': None if not capture_stderr else subprocess.STDOUT, } + if not capture_stdout: + kwargs['stdout'] = subprocess.DEVNULL if method == "check_output" and input is not None: kwargs['input'] = input From 9b252e02090433b6c96b76ef3cbbbd5edb9713d4 Mon Sep 17 00:00:00 2001 From: KiekerJan Date: Mon, 4 Apr 2022 22:31:54 +0200 Subject: [PATCH 06/12] retrying dns timeouts --- management/dns_update.py | 6 +++--- management/status_checks.py | 17 +++++++++++++++-- 2 files changed, 18 insertions(+), 5 deletions(-) diff --git a/management/dns_update.py b/management/dns_update.py index e1583528..9191a307 100755 --- a/management/dns_update.py +++ b/management/dns_update.py @@ -1102,7 +1102,7 @@ def set_secondary_dns(hostnames, env): if len(hostnames) > 0: # Validate that all hostnames are valid and that all zone-xfer IP addresses are valid. resolver = dns.resolver.get_default_resolver() - resolver.timeout = 6 + resolver.timeout = 3 for item in hostnames: if not item.startswith("xfr:"): # Resolve hostname. @@ -1115,7 +1115,7 @@ def set_secondary_dns(hostnames, env): except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): raise ValueError("Could not resolve the IP address of %s." % item) except (dns.resolver.Timeout): - resolver.timeout = 7 + resolver.timeout = 5 logging.warning('Timeout on resolving ipv4 address re-trying') try: response = resolver.resolve(item, "A") @@ -1127,7 +1127,7 @@ def set_secondary_dns(hostnames, env): raise ValueError("Could not resolve the IP address of %s." % item) except (dns.resolver.Timeout): raise ValueError("Could not resolve the IP address of %s due to timeout." % item) - resolver.timeout = 6 + resolver.timeout = 3 else: # Validate IP address. try: diff --git a/management/status_checks.py b/management/status_checks.py index 2fdba742..93e0320f 100755 --- a/management/status_checks.py +++ b/management/status_checks.py @@ -12,6 +12,7 @@ import dateutil.parser, dateutil.tz import idna import psutil import postfix_mta_sts_resolver.resolver +import logging from dns_update import get_dns_zones, build_tlsa_record, get_custom_dns_config, get_secondary_dns, get_custom_dns_records from web_update import get_web_domains, get_domains_with_a_records @@ -800,7 +801,7 @@ def query_dns(qname, rtype, nxdomain='[Not Set]', at=None, as_list=False): resolver.nameservers = [at] # Set a timeout so that a non-responsive server doesn't hold us back. - resolver.timeout = 5 + resolver.timeout = 3 # Do the query. try: @@ -808,9 +809,21 @@ def query_dns(qname, rtype, nxdomain='[Not Set]', at=None, as_list=False): except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): # Host did not have an answer for this query; not sure what the # difference is between the two exceptions. + logging.info("No result for dns lookup %s, %s", qname, rtype) return nxdomain except dns.exception.Timeout: - return "[timeout]" + logging.info("Timeout on dns lookup %s, %s. Retrying", qname, rtype) + resolver.timeout = 5 + try: + response = resolver.resolve(qname, rtype, search=True) + except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): + # Host did not have an answer for this query; not sure what the + # difference is between the two exceptions. + logging.info("No result for dns lookup %s, %s (2)", qname, rtype) + return nxdomain + except dns.exception.Timeout: + logging.info("Timeout on dns lookup %s, %s.", qname, rtype) + return "[timeout]" # Normalize IP addresses. IP address --- especially IPv6 addresses --- can # be expressed in equivalent string forms. Canonicalize the form before From d35b068a73d281de8ea82c7b012586c20b770f28 Mon Sep 17 00:00:00 2001 From: KiekerJan Date: Sun, 17 Apr 2022 22:56:30 +0200 Subject: [PATCH 07/12] add dns exception handling --- management/daemon.py | 3 ++- management/dns_update.py | 21 ++++++++++++++++----- management/status_checks.py | 10 ++++++---- 3 files changed, 24 insertions(+), 10 deletions(-) diff --git a/management/daemon.py b/management/daemon.py index 8ba11e7e..ae7fb351 100755 --- a/management/daemon.py +++ b/management/daemon.py @@ -274,6 +274,7 @@ def dns_update(): try: return do_dns_update(env, force=request.form.get('force', '') == '1') except Exception as e: + logging.exception('dns update exc') return (str(e), 500) @app.route('/dns/secondary-nameserver') @@ -763,7 +764,7 @@ def log_failed_login(request): # APP if __name__ == '__main__': - logging_level = logging.INFO + logging_level = logging.DEBUG if "DEBUG" in os.environ: # Turn on Flask debugging. diff --git a/management/dns_update.py b/management/dns_update.py index 9191a307..ad55868b 100755 --- a/management/dns_update.py +++ b/management/dns_update.py @@ -1065,6 +1065,7 @@ def set_custom_dns_record(qname, rtype, value, action, env): def get_secondary_dns(custom_dns, mode=None): resolver = dns.resolver.get_default_resolver() resolver.timeout = 10 + resolver.lifetime = 10 values = [] for qname, rtype, value in custom_dns: @@ -1082,10 +1083,17 @@ def get_secondary_dns(custom_dns, mode=None): # doesn't. if not hostname.startswith("xfr:"): if mode == "xfr": - response = dns.resolver.resolve(hostname+'.', "A", raise_on_no_answer=False) - values.extend(map(str, response)) - response = dns.resolver.resolve(hostname+'.', "AAAA", raise_on_no_answer=False) - values.extend(map(str, response)) + try: + response = resolver.resolve(hostname+'.', "A", raise_on_no_answer=False) + values.extend(map(str, response)) + except dns.exception.DNSException: + logging.debug("Secondary dns Alookup exception %s", hostname) + + try: + response = resolver.resolve(hostname+'.', "AAAA", raise_on_no_answer=False) + values.extend(map(str, response)) + except dns.exception.DNSException: + logging.debug("Secondary dns AAAA lookup exception %s", hostname) continue values.append(hostname) @@ -1103,6 +1111,7 @@ def set_secondary_dns(hostnames, env): # Validate that all hostnames are valid and that all zone-xfer IP addresses are valid. resolver = dns.resolver.get_default_resolver() resolver.timeout = 3 + resolver.lifetime = 3 for item in hostnames: if not item.startswith("xfr:"): # Resolve hostname. @@ -1116,7 +1125,8 @@ def set_secondary_dns(hostnames, env): raise ValueError("Could not resolve the IP address of %s." % item) except (dns.resolver.Timeout): resolver.timeout = 5 - logging.warning('Timeout on resolving ipv4 address re-trying') + resolver.lifetime = 5 + logging.debug('Timeout on resolving ipv4 address re-trying') try: response = resolver.resolve(item, "A") except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): @@ -1128,6 +1138,7 @@ def set_secondary_dns(hostnames, env): except (dns.resolver.Timeout): raise ValueError("Could not resolve the IP address of %s due to timeout." % item) resolver.timeout = 3 + resolver.lifetime = 3 else: # Validate IP address. try: diff --git a/management/status_checks.py b/management/status_checks.py index 93e0320f..2bdaf538 100755 --- a/management/status_checks.py +++ b/management/status_checks.py @@ -802,6 +802,7 @@ def query_dns(qname, rtype, nxdomain='[Not Set]', at=None, as_list=False): # Set a timeout so that a non-responsive server doesn't hold us back. resolver.timeout = 3 + reaolver.lifetime = 3 # Do the query. try: @@ -809,20 +810,21 @@ def query_dns(qname, rtype, nxdomain='[Not Set]', at=None, as_list=False): except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): # Host did not have an answer for this query; not sure what the # difference is between the two exceptions. - logging.info("No result for dns lookup %s, %s", qname, rtype) + logging.debug("No result for dns lookup %s, %s", qname, rtype) return nxdomain except dns.exception.Timeout: - logging.info("Timeout on dns lookup %s, %s. Retrying", qname, rtype) + logging.debug("Timeout on dns lookup %s, %s. Retrying", qname, rtype) resolver.timeout = 5 + resolver.lifetime = 5 try: response = resolver.resolve(qname, rtype, search=True) except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): # Host did not have an answer for this query; not sure what the # difference is between the two exceptions. - logging.info("No result for dns lookup %s, %s (2)", qname, rtype) + logging.debug("No result for dns lookup %s, %s (2)", qname, rtype) return nxdomain except dns.exception.Timeout: - logging.info("Timeout on dns lookup %s, %s.", qname, rtype) + logging.debug("Timeout on dns lookup %s, %s.", qname, rtype) return "[timeout]" # Normalize IP addresses. IP address --- especially IPv6 addresses --- can From 87c9e2381ee3ab7f01444d40d3c9d30b3a8c4719 Mon Sep 17 00:00:00 2001 From: KiekerJan Date: Sun, 17 Apr 2022 23:08:12 +0200 Subject: [PATCH 08/12] check unbound is up before changing local dns server --- setup/system.sh | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/setup/system.sh b/setup/system.sh index be605475..f50a353f 100755 --- a/setup/system.sh +++ b/setup/system.sh @@ -327,6 +327,17 @@ if [ -d /etc/unbound/lists.d ]; then mkdir /etc/unbound/lists.d fi +systemctl restart unbound + +unbound-control -q status + +# Only reset the local dns settings if unbound server is running, otherwise we'll +# up with a system with an unusable internet connection +if [ $? -ne 0 ]; then + echo "Recursive DNS server not active" + exit 1 +fi + # Modify systemd settings rm -f /etc/resolv.conf tools/editconf.py /etc/systemd/resolved.conf \ @@ -338,7 +349,6 @@ echo "nameserver 127.0.0.1" > /etc/resolv.conf # Restart the DNS services. systemctl restart systemd-resolved -systemctl restart unbound # ### Fail2Ban Service From 1b0f7991db639c5a1a7ea2fc18880231a6c0a5de Mon Sep 17 00:00:00 2001 From: KiekerJan Date: Mon, 18 Apr 2022 08:30:22 +0200 Subject: [PATCH 09/12] fix spelling error --- management/status_checks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/management/status_checks.py b/management/status_checks.py index 2bdaf538..bd03890e 100755 --- a/management/status_checks.py +++ b/management/status_checks.py @@ -802,7 +802,7 @@ def query_dns(qname, rtype, nxdomain='[Not Set]', at=None, as_list=False): # Set a timeout so that a non-responsive server doesn't hold us back. resolver.timeout = 3 - reaolver.lifetime = 3 + resolver.lifetime = 3 # Do the query. try: From aaa7702d9d19e431071a53e1177eddf9d6a964f4 Mon Sep 17 00:00:00 2001 From: "github@kiekerjan.isdronken.nl" Date: Mon, 18 Apr 2022 21:40:20 +0200 Subject: [PATCH 10/12] make dns resolver retrying explicit --- management/dns_update.py | 36 +++++++++++++++++------------------- management/status_checks.py | 29 ++++++++++++----------------- 2 files changed, 29 insertions(+), 36 deletions(-) diff --git a/management/dns_update.py b/management/dns_update.py index ad55868b..d934f50b 100755 --- a/management/dns_update.py +++ b/management/dns_update.py @@ -1087,7 +1087,7 @@ def get_secondary_dns(custom_dns, mode=None): response = resolver.resolve(hostname+'.', "A", raise_on_no_answer=False) values.extend(map(str, response)) except dns.exception.DNSException: - logging.debug("Secondary dns Alookup exception %s", hostname) + logging.debug("Secondary dns A lookup exception %s", hostname) try: response = resolver.resolve(hostname+'.', "AAAA", raise_on_no_answer=False) @@ -1110,35 +1110,33 @@ def set_secondary_dns(hostnames, env): if len(hostnames) > 0: # Validate that all hostnames are valid and that all zone-xfer IP addresses are valid. resolver = dns.resolver.get_default_resolver() - resolver.timeout = 3 - resolver.lifetime = 3 + resolver.timeout = 5 + resolver.lifetime = 5 for item in hostnames: if not item.startswith("xfr:"): # Resolve hostname. - try: - response = resolver.resolve(item, "A") - except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): - logging.debug('Error on resolving ipv4 address, trying ipv6') - try: - response = resolver.resolve(item, "AAAA") - except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): - raise ValueError("Could not resolve the IP address of %s." % item) - except (dns.resolver.Timeout): - resolver.timeout = 5 - resolver.lifetime = 5 - logging.debug('Timeout on resolving ipv4 address re-trying') + tries = 2 + + while tries > 0: + tries = tries - 1 try: response = resolver.resolve(item, "A") + tries = 0 except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): - logging.debug('Error on resolving ipv4 address, trying ipv6 (2)') + logging.debug('Error on resolving ipv4 address, trying ipv6') try: response = resolver.resolve(item, "AAAA") + tries = 0 except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): raise ValueError("Could not resolve the IP address of %s." % item) + except (dns.resolver.Timeout): + logging.debug('Timeout on resolving ipv6 address') + if tries < 1: + raise ValueError("Could not resolve the IP address of %s due to timeout." % item) except (dns.resolver.Timeout): - raise ValueError("Could not resolve the IP address of %s due to timeout." % item) - resolver.timeout = 3 - resolver.lifetime = 3 + logging.debug('Timeout on resolving ipv4 address') + if tries < 1: + raise ValueError("Could not resolve the IP address of %s due to timeout." % item) else: # Validate IP address. try: diff --git a/management/status_checks.py b/management/status_checks.py index bd03890e..94b2ca4a 100755 --- a/management/status_checks.py +++ b/management/status_checks.py @@ -801,31 +801,26 @@ def query_dns(qname, rtype, nxdomain='[Not Set]', at=None, as_list=False): resolver.nameservers = [at] # Set a timeout so that a non-responsive server doesn't hold us back. - resolver.timeout = 3 - resolver.lifetime = 3 + resolver.timeout = 5 + resolver.lifetime = 5 + tries = 2 # Do the query. - try: - response = resolver.resolve(qname, rtype, search=True) - except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): - # Host did not have an answer for this query; not sure what the - # difference is between the two exceptions. - logging.debug("No result for dns lookup %s, %s", qname, rtype) - return nxdomain - except dns.exception.Timeout: - logging.debug("Timeout on dns lookup %s, %s. Retrying", qname, rtype) - resolver.timeout = 5 - resolver.lifetime = 5 + while tries > 0: + tries = tries - 1 try: response = resolver.resolve(qname, rtype, search=True) + tries = 0 except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): # Host did not have an answer for this query; not sure what the # difference is between the two exceptions. - logging.debug("No result for dns lookup %s, %s (2)", qname, rtype) - return nxdomain + logging.debug("No result for dns lookup %s, %s (%d)", qname, rtype, tries) + if tries < 1: + return nxdomain except dns.exception.Timeout: - logging.debug("Timeout on dns lookup %s, %s.", qname, rtype) - return "[timeout]" + logging.debug("Timeout on dns lookup %s, %s (%d)", qname, rtype, tries) + if tries < 1: + return "[timeout]" # Normalize IP addresses. IP address --- especially IPv6 addresses --- can # be expressed in equivalent string forms. Canonicalize the form before From a1851a413bc14316068bf059e66635008ce5083b Mon Sep 17 00:00:00 2001 From: KiekerJan Date: Mon, 18 Apr 2022 21:52:33 +0200 Subject: [PATCH 11/12] use actual unbound command to flush cache --- management/dns_update.py | 2 +- management/status_checks.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/management/dns_update.py b/management/dns_update.py index d934f50b..ef20d767 100755 --- a/management/dns_update.py +++ b/management/dns_update.py @@ -118,7 +118,7 @@ def do_dns_update(env, force=False): # Clear unbound's DNS cache so our own DNS resolver is up to date. # (ignore errors with trap=True) - shell('check_call', ["/usr/sbin/unbound-control", "reload"], trap=True, capture_stdout=False) + shell('check_call', ["/usr/sbin/unbound-control", "flush_zone", "."], trap=True, capture_stdout=False) if len(updated_domains) == 0: # if nothing was updated (except maybe DKIM's files), don't show any output diff --git a/management/status_checks.py b/management/status_checks.py index 94b2ca4a..3a20c05c 100755 --- a/management/status_checks.py +++ b/management/status_checks.py @@ -57,7 +57,7 @@ def run_checks(rounded_values, env, output, pool, domains_to_check=None): # clear unbound's DNS cache so our DNS checks are up to date # (ignore errors; if unbound isn't running we'd already report # that in run_services checks.) - shell('check_call', ["/usr/sbin/unbound-control", "reload"], trap=True, capture_stdout=False) + shell('check_call', ["/usr/sbin/unbound-control", "flush_zone", "."], trap=True, capture_stdout=False) run_system_checks(rounded_values, env, output) From 6b30ee8665db7173345bd09f27558de11def1e5d Mon Sep 17 00:00:00 2001 From: "github@kiekerjan.isdronken.nl" Date: Wed, 20 Apr 2022 23:42:34 +0200 Subject: [PATCH 12/12] skip retry on spamhaus dns lookups --- management/status_checks.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/management/status_checks.py b/management/status_checks.py index 3a20c05c..21ad8475 100755 --- a/management/status_checks.py +++ b/management/status_checks.py @@ -296,7 +296,7 @@ def run_network_checks(env, output): # by a spammer, or the user may be deploying on a residential network. We # will not be able to reliably send mail in these cases. rev_ip4 = ".".join(reversed(env['PUBLIC_IP'].split('.'))) - zen = query_dns(rev_ip4+'.zen.spamhaus.org', 'A', nxdomain=None) + zen = query_dns(rev_ip4+'.zen.spamhaus.org', 'A', nxdomain=None, retry = False) if zen is None: output.print_ok("IP address is not blacklisted by zen.spamhaus.org.") elif zen == "[timeout]": @@ -747,7 +747,7 @@ def check_mail_domain(domain, env, output): # Stop if the domain is listed in the Spamhaus Domain Block List. # The user might have chosen a domain that was previously in use by a spammer # and will not be able to reliably send mail. - dbl = query_dns(domain+'.dbl.spamhaus.org', "A", nxdomain=None) + dbl = query_dns(domain+'.dbl.spamhaus.org', "A", nxdomain=None, retry=False) if dbl is None: output.print_ok("Domain is not blacklisted by dbl.spamhaus.org.") elif dbl == "[timeout]": @@ -783,7 +783,7 @@ def check_web_domain(domain, rounded_time, ssl_certificates, env, output): # website for also needs a signed certificate. check_ssl_cert(domain, rounded_time, ssl_certificates, env, output) -def query_dns(qname, rtype, nxdomain='[Not Set]', at=None, as_list=False): +def query_dns(qname, rtype, nxdomain='[Not Set]', at=None, as_list=False, retry=True): # Make the qname absolute by appending a period. Without this, dns.resolver.query # will fall back a failed lookup to a second query with this machine's hostname # appended. This has been causing some false-positive Spamhaus reports. The @@ -804,7 +804,11 @@ def query_dns(qname, rtype, nxdomain='[Not Set]', at=None, as_list=False): resolver.timeout = 5 resolver.lifetime = 5 - tries = 2 + if retry: + tries = 2 + else: + tries = 1 + # Do the query. while tries > 0: tries = tries - 1