diff --git a/CHANGELOG.md b/CHANGELOG.md index 70ac8cc6..6a57eac5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -19,6 +19,7 @@ No features of Mail-in-a-Box have changed in this release, but with the newer ve * certbot is upgraded to 1.21 (via the Ubuntu repository instead of a PPA). * fail2ban is upgraded to 0.11.2. * nginx is upgraded to 1.18. +* bind9 is replaced with unbound In Development -------------- diff --git a/README.md b/README.md index 72159006..61909b95 100644 --- a/README.md +++ b/README.md @@ -36,6 +36,7 @@ Functionality changes and additions Removed older cryptos following internet.nl recommendations * Replace opendkim with dkimpy (https://launchpad.net/dkimpy-milter) Added support for Ed25519 signing +* Replace bind9 with unbound DNS resolver Bug fixes * Munin error report fixed [see github issue](https://github.com/mail-in-a-box/mailinabox/issues/1555) diff --git a/conf/unbound.conf b/conf/unbound.conf new file mode 100644 index 00000000..30880afe --- /dev/null +++ b/conf/unbound.conf @@ -0,0 +1,68 @@ +server: + # the working directory. + directory: "/etc/unbound" + + # run as the unbound user + username: unbound + + verbosity: 0 # uncomment and increase to get more logging. + # logfile: "/var/log/unbound.log" # won't work due to apparmor + # use-syslog: no + + # By default listen only to localhost + #interface: ::1 + #interface: 127.0.0.1 + port: 53 + + # Only allow localhost to use this Unbound instance. + access-control: 127.0.0.1/8 allow + access-control: ::1/128 allow + + # Private IP ranges, which shall never be returned or forwarded as public DNS response. + private-address: 10.0.0.0/8 + private-address: 172.16.0.0/12 + private-address: 192.168.0.0/16 + private-address: 169.254.0.0/16 + private-address: fd00::/8 + private-address: fe80::/10 + + # Functionality + do-ip4: yes + do-ip6: yes + do-udp: yes + do-tcp: yes + + # Performance + num-threads: 2 + cache-min-ttl: 300 + cache-max-ttl: 86400 + serve-expired: yes + neg-cache-size: 4M + msg-cache-size: 50m + rrset-cache-size: 100m + + so-reuseport: yes + so-rcvbuf: 4m + so-sndbuf: 4m + + # Privacy / hardening + # hide server info from clients + hide-identity: yes + hide-version: yes + harden-glue: yes + harden-dnssec-stripped: yes + harden-algo-downgrade: yes + harden-large-queries: yes + harden-short-bufsize: yes + + rrset-roundrobin: yes + minimal-responses: yes + identity: "Server" + + # Include possible white/blacklists + include: /etc/unbound/lists.d/*.conf + +remote-control: + control-enable: yes + control-port: 953 + diff --git a/management/daemon.py b/management/daemon.py index 0bbb1ad5..ae7fb351 100755 --- a/management/daemon.py +++ b/management/daemon.py @@ -12,6 +12,7 @@ import os, os.path, re, json, time import multiprocessing.pool, subprocess +import logging from functools import wraps @@ -273,6 +274,7 @@ def dns_update(): try: return do_dns_update(env, force=request.form.get('force', '') == '1') except Exception as e: + logging.exception('dns update exc') return (str(e), 500) @app.route('/dns/secondary-nameserver') @@ -762,14 +764,21 @@ def log_failed_login(request): # APP if __name__ == '__main__': + logging_level = logging.DEBUG + if "DEBUG" in os.environ: # Turn on Flask debugging. app.debug = True + logging_level = logging.DEBUG if not app.debug: app.logger.addHandler(utils.create_syslog_handler()) #app.logger.info('API key: ' + auth_service.key) + logging.basicConfig(level=logging_level, format='%(levelname)s:%(module)s.%(funcName)s %(message)s') + logging.info('Logging level set to %s', logging.getLevelName(logging_level)) + # Start the application server. Listens on 127.0.0.1 (IPv4 only). app.run(port=10222) + diff --git a/management/dns_update.py b/management/dns_update.py index 62a053f5..ef20d767 100755 --- a/management/dns_update.py +++ b/management/dns_update.py @@ -8,6 +8,7 @@ import sys, os, os.path, urllib.parse, datetime, re, hashlib, base64 import ipaddress import rtyaml import dns.resolver +import logging from utils import shell, load_env_vars_from_file, safe_domain_name, sort_domains from ssl_certificates import get_ssl_certificates, check_certificate @@ -115,9 +116,9 @@ def do_dns_update(env, force=False): # If this is the only thing that changed? updated_domains.append("DKIM configuration") - # Clear bind9's DNS cache so our own DNS resolver is up to date. + # Clear unbound's DNS cache so our own DNS resolver is up to date. # (ignore errors with trap=True) - shell('check_call', ["/usr/sbin/rndc", "flush"], trap=True) + shell('check_call', ["/usr/sbin/unbound-control", "flush_zone", "."], trap=True, capture_stdout=False) if len(updated_domains) == 0: # if nothing was updated (except maybe DKIM's files), don't show any output @@ -1064,6 +1065,7 @@ def set_custom_dns_record(qname, rtype, value, action, env): def get_secondary_dns(custom_dns, mode=None): resolver = dns.resolver.get_default_resolver() resolver.timeout = 10 + resolver.lifetime = 10 values = [] for qname, rtype, value in custom_dns: @@ -1081,10 +1083,17 @@ def get_secondary_dns(custom_dns, mode=None): # doesn't. if not hostname.startswith("xfr:"): if mode == "xfr": - response = dns.resolver.resolve(hostname+'.', "A", raise_on_no_answer=False) - values.extend(map(str, response)) - response = dns.resolver.resolve(hostname+'.', "AAAA", raise_on_no_answer=False) - values.extend(map(str, response)) + try: + response = resolver.resolve(hostname+'.', "A", raise_on_no_answer=False) + values.extend(map(str, response)) + except dns.exception.DNSException: + logging.debug("Secondary dns A lookup exception %s", hostname) + + try: + response = resolver.resolve(hostname+'.', "AAAA", raise_on_no_answer=False) + values.extend(map(str, response)) + except dns.exception.DNSException: + logging.debug("Secondary dns AAAA lookup exception %s", hostname) continue values.append(hostname) @@ -1102,16 +1111,32 @@ def set_secondary_dns(hostnames, env): # Validate that all hostnames are valid and that all zone-xfer IP addresses are valid. resolver = dns.resolver.get_default_resolver() resolver.timeout = 5 + resolver.lifetime = 5 for item in hostnames: if not item.startswith("xfr:"): # Resolve hostname. - try: - response = resolver.resolve(item, "A") - except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): + tries = 2 + + while tries > 0: + tries = tries - 1 try: - response = resolver.resolve(item, "AAAA") + response = resolver.resolve(item, "A") + tries = 0 except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): - raise ValueError("Could not resolve the IP address of %s." % item) + logging.debug('Error on resolving ipv4 address, trying ipv6') + try: + response = resolver.resolve(item, "AAAA") + tries = 0 + except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): + raise ValueError("Could not resolve the IP address of %s." % item) + except (dns.resolver.Timeout): + logging.debug('Timeout on resolving ipv6 address') + if tries < 1: + raise ValueError("Could not resolve the IP address of %s due to timeout." % item) + except (dns.resolver.Timeout): + logging.debug('Timeout on resolving ipv4 address') + if tries < 1: + raise ValueError("Could not resolve the IP address of %s due to timeout." % item) else: # Validate IP address. try: diff --git a/management/status_checks.py b/management/status_checks.py index 5554cf0f..21ad8475 100755 --- a/management/status_checks.py +++ b/management/status_checks.py @@ -12,6 +12,7 @@ import dateutil.parser, dateutil.tz import idna import psutil import postfix_mta_sts_resolver.resolver +import logging from dns_update import get_dns_zones, build_tlsa_record, get_custom_dns_config, get_secondary_dns, get_custom_dns_records from web_update import get_web_domains, get_domains_with_a_records @@ -22,9 +23,8 @@ from utils import shell, sort_domains, load_env_vars_from_file, load_settings def get_services(): return [ - { "name": "Local DNS (bind9)", "port": 53, "public": False, }, - #{ "name": "NSD Control", "port": 8952, "public": False, }, - { "name": "Local DNS Control (bind9/rndc)", "port": 953, "public": False, }, + { "name": "Local DNS (unbound)", "port": 53, "public": False, }, + { "name": "Local DNS Control (unbound)", "port": 953, "public": False, }, { "name": "Dovecot LMTP LDA", "port": 10026, "public": False, }, { "name": "Postgrey", "port": 10023, "public": False, }, { "name": "Spamassassin", "port": 10025, "public": False, }, @@ -49,15 +49,15 @@ def run_checks(rounded_values, env, output, pool, domains_to_check=None): # check that services are running if not run_services_checks(env, output, pool): - # If critical services are not running, stop. If bind9 isn't running, + # If critical services are not running, stop. If unbound isn't running, # all later DNS checks will timeout and that will take forever to # go through, and if running over the web will cause a fastcgi timeout. return - # clear bind9's DNS cache so our DNS checks are up to date - # (ignore errors; if bind9/rndc isn't running we'd already report + # clear unbound's DNS cache so our DNS checks are up to date + # (ignore errors; if unbound isn't running we'd already report # that in run_services checks.) - shell('check_call', ["/usr/sbin/rndc", "flush"], trap=True) + shell('check_call', ["/usr/sbin/unbound-control", "flush_zone", "."], trap=True, capture_stdout=False) run_system_checks(rounded_values, env, output) @@ -296,7 +296,7 @@ def run_network_checks(env, output): # by a spammer, or the user may be deploying on a residential network. We # will not be able to reliably send mail in these cases. rev_ip4 = ".".join(reversed(env['PUBLIC_IP'].split('.'))) - zen = query_dns(rev_ip4+'.zen.spamhaus.org', 'A', nxdomain=None) + zen = query_dns(rev_ip4+'.zen.spamhaus.org', 'A', nxdomain=None, retry = False) if zen is None: output.print_ok("IP address is not blacklisted by zen.spamhaus.org.") elif zen == "[timeout]": @@ -747,7 +747,7 @@ def check_mail_domain(domain, env, output): # Stop if the domain is listed in the Spamhaus Domain Block List. # The user might have chosen a domain that was previously in use by a spammer # and will not be able to reliably send mail. - dbl = query_dns(domain+'.dbl.spamhaus.org', "A", nxdomain=None) + dbl = query_dns(domain+'.dbl.spamhaus.org', "A", nxdomain=None, retry=False) if dbl is None: output.print_ok("Domain is not blacklisted by dbl.spamhaus.org.") elif dbl == "[timeout]": @@ -783,7 +783,7 @@ def check_web_domain(domain, rounded_time, ssl_certificates, env, output): # website for also needs a signed certificate. check_ssl_cert(domain, rounded_time, ssl_certificates, env, output) -def query_dns(qname, rtype, nxdomain='[Not Set]', at=None, as_list=False): +def query_dns(qname, rtype, nxdomain='[Not Set]', at=None, as_list=False, retry=True): # Make the qname absolute by appending a period. Without this, dns.resolver.query # will fall back a failed lookup to a second query with this machine's hostname # appended. This has been causing some false-positive Spamhaus reports. The @@ -793,7 +793,7 @@ def query_dns(qname, rtype, nxdomain='[Not Set]', at=None, as_list=False): qname += "." # Use the default nameservers (as defined by the system, which is our locally - # running bind server), or if the 'at' argument is specified, use that host + # running unbound server), or if the 'at' argument is specified, use that host # as the nameserver. resolver = dns.resolver.get_default_resolver() if at: @@ -802,16 +802,29 @@ def query_dns(qname, rtype, nxdomain='[Not Set]', at=None, as_list=False): # Set a timeout so that a non-responsive server doesn't hold us back. resolver.timeout = 5 + resolver.lifetime = 5 + if retry: + tries = 2 + else: + tries = 1 + # Do the query. - try: - response = resolver.resolve(qname, rtype, search=True) - except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): - # Host did not have an answer for this query; not sure what the - # difference is between the two exceptions. - return nxdomain - except dns.exception.Timeout: - return "[timeout]" + while tries > 0: + tries = tries - 1 + try: + response = resolver.resolve(qname, rtype, search=True) + tries = 0 + except (dns.resolver.NoNameservers, dns.resolver.NXDOMAIN, dns.resolver.NoAnswer): + # Host did not have an answer for this query; not sure what the + # difference is between the two exceptions. + logging.debug("No result for dns lookup %s, %s (%d)", qname, rtype, tries) + if tries < 1: + return nxdomain + except dns.exception.Timeout: + logging.debug("Timeout on dns lookup %s, %s (%d)", qname, rtype, tries) + if tries < 1: + return "[timeout]" # Normalize IP addresses. IP address --- especially IPv6 addresses --- can # be expressed in equivalent string forms. Canonicalize the form before diff --git a/management/utils.py b/management/utils.py index bc357040..de23361c 100644 --- a/management/utils.py +++ b/management/utils.py @@ -106,7 +106,7 @@ def sort_email_addresses(email_addresses, env): ret.extend(sorted(email_addresses)) # whatever is left return ret -def shell(method, cmd_args, env={}, capture_stderr=False, return_bytes=False, trap=False, input=None): +def shell(method, cmd_args, env={}, capture_stdout=True, capture_stderr=False, return_bytes=False, trap=False, input=None): # A safe way to execute processes. # Some processes like apt-get require being given a sane PATH. import subprocess @@ -116,6 +116,8 @@ def shell(method, cmd_args, env={}, capture_stderr=False, return_bytes=False, tr 'env': env, 'stderr': None if not capture_stderr else subprocess.STDOUT, } + if not capture_stdout: + kwargs['stdout'] = subprocess.DEVNULL if method == "check_output" and input is not None: kwargs['input'] = input diff --git a/setup/dns.sh b/setup/dns.sh index fd0dec90..f24eb56b 100755 --- a/setup/dns.sh +++ b/setup/dns.sh @@ -14,7 +14,7 @@ echo "Installing nsd (DNS server)..." # Prepare nsd's configuration. # We configure nsd before installation as we only want it to bind to some addresses -# and it otherwise will have port / bind conflicts with bind9 used as the local resolver +# and it otherwise will have port / bind conflicts with unbound used as the local resolver mkdir -p /var/run/nsd mkdir -p /etc/nsd mkdir -p /etc/nsd/zones @@ -40,7 +40,7 @@ server: EOF -# Since we have bind9 listening on localhost for locally-generated +# Since we have unbound listening on localhost for locally-generated # DNS queries that require a recursive nameserver, and the system # might have other network interfaces for e.g. tunnelling, we have # to be specific about the network interfaces that nsd binds to. diff --git a/setup/system.sh b/setup/system.sh index 4d5ae33b..f50a353f 100755 --- a/setup/system.sh +++ b/setup/system.sh @@ -310,49 +310,44 @@ fi #NODOC # DNS server, which won't work for RBLs. So we really need a local recursive # nameserver. # -# We'll install `bind9`, which as packaged for Ubuntu, has DNSSEC enabled by default via "dnssec-validation auto". +# We'll install unbound, which as packaged for Ubuntu, has DNSSEC enabled by default. # We'll have it be bound to 127.0.0.1 so that it does not interfere with # the public, recursive nameserver `nsd` bound to the public ethernet interfaces. -# -# About the settings: -# -# * The listen-on directive in named.conf.options restricts `bind9` to -# binding to the loopback interface instead of all interfaces. -# * The max-recursion-queries directive increases the maximum number of iterative queries. -# If more queries than specified are sent, bind9 returns SERVFAIL. After flushing the cache during system checks, -# we ran into the limit thus we are increasing it from 75 (default value) to 100. -apt_install bind9 -if ! grep -q "listen-on " /etc/bind/named.conf.options; then - # Add a listen-on directive if it doesn't exist inside the options block. - sed -i "s/^}/\n\tlisten-on { 127.0.0.1; };\n}/" /etc/bind/named.conf.options -fi -if ! grep -q "listen-on-v6 " /etc/bind/named.conf.options; then - # Add a listen-on-v6 directive if it doesn't exist inside the options block. - sed -i "s/^}/\n\tlisten-on-v6 { ::1; };\n}/" /etc/bind/named.conf.options -else - # Modify the listen-on-v6 directive if it does exist - sed -i "s/listen-on-v6 { any; }/listen-on-v6 { ::1; }/" /etc/bind/named.conf.options +# remove bind9 in case it is still there +apt-get purge -qq -y bind9 bind9-utils + +# Install unbound and dns utils (e.g. dig) +apt_install unbound python3-unbound bind9-dnsutils + +# Configure unbound +cp -f conf/unbound.conf /etc/unbound/unbound.conf.d/miabunbound.conf + +if [ -d /etc/unbound/lists.d ]; then + mkdir /etc/unbound/lists.d fi -if ! grep -q "max-recursion-queries " /etc/bind/named.conf.options; then - # Add a max-recursion-queries directive if it doesn't exist inside the options block. - sed -i "s/^}/\n\tmax-recursion-queries 100;\n}/" /etc/bind/named.conf.options +systemctl restart unbound + +unbound-control -q status + +# Only reset the local dns settings if unbound server is running, otherwise we'll +# up with a system with an unusable internet connection +if [ $? -ne 0 ]; then + echo "Recursive DNS server not active" + exit 1 fi -# First we'll disable systemd-resolved's management of resolv.conf and its stub server. -# Breaking the symlink to /run/systemd/resolve/stub-resolv.conf means -# systemd-resolved will read it for DNS servers to use. Put in 127.0.0.1, -# which is where bind9 will be running. Obviously don't do this before -# installing bind9 or else apt won't be able to resolve a server to -# download bind9 from. +# Modify systemd settings rm -f /etc/resolv.conf -tools/editconf.py /etc/systemd/resolved.conf DNSStubListener=no +tools/editconf.py /etc/systemd/resolved.conf \ + DNS=127.0.0.1 \ + DNSSEC=yes \ + DNSStubListener=no echo "nameserver 127.0.0.1" > /etc/resolv.conf # Restart the DNS services. -restart_service bind9 systemctl restart systemd-resolved # ### Fail2Ban Service diff --git a/tools/create_dns_blocklist.sh b/tools/create_dns_blocklist.sh new file mode 100755 index 00000000..5b8bab86 --- /dev/null +++ b/tools/create_dns_blocklist.sh @@ -0,0 +1,33 @@ +#!/bin/bash +set -euo pipefail + +# Download select set of malware blocklists from The Firebog's "The Big Blocklist +# Collection" [0] and block access to them with Unbound by returning NXDOMAIN. +# +# [0]: https://firebog.net +( + # Malicious Lists + curl -sSf "https://raw.githubusercontent.com/DandelionSprout/adfilt/master/Alternate%20versions%20Anti-Malware%20List/AntiMalwareHosts.txt" ; + curl -sSf "https://osint.digitalside.it/Threat-Intel/lists/latestdomains.txt" ; + curl -sSf "https://s3.amazonaws.com/lists.disconnect.me/simple_malvertising.txt" ; + curl -sSf "https://v.firebog.net/hosts/Prigent-Crypto.txt" ; + curl -sSf "https://bitbucket.org/ethanr/dns-blacklists/raw/8575c9f96e5b4a1308f2f12394abd86d0927a4a0/bad_lists/Mandiant_APT1_Report_Appendix_D.txt" ; + curl -sSf "https://phishing.army/download/phishing_army_blocklist_extended.txt" ; + curl -sSf "https://gitlab.com/quidsup/notrack-blocklists/raw/master/notrack-malware.txt" ; + curl -sSf "https://raw.githubusercontent.com/Spam404/lists/master/main-blacklist.txt" ; + curl -sSf "https://raw.githubusercontent.com/FadeMind/hosts.extras/master/add.Risk/hosts" ; + curl -sSf "https://urlhaus.abuse.ch/downloads/hostfile/" ; +# curl -sSf "https://v.firebog.net/hosts/Prigent-Malware.txt" ; +# curl -sSf "https://v.firebog.net/hosts/Shalla-mal.txt" ; + +) | + cat | # Combine all lists into one + grep -v '#' | # Remove comments lines + grep -v '::' | # Remove universal ipv6 address + tr -d '\r' | # Normalize line endings by removing Windows carriage returns + sed -e 's/0\.0\.0\.0\s\{0,\}//g' | # Remove ip address from start of line + sed -e 's/127\.0\.0\.1\s\{0,\}//g' | + sed -e '/^$/d' | # Remove empty line + sort -u | # Sort and remove duplicates + awk '{print "local-zone: " ""$1"" " always_nxdomain"}' # Convert to Unbound configuration + \ No newline at end of file