From 322a5779f111e8aee8e432748dd948cfb109c0b8 Mon Sep 17 00:00:00 2001 From: Joshua Tauberer Date: Sun, 29 Mar 2015 09:33:31 -0400 Subject: [PATCH] store IDNs (internationalized domain names) in IDNA (ASCII) in our database, not in Unicode I changed my mind. In 1bf8f1991f6f08e0fb1e3d2572d280d894a5e431 I allowed Unicode domain names to go into the database. I thought that was nice because it's what the user *means*. But it's not how the web works. Web and DNS were working, but mail wasn't. Postfix (as shipped with Ubuntu 14.04 without support for SMTPUTF8) exists in an ASCII-only world. When it goes to the users/aliases table, it queries in ASCII (IDNA) only and had no hope of delivering mail if the domain was in full Unicode in the database. I was thinking ahead to SMTPUTF8, where we *could* put Unicode in the database (though that would prevent IDNA-encoded addressing from being deliverable) not realizing it isn't well supported yet anyway. It's IDNA that goes on the wire in most places anyway (SMTP without SMTPUTF8 (and therefore how Postfix queries our users/aliases tables), DNS zone files, nginx config, CSR 'CN' field, X509 Common Name and Subject Alternative Names fields), so we should really be talking in terms of IDNA (i.e. ASCII). This partially reverts commit 1bf8f1991f6f08e0fb1e3d2572d280d894a5e431, where I added a lot of Unicode=>IDNA conversions when writing configuration files. Instead I'm doing Unicode=>IDNA before email addresses get into the users/aliases table. Now we assume the database uses IDNA-encoded ASCII domain names. When adding/removing aliases, addresses are converted to ASCII (w/ IDNA). User accounts must be ASCII-only anyway because of Dovecot's auth limitations, so we don't do any IDNA conversion (don't want to change the user's login info behind their back!). The aliases control panel page converts domains back to Unicode for display to be nice. The status checks converts the domains to Unicode just for the output headings. A migration is added to convert existing aliases with Unicode domains into IDNA. Any custom DNS or web settings with Unicode may need to be changed. Future support for SMTPUTF8 will probably need to add columns in the users/aliases table so that it lists both IDNA and Unicode forms. --- CHANGELOG.md | 5 ++ management/dns_update.py | 18 +---- management/mailconfig.py | 120 ++++++++++++++++++------------ management/status_checks.py | 4 +- management/templates/aliases.html | 6 +- management/web_update.py | 4 +- setup/migrate.py | 29 ++++++++ 7 files changed, 117 insertions(+), 69 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cd6eae8f..0f81b9a9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,11 @@ Mail: * POP3S is now enabled (port 995). +System: + +* Internationalized Domain Names (IDNs) should now work in email. If you had custom DNS or custom web settings for internationalized domains, check that they are still working. + + v0.08 (April 1, 2015) --------------------- diff --git a/management/dns_update.py b/management/dns_update.py index 88080017..9043224e 100755 --- a/management/dns_update.py +++ b/management/dns_update.py @@ -397,26 +397,17 @@ $TTL 1800 ; default time to live """ # Replace replacement strings. - zone = zone.format(domain=domain.encode("idna").decode("ascii"), primary_domain=env["PRIMARY_HOSTNAME"].encode("idna").decode("ascii")) + zone = zone.format(domain=domain, primary_domain=env["PRIMARY_HOSTNAME"]) # Add records. for subdomain, querytype, value, explanation in records: if subdomain: - zone += subdomain.encode("idna").decode("ascii") + zone += subdomain zone += "\tIN\t" + querytype + "\t" if querytype == "TXT": - # Quote and escape. value = value.replace('\\', '\\\\') # escape backslashes value = value.replace('"', '\\"') # escape quotes value = '"' + value + '"' # wrap in quotes - elif querytype in ("NS", "CNAME"): - # These records must be IDNA-encoded. - value = value.encode("idna").decode("ascii") - elif querytype == "MX": - # Also IDNA-encoded, but must parse first. - priority, host = value.split(" ", 1) - host = host.encode("idna").decode("ascii") - value = priority + " " + host zone += value + "\n" # DNSSEC requires re-signing a zone periodically. That requires @@ -510,7 +501,7 @@ server: zone: name: %s zonefile: %s -""" % (domain.encode("idna").decode("ascii"), zonefile) +""" % (domain, zonefile) # If a custom secondary nameserver has been set, allow zone transfers # and notifies to that nameserver. @@ -555,9 +546,6 @@ def sign_zone(domain, zonefile, env): algo = dnssec_choose_algo(domain, env) dnssec_keys = load_env_vars_from_file(os.path.join(env['STORAGE_ROOT'], 'dns/dnssec/%s.conf' % algo)) - # From here, use the IDNA encoding of the domain name. - domain = domain.encode("idna").decode("ascii") - # In order to use the same keys for all domains, we have to generate # a new .key file with a DNSSEC record for the specific domain. We # can reuse the same key, but it won't validate without a DNSSEC diff --git a/management/mailconfig.py b/management/mailconfig.py index 4a9a7d9a..a2f67fbd 100755 --- a/management/mailconfig.py +++ b/management/mailconfig.py @@ -4,22 +4,32 @@ import subprocess, shutil, os, sqlite3, re import utils def validate_email(email, mode=None): - # There are a lot of characters permitted in email addresses, but - # Dovecot's sqlite driver seems to get confused if there are any - # unusual characters in the address. Bah. Also note that since - # the mailbox path name is based on the email address, the address - # shouldn't be absurdly long and must not have a forward slash. + # Checks that an email address is syntactically valid. Returns True/False. + # Until Postfix supports SMTPUTF8, an email address may contain ASCII + # characters only; IDNs must be IDNA-encoded. + # + # When mode=="user", we're checking that this can be a user account name. + # Dovecot has tighter restrictions - letters, numbers, underscore, and + # dash only! + # + # When mode=="alias", we're allowing anything that can be in a Postfix + # alias table, i.e. omitting the local part ("@domain.tld") is OK. + # Check that the address isn't absurdly long. if len(email) > 255: return False if mode == 'user': - # For Dovecot's benefit, only allow basic characters. + # There are a lot of characters permitted in email addresses, but + # Dovecot's sqlite driver seems to get confused if there are any + # unusual characters in the address. Bah. Also note that since + # the mailbox path name is based on the email address, the address + # shouldn't be absurdly long and must not have a forward slash. ATEXT = r'[a-zA-Z0-9_\-]' elif mode in (None, 'alias'): # For aliases, we can allow any valid email address. # Based on RFC 2822 and https://github.com/SyrusAkbary/validate_email/blob/master/validate_email.py, # these characters are permitted in email addresses. - ATEXT = r'[\w!#$%&\'\*\+\-/=\?\^`\{\|\}~]' # see 3.2.4 + ATEXT = r'[a-zA-Z0-9_!#$%&\'\*\+\-/=\?\^`\{\|\}~]' # see 3.2.4 else: raise ValueError(mode) @@ -31,8 +41,8 @@ def validate_email(email, mode=None): # on the destination side. Make the local part optional. DOT_ATOM_TEXT_LOCAL = '(?:' + DOT_ATOM_TEXT_LOCAL + ')?' - # as above, but we can require that the host part have at least - # one period in it, so use a "+" rather than a "*" at the end + # We can require that the host part have at least one period in it, + # so use a "+" rather than a "*" at the end. DOT_ATOM_TEXT_HOST = ATEXT + r'+(?:\.' + ATEXT + r'+)+' # per RFC 2822 3.4.1 @@ -42,27 +52,44 @@ def validate_email(email, mode=None): m = re.match(ADDR_SPEC, email) if not m: return False - # Check that the domain part is IDNA-encodable. + # Check that the domain part is valid IDNA. localpart, domainpart = m.groups() try: - domainpart.encode("idna") + domainpart.encode('ascii').decode("idna") except: + # Domain is not valid IDNA. return False + # Everything looks good. return True def sanitize_idn_email_address(email): - # Convert an IDNA-encoded email address (domain part) into Unicode - # before storing in our database. Chrome may IDNA-ize - # values before POSTing, so we want to normalize before putting - # values into the database. + # The user may enter Unicode in an email address. Convert the domain part + # to IDNA before going into our database. Leave the local part alone --- + # although validate_email will reject non-ASCII characters. + # + # The domain name system only exists in ASCII, so it doesn't make sense + # to store domain names in Unicode. We want to store what is meaningful + # to the underlying protocols. try: localpart, domainpart = email.split("@") - domainpart = domainpart.encode("ascii").decode("idna") + domainpart = domainpart.encode("idna").decode('ascii') return localpart + "@" + domainpart except: - # Domain part is already Unicode or not IDNA-valid, so - # leave unchanged. + # Domain part is not IDNA-valid, so leave unchanged. If there + # are non-ASCII characters it will be filtered out by + # validate_email. + return email + +def prettify_idn_email_address(email): + # This is the opposite of sanitize_idn_email_address. We store domain + # names in IDNA in the database, but we want to show Unicode to the user. + try: + localpart, domainpart = email.split("@") + domainpart = domainpart.encode("ascii").decode('idna') + return localpart + "@" + domainpart + except: + # Failed to decode IDNA. Should never happen. return email def open_database(env, with_connection=False): @@ -90,7 +117,7 @@ def get_mail_users_ex(env, with_archived=False, with_slow_info=False): # { # email: "name@domain.tld", # privileges: [ "priv1", "priv2", ... ], - # status: "active", + # status: "active" | "inactive", # }, # ... # ] @@ -182,7 +209,8 @@ def get_mail_aliases_ex(env): # domain: "domain.tld", # alias: [ # { - # source: "name@domain.tld", + # source: "name@domain.tld", # IDNA-encoded + # source_display: "name@domain.tld", # full Unicode # destination: ["target1@domain.com", "target2@domain.com", ...], # required: True|False # }, @@ -207,7 +235,8 @@ def get_mail_aliases_ex(env): } domains[domain]["aliases"].append({ "source": source, - "destination": [d.strip() for d in destination.split(",")], + "source_display": prettify_idn_email_address(source), + "destination": [prettify_idn_email_address(d.strip()) for d in destination.split(",")], "required": required, }) @@ -219,19 +248,22 @@ def get_mail_aliases_ex(env): domain["aliases"].sort(key = lambda alias : (alias["required"], alias["source"])) return domains -def get_domain(emailaddr): - return emailaddr.split('@', 1)[1] +def get_domain(emailaddr, as_unicode=True): + # Gets the domain part of an email address. Turns IDNA + # back to Unicode for display. + ret = emailaddr.split('@', 1)[1] + if as_unicode: ret = ret.encode('ascii').decode('idna') + return ret def get_mail_domains(env, filter_aliases=lambda alias : True): + # Returns the domain names (IDNA-encoded) of all of the email addresses + # configured on the system. return set( - [get_domain(addr) for addr in get_mail_users(env)] - + [get_domain(source) for source, target in get_mail_aliases(env) if filter_aliases((source, target)) ] + [get_domain(addr, as_unicode=False) for addr in get_mail_users(env)] + + [get_domain(source, as_unicode=False) for source, target in get_mail_aliases(env) if filter_aliases((source, target)) ] ) def add_mail_user(email, pw, privs, env): - # accept IDNA domain names but normalize to Unicode before going into database - email = sanitize_idn_email_address(email) - # validate email if email.strip() == "": return ("No email address provided.", 400) @@ -240,6 +272,7 @@ def add_mail_user(email, pw, privs, env): elif not validate_email(email, mode='user'): return ("User account email addresses may only use the ASCII letters A-Z, the digits 0-9, underscore (_), hyphen (-), and period (.).", 400) + # validate password validate_password(pw) # validate privileges @@ -290,9 +323,6 @@ def add_mail_user(email, pw, privs, env): return kick(env, "mail user added") def set_mail_password(email, pw, env): - # accept IDNA domain names but normalize to Unicode before going into database - email = sanitize_idn_email_address(email) - # validate that password is acceptable validate_password(pw) @@ -326,9 +356,6 @@ def get_mail_password(email, env): return rows[0][0] def remove_mail_user(email, env): - # accept IDNA domain names but normalize to Unicode before going into database - email = sanitize_idn_email_address(email) - # remove conn, c = open_database(env, with_connection=True) c.execute("DELETE FROM users WHERE email=?", (email,)) @@ -343,9 +370,6 @@ def parse_privs(value): return [p for p in value.split("\n") if p.strip() != ""] def get_mail_user_privileges(email, env): - # accept IDNA domain names but normalize to Unicode before going into database - email = sanitize_idn_email_address(email) - # get privs c = open_database(env) c.execute('SELECT privileges FROM users WHERE email=?', (email,)) @@ -360,9 +384,6 @@ def validate_privilege(priv): return None def add_remove_mail_user_privilege(email, priv, action, env): - # accept IDNA domain names but normalize to Unicode before going into database - email = sanitize_idn_email_address(email) - # validate validation = validate_privilege(priv) if validation: return validation @@ -390,7 +411,7 @@ def add_remove_mail_user_privilege(email, priv, action, env): return "OK" def add_mail_alias(source, destination, env, update_if_exists=False, do_kick=True): - # accept IDNA domain names but normalize to Unicode before going into database + # convert Unicode domain to IDNA source = sanitize_idn_email_address(source) # validate source @@ -402,18 +423,23 @@ def add_mail_alias(source, destination, env, update_if_exists=False, do_kick=Tru # validate destination dests = [] destination = destination.strip() - if validate_email(destination, mode='alias'): - # Oostfix allows a single @domain.tld as the destination, which means - # the local part on the address is preserved in the rewrite. - dests.append(sanitize_idn_email_address(destination)) + + # Postfix allows a single @domain.tld as the destination, which means + # the local part on the address is preserved in the rewrite. We must + # try to convert Unicode to IDNA first before validating that it's a + # legitimate alias address. + d1 = sanitize_idn_email_address(destination) + if validate_email(d1, mode='alias'): + dests.append(d1) + else: # Parse comma and \n-separated destination emails & validate. In this # case, the recipients must be complete email addresses. for line in destination.split("\n"): for email in line.split(","): email = email.strip() - email = sanitize_idn_email_address(email) # Unicode => IDNA if email == "": continue + email = sanitize_idn_email_address(email) # Unicode => IDNA if not validate_email(email): return ("Invalid destination email address (%s)." % email, 400) dests.append(email) @@ -440,7 +466,7 @@ def add_mail_alias(source, destination, env, update_if_exists=False, do_kick=Tru return kick(env, return_status) def remove_mail_alias(source, env, do_kick=True): - # accept IDNA domain names but normalize to Unicode before going into database + # convert Unicode domain to IDNA source = sanitize_idn_email_address(source) # remove diff --git a/management/status_checks.py b/management/status_checks.py index bf16df47..fec8ed8e 100755 --- a/management/status_checks.py +++ b/management/status_checks.py @@ -246,7 +246,8 @@ def run_domain_checks(rounded_time, env, output, pool): def run_domain_checks_on_domain(domain, rounded_time, env, dns_domains, dns_zonefiles, mail_domains, web_domains): output = BufferedOutput() - output.add_heading(domain) + # The domain is IDNA-encoded, but for display use Unicode. + output.add_heading(domain.encode('ascii').decode('idna')) if domain == env["PRIMARY_HOSTNAME"]: check_primary_hostname_dns(domain, env, output, dns_domains, dns_zonefiles) @@ -639,7 +640,6 @@ def check_certificate(domain, ssl_certificate, ssl_private_key, rounded_time=Fal if m: cert_expiration_date = dateutil.parser.parse(m.group(1)) - domain = domain.encode("idna").decode("ascii") wildcard_domain = re.sub("^[^\.]+", "*", domain) if domain is not None and domain not in certificate_names and wildcard_domain not in certificate_names: return ("The certificate is for the wrong domain name. It is for %s." diff --git a/management/templates/aliases.html b/management/templates/aliases.html index 5ec85c5b..653cd5e9 100644 --- a/management/templates/aliases.html +++ b/management/templates/aliases.html @@ -27,7 +27,7 @@
-
You may use international (non-ASCII) characters, but this has not yet been well tested.
+
You may use international (non-ASCII) characters for the domain part of the email address only.
@@ -98,8 +98,8 @@ function show_aliases() { n.attr('id', ''); if (alias.required) n.addClass('alias-required'); - n.attr('data-email', alias.source); - n.find('td.email').text(alias.source) + n.attr('data-email', alias.source_display); // this is decoded from IDNA, but will get re-coded to IDNA on the backend + n.find('td.email').text(alias.source_display) for (var j = 0; j < alias.destination.length; j++) n.find('td.target').append($("
").text(alias.destination[j])) $('#alias_table tbody').append(n); diff --git a/management/web_update.py b/management/web_update.py index b088c55f..aecbcf67 100644 --- a/management/web_update.py +++ b/management/web_update.py @@ -89,7 +89,7 @@ def make_domain_config(domain, template, template_for_primaryhost, env): # Replace substitution strings in the template & return. nginx_conf = nginx_conf.replace("$STORAGE_ROOT", env['STORAGE_ROOT']) - nginx_conf = nginx_conf.replace("$HOSTNAME", domain.encode("idna").decode("ascii")) + nginx_conf = nginx_conf.replace("$HOSTNAME", domain) nginx_conf = nginx_conf.replace("$ROOT", root) nginx_conf = nginx_conf.replace("$SSL_KEY", ssl_key) nginx_conf = nginx_conf.replace("$SSL_CERTIFICATE", ssl_certificate) @@ -213,7 +213,7 @@ def create_csr(domain, ssl_key, env): "-key", ssl_key, "-out", "/dev/stdout", "-sha256", - "-subj", "/C=%s/ST=/L=/O=/CN=%s" % (env["CSR_COUNTRY"], domain.encode("idna").decode("ascii"))]) + "-subj", "/C=%s/ST=/L=/O=/CN=%s" % (env["CSR_COUNTRY"], domain)]) def install_cert(domain, ssl_cert, ssl_chain, env): if domain not in get_web_domains(env): diff --git a/setup/migrate.py b/setup/migrate.py index b7e78756..00fe42e8 100755 --- a/setup/migrate.py +++ b/setup/migrate.py @@ -67,6 +67,35 @@ def migration_6(env): basepath = os.path.join(env["STORAGE_ROOT"], 'dns/dnssec') shutil.move(os.path.join(basepath, 'keys.conf'), os.path.join(basepath, 'RSASHA1-NSEC3-SHA1.conf')) +def migration_7(env): + # I previously wanted domain names to be stored in Unicode in the database. Now I want them + # to be in IDNA. Affects aliases only. + import sqlite3 + conn = sqlite3.connect(os.path.join(env["STORAGE_ROOT"], "mail/users.sqlite")) + + # Get existing alias source addresses. + c = conn.cursor() + c.execute('SELECT source FROM aliases') + aliases = [ row[0] for row in c.fetchall() ] + + # Update to IDNA-encoded domains. + for email in aliases: + try: + localpart, domainpart = email.split("@") + domainpart = domainpart.encode("idna").decode("ascii") + newemail = localpart + "@" + domainpart + if newemail != email: + c = conn.cursor() + c.execute("UPDATE aliases SET source=? WHERE source=?", (newemail, email)) + if c.rowcount != 1: raise ValueError("Alias not found.") + print("Updated alias", email, "to", newemail) + except Exception as e: + print("Error updating IDNA alias", email, e) + + # Save. + conn.commit() + + def get_current_migration(): ver = 0 while True: