From 2726b157640295913f704e4db7f5fd889f830260 Mon Sep 17 00:00:00 2001 From: David Piggott Date: Thu, 26 Mar 2015 12:27:26 +0000 Subject: [PATCH] Use built in duplicity encryption (GPG) for backups, closes #362 --- management/backup.py | 124 ++++++++++++++---------- management/templates/system-backup.html | 5 +- 2 files changed, 74 insertions(+), 55 deletions(-) diff --git a/management/backup.py b/management/backup.py index fdcc5248..cc41ad2d 100755 --- a/management/backup.py +++ b/management/backup.py @@ -36,10 +36,10 @@ def backup_status(env): return "%d hours, %d minutes" % (rd.hours, rd.minutes) backups = { } - basedir = os.path.join(env['STORAGE_ROOT'], 'backup/duplicity/') - encdir = os.path.join(env['STORAGE_ROOT'], 'backup/encrypted/') - os.makedirs(basedir, exist_ok=True) # os.listdir fails if directory does not exist - for fn in os.listdir(basedir): + backup_dir = os.path.join(env["STORAGE_ROOT"], 'backup') + backup_encrypted_dir = os.path.join(backup_dir, 'encrypted') + os.makedirs(backup_encrypted_dir, exist_ok=True) # os.listdir fails if directory does not exist + for fn in os.listdir(backup_encrypted_dir): m = re.match(r"duplicity-(full|full-signatures|(inc|new-signatures)\.(?P\d+T\d+Z)\.to)\.(?P\d+T\d+Z)\.", fn) if not m: raise ValueError(fn) @@ -53,15 +53,9 @@ def backup_status(env): "full": m.group("incbase") is None, "previous": m.group("incbase"), "size": 0, - "encsize": 0, } - backups[key]["size"] += os.path.getsize(os.path.join(basedir, fn)) - - # Also check encrypted size. - encfn = os.path.join(encdir, fn + ".enc") - if os.path.exists(encfn): - backups[key]["encsize"] += os.path.getsize(encfn) + backups[key]["size"] += os.path.getsize(os.path.join(backup_encrypted_dir, fn)) # Ensure the rows are sorted reverse chronologically. # This is relied on by should_force_full() and the next step. @@ -78,7 +72,7 @@ def backup_status(env): break incremental_count += 1 incremental_size += bak["size"] - + # Predict how many more increments until the next full backup, # and add to that the time we hold onto backups, to predict # how long the most recent full backup+increments will be held @@ -106,9 +100,8 @@ def backup_status(env): bak["deleted_in"] = deleted_in return { - "directory": basedir, - "encpwfile": os.path.join(env['STORAGE_ROOT'], 'backup/secret_key.txt'), - "encdirectory": encdir, + "directory": backup_encrypted_dir, + "encpwfile": os.path.join(backup_dir, 'secret_key.txt'), "tz": now.tzname(), "backups": backups, } @@ -137,10 +130,61 @@ def perform_backup(full_backup): exclusive_process("backup") - # Ensure the backup directory exists. backup_dir = os.path.join(env["STORAGE_ROOT"], 'backup') + backup_encrypted_dir = os.path.join(backup_dir, 'encrypted') + + # In an older version of this script, duplicity was called + # such that it did not encrypt the backups it created (in + # backup/duplicity), and instead openssl was called separately + # after each backup run, creating AES256 encrypted copies of + # each file created by duplicity in backup/encrypted. + # + # We detect the transition by the presence of backup/duplicity + # and handle it by 'dupliception': we move all the old *un*encrypted + # duplicity files up out of the backup/duplicity directory (as + # backup/ is excluded from duplicity runs) in order that it is + # included in the next run, and we delete backup/encrypted (which + # duplicity will output files directly to, post-transition). + # + # This achieves two things: + # 1. It preserves the pre-transition unencrypted backup files + # within the encrypted backups we will immediately create, so + # that they are kept until the next full backup is triggered. + # (it is because those backups will be encrypted that we take + # the old *un*encrypted backups, not the duplicates encrypted + # with openssl). + # 2. It results in backup_status() being called on a non-existant + # backup/encrypted directory, which will trigger a full backup + # (though duplicity ought to do one anyway as it ought not + # recognise the old openssl encrypted .enc files, if we *had* + # left them there). More to the point it clears out those .enc + # files which are now redundant, thereby gaining disk space and + # preventing backup_status() getting terribly confused by their + # presence. + # + # A note about disk use: + # At no point in the transition will we use more disk space than + # was used pre-transition, because by deleting the openssl + # encrypted duplicates we decrease by more* than half the disk + # space used, while the addition by 'dupliception' of the old + # *un*encrypted backups takes less space than we gained from + # dropping the openssl encrypted duplicates. + # + # A note about the status page post-upgrade but pre-transition: + # Between the point that the new code is deployed and when the first + # daily backup is run, there will be a subtle change in the + # behaviour of the web control panel's backup status page, in that + # it will only sum the size of the encrypted backups when reporting + # the total size on disk i.e. it will not consider the unencrypted + # originals. + # + # *the openssl encrypted duplicates were base64 encrypted, hence + # accounting for more than half of the space used. backup_duplicity_dir = os.path.join(backup_dir, 'duplicity') - os.makedirs(backup_duplicity_dir, exist_ok=True) + migrated_unencrypted_backup_dir = os.path.join(env["STORAGE_ROOT"], "migrated_unencrypted_backup") + if os.path.isdir(backup_duplicity_dir): + shutil.move(backup_duplicity_dir, migrated_unencrypted_backup_dir) + shutil.rmtree(backup_encrypted_dir) # On the first run, always do a full backup. Incremental # will fail. Otherwise do a full backup when the size of @@ -158,65 +202,41 @@ def perform_backup(full_backup): shell('check_call', [ "/usr/bin/duplicity", "full" if full_backup else "incr", - "--no-encryption", "--archive-dir", "/tmp/duplicity-archive-dir", - "--name", "mailinabox", "--exclude", backup_dir, "--volsize", "100", "--verbosity", "warning", env["STORAGE_ROOT"], - "file://" + backup_duplicity_dir - ]) + "file://" + backup_encrypted_dir + ], + env={ "PASSPHRASE" : open( + os.path.join(backup_dir, 'secret_key.txt') + ).read() } + ) finally: # Start services again. shell('check_call', ["/usr/sbin/service", "dovecot", "start"]) shell('check_call', ["/usr/sbin/service", "postfix", "start"]) + if os.path.isdir(migrated_unencrypted_backup_dir): + shutil.rmtree(migrated_unencrypted_backup_dir) + # Remove old backups. This deletes all backup data no longer needed - # from more than 31 days ago. Must do this before destroying the + # from more than 3 days ago. Must do this before destroying the # cache directory or else this command will re-create it. shell('check_call', [ "/usr/bin/duplicity", "remove-older-than", "%dD" % keep_backups_for_days, "--archive-dir", "/tmp/duplicity-archive-dir", - "--name", "mailinabox", "--force", "--verbosity", "warning", - "file://" + backup_duplicity_dir + "file://" + backup_encrypted_dir ]) # Remove duplicity's cache directory because it's redundant with our backup directory. shutil.rmtree("/tmp/duplicity-archive-dir") - # Encrypt all of the new files. - backup_encrypted_dir = os.path.join(backup_dir, 'encrypted') - os.makedirs(backup_encrypted_dir, exist_ok=True) - for fn in os.listdir(backup_duplicity_dir): - fn2 = os.path.join(backup_encrypted_dir, fn) + ".enc" - if os.path.exists(fn2): continue - - # Encrypt the backup using the backup private key. - shell('check_call', [ - "/usr/bin/openssl", - "enc", - "-aes-256-cbc", - "-a", - "-salt", - "-in", os.path.join(backup_duplicity_dir, fn), - "-out", fn2, - "-pass", "file:%s" % os.path.join(backup_dir, "secret_key.txt"), - ]) - - # The backup can be decrypted with: - # openssl enc -d -aes-256-cbc -a -in latest.tgz.enc -out /dev/stdout -pass file:secret_key.txt | tar -z - - # Remove encrypted backups that are no longer needed. - for fn in os.listdir(backup_encrypted_dir): - fn2 = os.path.join(backup_duplicity_dir, fn.replace(".enc", "")) - if os.path.exists(fn2): continue - os.unlink(os.path.join(backup_encrypted_dir, fn)) - # Execute a post-backup script that does the copying to a remote server. # Run as the STORAGE_USER user, not as root. Pass our settings in # environment variables so the script has access to STORAGE_ROOT. diff --git a/management/templates/system-backup.html b/management/templates/system-backup.html index 7ff1a868..fffe8680 100644 --- a/management/templates/system-backup.html +++ b/management/templates/system-backup.html @@ -54,7 +54,7 @@ function show_system_backup() { "GET", { }, function(r) { - $('#backup-location').text(r.encdirectory); + $('#backup-location').text(r.directory); $('#backup-encpassword-file').text(r.encpwfile); $('#backup-status tbody').html(""); @@ -72,7 +72,7 @@ function show_system_backup() { tr.append( $('').text(b.date_str + " " + r.tz) ); tr.append( $('').text(b.date_delta + " ago") ); tr.append( $('').text(b.full ? "full" : "increment") ); - tr.append( $('').text( nice_size(b.encsize)) ); + tr.append( $('').text( nice_size(b.size)) ); if (b.deleted_in) tr.append( $('').text(b.deleted_in) ); else @@ -80,7 +80,6 @@ function show_system_backup() { $('#backup-status tbody').append(tr); total_disk_size += b.size; - total_disk_size += b.encsize; } $('#backup-total-size').text(nice_size(total_disk_size));