1
0
mirror of https://github.com/mail-in-a-box/mailinabox.git synced 2025-01-22 12:27:05 +00:00

merge - duplicity configuration in the control panel and disabling backups

Merges branch 'ponychicken-backup'.
This commit is contained in:
Joshua Tauberer 2015-08-09 20:16:50 +00:00
commit c2e5f5cb2b
4 changed files with 369 additions and 72 deletions

View File

@ -10,20 +10,27 @@
import os, os.path, shutil, glob, re, datetime
import dateutil.parser, dateutil.relativedelta, dateutil.tz
import rtyaml
from utils import exclusive_process, load_environment, shell, wait_for_service
# Destroy backups when the most recent increment in the chain
# that depends on it is this many days old.
keep_backups_for_days = 3
def backup_status(env):
# What is the current status of backups?
# Loop through all of the files in STORAGE_ROOT/backup/encrypted to
# get a list of all of the backups taken and sum up file sizes to
# see how large the storage is.
# Root folder
backup_root = os.path.join(env["STORAGE_ROOT"], 'backup')
# What is the current status of backups?
# Query duplicity to get a list of all backups.
# Use the number of volumes to estimate the size.
config = get_backup_config(env)
now = datetime.datetime.now(dateutil.tz.tzlocal())
# Are backups dissbled?
if config["target"] == "off":
return { }
backups = { }
backup_cache_dir = os.path.join(backup_root, 'cache')
def reldate(date, ref, clip):
if ref < date: return clip
rd = dateutil.relativedelta.relativedelta(ref, date)
@ -34,27 +41,38 @@ def backup_status(env):
if rd.days == 1: return "%d day, %d hours" % (rd.days, rd.hours)
return "%d hours, %d minutes" % (rd.hours, rd.minutes)
backups = { }
backup_root = os.path.join(env["STORAGE_ROOT"], 'backup')
backup_dir = os.path.join(backup_root, 'encrypted')
os.makedirs(backup_dir, exist_ok=True) # os.listdir fails if directory does not exist
for fn in os.listdir(backup_dir):
# Get duplicity collection status and parse for a list of backups.
def parse_line(line):
keys = line.strip().split()
date = dateutil.parser.parse(keys[1])
return {
"date": keys[1],
"date_str": date.strftime("%x %X"),
"date_delta": reldate(date, now, "the future?"),
"full": keys[0] == "full",
"size": 0, # collection-status doesn't give us the size
"volumes": keys[2], # number of archive volumes for this backup (not really helpful)
}
collection_status = shell('check_output', [
"/usr/bin/duplicity",
"collection-status",
"--archive-dir", backup_cache_dir,
"--gpg-options", "--cipher-algo=AES256",
"--log-fd", "1",
config["target"],
],
get_env(env))
for line in collection_status.split('\n'):
if line.startswith(" full") or line.startswith(" inc"):
backup = parse_line(line)
backups[backup["date"]] = backup
# Look at the target to get the sizes of each of the backups. There is more than one file per backup.
for fn, size in list_target_files(config):
m = re.match(r"duplicity-(full|full-signatures|(inc|new-signatures)\.(?P<incbase>\d+T\d+Z)\.to)\.(?P<date>\d+T\d+Z)\.", fn)
if not m: raise ValueError(fn)
if not m: continue # not a part of a current backup chain
key = m.group("date")
if key not in backups:
date = dateutil.parser.parse(m.group("date"))
backups[key] = {
"date": m.group("date"),
"date_str": date.strftime("%x %X"),
"date_delta": reldate(date, now, "the future?"),
"full": m.group("incbase") is None,
"previous": m.group("incbase"),
"size": 0,
}
backups[key]["size"] += os.path.getsize(os.path.join(backup_dir, fn))
backups[key]["size"] += size
# Ensure the rows are sorted reverse chronologically.
# This is relied on by should_force_full() and the next step.
@ -79,11 +97,11 @@ def backup_status(env):
# when the threshold is met.
deleted_in = None
if incremental_count > 0 and first_full_size is not None:
deleted_in = "approx. %d days" % round(keep_backups_for_days + (.5 * first_full_size - incremental_size) / (incremental_size/incremental_count) + .5)
deleted_in = "approx. %d days" % round(config["min_age_in_days"] + (.5 * first_full_size - incremental_size) / (incremental_size/incremental_count) + .5)
# When will a backup be deleted?
saw_full = False
days_ago = now - datetime.timedelta(days=keep_backups_for_days)
days_ago = now - datetime.timedelta(days=config["min_age_in_days"])
for bak in backups:
if deleted_in:
# Subsequent backups are deleted when the most recent increment
@ -99,8 +117,6 @@ def backup_status(env):
bak["deleted_in"] = deleted_in
return {
"directory": backup_dir,
"encpwfile": os.path.join(backup_root, 'secret_key.txt'),
"tz": now.tzname(),
"backups": backups,
}
@ -124,15 +140,48 @@ def should_force_full(env):
# (I love for/else blocks. Here it's just to show off.)
return True
def get_passphrase(env):
# Get the encryption passphrase. secret_key.txt is 2048 random
# bits base64-encoded and with line breaks every 65 characters.
# gpg will only take the first line of text, so sanity check that
# that line is long enough to be a reasonable passphrase. It
# only needs to be 43 base64-characters to match AES256's key
# length of 32 bytes.
backup_root = os.path.join(env["STORAGE_ROOT"], 'backup')
with open(os.path.join(backup_root, 'secret_key.txt')) as f:
passphrase = f.readline().strip()
if len(passphrase) < 43: raise Exception("secret_key.txt's first line is too short!")
return passphrase
def get_env(env):
config = get_backup_config(env)
env = { "PASSPHRASE" : get_passphrase(env) }
if get_target_type(config) == 's3':
env["AWS_ACCESS_KEY_ID"] = config["target_user"]
env["AWS_SECRET_ACCESS_KEY"] = config["target_pass"]
return env
def get_target_type(config):
protocol = config["target"].split(":")[0]
return protocol
def perform_backup(full_backup):
env = load_environment()
exclusive_process("backup")
config = get_backup_config(env)
backup_root = os.path.join(env["STORAGE_ROOT"], 'backup')
backup_cache_dir = os.path.join(backup_root, 'cache')
backup_dir = os.path.join(backup_root, 'encrypted')
# Are backups dissbled?
if config["target"] == "off":
return
# In an older version of this script, duplicity was called
# such that it did not encrypt the backups it created (in
# backup/duplicity), and instead openssl was called separately
@ -169,17 +218,6 @@ def perform_backup(full_backup):
shell('check_call', ["/usr/sbin/service", "dovecot", "stop"])
shell('check_call', ["/usr/sbin/service", "postfix", "stop"])
# Get the encryption passphrase. secret_key.txt is 2048 random
# bits base64-encoded and with line breaks every 65 characters.
# gpg will only take the first line of text, so sanity check that
# that line is long enough to be a reasonable passphrase. It
# only needs to be 43 base64-characters to match AES256's key
# length of 32 bytes.
with open(os.path.join(backup_root, 'secret_key.txt')) as f:
passphrase = f.readline().strip()
if len(passphrase) < 43: raise Exception("secret_key.txt's first line is too short!")
env_with_passphrase = { "PASSPHRASE" : passphrase }
# Run a backup of STORAGE_ROOT (but excluding the backups themselves!).
# --allow-source-mismatch is needed in case the box's hostname is changed
# after the first backup. See #396.
@ -192,10 +230,10 @@ def perform_backup(full_backup):
"--volsize", "250",
"--gpg-options", "--cipher-algo=AES256",
env["STORAGE_ROOT"],
"file://" + backup_dir,
"--allow-source-mismatch"
config["target"],
"--allow-source-mismatch"
],
env_with_passphrase)
get_env(env))
finally:
# Start services again.
shell('check_call', ["/usr/sbin/service", "dovecot", "start"])
@ -210,12 +248,12 @@ def perform_backup(full_backup):
shell('check_call', [
"/usr/bin/duplicity",
"remove-older-than",
"%dD" % keep_backups_for_days,
"%dD" % config["min_age_in_days"],
"--archive-dir", backup_cache_dir,
"--force",
"file://" + backup_dir
config["target"]
],
env_with_passphrase)
get_env(env))
# From duplicity's manual:
# "This should only be necessary after a duplicity session fails or is
@ -227,13 +265,14 @@ def perform_backup(full_backup):
"cleanup",
"--archive-dir", backup_cache_dir,
"--force",
"file://" + backup_dir
config["target"]
],
env_with_passphrase)
get_env(env))
# Change ownership of backups to the user-data user, so that the after-bcakup
# script can access them.
shell('check_call', ["/bin/chown", "-R", env["STORAGE_USER"], backup_dir])
if get_target_type(config) == 'file':
shell('check_call', ["/bin/chown", "-R", env["STORAGE_USER"], backup_dir])
# Execute a post-backup script that does the copying to a remote server.
# Run as the STORAGE_USER user, not as root. Pass our settings in
@ -241,7 +280,7 @@ def perform_backup(full_backup):
post_script = os.path.join(backup_root, 'after-backup')
if os.path.exists(post_script):
shell('check_call',
['su', env['STORAGE_USER'], '-c', post_script],
['su', env['STORAGE_USER'], '-c', post_script, config["target"]],
env=env)
# Our nightly cron job executes system status checks immediately after this
@ -254,9 +293,9 @@ def perform_backup(full_backup):
def run_duplicity_verification():
env = load_environment()
backup_root = os.path.join(env["STORAGE_ROOT"], 'backup')
config = get_backup_config(env)
backup_cache_dir = os.path.join(backup_root, 'cache')
backup_dir = os.path.join(backup_root, 'encrypted')
env_with_passphrase = { "PASSPHRASE" : open(os.path.join(backup_root, 'secret_key.txt')).read() }
shell('check_call', [
"/usr/bin/duplicity",
"--verbosity", "info",
@ -264,9 +303,113 @@ def run_duplicity_verification():
"--compare-data",
"--archive-dir", backup_cache_dir,
"--exclude", backup_root,
"file://" + backup_dir,
config["target"],
env["STORAGE_ROOT"],
], env_with_passphrase)
], get_env(env))
def list_target_files(config):
import urllib.parse
try:
p = urllib.parse.urlparse(config["target"])
except ValueError:
return "invalid target"
if p.scheme == "file":
return [(fn, os.path.getsize(os.path.join(p.path, fn))) for fn in os.listdir(p.path)]
elif p.scheme == "s3":
# match to a Region
import boto.s3
from boto.exception import BotoServerError
for region in boto.s3.regions():
if region.endpoint == p.hostname:
break
else:
raise ValueError("Invalid S3 region/host.")
bucket = p.path[1:].split('/')[0]
path = '/'.join(p.path[1:].split('/')[1:]) + '/'
if bucket == "":
raise ValueError("Enter an S3 bucket name.")
# connect to the region & bucket
try:
conn = region.connect(aws_access_key_id=config["target_user"], aws_secret_access_key=config["target_pass"])
bucket = conn.get_bucket(bucket)
except BotoServerError as e:
if e.status == 403:
raise ValueError("Invalid S3 access key or secret access key.")
elif e.status == 404:
raise ValueError("Invalid S3 bucket name.")
elif e.status == 301:
raise ValueError("Incorrect region for this bucket.")
raise ValueError(e.reason)
return [(key.name[len(path):], key.size) for key in bucket.list(prefix=path)]
else:
raise ValueError(config["target"])
def backup_set_custom(env, target, target_user, target_pass, min_age):
config = get_backup_config(env, for_save=True)
# min_age must be an int
if isinstance(min_age, str):
min_age = int(min_age)
config["target"] = target
config["target_user"] = target_user
config["target_pass"] = target_pass
config["min_age_in_days"] = min_age
# Validate.
try:
if config["target"] not in ("off", "local"):
# these aren't supported by the following function, which expects a full url in the target key,
# which is what is there except when loading the config prior to saving
list_target_files(config)
except ValueError as e:
return str(e)
write_backup_config(env, config)
return "Updated backup config"
def get_backup_config(env, for_save=False):
backup_root = os.path.join(env["STORAGE_ROOT"], 'backup')
# Defaults.
config = {
"min_age_in_days": 3,
"target": "local",
}
# Merge in anything written to custom.yaml.
try:
custom_config = rtyaml.load(open(os.path.join(backup_root, 'custom.yaml')))
if not isinstance(custom_config, dict): raise ValueError() # caught below
config.update(custom_config)
except:
pass
# When updating config.yaml, don't do any further processing on what we find.
if for_save:
return config
# helper fields for the admin
config["file_target_directory"] = os.path.join(backup_root, 'encrypted')
config["enc_pw_file"] = os.path.join(backup_root, 'secret_key.txt')
if config["target"] == "local":
# Expand to the full URL.
config["target"] = "file://" + config["file_target_directory"]
return config
def write_backup_config(env, newconfig):
backup_root = os.path.join(env["STORAGE_ROOT"], 'backup')
with open(os.path.join(backup_root, 'custom.yaml'), "w") as f:
f.write(rtyaml.dump(newconfig))
if __name__ == "__main__":
import sys
@ -274,6 +417,7 @@ if __name__ == "__main__":
# Run duplicity's verification command to check a) the backup files
# are readable, and b) report if they are up to date.
run_duplicity_verification()
else:
# Perform a backup. Add --full to force a full backup rather than
# possibly performing an incremental backup.

View File

@ -90,13 +90,19 @@ def json_response(data):
def index():
# Render the control panel. This route does not require user authentication
# so it must be safe!
no_users_exist = (len(get_mail_users(env)) == 0)
no_admins_exist = (len(get_admins(env)) == 0)
import boto.s3
backup_s3_hosts = [(r.name, r.endpoint) for r in boto.s3.regions()]
return render_template('index.html',
hostname=env['PRIMARY_HOSTNAME'],
storage_root=env['STORAGE_ROOT'],
no_users_exist=no_users_exist,
no_admins_exist=no_admins_exist,
backup_s3_hosts=backup_s3_hosts,
)
@app.route('/me')
@ -402,6 +408,23 @@ def backup_status():
from backup import backup_status
return json_response(backup_status(env))
@app.route('/system/backup/config', methods=["GET"])
@authorized_personnel_only
def backup_get_custom():
from backup import get_backup_config
return json_response(get_backup_config(env))
@app.route('/system/backup/config', methods=["POST"])
@authorized_personnel_only
def backup_set_custom():
from backup import backup_set_custom
return json_response(backup_set_custom(env,
request.form.get('target', ''),
request.form.get('target_user', ''),
request.form.get('target_pass', ''),
request.form.get('min_age', '')
))
# MUNIN
@app.route('/munin/')
@ -432,4 +455,3 @@ if __name__ == '__main__':
# Start the application server. Listens on 127.0.0.1 (IPv4 only).
app.run(port=10222)

View File

@ -5,17 +5,77 @@
<h2>Backup Status</h2>
<h3>Copying Backup Files</h3>
<p>The box makes an incremental backup each night. By default the backup is stored on the machine itself, but you can also have it stored on Amazon S3</p>
<p>The box makes an incremental backup each night. The backup is stored on the machine itself. You are responsible for copying the backup files off of the machine.</p>
<h3>Configuration</h3>
<p>Many cloud providers make this easy by allowing you to take snapshots of the machine's disk.</p>
<form class="form-horizontal" role="form" onsubmit="set_custom_backup(); return false;">
<div class="form-group">
<label for="backup-target-type" class="col-sm-2 control-label">Backup to:</label>
<div class="col-sm-2">
<select class="form-control" rows="1" id="backup-target-type" onchange="toggle_form()">
<option value="off">Nowhere (Disable Backups)</option>
<option value="local">{{hostname}}</option>
<option value="s3">Amazon S3</option>
</select>
</div>
</div>
<div class="form-group backup-target-local">
<div class="col-sm-10 col-sm-offset-2">
<div>Backups are stored on this machine&rsquo;s own hard disk. You are responsible for periodically using SFTP (FTP over SSH) to copy the backup files from <tt id="backup-location"></tt> to a safe location. These files are encrypted, so they are safe to store anywhere.</div>
</div>
</div>
<div class="form-group backup-target-s3">
<div class="col-sm-10 col-sm-offset-2">
<div>Backups are stored in an Amazon Web Services S3 bucket. You must have an AWS account already.</div>
</div>
</div>
<div class="form-group backup-target-local backup-target-s3">
<label for="min-age" class="col-sm-2 control-label">How many days should backups be kept?</label>
<div class="col-sm-8">
<input type="number" class="form-control" rows="1" id="min-age">
</div>
</div>
<div class="form-group backup-target-s3">
<label for="backup-target-s3-host" class="col-sm-2 control-label">S3 Region</label>
<div class="col-sm-8">
<select class="form-control" rows="1" id="backup-target-s3-host">
{% for name, host in backup_s3_hosts %}
<option value="{{host}}">{{name}}</option>
{% endfor %}
</select>
</div>
</div>
<div class="form-group backup-target-s3">
<label for="backup-target-s3-path" class="col-sm-2 control-label">S3 Path</label>
<div class="col-sm-8">
<input type="text" placeholder="your-bucket-name/backup-directory" class="form-control" rows="1" id="backup-target-s3-path">
</div>
</div>
<div class="form-group backup-target-s3">
<label for="backup-target-user" class="col-sm-2 control-label">S3 Access Key</label>
<div class="col-sm-8">
<input type="text" class="form-control" rows="1" id="backup-target-user">
</div>
</div>
<div class="form-group backup-target-s3">
<label for="backup-target-pass" class="col-sm-2 control-label">S3 Secret Access Key</label>
<div class="col-sm-8">
<input type="text" class="form-control" rows="1" id="backup-target-pass">
</div>
</div>
<div class="form-group">
<div class="col-sm-offset-2 col-sm-11">
<button id="set-s3-backup-button" type="submit" class="btn btn-primary">Save</button>
</div>
</div>
</form>
<p>You can also use SFTP (FTP over SSH) to copy files from <tt id="backup-location"></tt>. These files are encrypted, so they are safe to store anywhere. Copy the encryption password from <tt id="backup-encpassword-file"></tt> also but keep it in a safe location.</p>
<p> Copy the encryption password from <tt id="backup-encpassword-file"></tt> to a safe and secure location. You will need this file to decrypt backup files.</div></p>
<h3>Current Backups</h3>
<h3>Available Backups</h3>
<p>The backup directory currently contains the backups listed below. The total size on disk of the backups is currently <span id="backup-total-size"></span>.</p>
<p>The backup location currently contains the backups listed below. The total size of the backups is currently <span id="backup-total-size"></span>.</p>
<table id="backup-status" class="table" style="width: auto">
<thead>
@ -27,8 +87,14 @@
<tbody>
</tbody>
</table>
<script>
function toggle_form() {
var target_type = $("#backup-target-type").val();
$(".backup-target-local, .backup-target-s3").hide();
$(".backup-target-" + target_type).show();
}
function nice_size(bytes) {
var powers = ['bytes', 'KB', 'MB', 'GB', 'TB'];
while (true) {
@ -46,19 +112,22 @@ function nice_size(bytes) {
}
function show_system_backup() {
show_custom_backup()
$('#backup-status tbody').html("<tr><td colspan='2' class='text-muted'>Loading...</td></tr>")
api(
"/system/backup/status",
"GET",
{ },
function(r) {
$('#backup-location').text(r.directory);
$('#backup-encpassword-file').text(r.encpwfile);
$('#backup-status tbody').html("");
var total_disk_size = 0;
if (r.backups.length == 0) {
if (typeof r.backups == "undefined") {
var tr = $('<tr><td colspan="3">Backups are turned off.</td></tr>');
$('#backup-status tbody').append(tr);
return;
} else if (r.backups.length == 0) {
var tr = $('<tr><td colspan="3">No backups have been made yet.</td></tr>');
$('#backup-status tbody').append(tr);
}
@ -83,4 +152,62 @@ function show_system_backup() {
$('#backup-total-size').text(nice_size(total_disk_size));
})
}
function show_custom_backup() {
$(".backup-target-local, .backup-target-s3").hide();
api(
"/system/backup/config",
"GET",
{ },
function(r) {
if (r.target == "file://" + r.file_target_directory) {
$("#backup-target-type").val("local");
} else if (r.target == "off") {
$("#backup-target-type").val("off");
} else if (r.target.substring(0, 5) == "s3://") {
$("#backup-target-type").val("s3");
var hostpath = r.target.substring(5).split('/');
var host = hostpath.shift();
$("#backup-target-s3-host").val(host);
$("#backup-target-s3-path").val(hostpath.join('/'));
}
$("#backup-target-user").val(r.target_user);
$("#backup-target-pass").val(r.target_pass);
$("#min-age").val(r.min_age_in_days);
$('#backup-location').text(r.file_target_directory);
$('#backup-encpassword-file').text(r.enc_pw_file);
toggle_form()
})
}
function set_custom_backup() {
var target_type = $("#backup-target-type").val();
var target_user = $("#backup-target-user").val();
var target_pass = $("#backup-target-pass").val();
var target;
if (target_type == "local" || target_type == "off")
target = target_type;
else if (target_type == "s3")
target = "s3://" + $("#backup-target-s3-host").val() + "/" + $("#backup-target-s3-path").val();
var min_age = $("#min-age").val();
api(
"/system/backup/config",
"POST",
{
target: target,
target_user: target_user,
target_pass: target_pass,
min_age: min_age
},
function(r) {
// Responses are multiple lines of pre-formatted text.
show_modal_error("Backup configuration", $("<pre/>").text(r), function() { show_system_backup(); }); // refresh after modal
},
function(r) {
show_modal_error("Backup configuration (error)", r);
});
return false;
}
</script>

View File

@ -4,9 +4,13 @@ source setup/functions.sh
# build-essential libssl-dev libffi-dev python3-dev: Required to pip install cryptography.
apt_install python3-flask links duplicity libyaml-dev python3-dnspython python3-dateutil \
build-essential libssl-dev libffi-dev python3-dev
hide_output pip3 install --upgrade rtyaml email_validator idna cryptography
# email_validator is repeated in setup/questions.sh
build-essential libssl-dev libffi-dev python3-dev python-pip
hide_output pip3 install --upgrade rtyaml email_validator idna cryptography boto
# duplicity uses python 2 so we need to use the python 2 package of boto
hide_output pip install --upgrade boto
# email_validator is repeated in setup/questions.sh
# Create a backup directory and a random key for encrypting backups.
mkdir -p $STORAGE_ROOT/backup