#!/usr/bin/python3

# This script performs a backup of all user data:
# 1) System services are stopped while a copy of user data is made.
# 2) An incremental backup is made using duplicity into the
#    directory STORAGE_ROOT/backup/duplicity.
# 3) The stopped services are restarted.
# 4) The backup files are encrypted with a long password (stored in
#    backup/secret_key.txt) to STORAGE_ROOT/backup/encrypted.
# 5) STORAGE_ROOT/backup/after-backup is executd if it exists.

import os
import os.path
import shutil
import glob
import re
import datetime
import dateutil.parser
import dateutil.relativedelta
import dateutil.tz

from utils import exclusive_process, load_environment, shell

# destroy backups when the most recent increment in the chain
# that depends on it is this many days old.
keep_backups_for_days = 3


def backup_status(env):
    # What is the current status of backups?
    # Loop through all of the files in STORAGE_ROOT/backup/duplicity to
    # get a list of all of the backups taken and sum up file sizes to
    # see how large the storage is.

    now = datetime.datetime.now(dateutil.tz.tzlocal())

    def reldate(date, ref, clip):
        if ref < date:
            return clip
        rd = dateutil.relativedelta.relativedelta(ref, date)
        if rd.months > 1:
            return "%d months, %d days" % (rd.months, rd.days)
        if rd.months == 1:
            return "%d month, %d days" % (rd.months, rd.days)
        if rd.days >= 7:
            return "%d days" % rd.days
        if rd.days > 1:
            return "%d days, %d hours" % (rd.days, rd.hours)
        if rd.days == 1:
            return "%d day, %d hours" % (rd.days, rd.hours)
        return "%d hours, %d minutes" % (rd.hours, rd.minutes)

    backups = {}
    basedir = os.path.join(env['STORAGE_ROOT'], 'backup/duplicity/')
    encdir = os.path.join(env['STORAGE_ROOT'], 'backup/encrypted/')
    # os.listdir fails if directory does not exist
    os.makedirs(basedir, exist_ok=True)
    for fn in os.listdir(basedir):
        m = re.match(r"duplicity-(full|full-signatures|(inc|new-signatures)\.(?P<incbase>\d+T\d+Z)\.to)\.(?P<date>\d+T\d+Z)\.", fn)
        if not m:
            raise ValueError(fn)

        key = m.group("date")
        if key not in backups:
            date = dateutil.parser.parse(m.group("date"))
            backups[key] = {
                "date": m.group("date"),
                "date_str": date.strftime("%x %X"),
                "date_delta": reldate(date, now, "the future?"),
                "full": m.group("incbase") is None,
                "previous": m.group("incbase"),
                "size": 0,
                "encsize": 0,
            }

        backups[key]["size"] += os.path.getsize(os.path.join(basedir, fn))

        # Also check encrypted size.
        encfn = os.path.join(encdir, fn + ".enc")
        if os.path.exists(encfn):
            backups[key]["encsize"] += os.path.getsize(encfn)

    # Ensure the rows are sorted reverse chronologically.
    # This is relied on by should_force_full() and the next step.
    backups = sorted(backups.values(), key=lambda b: b["date"], reverse=True)

    # When will a backup be deleted?
    saw_full = False
    deleted_in = None
    days_ago = now - datetime.timedelta(days=keep_backups_for_days)
    for bak in backups:
        if deleted_in:
            # Subsequent backups are deleted when the most recent increment
            # in the chain would be deleted.
            bak["deleted_in"] = deleted_in
        if bak["full"]:
            # Reset when we get to a full backup. A new chain start next.
            saw_full = True
            deleted_in = None
        elif saw_full and not deleted_in:
            # Mark deleted_in only on the first increment after a full backup.
            deleted_in = reldate(days_ago, dateutil.parser.parse(bak["date"]), "on next daily backup")
            bak["deleted_in"] = deleted_in

    return {
        "directory": basedir,
        "encpwfile": os.path.join(env['STORAGE_ROOT'], 'backup/secret_key.txt'),
        "encdirectory": encdir,
        "tz": now.tzname(),
        "backups": backups,
    }


def should_force_full(env):
    # Force a full backup when the total size of the increments
    # since the last full backup is greater than half the size
    # of that full backup.
    inc_size = 0
    for bak in backup_status(env)["backups"]:
        if not bak["full"]:
            # Scan through the incremental backups cumulating
            # size...
            inc_size += bak["size"]
        else:
            # ...until we reach the most recent full backup.
            # Return if we should to a full backup.
            return inc_size > .5*bak["size"]
    else:
        # If we got here there are no (full) backups, so make one.
        # (I love for/else blocks. Here it's just to show off.)
        return True


def perform_backup(full_backup):
    env = load_environment()

    exclusive_process("backup")

    # Ensure the backup directory exists.
    backup_dir = os.path.join(env["STORAGE_ROOT"], 'backup')
    backup_duplicity_dir = os.path.join(backup_dir, 'duplicity')
    os.makedirs(backup_duplicity_dir, exist_ok=True)

    # On the first run, always do a full backup. Incremental
    # will fail. Otherwise do a full backup when the size of
    # the increments since the most recent full backup are
    # large.
    full_backup = full_backup or should_force_full(env)

    # Stop services.
    shell('check_call', ["/usr/sbin/service", "dovecot", "stop"])
    shell('check_call', ["/usr/sbin/service", "postfix", "stop"])

    # Update the backup mirror directory which mirrors the current
    # STORAGE_ROOT (but excluding the backups themselves!).
    try:
        shell('check_call', [
            "/usr/bin/duplicity",
            "full" if full_backup else "incr",
            "--no-encryption",
            "--archive-dir", "/tmp/duplicity-archive-dir",
            "--name", "mailinabox",
            "--exclude", backup_dir,
            "--volsize", "100",
            "--verbosity", "warning",
            env["STORAGE_ROOT"],
            "file://" + backup_duplicity_dir
            ])
    finally:
        # Start services again.
        shell('check_call', ["/usr/sbin/service", "dovecot", "start"])
        shell('check_call', ["/usr/sbin/service", "postfix", "start"])

    # Remove old backups. This deletes all backup data no longer needed
    # from more than 31 days ago. Must do this before destroying the
    # cache directory or else this command will re-create it.
    shell('check_call', [
        "/usr/bin/duplicity",
        "remove-older-than",
        "%dD" % keep_backups_for_days,
        "--archive-dir", "/tmp/duplicity-archive-dir",
        "--name", "mailinabox",
        "--force",
        "--verbosity", "warning",
        "file://" + backup_duplicity_dir
        ])

    # Remove duplicity's cache directory because it's redundant with our backup directory.
    shutil.rmtree("/tmp/duplicity-archive-dir")

    # Encrypt all of the new files.
    backup_encrypted_dir = os.path.join(backup_dir, 'encrypted')
    os.makedirs(backup_encrypted_dir, exist_ok=True)
    for fn in os.listdir(backup_duplicity_dir):
        fn2 = os.path.join(backup_encrypted_dir, fn) + ".enc"
        if os.path.exists(fn2):
            continue

        # Encrypt the backup using the backup private key.
        shell('check_call', [
            "/usr/bin/openssl",
            "enc",
            "-aes-256-cbc",
            "-a",
            "-salt",
            "-in", os.path.join(backup_duplicity_dir, fn),
            "-out", fn2,
            "-pass", "file:%s" % os.path.join(backup_dir, "secret_key.txt"),
            ])

        # The backup can be decrypted with:
        # openssl enc -d -aes-256-cbc -a -in latest.tgz.enc -out /dev/stdout -pass file:secret_key.txt | tar -z

    # Remove encrypted backups that are no longer needed.
    for fn in os.listdir(backup_encrypted_dir):
        fn2 = os.path.join(backup_duplicity_dir, fn.replace(".enc", ""))
        if os.path.exists(fn2):
            continue
        os.unlink(os.path.join(backup_encrypted_dir, fn))

    # Execute a post-backup script that does the copying to a remote server.
    # Run as the STORAGE_USER user, not as root. Pass our settings in
    # environment variables so the script has access to STORAGE_ROOT.
    post_script = os.path.join(backup_dir, 'after-backup')
    if os.path.exists(post_script):
        shell('check_call',
              ['su', env['STORAGE_USER'], '-c', post_script],
              env=env)

if __name__ == "__main__":
    import sys
    full_backup = "--full" in sys.argv
    perform_backup(full_backup)