2018-01-15 17:43:32 +00:00
|
|
|
#!/usr/local/lib/mailinabox/env/bin/python
|
2014-06-03 20:21:17 +00:00
|
|
|
|
|
|
|
# This script performs a backup of all user data:
|
2016-01-02 21:25:36 +00:00
|
|
|
# 1) System services are stopped.
|
2016-05-11 08:11:16 +00:00
|
|
|
# 2) STORAGE_ROOT/backup/before-backup is executed if it exists.
|
|
|
|
# 3) An incremental encrypted backup is made using duplicity.
|
|
|
|
# 4) The stopped services are restarted.
|
|
|
|
# 5) STORAGE_ROOT/backup/after-backup is executed if it exists.
|
2014-06-03 20:21:17 +00:00
|
|
|
|
2023-12-22 15:08:30 +00:00
|
|
|
import os, os.path, re, datetime, sys
|
2014-09-01 13:06:38 +00:00
|
|
|
import dateutil.parser, dateutil.relativedelta, dateutil.tz
|
2015-07-26 16:25:52 +00:00
|
|
|
import rtyaml
|
2017-01-01 22:11:31 +00:00
|
|
|
from exclusiveprocess import Lock
|
2014-06-03 20:21:17 +00:00
|
|
|
|
2022-01-09 00:09:30 +00:00
|
|
|
from utils import load_environment, shell, wait_for_service
|
2014-06-03 20:21:17 +00:00
|
|
|
|
2014-09-01 13:06:38 +00:00
|
|
|
def backup_status(env):
|
2018-10-13 20:16:30 +00:00
|
|
|
# If backups are dissbled, return no status.
|
2015-08-09 16:56:33 +00:00
|
|
|
config = get_backup_config(env)
|
2015-08-09 20:15:43 +00:00
|
|
|
if config["target"] == "off":
|
|
|
|
return { }
|
|
|
|
|
2018-10-13 20:16:30 +00:00
|
|
|
# Query duplicity to get a list of all full and incremental
|
|
|
|
# backups available.
|
|
|
|
|
2015-07-27 20:13:28 +00:00
|
|
|
backups = { }
|
2018-10-13 20:16:30 +00:00
|
|
|
now = datetime.datetime.now(dateutil.tz.tzlocal())
|
|
|
|
backup_root = os.path.join(env["STORAGE_ROOT"], 'backup')
|
2015-07-27 20:13:28 +00:00
|
|
|
backup_cache_dir = os.path.join(backup_root, 'cache')
|
|
|
|
|
2014-10-12 21:31:44 +00:00
|
|
|
def reldate(date, ref, clip):
|
|
|
|
if ref < date: return clip
|
2014-09-08 20:09:18 +00:00
|
|
|
rd = dateutil.relativedelta.relativedelta(ref, date)
|
2017-02-15 23:24:32 +00:00
|
|
|
if rd.years > 1: return "%d years, %d months" % (rd.years, rd.months)
|
|
|
|
if rd.years == 1: return "%d year, %d months" % (rd.years, rd.months)
|
2014-10-05 18:23:18 +00:00
|
|
|
if rd.months > 1: return "%d months, %d days" % (rd.months, rd.days)
|
|
|
|
if rd.months == 1: return "%d month, %d days" % (rd.months, rd.days)
|
2014-09-01 13:06:38 +00:00
|
|
|
if rd.days >= 7: return "%d days" % rd.days
|
|
|
|
if rd.days > 1: return "%d days, %d hours" % (rd.days, rd.hours)
|
|
|
|
if rd.days == 1: return "%d day, %d hours" % (rd.days, rd.hours)
|
|
|
|
return "%d hours, %d minutes" % (rd.hours, rd.minutes)
|
2015-07-27 20:30:22 +00:00
|
|
|
|
2015-08-09 19:59:38 +00:00
|
|
|
# Get duplicity collection status and parse for a list of backups.
|
2015-07-26 16:25:52 +00:00
|
|
|
def parse_line(line):
|
|
|
|
keys = line.strip().split()
|
2016-02-05 13:46:21 +00:00
|
|
|
date = dateutil.parser.parse(keys[1]).astimezone(dateutil.tz.tzlocal())
|
2015-07-26 16:25:52 +00:00
|
|
|
return {
|
|
|
|
"date": keys[1],
|
2018-08-05 13:26:45 +00:00
|
|
|
"date_str": date.strftime("%Y-%m-%d %X") + " " + now.tzname(),
|
2015-07-26 16:25:52 +00:00
|
|
|
"date_delta": reldate(date, now, "the future?"),
|
|
|
|
"full": keys[0] == "full",
|
2015-08-09 19:59:38 +00:00
|
|
|
"size": 0, # collection-status doesn't give us the size
|
2018-10-13 20:16:30 +00:00
|
|
|
"volumes": int(keys[2]), # number of archive volumes for this backup (not really helpful)
|
2015-07-26 16:25:52 +00:00
|
|
|
}
|
2016-11-12 14:28:55 +00:00
|
|
|
|
2015-08-12 11:19:59 +00:00
|
|
|
code, collection_status = shell('check_output', [
|
2015-07-26 16:25:52 +00:00
|
|
|
"/usr/bin/duplicity",
|
|
|
|
"collection-status",
|
2015-07-27 20:13:28 +00:00
|
|
|
"--archive-dir", backup_cache_dir,
|
2023-08-29 20:37:25 +00:00
|
|
|
"--gpg-options", "'--cipher-algo=AES256'",
|
2015-07-27 20:30:22 +00:00
|
|
|
"--log-fd", "1",
|
2023-08-31 04:48:51 +00:00
|
|
|
] + get_duplicity_additional_args(env) + [
|
|
|
|
get_duplicity_target_url(config)
|
|
|
|
],
|
2022-06-11 13:23:58 +00:00
|
|
|
get_duplicity_env_vars(env),
|
2015-08-12 11:19:59 +00:00
|
|
|
trap=True)
|
|
|
|
if code != 0:
|
|
|
|
# Command failed. This is likely due to an improperly configured remote
|
2016-01-02 21:25:36 +00:00
|
|
|
# destination for the backups or the last backup job terminated unexpectedly.
|
|
|
|
raise Exception("Something is wrong with the backup: " + collection_status)
|
2015-08-09 19:59:38 +00:00
|
|
|
for line in collection_status.split('\n'):
|
2023-12-22 15:12:34 +00:00
|
|
|
if line.startswith((" full", " inc")):
|
2015-07-27 20:30:22 +00:00
|
|
|
backup = parse_line(line)
|
|
|
|
backups[backup["date"]] = backup
|
2015-07-26 16:25:52 +00:00
|
|
|
|
2018-10-13 20:16:30 +00:00
|
|
|
# Look at the target directly to get the sizes of each of the backups. There is more than one file per backup.
|
|
|
|
# Starting with duplicity in Ubuntu 18.04, "signatures" files have dates in their
|
|
|
|
# filenames that are a few seconds off the backup date and so don't line up
|
|
|
|
# with the list of backups we have. Track unmatched files so we know how much other
|
|
|
|
# space is used for those.
|
|
|
|
unmatched_file_size = 0
|
2015-08-09 19:59:38 +00:00
|
|
|
for fn, size in list_target_files(config):
|
|
|
|
m = re.match(r"duplicity-(full|full-signatures|(inc|new-signatures)\.(?P<incbase>\d+T\d+Z)\.to)\.(?P<date>\d+T\d+Z)\.", fn)
|
|
|
|
if not m: continue # not a part of a current backup chain
|
|
|
|
key = m.group("date")
|
2018-10-13 20:16:30 +00:00
|
|
|
if key in backups:
|
|
|
|
backups[key]["size"] += size
|
|
|
|
else:
|
|
|
|
unmatched_file_size += size
|
2015-08-09 19:59:38 +00:00
|
|
|
|
2014-09-08 19:42:42 +00:00
|
|
|
# Ensure the rows are sorted reverse chronologically.
|
2014-09-08 20:09:18 +00:00
|
|
|
# This is relied on by should_force_full() and the next step.
|
2014-09-01 13:06:38 +00:00
|
|
|
backups = sorted(backups.values(), key = lambda b : b["date"], reverse=True)
|
|
|
|
|
2016-02-05 16:08:33 +00:00
|
|
|
# Get the average size of incremental backups, the size of the
|
|
|
|
# most recent full backup, and the date of the most recent
|
|
|
|
# backup and the most recent full backup.
|
2015-03-08 20:55:39 +00:00
|
|
|
incremental_count = 0
|
|
|
|
incremental_size = 0
|
2016-02-05 16:08:33 +00:00
|
|
|
first_date = None
|
2015-03-08 20:55:39 +00:00
|
|
|
first_full_size = None
|
2016-02-05 16:08:33 +00:00
|
|
|
first_full_date = None
|
2015-03-08 20:55:39 +00:00
|
|
|
for bak in backups:
|
2016-02-05 16:08:33 +00:00
|
|
|
if first_date is None:
|
|
|
|
first_date = dateutil.parser.parse(bak["date"])
|
2015-03-08 20:55:39 +00:00
|
|
|
if bak["full"]:
|
|
|
|
first_full_size = bak["size"]
|
2016-02-05 16:08:33 +00:00
|
|
|
first_full_date = dateutil.parser.parse(bak["date"])
|
2015-03-08 20:55:39 +00:00
|
|
|
break
|
|
|
|
incremental_count += 1
|
|
|
|
incremental_size += bak["size"]
|
2015-03-26 12:27:26 +00:00
|
|
|
|
2016-02-05 16:08:33 +00:00
|
|
|
# When will the most recent backup be deleted? It won't be deleted if the next
|
|
|
|
# backup is incremental, because the increments rely on all past increments.
|
|
|
|
# So first guess how many more incremental backups will occur until the next
|
|
|
|
# full backup. That full backup frees up this one to be deleted. But, the backup
|
|
|
|
# must also be at least min_age_in_days old too.
|
2015-03-08 20:55:39 +00:00
|
|
|
deleted_in = None
|
2017-04-03 12:30:42 +00:00
|
|
|
if incremental_count > 0 and incremental_size > 0 and first_full_size is not None:
|
2016-02-05 16:08:33 +00:00
|
|
|
# How many days until the next incremental backup? First, the part of
|
|
|
|
# the algorithm based on increment sizes:
|
|
|
|
est_days_to_next_full = (.5 * first_full_size - incremental_size) / (incremental_size/incremental_count)
|
|
|
|
est_time_of_next_full = first_date + datetime.timedelta(days=est_days_to_next_full)
|
2015-03-08 20:55:39 +00:00
|
|
|
|
2016-02-05 16:08:33 +00:00
|
|
|
# ...And then the part of the algorithm based on full backup age:
|
|
|
|
est_time_of_next_full = min(est_time_of_next_full, first_full_date + datetime.timedelta(days=config["min_age_in_days"]*10+1))
|
|
|
|
|
|
|
|
# It still can't be deleted until it's old enough.
|
|
|
|
est_deleted_on = max(est_time_of_next_full, first_date + datetime.timedelta(days=config["min_age_in_days"]))
|
|
|
|
|
|
|
|
deleted_in = "approx. %d days" % round((est_deleted_on-now).total_seconds()/60/60/24 + .5)
|
|
|
|
|
|
|
|
# When will a backup be deleted? Set the deleted_in field of each backup.
|
2014-09-08 20:09:18 +00:00
|
|
|
saw_full = False
|
|
|
|
for bak in backups:
|
|
|
|
if deleted_in:
|
2016-02-05 16:08:33 +00:00
|
|
|
# The most recent increment in a chain and all of the previous backups
|
|
|
|
# it relies on are deleted at the same time.
|
2014-09-08 20:09:18 +00:00
|
|
|
bak["deleted_in"] = deleted_in
|
|
|
|
if bak["full"]:
|
2016-02-05 16:08:33 +00:00
|
|
|
# Reset when we get to a full backup. A new chain start *next*.
|
2014-09-08 20:09:18 +00:00
|
|
|
saw_full = True
|
|
|
|
deleted_in = None
|
|
|
|
elif saw_full and not deleted_in:
|
2016-02-05 16:08:33 +00:00
|
|
|
# We're now on backups prior to the most recent full backup. These are
|
|
|
|
# free to be deleted as soon as they are min_age_in_days old.
|
|
|
|
deleted_in = reldate(now, dateutil.parser.parse(bak["date"]) + datetime.timedelta(days=config["min_age_in_days"]), "on next daily backup")
|
2014-09-08 20:09:18 +00:00
|
|
|
bak["deleted_in"] = deleted_in
|
|
|
|
|
2014-09-01 13:06:38 +00:00
|
|
|
return {
|
|
|
|
"backups": backups,
|
2018-10-13 20:16:30 +00:00
|
|
|
"unmatched_file_size": unmatched_file_size,
|
2014-09-01 13:06:38 +00:00
|
|
|
}
|
|
|
|
|
2016-02-05 16:08:33 +00:00
|
|
|
def should_force_full(config, env):
|
2014-09-08 19:42:42 +00:00
|
|
|
# Force a full backup when the total size of the increments
|
|
|
|
# since the last full backup is greater than half the size
|
|
|
|
# of that full backup.
|
|
|
|
inc_size = 0
|
|
|
|
for bak in backup_status(env)["backups"]:
|
|
|
|
if not bak["full"]:
|
|
|
|
# Scan through the incremental backups cumulating
|
|
|
|
# size...
|
|
|
|
inc_size += bak["size"]
|
|
|
|
else:
|
|
|
|
# ...until we reach the most recent full backup.
|
2016-02-05 16:08:33 +00:00
|
|
|
# Return if we should to a full backup, which is based
|
|
|
|
# on the size of the increments relative to the full
|
|
|
|
# backup, as well as the age of the full backup.
|
|
|
|
if inc_size > .5*bak["size"]:
|
|
|
|
return True
|
2016-02-18 12:50:59 +00:00
|
|
|
if dateutil.parser.parse(bak["date"]) + datetime.timedelta(days=config["min_age_in_days"]*10+1) < datetime.datetime.now(dateutil.tz.tzlocal()):
|
2016-02-05 16:08:33 +00:00
|
|
|
return True
|
|
|
|
return False
|
2014-09-08 19:42:42 +00:00
|
|
|
else:
|
|
|
|
# If we got here there are no (full) backups, so make one.
|
|
|
|
# (I love for/else blocks. Here it's just to show off.)
|
|
|
|
return True
|
|
|
|
|
2015-08-09 16:56:33 +00:00
|
|
|
def get_passphrase(env):
|
2015-07-26 16:25:52 +00:00
|
|
|
# Get the encryption passphrase. secret_key.txt is 2048 random
|
|
|
|
# bits base64-encoded and with line breaks every 65 characters.
|
|
|
|
# gpg will only take the first line of text, so sanity check that
|
|
|
|
# that line is long enough to be a reasonable passphrase. It
|
|
|
|
# only needs to be 43 base64-characters to match AES256's key
|
|
|
|
# length of 32 bytes.
|
2015-08-09 16:56:33 +00:00
|
|
|
backup_root = os.path.join(env["STORAGE_ROOT"], 'backup')
|
2015-07-26 16:25:52 +00:00
|
|
|
with open(os.path.join(backup_root, 'secret_key.txt')) as f:
|
|
|
|
passphrase = f.readline().strip()
|
|
|
|
if len(passphrase) < 43: raise Exception("secret_key.txt's first line is too short!")
|
2016-11-12 14:28:55 +00:00
|
|
|
|
2015-07-26 16:25:52 +00:00
|
|
|
return passphrase
|
|
|
|
|
2022-06-11 13:24:45 +00:00
|
|
|
def get_duplicity_target_url(config):
|
|
|
|
target = config["target"]
|
|
|
|
|
|
|
|
if get_target_type(config) == "s3":
|
|
|
|
from urllib.parse import urlsplit, urlunsplit
|
|
|
|
target = list(urlsplit(target))
|
|
|
|
|
2022-01-09 00:09:30 +00:00
|
|
|
# Although we store the S3 hostname in the target URL,
|
2022-06-11 13:24:45 +00:00
|
|
|
# duplicity no longer accepts it in the target URL. The hostname in
|
|
|
|
# the target URL must be the bucket name. The hostname is passed
|
|
|
|
# via get_duplicity_additional_args. Move the first part of the
|
|
|
|
# path (the bucket name) into the hostname URL component, and leave
|
2023-02-19 20:29:49 +00:00
|
|
|
# the rest for the path. (The S3 region name is also stored in the
|
|
|
|
# hostname part of the URL, in the username portion, which we also
|
|
|
|
# have to drop here).
|
2022-06-11 13:24:45 +00:00
|
|
|
target[1], target[2] = target[2].lstrip('/').split('/', 1)
|
|
|
|
|
|
|
|
target = urlunsplit(target)
|
|
|
|
|
|
|
|
return target
|
|
|
|
|
2022-06-11 13:23:58 +00:00
|
|
|
def get_duplicity_additional_args(env):
|
|
|
|
config = get_backup_config(env)
|
2022-06-11 13:24:45 +00:00
|
|
|
|
2022-06-11 13:23:58 +00:00
|
|
|
if get_target_type(config) == 'rsync':
|
2023-01-15 15:03:05 +00:00
|
|
|
# Extract a port number for the ssh transport. Duplicity accepts the
|
|
|
|
# optional port number syntax in the target, but it doesn't appear to act
|
|
|
|
# on it, so we set the ssh port explicitly via the duplicity options.
|
|
|
|
from urllib.parse import urlsplit
|
|
|
|
try:
|
|
|
|
port = urlsplit(config["target"]).port
|
|
|
|
except ValueError:
|
|
|
|
port = 22
|
2023-01-28 16:04:46 +00:00
|
|
|
if port is None:
|
|
|
|
port = 22
|
2023-12-22 15:12:50 +00:00
|
|
|
|
2022-06-11 13:23:58 +00:00
|
|
|
return [
|
2023-08-31 04:48:51 +00:00
|
|
|
f"--ssh-options='-i /root/.ssh/id_rsa_miab -p {port}'",
|
|
|
|
f"--rsync-options='-e \"/usr/bin/ssh -oStrictHostKeyChecking=no -oBatchMode=yes -p {port} -i /root/.ssh/id_rsa_miab\"'",
|
2022-06-11 13:23:58 +00:00
|
|
|
]
|
2022-06-11 13:24:45 +00:00
|
|
|
elif get_target_type(config) == 's3':
|
|
|
|
# See note about hostname in get_duplicity_target_url.
|
2023-02-19 20:29:49 +00:00
|
|
|
# The region name, which is required by some non-AWS endpoints,
|
|
|
|
# is saved inside the username portion of the URL.
|
2022-06-11 13:24:45 +00:00
|
|
|
from urllib.parse import urlsplit, urlunsplit
|
|
|
|
target = urlsplit(config["target"])
|
2023-02-19 20:29:49 +00:00
|
|
|
endpoint_url = urlunsplit(("https", target.hostname, '', '', ''))
|
|
|
|
args = ["--s3-endpoint-url", endpoint_url]
|
|
|
|
if target.username: # region name is stuffed here
|
|
|
|
args += ["--s3-region-name", target.username]
|
|
|
|
return args
|
2022-06-11 13:24:45 +00:00
|
|
|
|
2022-06-11 13:23:58 +00:00
|
|
|
return []
|
|
|
|
|
|
|
|
def get_duplicity_env_vars(env):
|
2015-08-09 16:56:33 +00:00
|
|
|
config = get_backup_config(env)
|
2016-11-12 14:28:55 +00:00
|
|
|
|
2015-08-09 16:56:33 +00:00
|
|
|
env = { "PASSPHRASE" : get_passphrase(env) }
|
2016-11-12 14:28:55 +00:00
|
|
|
|
2015-07-27 20:09:58 +00:00
|
|
|
if get_target_type(config) == 's3':
|
2015-07-26 16:25:52 +00:00
|
|
|
env["AWS_ACCESS_KEY_ID"] = config["target_user"]
|
|
|
|
env["AWS_SECRET_ACCESS_KEY"] = config["target_pass"]
|
2016-11-12 14:28:55 +00:00
|
|
|
|
2015-07-26 16:25:52 +00:00
|
|
|
return env
|
2016-11-12 14:28:55 +00:00
|
|
|
|
2015-07-27 20:09:58 +00:00
|
|
|
def get_target_type(config):
|
|
|
|
protocol = config["target"].split(":")[0]
|
|
|
|
return protocol
|
2016-11-12 14:28:55 +00:00
|
|
|
|
2014-09-01 13:06:38 +00:00
|
|
|
def perform_backup(full_backup):
|
|
|
|
env = load_environment()
|
|
|
|
|
2017-01-01 22:11:31 +00:00
|
|
|
# Create an global exclusive lock so that the backup script
|
|
|
|
# cannot be run more than one.
|
|
|
|
Lock(die=True).forever()
|
|
|
|
|
2015-08-09 16:56:33 +00:00
|
|
|
config = get_backup_config(env)
|
|
|
|
backup_root = os.path.join(env["STORAGE_ROOT"], 'backup')
|
2015-04-11 17:43:16 +00:00
|
|
|
backup_cache_dir = os.path.join(backup_root, 'cache')
|
|
|
|
backup_dir = os.path.join(backup_root, 'encrypted')
|
2015-03-26 12:27:26 +00:00
|
|
|
|
2016-11-12 14:28:55 +00:00
|
|
|
# Are backups disabled?
|
2015-08-09 20:15:43 +00:00
|
|
|
if config["target"] == "off":
|
|
|
|
return
|
|
|
|
|
2014-09-01 13:06:38 +00:00
|
|
|
# On the first run, always do a full backup. Incremental
|
2014-09-08 19:42:42 +00:00
|
|
|
# will fail. Otherwise do a full backup when the size of
|
|
|
|
# the increments since the most recent full backup are
|
|
|
|
# large.
|
2016-01-02 21:25:36 +00:00
|
|
|
try:
|
2016-02-05 16:08:33 +00:00
|
|
|
full_backup = full_backup or should_force_full(config, env)
|
2016-01-02 21:25:36 +00:00
|
|
|
except Exception as e:
|
|
|
|
# This was the first call to duplicity, and there might
|
|
|
|
# be an error already.
|
|
|
|
print(e)
|
|
|
|
sys.exit(1)
|
2014-09-01 13:06:38 +00:00
|
|
|
|
|
|
|
# Stop services.
|
2016-01-02 21:50:39 +00:00
|
|
|
def service_command(service, command, quit=None):
|
|
|
|
# Execute silently, but if there is an error then display the output & exit.
|
|
|
|
code, ret = shell('check_output', ["/usr/sbin/service", service, command], capture_stderr=True, trap=True)
|
|
|
|
if code != 0:
|
|
|
|
print(ret)
|
|
|
|
if quit:
|
|
|
|
sys.exit(code)
|
|
|
|
|
2022-06-19 11:12:02 +00:00
|
|
|
service_command("php8.0-fpm", "stop", quit=True)
|
2016-01-02 21:50:39 +00:00
|
|
|
service_command("postfix", "stop", quit=True)
|
|
|
|
service_command("dovecot", "stop", quit=True)
|
2022-09-24 17:17:55 +00:00
|
|
|
service_command("postgrey", "stop", quit=True)
|
2014-09-01 13:06:38 +00:00
|
|
|
|
2016-05-11 08:11:16 +00:00
|
|
|
# Execute a pre-backup script that copies files outside the homedir.
|
|
|
|
# Run as the STORAGE_USER user, not as root. Pass our settings in
|
|
|
|
# environment variables so the script has access to STORAGE_ROOT.
|
|
|
|
pre_script = os.path.join(backup_root, 'before-backup')
|
|
|
|
if os.path.exists(pre_script):
|
|
|
|
shell('check_call',
|
|
|
|
['su', env['STORAGE_USER'], '-c', pre_script, config["target"]],
|
|
|
|
env=env)
|
|
|
|
|
2015-05-30 13:46:39 +00:00
|
|
|
# Run a backup of STORAGE_ROOT (but excluding the backups themselves!).
|
|
|
|
# --allow-source-mismatch is needed in case the box's hostname is changed
|
|
|
|
# after the first backup. See #396.
|
2014-09-01 13:06:38 +00:00
|
|
|
try:
|
|
|
|
shell('check_call', [
|
|
|
|
"/usr/bin/duplicity",
|
|
|
|
"full" if full_backup else "incr",
|
2016-01-02 21:50:39 +00:00
|
|
|
"--verbosity", "warning", "--no-print-statistics",
|
2015-03-26 12:27:26 +00:00
|
|
|
"--archive-dir", backup_cache_dir,
|
2015-04-11 17:43:16 +00:00
|
|
|
"--exclude", backup_root,
|
2015-03-26 12:27:26 +00:00
|
|
|
"--volsize", "250",
|
2023-08-29 20:37:25 +00:00
|
|
|
"--gpg-options", "'--cipher-algo=AES256'",
|
2023-08-31 04:48:51 +00:00
|
|
|
"--allow-source-mismatch"
|
|
|
|
] + get_duplicity_additional_args(env) + [
|
2014-09-01 13:06:38 +00:00
|
|
|
env["STORAGE_ROOT"],
|
2022-06-11 13:24:45 +00:00
|
|
|
get_duplicity_target_url(config),
|
2023-08-31 04:48:51 +00:00
|
|
|
],
|
2022-06-11 13:23:58 +00:00
|
|
|
get_duplicity_env_vars(env))
|
2014-09-01 13:06:38 +00:00
|
|
|
finally:
|
|
|
|
# Start services again.
|
2022-09-24 17:17:55 +00:00
|
|
|
service_command("postgrey", "start", quit=False)
|
2016-01-02 21:50:39 +00:00
|
|
|
service_command("dovecot", "start", quit=False)
|
|
|
|
service_command("postfix", "start", quit=False)
|
2022-06-19 11:12:02 +00:00
|
|
|
service_command("php8.0-fpm", "start", quit=False)
|
2014-09-01 13:06:38 +00:00
|
|
|
|
|
|
|
# Remove old backups. This deletes all backup data no longer needed
|
2015-03-26 12:27:26 +00:00
|
|
|
# from more than 3 days ago.
|
2014-06-09 12:09:45 +00:00
|
|
|
shell('check_call', [
|
2014-06-09 13:34:52 +00:00
|
|
|
"/usr/bin/duplicity",
|
2014-09-01 13:06:38 +00:00
|
|
|
"remove-older-than",
|
2015-07-27 20:18:19 +00:00
|
|
|
"%dD" % config["min_age_in_days"],
|
2016-01-02 21:50:39 +00:00
|
|
|
"--verbosity", "error",
|
2015-03-26 12:27:26 +00:00
|
|
|
"--archive-dir", backup_cache_dir,
|
2014-09-01 13:06:38 +00:00
|
|
|
"--force",
|
2023-08-31 04:48:51 +00:00
|
|
|
] + get_duplicity_additional_args(env) + [
|
2022-06-11 13:24:45 +00:00
|
|
|
get_duplicity_target_url(config)
|
2023-08-31 04:48:51 +00:00
|
|
|
],
|
2022-06-11 13:23:58 +00:00
|
|
|
get_duplicity_env_vars(env))
|
2014-06-09 13:34:52 +00:00
|
|
|
|
2015-03-26 12:27:26 +00:00
|
|
|
# From duplicity's manual:
|
|
|
|
# "This should only be necessary after a duplicity session fails or is
|
|
|
|
# aborted prematurely."
|
|
|
|
# That may be unlikely here but we may as well ensure we tidy up if
|
|
|
|
# that does happen - it might just have been a poorly timed reboot.
|
|
|
|
shell('check_call', [
|
|
|
|
"/usr/bin/duplicity",
|
|
|
|
"cleanup",
|
2016-01-02 21:50:39 +00:00
|
|
|
"--verbosity", "error",
|
2015-03-26 12:27:26 +00:00
|
|
|
"--archive-dir", backup_cache_dir,
|
|
|
|
"--force",
|
2023-08-31 04:48:51 +00:00
|
|
|
] + get_duplicity_additional_args(env) + [
|
2022-06-11 13:24:45 +00:00
|
|
|
get_duplicity_target_url(config)
|
2023-08-31 04:48:51 +00:00
|
|
|
],
|
2022-06-11 13:23:58 +00:00
|
|
|
get_duplicity_env_vars(env))
|
2014-09-01 13:06:38 +00:00
|
|
|
|
2015-04-11 17:43:16 +00:00
|
|
|
# Change ownership of backups to the user-data user, so that the after-bcakup
|
|
|
|
# script can access them.
|
2015-07-27 20:09:58 +00:00
|
|
|
if get_target_type(config) == 'file':
|
2015-07-26 16:25:52 +00:00
|
|
|
shell('check_call', ["/bin/chown", "-R", env["STORAGE_USER"], backup_dir])
|
2015-04-11 17:43:16 +00:00
|
|
|
|
2014-09-01 13:06:38 +00:00
|
|
|
# Execute a post-backup script that does the copying to a remote server.
|
|
|
|
# Run as the STORAGE_USER user, not as root. Pass our settings in
|
|
|
|
# environment variables so the script has access to STORAGE_ROOT.
|
2015-04-11 17:43:16 +00:00
|
|
|
post_script = os.path.join(backup_root, 'after-backup')
|
2014-09-01 13:06:38 +00:00
|
|
|
if os.path.exists(post_script):
|
|
|
|
shell('check_call',
|
2015-07-26 16:25:52 +00:00
|
|
|
['su', env['STORAGE_USER'], '-c', post_script, config["target"]],
|
2015-08-09 20:01:12 +00:00
|
|
|
env=env)
|
2014-09-01 13:06:38 +00:00
|
|
|
|
2015-04-29 21:06:38 +00:00
|
|
|
# Our nightly cron job executes system status checks immediately after this
|
|
|
|
# backup. Since it checks that dovecot and postfix are running, block for a
|
|
|
|
# bit (maximum of 10 seconds each) to give each a chance to finish restarting
|
|
|
|
# before the status checks might catch them down. See #381.
|
|
|
|
wait_for_service(25, True, env, 10)
|
|
|
|
wait_for_service(993, True, env, 10)
|
|
|
|
|
2015-04-11 18:43:46 +00:00
|
|
|
def run_duplicity_verification():
|
|
|
|
env = load_environment()
|
2015-08-09 16:56:33 +00:00
|
|
|
backup_root = os.path.join(env["STORAGE_ROOT"], 'backup')
|
|
|
|
config = get_backup_config(env)
|
2015-04-11 18:43:46 +00:00
|
|
|
backup_cache_dir = os.path.join(backup_root, 'cache')
|
2015-07-26 16:25:52 +00:00
|
|
|
|
2015-04-11 18:43:46 +00:00
|
|
|
shell('check_call', [
|
|
|
|
"/usr/bin/duplicity",
|
|
|
|
"--verbosity", "info",
|
|
|
|
"verify",
|
|
|
|
"--compare-data",
|
|
|
|
"--archive-dir", backup_cache_dir,
|
|
|
|
"--exclude", backup_root,
|
2023-08-31 04:48:51 +00:00
|
|
|
] + get_duplicity_additional_args(env) + [
|
2022-06-11 13:24:45 +00:00
|
|
|
get_duplicity_target_url(config),
|
2015-04-11 18:43:46 +00:00
|
|
|
env["STORAGE_ROOT"],
|
2023-08-31 04:48:51 +00:00
|
|
|
], get_duplicity_env_vars(env))
|
2015-07-26 16:25:52 +00:00
|
|
|
|
2015-12-09 13:29:58 +00:00
|
|
|
def run_duplicity_restore(args):
|
|
|
|
env = load_environment()
|
|
|
|
config = get_backup_config(env)
|
|
|
|
backup_cache_dir = os.path.join(env["STORAGE_ROOT"], 'backup', 'cache')
|
|
|
|
shell('check_call', [
|
|
|
|
"/usr/bin/duplicity",
|
|
|
|
"restore",
|
|
|
|
"--archive-dir", backup_cache_dir,
|
2023-08-31 04:48:51 +00:00
|
|
|
] + get_duplicity_additional_args(env) + [
|
|
|
|
get_duplicity_target_url(config)
|
|
|
|
] + args,
|
|
|
|
get_duplicity_env_vars(env))
|
2015-12-09 13:29:58 +00:00
|
|
|
|
2023-09-02 11:49:41 +00:00
|
|
|
def print_duplicity_command():
|
|
|
|
import shlex
|
|
|
|
env = load_environment()
|
|
|
|
config = get_backup_config(env)
|
|
|
|
backup_cache_dir = os.path.join(env["STORAGE_ROOT"], 'backup', 'cache')
|
|
|
|
for k, v in get_duplicity_env_vars(env).items():
|
|
|
|
print(f"export {k}={shlex.quote(v)}")
|
|
|
|
print("duplicity", "{command}", shlex.join([
|
|
|
|
"--archive-dir", backup_cache_dir,
|
|
|
|
] + get_duplicity_additional_args(env) + [
|
|
|
|
get_duplicity_target_url(config)
|
|
|
|
]))
|
2015-12-09 13:29:58 +00:00
|
|
|
|
2015-08-09 19:59:38 +00:00
|
|
|
def list_target_files(config):
|
2015-08-09 18:25:26 +00:00
|
|
|
import urllib.parse
|
|
|
|
try:
|
2016-12-17 14:29:48 +00:00
|
|
|
target = urllib.parse.urlparse(config["target"])
|
2015-08-09 18:25:26 +00:00
|
|
|
except ValueError:
|
|
|
|
return "invalid target"
|
|
|
|
|
2016-12-17 14:29:48 +00:00
|
|
|
if target.scheme == "file":
|
|
|
|
return [(fn, os.path.getsize(os.path.join(target.path, fn))) for fn in os.listdir(target.path)]
|
2015-08-09 18:25:26 +00:00
|
|
|
|
2016-12-17 14:29:48 +00:00
|
|
|
elif target.scheme == "rsync":
|
2016-11-12 14:28:55 +00:00
|
|
|
rsync_fn_size_re = re.compile(r'.* ([^ ]*) [^ ]* [^ ]* (.*)')
|
|
|
|
rsync_target = '{host}:{path}'
|
|
|
|
|
2023-01-15 15:03:05 +00:00
|
|
|
# Strip off any trailing port specifier because it's not valid in rsync's
|
|
|
|
# DEST syntax. Explicitly set the port number for the ssh transport.
|
|
|
|
user_host, *_ = target.netloc.rsplit(':', 1)
|
|
|
|
try:
|
|
|
|
port = target.port
|
|
|
|
except ValueError:
|
|
|
|
port = 22
|
2023-01-28 16:04:46 +00:00
|
|
|
if port is None:
|
|
|
|
port = 22
|
2023-01-15 15:03:05 +00:00
|
|
|
|
2017-04-10 08:33:11 +00:00
|
|
|
target_path = target.path
|
|
|
|
if not target_path.endswith('/'):
|
|
|
|
target_path = target_path + '/'
|
|
|
|
if target_path.startswith('/'):
|
|
|
|
target_path = target_path[1:]
|
2016-11-12 14:28:55 +00:00
|
|
|
|
|
|
|
rsync_command = [ 'rsync',
|
|
|
|
'-e',
|
2023-01-15 15:03:05 +00:00
|
|
|
f'/usr/bin/ssh -i /root/.ssh/id_rsa_miab -oStrictHostKeyChecking=no -oBatchMode=yes -p {port}',
|
2016-11-12 14:28:55 +00:00
|
|
|
'--list-only',
|
|
|
|
'-r',
|
|
|
|
rsync_target.format(
|
2023-01-15 15:03:05 +00:00
|
|
|
host=user_host,
|
2016-11-12 14:28:55 +00:00
|
|
|
path=target_path)
|
|
|
|
]
|
|
|
|
|
2016-12-17 14:29:48 +00:00
|
|
|
code, listing = shell('check_output', rsync_command, trap=True, capture_stderr=True)
|
2016-11-12 14:28:55 +00:00
|
|
|
if code == 0:
|
2016-11-12 14:58:49 +00:00
|
|
|
ret = []
|
2016-11-12 14:28:55 +00:00
|
|
|
for l in listing.split('\n'):
|
|
|
|
match = rsync_fn_size_re.match(l)
|
|
|
|
if match:
|
2016-11-12 14:58:49 +00:00
|
|
|
ret.append( (match.groups()[1], int(match.groups()[0].replace(',',''))) )
|
|
|
|
return ret
|
2016-11-12 14:28:55 +00:00
|
|
|
else:
|
2016-12-17 14:29:48 +00:00
|
|
|
if 'Permission denied (publickey).' in listing:
|
|
|
|
reason = "Invalid user or check you correctly copied the SSH key."
|
|
|
|
elif 'No such file or directory' in listing:
|
|
|
|
reason = "Provided path {} is invalid.".format(target_path)
|
|
|
|
elif 'Network is unreachable' in listing:
|
|
|
|
reason = "The IP address {} is unreachable.".format(target.hostname)
|
2019-08-13 09:47:11 +00:00
|
|
|
elif 'Could not resolve hostname' in listing:
|
2016-12-17 14:29:48 +00:00
|
|
|
reason = "The hostname {} cannot be resolved.".format(target.hostname)
|
|
|
|
else:
|
|
|
|
reason = "Unknown error." \
|
2019-11-23 13:04:22 +00:00
|
|
|
"Please check running 'management/backup.py --verify'" \
|
2016-12-17 14:29:48 +00:00
|
|
|
"from mailinabox sources to debug the issue."
|
|
|
|
raise ValueError("Connection to rsync host failed: {}".format(reason))
|
|
|
|
|
|
|
|
elif target.scheme == "s3":
|
2022-09-17 11:57:12 +00:00
|
|
|
import boto3.s3
|
|
|
|
from botocore.exceptions import ClientError
|
2023-12-22 15:12:50 +00:00
|
|
|
|
2022-09-17 11:57:12 +00:00
|
|
|
# separate bucket from path in target
|
2016-12-17 14:29:48 +00:00
|
|
|
bucket = target.path[1:].split('/')[0]
|
|
|
|
path = '/'.join(target.path[1:].split('/')[1:]) + '/'
|
2015-08-11 11:54:30 +00:00
|
|
|
|
|
|
|
# If no prefix is specified, set the path to '', otherwise boto won't list the files
|
|
|
|
if path == '/':
|
|
|
|
path = ''
|
|
|
|
|
2015-08-09 18:25:26 +00:00
|
|
|
if bucket == "":
|
2023-12-22 15:10:48 +00:00
|
|
|
msg = "Enter an S3 bucket name."
|
|
|
|
raise ValueError(msg)
|
2015-08-09 18:25:26 +00:00
|
|
|
|
|
|
|
# connect to the region & bucket
|
|
|
|
try:
|
2022-09-17 11:57:12 +00:00
|
|
|
s3 = boto3.client('s3', \
|
|
|
|
endpoint_url=f'https://{target.hostname}', \
|
|
|
|
aws_access_key_id=config['target_user'], \
|
|
|
|
aws_secret_access_key=config['target_pass'])
|
|
|
|
bucket_objects = s3.list_objects_v2(Bucket=bucket, Prefix=path)['Contents']
|
|
|
|
backup_list = [(key['Key'][len(path):], key['Size']) for key in bucket_objects]
|
|
|
|
except ClientError as e:
|
|
|
|
raise ValueError(e)
|
|
|
|
return backup_list
|
2020-11-26 12:13:31 +00:00
|
|
|
elif target.scheme == 'b2':
|
|
|
|
from b2sdk.v1 import InMemoryAccountInfo, B2Api
|
|
|
|
from b2sdk.v1.exception import NonExistentBucket
|
|
|
|
info = InMemoryAccountInfo()
|
|
|
|
b2_api = B2Api(info)
|
2023-12-22 15:12:50 +00:00
|
|
|
|
2020-11-26 12:13:31 +00:00
|
|
|
# Extract information from target
|
|
|
|
b2_application_keyid = target.netloc[:target.netloc.index(':')]
|
2023-09-02 11:03:24 +00:00
|
|
|
b2_application_key = urllib.parse.unquote(target.netloc[target.netloc.index(':')+1:target.netloc.index('@')])
|
2020-11-26 12:13:31 +00:00
|
|
|
b2_bucket = target.netloc[target.netloc.index('@')+1:]
|
|
|
|
|
|
|
|
try:
|
|
|
|
b2_api.authorize_account("production", b2_application_keyid, b2_application_key)
|
|
|
|
bucket = b2_api.get_bucket_by_name(b2_bucket)
|
|
|
|
except NonExistentBucket as e:
|
2023-12-22 15:10:48 +00:00
|
|
|
msg = "B2 Bucket does not exist. Please double check your information!"
|
|
|
|
raise ValueError(msg)
|
2020-11-26 12:13:31 +00:00
|
|
|
return [(key.file_name, key.size) for key, _ in bucket.ls()]
|
2015-08-09 19:59:38 +00:00
|
|
|
|
|
|
|
else:
|
|
|
|
raise ValueError(config["target"])
|
|
|
|
|
2015-07-26 16:25:52 +00:00
|
|
|
|
2015-08-09 16:56:33 +00:00
|
|
|
def backup_set_custom(env, target, target_user, target_pass, min_age):
|
2015-08-09 17:52:24 +00:00
|
|
|
config = get_backup_config(env, for_save=True)
|
2016-11-12 14:28:55 +00:00
|
|
|
|
2015-07-27 20:18:19 +00:00
|
|
|
# min_age must be an int
|
|
|
|
if isinstance(min_age, str):
|
|
|
|
min_age = int(min_age)
|
2015-07-26 16:25:52 +00:00
|
|
|
|
|
|
|
config["target"] = target
|
|
|
|
config["target_user"] = target_user
|
|
|
|
config["target_pass"] = target_pass
|
2015-07-27 20:18:19 +00:00
|
|
|
config["min_age_in_days"] = min_age
|
2015-08-09 18:25:26 +00:00
|
|
|
|
|
|
|
# Validate.
|
|
|
|
try:
|
2023-12-22 15:10:25 +00:00
|
|
|
if config["target"] not in {"off", "local"}:
|
2015-08-09 20:15:43 +00:00
|
|
|
# these aren't supported by the following function, which expects a full url in the target key,
|
2015-08-09 18:25:26 +00:00
|
|
|
# which is what is there except when loading the config prior to saving
|
2015-08-09 19:59:38 +00:00
|
|
|
list_target_files(config)
|
2015-08-09 18:25:26 +00:00
|
|
|
except ValueError as e:
|
|
|
|
return str(e)
|
2016-11-12 14:28:55 +00:00
|
|
|
|
2015-08-09 16:56:33 +00:00
|
|
|
write_backup_config(env, config)
|
2015-07-26 16:25:52 +00:00
|
|
|
|
2015-08-28 11:43:25 +00:00
|
|
|
return "OK"
|
2016-11-12 14:28:55 +00:00
|
|
|
|
2015-08-28 11:37:04 +00:00
|
|
|
def get_backup_config(env, for_save=False, for_ui=False):
|
2015-08-09 16:56:33 +00:00
|
|
|
backup_root = os.path.join(env["STORAGE_ROOT"], 'backup')
|
|
|
|
|
2015-08-09 17:52:24 +00:00
|
|
|
# Defaults.
|
2015-08-09 16:56:33 +00:00
|
|
|
config = {
|
|
|
|
"min_age_in_days": 3,
|
2015-08-09 18:25:26 +00:00
|
|
|
"target": "local",
|
2015-08-09 16:56:33 +00:00
|
|
|
}
|
|
|
|
|
2015-08-09 17:52:24 +00:00
|
|
|
# Merge in anything written to custom.yaml.
|
2015-07-26 16:25:52 +00:00
|
|
|
try:
|
2023-12-22 15:08:56 +00:00
|
|
|
with open(os.path.join(backup_root, 'custom.yaml')) as f:
|
2023-01-15 13:28:43 +00:00
|
|
|
custom_config = rtyaml.load(f)
|
2015-08-09 16:56:33 +00:00
|
|
|
if not isinstance(custom_config, dict): raise ValueError() # caught below
|
|
|
|
config.update(custom_config)
|
2015-07-26 16:25:52 +00:00
|
|
|
except:
|
2015-08-09 16:56:33 +00:00
|
|
|
pass
|
2015-07-27 20:18:19 +00:00
|
|
|
|
2015-08-09 17:52:24 +00:00
|
|
|
# When updating config.yaml, don't do any further processing on what we find.
|
|
|
|
if for_save:
|
|
|
|
return config
|
|
|
|
|
2015-08-28 11:37:04 +00:00
|
|
|
# When passing this back to the admin to show the current settings, do not include
|
|
|
|
# authentication details. The user will have to re-enter it.
|
|
|
|
if for_ui:
|
|
|
|
for field in ("target_user", "target_pass"):
|
|
|
|
if field in config:
|
|
|
|
del config[field]
|
|
|
|
|
2015-08-09 17:52:24 +00:00
|
|
|
# helper fields for the admin
|
|
|
|
config["file_target_directory"] = os.path.join(backup_root, 'encrypted')
|
|
|
|
config["enc_pw_file"] = os.path.join(backup_root, 'secret_key.txt')
|
2015-08-09 18:25:26 +00:00
|
|
|
if config["target"] == "local":
|
|
|
|
# Expand to the full URL.
|
|
|
|
config["target"] = "file://" + config["file_target_directory"]
|
2016-11-12 14:28:55 +00:00
|
|
|
ssh_pub_key = os.path.join('/root', '.ssh', 'id_rsa_miab.pub')
|
|
|
|
if os.path.exists(ssh_pub_key):
|
2023-12-22 15:08:56 +00:00
|
|
|
with open(ssh_pub_key) as f:
|
2023-01-15 13:28:43 +00:00
|
|
|
config["ssh_pub_key"] = f.read()
|
2015-08-09 17:52:24 +00:00
|
|
|
|
2015-07-26 16:25:52 +00:00
|
|
|
return config
|
|
|
|
|
2015-08-09 16:56:33 +00:00
|
|
|
def write_backup_config(env, newconfig):
|
|
|
|
backup_root = os.path.join(env["STORAGE_ROOT"], 'backup')
|
2015-07-26 16:25:52 +00:00
|
|
|
with open(os.path.join(backup_root, 'custom.yaml'), "w") as f:
|
|
|
|
f.write(rtyaml.dump(newconfig))
|
2015-04-11 18:43:46 +00:00
|
|
|
|
2014-09-01 13:06:38 +00:00
|
|
|
if __name__ == "__main__":
|
|
|
|
import sys
|
2015-04-11 18:43:46 +00:00
|
|
|
if sys.argv[-1] == "--verify":
|
|
|
|
# Run duplicity's verification command to check a) the backup files
|
|
|
|
# are readable, and b) report if they are up to date.
|
|
|
|
run_duplicity_verification()
|
2015-07-26 16:25:52 +00:00
|
|
|
|
2016-12-17 14:29:48 +00:00
|
|
|
elif sys.argv[-1] == "--list":
|
2018-10-13 20:16:30 +00:00
|
|
|
# List the saved backup files.
|
2016-12-17 14:29:48 +00:00
|
|
|
for fn, size in list_target_files(get_backup_config(load_environment())):
|
|
|
|
print("{}\t{}".format(fn, size))
|
|
|
|
|
2015-11-26 14:34:07 +00:00
|
|
|
elif sys.argv[-1] == "--status":
|
|
|
|
# Show backup status.
|
|
|
|
ret = backup_status(load_environment())
|
|
|
|
print(rtyaml.dump(ret["backups"]))
|
2018-10-13 20:16:30 +00:00
|
|
|
print("Storage for unmatched files:", ret["unmatched_file_size"])
|
2015-11-26 14:34:07 +00:00
|
|
|
|
2015-12-09 13:29:58 +00:00
|
|
|
elif len(sys.argv) >= 2 and sys.argv[1] == "--restore":
|
|
|
|
# Run duplicity restore. Rest of command line passed as arguments
|
|
|
|
# to duplicity. The restore path should be specified.
|
|
|
|
run_duplicity_restore(sys.argv[2:])
|
|
|
|
|
2023-09-02 11:49:41 +00:00
|
|
|
elif sys.argv[-1] == "--duplicity-command":
|
|
|
|
print_duplicity_command()
|
|
|
|
|
2015-04-11 18:43:46 +00:00
|
|
|
else:
|
|
|
|
# Perform a backup. Add --full to force a full backup rather than
|
|
|
|
# possibly performing an incremental backup.
|
|
|
|
full_backup = "--full" in sys.argv
|
|
|
|
perform_backup(full_backup)
|