Browse Source

Update backup to be compatible with duplicity 0.8.23

We were using duplicity 0.8.21-ppa202111091602~ubuntu1 from the duplicity PPA probably until June 5, which is when my box automatically updated to 0.8.23-ppa202205151528~ubuntu18.04.1. Starting with that version, two changes broke backups:

* The default s3 backend was changed to boto3. But boto3 depends on the AWS SDK which does not support Ubuntu 18.04, so we can't install it. Instead, we map s3: backup target URLs to the boto+s3 scheme which tells duplicity to use legacy boto. This should be reverted when we can switch to boto3.
* Contrary to the documentation, the s3 target no longer accepts a S3 hostname in the URL. It now reads the bucket from the hostname part of the URL. So we now drop the hostname from our target URL before passing it to duplicity and we pass the endpoint URL in a separate command-line argument. (The boto backend was dropped from duplicity's "uses_netloc" in 74d4cf44b1 (f5a07610d36bd242c3e5b98f8348879a468b866a_37_34,) but other changes may be related.)

The change of target URL (due to both changes) seems to also cause duplicity to store cached data in a different directory within $STORAGE_ROOT/backup/cache, so on the next backup it will re-download cached manifest/signature files. Since the cache directory will still hold the prior data which is no longer needed, it might be a good idea to clear out the cache directory to save space. A system status checks message is added about that.

Fixes #2123
pull/1882/merge
Joshua Tauberer 3 weeks ago
parent
commit
99474b348f
  1. 5
      CHANGELOG.md
  2. 44
      management/backup.py
  3. 12
      management/status_checks.py

5
CHANGELOG.md

@ -1,6 +1,11 @@
CHANGELOG
=========
In Development
--------------
* Fixed S3 backups which broke with duplicity 0.8.23.
Version 56 (January 19, 2022)
-----------------------------

44
management/backup.py

@ -59,7 +59,7 @@ def backup_status(env):
"--archive-dir", backup_cache_dir,
"--gpg-options", "--cipher-algo=AES256",
"--log-fd", "1",
config["target"],
get_duplicity_target_url(config),
] + get_duplicity_additional_args(env),
get_duplicity_env_vars(env),
trap=True)
@ -190,13 +190,45 @@ def get_passphrase(env):
return passphrase
def get_duplicity_target_url(config):
target = config["target"]
if get_target_type(config) == "s3":
from urllib.parse import urlsplit, urlunsplit
target = list(urlsplit(target))
# Duplicity now defaults to boto3 as the backend for S3, but we have
# legacy boto installed (boto3 doesn't support Ubuntu 18.04) so
# we retarget for classic boto.
target[0] = "boto+" + target[0]
# In addition, although we store the S3 hostname in the target URL,
# duplicity no longer accepts it in the target URL. The hostname in
# the target URL must be the bucket name. The hostname is passed
# via get_duplicity_additional_args. Move the first part of the
# path (the bucket name) into the hostname URL component, and leave
# the rest for the path.
target[1], target[2] = target[2].lstrip('/').split('/', 1)
target = urlunsplit(target)
return target
def get_duplicity_additional_args(env):
config = get_backup_config(env)
if get_target_type(config) == 'rsync':
return [
"--ssh-options= -i /root/.ssh/id_rsa_miab",
"--rsync-options= -e \"/usr/bin/ssh -oStrictHostKeyChecking=no -oBatchMode=yes -p 22 -i /root/.ssh/id_rsa_miab\"",
]
elif get_target_type(config) == 's3':
# See note about hostname in get_duplicity_target_url.
from urllib.parse import urlsplit, urlunsplit
target = urlsplit(config["target"])
endpoint_url = urlunsplit(("https", target.netloc, '', '', ''))
return ["--s3-endpoint-url", endpoint_url]
return []
def get_duplicity_env_vars(env):
@ -277,7 +309,7 @@ def perform_backup(full_backup):
"--volsize", "250",
"--gpg-options", "--cipher-algo=AES256",
env["STORAGE_ROOT"],
config["target"],
get_duplicity_target_url(config),
"--allow-source-mismatch"
] + get_duplicity_additional_args(env),
get_duplicity_env_vars(env))
@ -296,7 +328,7 @@ def perform_backup(full_backup):
"--verbosity", "error",
"--archive-dir", backup_cache_dir,
"--force",
config["target"]
get_duplicity_target_url(config)
] + get_duplicity_additional_args(env),
get_duplicity_env_vars(env))
@ -311,7 +343,7 @@ def perform_backup(full_backup):
"--verbosity", "error",
"--archive-dir", backup_cache_dir,
"--force",
config["target"]
get_duplicity_target_url(config)
] + get_duplicity_additional_args(env),
get_duplicity_env_vars(env))
@ -349,7 +381,7 @@ def run_duplicity_verification():
"--compare-data",
"--archive-dir", backup_cache_dir,
"--exclude", backup_root,
config["target"],
get_duplicity_target_url(config),
env["STORAGE_ROOT"],
] + get_duplicity_additional_args(env), get_duplicity_env_vars(env))
@ -361,7 +393,7 @@ def run_duplicity_restore(args):
"/usr/bin/duplicity",
"restore",
"--archive-dir", backup_cache_dir,
config["target"],
get_duplicity_target_url(config),
] + get_duplicity_additional_args(env) + args,
get_duplicity_env_vars(env))

12
management/status_checks.py

@ -253,6 +253,18 @@ def check_free_disk_space(rounded_values, env, output):
if rounded_values: disk_msg = "The disk has less than 15% free space."
output.print_error(disk_msg)
# Check that there's only one duplicity cache. If there's more than one,
# it's probably no longer in use, and we can recommend clearing the cache
# to save space. The cache directory may not exist yet, which is OK.
backup_cache_path = os.path.join(env['STORAGE_ROOT'], 'backup/cache')
try:
backup_cache_count = len(os.listdir(backup_cache_path))
except:
backup_cache_count = 0
if backup_cache_count > 1:
output.print_warning("The backup cache directory {} has more than one backup target cache. Consider clearing this directory to save disk space."
.format(backup_cache_path))
def check_free_memory(rounded_values, env, output):
# Check free memory.
percent_free = 100 - psutil.virtual_memory().percent

Loading…
Cancel
Save