take a full backup more often so we don't keep backups around for so long

This commit is contained in:
Joshua Tauberer 2016-02-05 11:08:33 -05:00
parent 178527dab1
commit 4ed23f44e6
2 changed files with 43 additions and 19 deletions

View File

@ -7,6 +7,7 @@ In Development
* Roundcube updated to version 1.1.4. * Roundcube updated to version 1.1.4.
* On multi-homed machines, Postfix now binds to the right network interface when sending outbound mail so that SPF checks on the receiving end will pass. * On multi-homed machines, Postfix now binds to the right network interface when sending outbound mail so that SPF checks on the receiving end will pass.
* Backup times were displayed with the wrong time zone. * Backup times were displayed with the wrong time zone.
* On low-usage systems, don't hold backups for quite so long by taking a full backup more often.
v0.16 (January 30, 2016) v0.16 (January 30, 2016)
------------------------ ------------------------

View File

@ -81,49 +81,66 @@ def backup_status(env):
# This is relied on by should_force_full() and the next step. # This is relied on by should_force_full() and the next step.
backups = sorted(backups.values(), key = lambda b : b["date"], reverse=True) backups = sorted(backups.values(), key = lambda b : b["date"], reverse=True)
# Get the average size of incremental backups and the size of the # Get the average size of incremental backups, the size of the
# most recent full backup. # most recent full backup, and the date of the most recent
# backup and the most recent full backup.
incremental_count = 0 incremental_count = 0
incremental_size = 0 incremental_size = 0
first_date = None
first_full_size = None first_full_size = None
first_full_date = None
for bak in backups: for bak in backups:
if first_date is None:
first_date = dateutil.parser.parse(bak["date"])
if bak["full"]: if bak["full"]:
first_full_size = bak["size"] first_full_size = bak["size"]
first_full_date = dateutil.parser.parse(bak["date"])
break break
incremental_count += 1 incremental_count += 1
incremental_size += bak["size"] incremental_size += bak["size"]
# Predict how many more increments until the next full backup, # When will the most recent backup be deleted? It won't be deleted if the next
# and add to that the time we hold onto backups, to predict # backup is incremental, because the increments rely on all past increments.
# how long the most recent full backup+increments will be held # So first guess how many more incremental backups will occur until the next
# onto. Round up since the backup occurs on the night following # full backup. That full backup frees up this one to be deleted. But, the backup
# when the threshold is met. # must also be at least min_age_in_days old too.
deleted_in = None deleted_in = None
if incremental_count > 0 and first_full_size is not None: if incremental_count > 0 and first_full_size is not None:
deleted_in = "approx. %d days" % round(config["min_age_in_days"] + (.5 * first_full_size - incremental_size) / (incremental_size/incremental_count) + .5) # How many days until the next incremental backup? First, the part of
# the algorithm based on increment sizes:
est_days_to_next_full = (.5 * first_full_size - incremental_size) / (incremental_size/incremental_count)
est_time_of_next_full = first_date + datetime.timedelta(days=est_days_to_next_full)
# When will a backup be deleted? # ...And then the part of the algorithm based on full backup age:
est_time_of_next_full = min(est_time_of_next_full, first_full_date + datetime.timedelta(days=config["min_age_in_days"]*10+1))
# It still can't be deleted until it's old enough.
est_deleted_on = max(est_time_of_next_full, first_date + datetime.timedelta(days=config["min_age_in_days"]))
deleted_in = "approx. %d days" % round((est_deleted_on-now).total_seconds()/60/60/24 + .5)
# When will a backup be deleted? Set the deleted_in field of each backup.
saw_full = False saw_full = False
days_ago = now - datetime.timedelta(days=config["min_age_in_days"])
for bak in backups: for bak in backups:
if deleted_in: if deleted_in:
# Subsequent backups are deleted when the most recent increment # The most recent increment in a chain and all of the previous backups
# in the chain would be deleted. # it relies on are deleted at the same time.
bak["deleted_in"] = deleted_in bak["deleted_in"] = deleted_in
if bak["full"]: if bak["full"]:
# Reset when we get to a full backup. A new chain start next. # Reset when we get to a full backup. A new chain start *next*.
saw_full = True saw_full = True
deleted_in = None deleted_in = None
elif saw_full and not deleted_in: elif saw_full and not deleted_in:
# Mark deleted_in only on the first increment after a full backup. # We're now on backups prior to the most recent full backup. These are
deleted_in = reldate(days_ago, dateutil.parser.parse(bak["date"]), "on next daily backup") # free to be deleted as soon as they are min_age_in_days old.
deleted_in = reldate(now, dateutil.parser.parse(bak["date"]) + datetime.timedelta(days=config["min_age_in_days"]), "on next daily backup")
bak["deleted_in"] = deleted_in bak["deleted_in"] = deleted_in
return { return {
"backups": backups, "backups": backups,
} }
def should_force_full(env): def should_force_full(config, env):
# Force a full backup when the total size of the increments # Force a full backup when the total size of the increments
# since the last full backup is greater than half the size # since the last full backup is greater than half the size
# of that full backup. # of that full backup.
@ -135,8 +152,14 @@ def should_force_full(env):
inc_size += bak["size"] inc_size += bak["size"]
else: else:
# ...until we reach the most recent full backup. # ...until we reach the most recent full backup.
# Return if we should to a full backup. # Return if we should to a full backup, which is based
return inc_size > .5*bak["size"] # on the size of the increments relative to the full
# backup, as well as the age of the full backup.
if inc_size > .5*bak["size"]:
return True
if dateutil.parser.parse(bak["date"]) + datetime.timedelta(days=config["min_age_in_days"]*10+1) > datetime.datetime.now(dateutil.tz.tzlocal()):
return True
return False
else: else:
# If we got here there are no (full) backups, so make one. # If we got here there are no (full) backups, so make one.
# (I love for/else blocks. Here it's just to show off.) # (I love for/else blocks. Here it's just to show off.)
@ -215,7 +238,7 @@ def perform_backup(full_backup):
# the increments since the most recent full backup are # the increments since the most recent full backup are
# large. # large.
try: try:
full_backup = full_backup or should_force_full(env) full_backup = full_backup or should_force_full(config, env)
except Exception as e: except Exception as e:
# This was the first call to duplicity, and there might # This was the first call to duplicity, and there might
# be an error already. # be an error already.