Compare commits
34 Commits
706284d590
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 017ad7d3a6 | |||
| fd759a921a | |||
| a84b911155 | |||
| 5ce1dfa39e | |||
| aa3e8f081b | |||
| 1f22b9c603 | |||
| 60abeb636a | |||
| 7ae49a1b2c | |||
| a92b1a82ad | |||
| 39d5d21d48 | |||
| 44c9008e7e | |||
| 8036d93914 | |||
| b55d5763ad | |||
| 53febb48d2 | |||
| d55ddc5914 | |||
| d9319dc0fb | |||
| af330f2777 | |||
| 683fb0b103 | |||
| cf8a527d35 | |||
| a63c729fbc | |||
| a1065d4f1f | |||
| 0e9c96c8b5 | |||
| cafae65f61 | |||
| 7a0d5bfd73 | |||
| 8d1c8982d7 | |||
| e75367c594 | |||
| 64c69cd8e3 | |||
| 132f7149ca | |||
| f1ce085972 | |||
| d8edf90fac | |||
| 301ea217e8 | |||
| 9f537b9c4c | |||
| cf5b643bee | |||
| 3113014b58 |
118
README.md
118
README.md
@@ -78,7 +78,7 @@ vaultik snapshot verify <snapshot-id>
|
||||
VAULTIK_AGE_SECRET_KEY='AGE-SECRET-KEY-...' vaultik snapshot verify --deep <snapshot-id>
|
||||
|
||||
# restore (requires the private key)
|
||||
VAULTIK_AGE_SECRET_KEY='AGE-SECRET-KEY-...' vaultik restore <snapshot-id> /tmp/restored
|
||||
VAULTIK_AGE_SECRET_KEY='AGE-SECRET-KEY-...' vaultik snapshot restore <snapshot-id> /tmp/restored
|
||||
|
||||
# daily cron job: back up, keep a 4-week rolling window of snapshots
|
||||
# 0 3 * * * vaultik snapshot create --cron --prune --keep-newer-than 4w
|
||||
@@ -95,17 +95,17 @@ vaultik [--config <path>] config init
|
||||
vaultik [--config <path>] config edit
|
||||
vaultik [--config <path>] config get <key>
|
||||
vaultik [--config <path>] config set <key> <value>
|
||||
vaultik [--config <path>] snapshot create [snapshot-names...] [--cron] [--prune] [--keep-newer-than <duration>] [--skip-errors]
|
||||
vaultik [--config <path>] snapshot create [snapshot-names...] [--cron] [--prune] [--keep-newer-than <duration>]
|
||||
vaultik [--config <path>] snapshot list [--json]
|
||||
vaultik [--config <path>] snapshot verify <snapshot-id> [--deep] [--json]
|
||||
vaultik [--config <path>] snapshot purge [--keep-latest | --older-than <duration>] [--snapshot <name>...] [--force]
|
||||
vaultik [--config <path>] snapshot remove <snapshot-id|--all> [--dry-run] [--force] [--remote] [--json]
|
||||
vaultik [--config <path>] snapshot prune
|
||||
vaultik [--config <path>] snapshot cleanup
|
||||
vaultik [--config <path>] restore <snapshot-id> <target-dir> [paths...] [--verify]
|
||||
vaultik [--config <path>] snapshot restore <snapshot-id> <target-dir> [paths...] [--verify]
|
||||
vaultik [--config <path>] prune [--force] [--json]
|
||||
vaultik [--config <path>] info
|
||||
vaultik [--config <path>] remote info [--json]
|
||||
vaultik [--config <path>] remote nuke --force
|
||||
vaultik [--config <path>] store info
|
||||
vaultik [--config <path>] database purge [--force]
|
||||
vaultik completion <bash|zsh|fish|powershell>
|
||||
@@ -117,11 +117,12 @@ vaultik version
|
||||
* `--config <path>`: Path to config file (default: `$VAULTIK_CONFIG`, then platform config dir, then `/etc/vaultik/config.yml`)
|
||||
* `--verbose`, `-v`: Enable verbose output
|
||||
* `--debug`: Enable debug output
|
||||
* `--quiet`, `-q`: Suppress non-error output
|
||||
* `--quiet`, `-q`: Suppress non-error output (also suppresses startup banner)
|
||||
* `--skip-errors`: Continue past per-file errors instead of aborting (applies to `snapshot create` and `restore`)
|
||||
|
||||
### environment variables
|
||||
|
||||
* `VAULTIK_AGE_SECRET_KEY`: Age private key for decryption (required for `restore` and `verify --deep`)
|
||||
* `VAULTIK_AGE_SECRET_KEY`: Age private key for decryption (required for `snapshot restore` and `snapshot verify --deep`)
|
||||
* `VAULTIK_CONFIG`: Path to config file (overridden by `--config`)
|
||||
* `VAULTIK_INDEX_PATH`: Override local SQLite index path
|
||||
|
||||
@@ -155,11 +156,13 @@ existing file. Created with mode `0600` since it will contain credentials.
|
||||
**`config edit`**: Open the config file in `$EDITOR` (falls back to `vi`).
|
||||
|
||||
**`config get`**: Print a config value addressed by dotted YAML path
|
||||
(e.g. `vaultik config get s3.bucket`). Non-scalar values print as YAML.
|
||||
(e.g. `vaultik config get storage_url`). Non-scalar values print as YAML.
|
||||
|
||||
**`config set`**: Set a scalar config value by dotted YAML path
|
||||
(e.g. `vaultik config set compression_level 9`). Comments and formatting
|
||||
in the file are preserved; intermediate maps are created as needed.
|
||||
(e.g. `vaultik config set compression_level 9`,
|
||||
`vaultik config set storage_url "file:///mnt/backups"`). Comments and
|
||||
formatting in the file are preserved; intermediate maps are created as
|
||||
needed.
|
||||
|
||||
**`snapshot create`**: Perform incremental backup of configured snapshots.
|
||||
* Optional snapshot names argument to create specific snapshots (default: all)
|
||||
@@ -173,9 +176,12 @@ in the file are preserved; intermediate maps are created as needed.
|
||||
snapshot per name; use `--keep-newer-than` for a rolling window.
|
||||
* `--keep-newer-than <duration>`: With `--prune`, keep snapshots newer than
|
||||
this duration instead of only the latest (e.g. `4w`, `30d`, `6mo`, `1y`)
|
||||
* `--skip-errors`: Skip file read errors (log them loudly but continue)
|
||||
|
||||
**`snapshot list`**: List all snapshots with their timestamps and sizes.
|
||||
**`snapshot list`**: Show every snapshot known to the destination
|
||||
store with timestamps and three sizes per snapshot (compressed
|
||||
remote size; total uncompressed chunk size; size of chunks newly
|
||||
referenced by that snapshot). The uncompressed and "new chunk"
|
||||
columns show `<remote only>` for snapshots not in the local index.
|
||||
* `--json`: Output in JSON format
|
||||
|
||||
**`snapshot verify`**: Verify snapshot integrity.
|
||||
@@ -193,28 +199,31 @@ latest globally).
|
||||
* `--force`: Skip confirmation prompt
|
||||
|
||||
**`snapshot remove`**: Remove a specific snapshot from the local database.
|
||||
Automatically cleans up local rows (files, chunks, blobs) that the removed
|
||||
snapshot was the last referrer for — you don't need a separate prune step
|
||||
after removal.
|
||||
* `--remote`: Also remove snapshot metadata from remote storage
|
||||
* `--all`: Remove all snapshots (requires `--force`)
|
||||
* `--dry-run`: Show what would be deleted without deleting
|
||||
* `--force`: Skip confirmation prompt
|
||||
* `--json`: Output result as JSON
|
||||
|
||||
**`snapshot prune`**: Clean orphaned data from the local database (files,
|
||||
chunks, blobs not referenced by any snapshot).
|
||||
|
||||
**`snapshot cleanup`**: Remove stale local snapshot records that have no
|
||||
corresponding metadata in remote storage. These are typically left behind
|
||||
by incomplete or interrupted backups. Does not touch remote storage.
|
||||
|
||||
**`restore`**: Restore files from a backup snapshot.
|
||||
**`snapshot restore`**: Restore files from a backup snapshot.
|
||||
* Requires `VAULTIK_AGE_SECRET_KEY` environment variable
|
||||
* Optional path arguments to restore specific files/directories (default: all)
|
||||
* Preserves file permissions, timestamps, ownership (ownership requires root),
|
||||
symlinks, and empty directories
|
||||
* `--verify`: After restoring, verify every file's chunk hashes match
|
||||
|
||||
**`prune`**: Remove unreferenced blobs from remote storage.
|
||||
* Scans all snapshot manifests for referenced blobs, deletes any blob not referenced
|
||||
**`prune`**: Tidy up everything that isn't needed. Removes orphaned local
|
||||
database rows (files, chunks, blobs no longer referenced by any completed
|
||||
snapshot) AND deletes unreferenced blobs from remote storage. `snapshot
|
||||
create --prune`, `snapshot remove`, and `snapshot purge` run the same
|
||||
cleanup automatically; this is the manual entry point for the same work.
|
||||
* `--force`: Skip confirmation prompt
|
||||
* `--json`: Output stats as JSON
|
||||
|
||||
@@ -225,6 +234,11 @@ recipients, and local database statistics.
|
||||
metadata sizes, blob counts, and orphaned blob detection.
|
||||
* `--json`: Output as JSON
|
||||
|
||||
**`remote nuke`**: Delete every snapshot's metadata and every blob from the
|
||||
backup destination store, leaving the bucket prefix empty. Destructive and
|
||||
irreversible.
|
||||
* `--force`: Required to confirm destruction.
|
||||
|
||||
**`store info`**: Display storage backend type and statistics.
|
||||
|
||||
**`database purge`**: Delete the local SQLite state database entirely. Remote
|
||||
@@ -379,13 +393,71 @@ Key fields:
|
||||
|
||||
## roadmap
|
||||
|
||||
Items for future releases:
|
||||
Items still to do before / shortly after 1.0. Loosely ordered by
|
||||
priority.
|
||||
|
||||
* Error-condition tests (network failures, disk full, corrupted/missing blobs)
|
||||
* Parallel blob downloads during restore
|
||||
* Bandwidth limiting (`--bwlimit`)
|
||||
* Security audit of encryption implementation
|
||||
* Man pages and richer `--help` examples
|
||||
### correctness and operability
|
||||
|
||||
* **Security audit of the encryption implementation.** Pre-1.0
|
||||
blocker if we're advertising "secure" at the top of this README.
|
||||
age + zstd + content-defined chunking is mostly off-the-shelf
|
||||
pieces, but the seams (key handling, recipient parsing, manifest
|
||||
trust boundary, restore-time identity validation) need an outside
|
||||
read.
|
||||
* **Error-condition tests.** Today's coverage is the happy path
|
||||
plus a few specific regressions. Need fault-injection coverage:
|
||||
network failures mid-blob, disk-full during restore, corrupted /
|
||||
truncated / missing blobs, partial uploads, kill -9 between
|
||||
manifest and db.zst.age writes.
|
||||
* **Verify restored content end-to-end in CI.** The current
|
||||
integration test does this for a small synthetic snapshot but
|
||||
not at scale. A nightly job against a multi-GB representative
|
||||
snapshot would catch silent regressions in the chunker, packer,
|
||||
or restore planner.
|
||||
|
||||
### performance
|
||||
|
||||
* **Parallel blob downloads during restore.** Single-stream right
|
||||
now. With a fast S3 endpoint and a multi-core machine restore is
|
||||
bound by per-blob fetch + decrypt + decompress; running N of
|
||||
those in parallel against the disk cache would close most of the
|
||||
remaining gap. Needs to interact correctly with the locality
|
||||
planner and sweeper.
|
||||
* **Bandwidth limiting (`--bwlimit`).** Both upload and download.
|
||||
Useful for backing up over a shared link. Tricky to make work
|
||||
correctly with the parallel-download story.
|
||||
* **Restart of interrupted restore.** Today restore is restartable
|
||||
in the sense that re-running it overwrites partial output; it
|
||||
doesn't resume from where it stopped or skip already-present
|
||||
files. A `--resume` mode that checks targets before fetching
|
||||
blobs would matter for very large restores.
|
||||
|
||||
### usability
|
||||
|
||||
* **Man pages and richer `--help` examples.** Cobra generates
|
||||
basic help; man pages would be a separate target.
|
||||
* **`--bwlimit` style human-readable size flags** across the
|
||||
command surface where they're currently raw integers.
|
||||
* **`vaultik snapshot diff <a> <b>`** — show which files changed
|
||||
between two snapshots without restoring either.
|
||||
* **Status reporting hook for `--cron`.** When a backup fails
|
||||
silently in cron, the user has no idea. A configurable
|
||||
webhook / email / `notify-send` hook on completion (success and
|
||||
failure) would close the loop.
|
||||
|
||||
### infrastructure
|
||||
|
||||
* **Cross-machine restore documentation.** The "restore from
|
||||
another host" workflow works but isn't documented as a
|
||||
first-class operation in this README. Worth a dedicated section
|
||||
once it's settled.
|
||||
* **Schema migrations.** Currently nonexistent — pre-1.0 schema
|
||||
changes are handled by `vaultik database purge` plus a full
|
||||
re-scan. Post-1.0 we'll need a migration story to keep existing
|
||||
index databases usable across upgrades.
|
||||
* **Storage backend coverage tests.** S3, file://, and rclone://
|
||||
all share the Storer interface but the rclone path is the least
|
||||
exercised in CI.
|
||||
|
||||
---
|
||||
|
||||
|
||||
3
go.mod
3
go.mod
@@ -18,7 +18,6 @@ require (
|
||||
github.com/johannesboyne/gofakes3 v0.0.0-20250603205740-ed9094be7668
|
||||
github.com/klauspost/compress v1.18.1
|
||||
github.com/rclone/rclone v1.72.1
|
||||
github.com/schollz/progressbar/v3 v3.19.0
|
||||
github.com/spf13/afero v1.15.0
|
||||
github.com/spf13/cobra v1.10.1
|
||||
github.com/stretchr/testify v1.11.1
|
||||
@@ -186,7 +185,6 @@ require (
|
||||
github.com/mattn/go-colorable v0.1.14 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.19 // indirect
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
|
||||
github.com/mitchellh/go-homedir v1.1.0 // indirect
|
||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
@@ -217,7 +215,6 @@ require (
|
||||
github.com/relvacode/iso8601 v1.7.0 // indirect
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||
github.com/rfjakob/eme v1.1.2 // indirect
|
||||
github.com/rivo/uniseg v0.4.7 // indirect
|
||||
github.com/ryanuber/go-glob v1.0.0 // indirect
|
||||
github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 // indirect
|
||||
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 // indirect
|
||||
|
||||
8
go.sum
8
go.sum
@@ -202,8 +202,6 @@ github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UF
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/cevatbarisyilmaz/ara v0.0.4 h1:SGH10hXpBJhhTlObuZzTuFn1rrdmjQImITXnZVPSodc=
|
||||
github.com/cevatbarisyilmaz/ara v0.0.4/go.mod h1:BfFOxnUd6Mj6xmcvRxHN3Sr21Z1T3U2MYkYOmoQe4Ts=
|
||||
github.com/chengxilo/virtualterm v1.0.4 h1:Z6IpERbRVlfB8WkOmtbHiDbBANU7cimRIof7mk9/PwM=
|
||||
github.com/chengxilo/virtualterm v1.0.4/go.mod h1:DyxxBZz/x1iqJjFxTFcr6/x+jSpqN0iwWCOK1q10rlY=
|
||||
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9 h1:z0uK8UQqjMVYzvk4tiiu3obv2B44+XBsvgEJREQfnO8=
|
||||
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9/go.mod h1:Jl2neWsQaDanWORdqZ4emBl50J4/aRBBS4FyyG9/PFo=
|
||||
github.com/chzyer/logex v1.1.10/go.mod h1:+Ywpsq7O8HXn0nuIou7OrIPyXbp3wmkHB+jjWRnGsAI=
|
||||
@@ -599,8 +597,6 @@ github.com/miekg/dns v1.1.41 h1:WMszZWJG0XmzbK9FEmzH2TVcqYzFesusSIB41b8KHxY=
|
||||
github.com/miekg/dns v1.1.41/go.mod h1:p6aan82bvRIyn+zDIv9xYNUpwa73JcSh9BKwknJysuI=
|
||||
github.com/mitchellh/cli v1.0.0/go.mod h1:hNIlj7HEI86fIcpObd7a0FcrxTWetlwJDGcceTlRvqc=
|
||||
github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI=
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ=
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw=
|
||||
github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y=
|
||||
github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0=
|
||||
github.com/mitchellh/go-wordwrap v1.0.0/go.mod h1:ZXFpozHsX6DPmq2I0TCekCxypsnAUbP2oI0UX1GXzOo=
|
||||
@@ -705,8 +701,6 @@ github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec h1:W09IVJc94
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec/go.mod h1:qqbHyh8v60DhA7CoWK5oRCqLrMHRGoxYCSS9EjAz6Eo=
|
||||
github.com/rfjakob/eme v1.1.2 h1:SxziR8msSOElPayZNFfQw4Tjx/Sbaeeh3eRvrHVMUs4=
|
||||
github.com/rfjakob/eme v1.1.2/go.mod h1:cVvpasglm/G3ngEfcfT/Wt0GwhkuO32pf/poW6Nyk1k=
|
||||
github.com/rivo/uniseg v0.4.7 h1:WUdvkW8uEhrYfLC4ZzdpI2ztxP1I582+49Oc5Mq64VQ=
|
||||
github.com/rivo/uniseg v0.4.7/go.mod h1:FN3SvrM+Zdj16jyLfmOkMNblXMcoc8DfTHruCPUcx88=
|
||||
github.com/rogpeppe/go-internal v1.3.0/go.mod h1:M8bDsm7K2OlrFYOpmOWEs/qY81heoFRclV5y23lUDJ4=
|
||||
github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ=
|
||||
github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc=
|
||||
@@ -721,8 +715,6 @@ github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 h1:OkMGxebDj
|
||||
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06/go.mod h1:+ePHsJ1keEjQtpvf9HHw0f4ZeJ0TLRsxhunSI2hYJSs=
|
||||
github.com/samber/lo v1.52.0 h1:Rvi+3BFHES3A8meP33VPAxiBZX/Aws5RxrschYGjomw=
|
||||
github.com/samber/lo v1.52.0/go.mod h1:4+MXEGsJzbKGaUEQFKBq2xtfuznW9oz/WrgyzMzRoM0=
|
||||
github.com/schollz/progressbar/v3 v3.19.0 h1:Ea18xuIRQXLAUidVDox3AbwfUhD0/1IvohyTutOIFoc=
|
||||
github.com/schollz/progressbar/v3 v3.19.0/go.mod h1:IsO3lpbaGuzh8zIMzgY3+J8l4C8GjO0Y9S69eFvNsec=
|
||||
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I=
|
||||
github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc=
|
||||
github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAmXWZgo=
|
||||
|
||||
@@ -4,7 +4,6 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/signal"
|
||||
"path/filepath"
|
||||
@@ -35,29 +34,36 @@ type AppOptions struct {
|
||||
Invokes []fx.Option
|
||||
}
|
||||
|
||||
// setupGlobals records the startup time and prints the startup banner.
|
||||
// In --cron mode the banner is suppressed (LogOptions.Cron == true).
|
||||
// setupGlobals records the startup time and, when an output-suppression
|
||||
// flag is active, marks the UI writer quiet so that Begin/Complete/
|
||||
// Info/Notice/Detail/Progress are silenced. Warning and Error are NOT
|
||||
// silenced — per the documented convention that --quiet suppresses
|
||||
// non-error output only. The startup banner is printed by CLIEntry
|
||||
// before cobra parses arguments, gated by the same arg-level check.
|
||||
func setupGlobals(lc fx.Lifecycle, g *globals.Globals, v *vaultik.Vaultik, opts log.LogOptions) {
|
||||
lc.Append(fx.Hook{
|
||||
OnStart: func(ctx context.Context) error {
|
||||
g.StartTime = time.Now().UTC()
|
||||
if opts.Cron || opts.Quiet {
|
||||
// Replace UI writer with a discarding one so all
|
||||
// user-facing output is suppressed.
|
||||
v.UI = ui.NewWithColor(io.Discard, false)
|
||||
} else {
|
||||
v.UI.Banner("%s %s by %s (commit %s, built on %s) starting up at %s.",
|
||||
g.Appname, g.Version, globals.Author,
|
||||
g.ShortCommit(), g.CommitDate,
|
||||
g.StartTime.Format(time.RFC3339))
|
||||
v.UI.Banner("%s", globals.Homepage)
|
||||
v.UI.Banner("")
|
||||
v.UI.SetQuiet(true)
|
||||
}
|
||||
return nil
|
||||
},
|
||||
})
|
||||
}
|
||||
|
||||
// writeStartupBanner prints the two-line application banner followed by a
|
||||
// blank line. Used both from the fx hook (for subcommand invocations) and
|
||||
// from the root cobra Run handler (for `vaultik` with no subcommand).
|
||||
func writeStartupBanner(w *ui.Writer, startTime time.Time, shortCommit string) {
|
||||
w.Banner("%s %s by %s (commit %s, built on %s) starting up at %s.",
|
||||
globals.Appname, globals.Version, globals.Author,
|
||||
shortCommit, globals.CommitDate,
|
||||
startTime.Format(time.RFC3339))
|
||||
w.Banner("%s", globals.Homepage)
|
||||
w.Banner("")
|
||||
}
|
||||
|
||||
// NewApp creates a new fx application with common modules.
|
||||
// It sets up the base modules (config, database, logging, globals) and
|
||||
// combines them with any additional modules specified in the options.
|
||||
|
||||
@@ -285,7 +285,7 @@ func newConfigEditCommand() *cobra.Command {
|
||||
func newConfigGetCommand() *cobra.Command {
|
||||
return &cobra.Command{
|
||||
Use: "get <key>",
|
||||
Short: "Print a config value by dotted path (e.g. s3.bucket)",
|
||||
Short: "Print a config value by dotted path (e.g. storage_url, compression_level)",
|
||||
Args: cobra.ExactArgs(1),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
path, err := ResolveConfigPath()
|
||||
@@ -328,9 +328,10 @@ the file back, preserving comments and formatting. Intermediate maps
|
||||
are created as needed.
|
||||
|
||||
Examples:
|
||||
vaultik config set storage_url "file:///mnt/backups"
|
||||
vaultik config set storage_url "s3://bucket/prefix?endpoint=host®ion=us-east-1"
|
||||
vaultik config set compression_level 9
|
||||
vaultik config set s3.bucket mybucket
|
||||
vaultik config set storage_url "file:///mnt/backups"`,
|
||||
vaultik config set s3.bucket mybucket # legacy S3 fields still supported`,
|
||||
Args: cobra.ExactArgs(2),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
path, err := ResolveConfigPath()
|
||||
|
||||
@@ -2,14 +2,67 @@ package cli
|
||||
|
||||
import (
|
||||
"os"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/globals"
|
||||
"sneak.berlin/go/vaultik/internal/ui"
|
||||
)
|
||||
|
||||
// CLIEntry is the main entry point for the CLI application.
|
||||
// It creates the root command, executes it, and exits with status 1
|
||||
// if an error occurs. This function should be called from main().
|
||||
// It prints the startup banner (unless a quiet flag is present in os.Args),
|
||||
// executes the root cobra command, and routes any returned error through
|
||||
// the ui.Writer so the user sees a properly formatted "🛑 ERROR:" line.
|
||||
func CLIEntry() {
|
||||
if !bannerSuppressedInArgs(os.Args[1:]) {
|
||||
short := globals.Commit
|
||||
if len(short) > 12 {
|
||||
short = short[:12]
|
||||
}
|
||||
writeStartupBanner(ui.New(os.Stdout), time.Now().UTC(), short)
|
||||
}
|
||||
|
||||
rootCmd := NewRootCommand()
|
||||
rootCmd.SilenceErrors = true
|
||||
|
||||
if err := rootCmd.Execute(); err != nil {
|
||||
ReportError("%s", err.Error())
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// ReportError emits a user-facing error to stderr in the standard
|
||||
// 🛑 ERROR: format. Use it from goroutine error paths (where returning
|
||||
// an error to cobra isn't an option) and anywhere else a CLI command
|
||||
// must surface a failure outside the normal RunE return path.
|
||||
func ReportError(format string, args ...any) {
|
||||
ui.New(os.Stderr).Error(format, args...)
|
||||
}
|
||||
|
||||
// bannerSuppressedInArgs reports whether any of args is a flag that
|
||||
// should suppress the startup banner (--quiet/-q/--cron). Stops at the
|
||||
// "--" argument terminator. Recognizes both long forms and short -q,
|
||||
// including combined short flags like "-qv".
|
||||
func bannerSuppressedInArgs(args []string) bool {
|
||||
for _, a := range args {
|
||||
if a == "--" {
|
||||
return false
|
||||
}
|
||||
switch a {
|
||||
case "--quiet", "-q", "--cron":
|
||||
return true
|
||||
}
|
||||
if strings.HasPrefix(a, "--quiet=") || strings.HasPrefix(a, "--cron=") {
|
||||
return true
|
||||
}
|
||||
// Combined short flags like -qv or -vq.
|
||||
if len(a) > 1 && a[0] == '-' && a[1] != '-' {
|
||||
for _, c := range a[1:] {
|
||||
if c == 'q' {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
@@ -18,7 +18,7 @@ func TestCLIEntry(t *testing.T) {
|
||||
}
|
||||
|
||||
// Verify all subcommands are registered
|
||||
expectedCommands := []string{"config", "snapshot", "store", "restore", "prune", "info", "version", "remote", "database"}
|
||||
expectedCommands := []string{"config", "snapshot", "store", "prune", "info", "version", "remote", "database"}
|
||||
for _, expected := range expectedCommands {
|
||||
found := false
|
||||
for _, cmd := range cmd.Commands() {
|
||||
@@ -38,7 +38,7 @@ func TestCLIEntry(t *testing.T) {
|
||||
t.Errorf("Failed to find snapshot command: %v", err)
|
||||
} else {
|
||||
// Check snapshot subcommands
|
||||
expectedSubCommands := []string{"create", "list", "purge", "verify", "cleanup"}
|
||||
expectedSubCommands := []string{"create", "list", "purge", "verify", "cleanup", "restore"}
|
||||
for _, expected := range expectedSubCommands {
|
||||
found := false
|
||||
for _, subcmd := range snapshotCmd.Commands() {
|
||||
|
||||
@@ -47,6 +47,7 @@ func NewInfoCommand() *cobra.Command {
|
||||
if err := v.ShowInfo(); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Failed to show info", "error", err)
|
||||
ReportError("Failed to show info: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -16,14 +16,19 @@ func NewPruneCommand() *cobra.Command {
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "prune",
|
||||
Short: "Remove unreferenced blobs",
|
||||
Long: `Removes blobs that are not referenced by any snapshot.
|
||||
Short: "Tidy local database and remote storage",
|
||||
Long: `Removes orphaned data from both the local index database and
|
||||
unreferenced blobs from the backup destination store.
|
||||
|
||||
This command scans all snapshots and their manifests to build a list of
|
||||
referenced blobs, then removes any blobs in storage that are not in this list.
|
||||
Local cleanup drops incomplete snapshots and any files, chunks, or
|
||||
blobs no longer referenced by a completed snapshot. Remote cleanup
|
||||
scans every snapshot manifest in the destination store, builds the
|
||||
set of still-referenced blob hashes, and deletes any blob not in that
|
||||
set.
|
||||
|
||||
Use this command after deleting snapshots with 'vaultik purge' to reclaim
|
||||
storage space.`,
|
||||
Snapshot create --prune and snapshot remove run the same cleanup
|
||||
automatically; this command is the manual entry point for the same
|
||||
work (e.g. after a crashed backup or to reclaim storage).`,
|
||||
Args: cobra.NoArgs,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Use unified config resolution
|
||||
@@ -49,10 +54,11 @@ storage space.`,
|
||||
// Start the prune operation in a goroutine
|
||||
go func() {
|
||||
// Run the prune operation
|
||||
if err := v.PruneBlobs(opts); err != nil {
|
||||
if err := v.Prune(opts); err != nil {
|
||||
if err != context.Canceled {
|
||||
if !opts.JSON {
|
||||
log.Error("Prune operation failed", "error", err)
|
||||
ReportError("Prune failed: %v", err)
|
||||
}
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
@@ -20,6 +21,73 @@ func NewRemoteCommand() *cobra.Command {
|
||||
|
||||
// Add subcommands
|
||||
cmd.AddCommand(newRemoteInfoCommand())
|
||||
cmd.AddCommand(newRemoteNukeCommand())
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
// newRemoteNukeCommand creates the 'remote nuke' subcommand.
|
||||
func newRemoteNukeCommand() *cobra.Command {
|
||||
var force bool
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "nuke",
|
||||
Short: "Delete ALL snapshot metadata and blobs from the backup destination store",
|
||||
Long: `Removes every snapshot's metadata and every blob from remote
|
||||
storage. After this command completes successfully the bucket prefix is
|
||||
empty and the next backup starts from scratch.
|
||||
|
||||
This is destructive and irreversible. Requires --force.`,
|
||||
Args: cobra.NoArgs,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
if !force {
|
||||
return fmt.Errorf("remote nuke requires --force (this deletes ALL remote snapshots and blobs)")
|
||||
}
|
||||
|
||||
configPath, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
rootFlags := GetRootFlags()
|
||||
return RunWithApp(cmd.Context(), AppOptions{
|
||||
ConfigPath: configPath,
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
Quiet: rootFlags.Quiet,
|
||||
},
|
||||
Modules: []fx.Option{},
|
||||
Invokes: []fx.Option{
|
||||
fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) {
|
||||
lc.Append(fx.Hook{
|
||||
OnStart: func(ctx context.Context) error {
|
||||
go func() {
|
||||
if err := v.NukeRemote(true); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Remote nuke failed", "error", err)
|
||||
ReportError("Remote nuke failed: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
if err := v.Shutdowner.Shutdown(); err != nil {
|
||||
log.Error("Failed to shutdown", "error", err)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
},
|
||||
OnStop: func(ctx context.Context) error {
|
||||
v.Cancel()
|
||||
return nil
|
||||
},
|
||||
})
|
||||
}),
|
||||
},
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
cmd.Flags().BoolVar(&force, "force", false, "Required: confirm destruction of ALL remote data")
|
||||
|
||||
return cmd
|
||||
}
|
||||
@@ -62,6 +130,7 @@ func newRemoteInfoCommand() *cobra.Command {
|
||||
if err != context.Canceled {
|
||||
if !jsonOutput {
|
||||
log.Error("Failed to get remote info", "error", err)
|
||||
ReportError("Failed to get remote info: %v", err)
|
||||
}
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
@@ -17,6 +17,7 @@ type RootFlags struct {
|
||||
Verbose bool
|
||||
Debug bool
|
||||
Quiet bool
|
||||
SkipErrors bool
|
||||
}
|
||||
|
||||
var rootFlags RootFlags
|
||||
@@ -32,6 +33,13 @@ func NewRootCommand() *cobra.Command {
|
||||
public keys and uploads to S3-compatible storage. No private keys are needed
|
||||
on the source system.`,
|
||||
SilenceUsage: true,
|
||||
// Bare 'vaultik' (no subcommand): print help. The banner is
|
||||
// printed once at process startup by CLIEntry, before cobra
|
||||
// parses arguments, so it appears even when cobra rejects
|
||||
// args (e.g. "requires at least 2 arg(s)") and on --help.
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
_ = cmd.Help()
|
||||
},
|
||||
}
|
||||
|
||||
// Add global flags
|
||||
@@ -39,11 +47,11 @@ on the source system.`,
|
||||
cmd.PersistentFlags().BoolVarP(&rootFlags.Verbose, "verbose", "v", false, "Enable verbose output")
|
||||
cmd.PersistentFlags().BoolVar(&rootFlags.Debug, "debug", false, "Enable debug output")
|
||||
cmd.PersistentFlags().BoolVarP(&rootFlags.Quiet, "quiet", "q", false, "Suppress non-error output")
|
||||
cmd.PersistentFlags().BoolVar(&rootFlags.SkipErrors, "skip-errors", false, "Continue past per-file errors instead of aborting (applies to snapshot create and restore)")
|
||||
|
||||
// Add subcommands
|
||||
cmd.AddCommand(
|
||||
NewConfigCommand(),
|
||||
NewRestoreCommand(),
|
||||
NewPruneCommand(),
|
||||
NewStoreCommand(),
|
||||
NewSnapshotCommand(),
|
||||
|
||||
@@ -25,8 +25,8 @@ func NewSnapshotCommand() *cobra.Command {
|
||||
cmd.AddCommand(newSnapshotPurgeCommand())
|
||||
cmd.AddCommand(newSnapshotVerifyCommand())
|
||||
cmd.AddCommand(newSnapshotRemoveCommand())
|
||||
cmd.AddCommand(newSnapshotPruneCommand())
|
||||
cmd.AddCommand(newSnapshotCleanupCommand())
|
||||
cmd.AddCommand(newSnapshotRestoreCommand())
|
||||
|
||||
return cmd
|
||||
}
|
||||
@@ -49,6 +49,8 @@ specifying a path using --config or by setting VAULTIK_CONFIG to a path.`,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Pass snapshot names from args
|
||||
opts.Snapshots = args
|
||||
// --skip-errors is a global flag on the root command.
|
||||
opts.SkipErrors = rootFlags.SkipErrors
|
||||
// Use unified config resolution
|
||||
configPath, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
@@ -76,6 +78,7 @@ specifying a path using --config or by setting VAULTIK_CONFIG to a path.`,
|
||||
if err := v.CreateSnapshot(opts); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Snapshot creation failed", "error", err)
|
||||
ReportError("Snapshot creation failed: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
@@ -103,7 +106,6 @@ specifying a path using --config or by setting VAULTIK_CONFIG to a path.`,
|
||||
cmd.Flags().BoolVar(&opts.Cron, "cron", false, "Run in cron mode (silent unless error)")
|
||||
cmd.Flags().BoolVar(&opts.Prune, "prune", false, "After backup, drop older snapshots of the same name and remove orphaned blobs")
|
||||
cmd.Flags().StringVar(&opts.KeepNewerThan, "keep-newer-than", "", "With --prune: keep snapshots newer than this duration (e.g. 4w, 30d, 6mo) instead of only the latest")
|
||||
cmd.Flags().BoolVar(&opts.SkipErrors, "skip-errors", false, "Skip file read errors (log them loudly but continue)")
|
||||
|
||||
return cmd
|
||||
}
|
||||
@@ -142,6 +144,7 @@ func newSnapshotListCommand() *cobra.Command {
|
||||
if err := v.ListSnapshots(jsonOutput); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Failed to list snapshots", "error", err)
|
||||
ReportError("Failed to list snapshots: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
@@ -212,6 +215,7 @@ restrict the operation to specific snapshot names.`,
|
||||
if err := v.PurgeSnapshotsWithOptions(opts); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Failed to purge snapshots", "error", err)
|
||||
ReportError("Failed to purge snapshots: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
@@ -285,6 +289,7 @@ func newSnapshotVerifyCommand() *cobra.Command {
|
||||
if err != context.Canceled {
|
||||
if !opts.JSON {
|
||||
log.Error("Verification failed", "error", err)
|
||||
ReportError("Verification failed: %v", err)
|
||||
}
|
||||
os.Exit(1)
|
||||
}
|
||||
@@ -378,6 +383,7 @@ Use --all --force to remove all snapshots.`,
|
||||
if err != context.Canceled {
|
||||
if !opts.JSON {
|
||||
log.Error("Failed to remove snapshot", "error", err)
|
||||
ReportError("Failed to remove snapshot: %v", err)
|
||||
}
|
||||
os.Exit(1)
|
||||
}
|
||||
@@ -408,63 +414,6 @@ Use --all --force to remove all snapshots.`,
|
||||
return cmd
|
||||
}
|
||||
|
||||
// newSnapshotPruneCommand creates the 'snapshot prune' subcommand
|
||||
func newSnapshotPruneCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "prune",
|
||||
Short: "Remove orphaned data from local database",
|
||||
Long: `Removes orphaned files, chunks, and blobs from the local database.
|
||||
|
||||
This cleans up data that is no longer referenced by any snapshot, which can
|
||||
accumulate from incomplete backups or deleted snapshots.`,
|
||||
Args: cobra.NoArgs,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Use unified config resolution
|
||||
configPath, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
rootFlags := GetRootFlags()
|
||||
return RunWithApp(cmd.Context(), AppOptions{
|
||||
ConfigPath: configPath,
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
Quiet: rootFlags.Quiet,
|
||||
},
|
||||
Modules: []fx.Option{},
|
||||
Invokes: []fx.Option{
|
||||
fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) {
|
||||
lc.Append(fx.Hook{
|
||||
OnStart: func(ctx context.Context) error {
|
||||
go func() {
|
||||
if _, err := v.PruneDatabase(); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Failed to prune database", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
if err := v.Shutdowner.Shutdown(); err != nil {
|
||||
log.Error("Failed to shutdown", "error", err)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
},
|
||||
OnStop: func(ctx context.Context) error {
|
||||
v.Cancel()
|
||||
return nil
|
||||
},
|
||||
})
|
||||
}),
|
||||
},
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
// newSnapshotCleanupCommand creates the 'snapshot cleanup' subcommand
|
||||
func newSnapshotCleanupCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
@@ -499,6 +448,7 @@ This command does not delete anything from remote storage.`,
|
||||
if err := v.CleanupLocalSnapshots(); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Cleanup failed", "error", err)
|
||||
ReportError("Cleanup failed: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -29,13 +29,13 @@ type RestoreApp struct {
|
||||
Shutdowner fx.Shutdowner
|
||||
}
|
||||
|
||||
// NewRestoreCommand creates the restore command
|
||||
func NewRestoreCommand() *cobra.Command {
|
||||
// newSnapshotRestoreCommand creates the 'snapshot restore' subcommand
|
||||
func newSnapshotRestoreCommand() *cobra.Command {
|
||||
opts := &RestoreOptions{}
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "restore <snapshot-id> <target-dir> [paths...]",
|
||||
Short: "Restore files from backup",
|
||||
Short: "Restore files from a snapshot",
|
||||
Long: `Download and decrypt files from a backup snapshot.
|
||||
|
||||
This command will restore files from the specified snapshot to the target directory.
|
||||
@@ -46,16 +46,16 @@ Requires the VAULTIK_AGE_SECRET_KEY environment variable to be set with the age
|
||||
|
||||
Examples:
|
||||
# Restore entire snapshot
|
||||
vaultik restore myhost_docs_2025-01-01T12:00:00Z /restore
|
||||
vaultik snapshot restore myhost_docs_2025-01-01T12:00:00Z /restore
|
||||
|
||||
# Restore specific file
|
||||
vaultik restore myhost_docs_2025-01-01T12:00:00Z /restore /home/user/important.txt
|
||||
vaultik snapshot restore myhost_docs_2025-01-01T12:00:00Z /restore /home/user/important.txt
|
||||
|
||||
# Restore specific directory
|
||||
vaultik restore myhost_docs_2025-01-01T12:00:00Z /restore /home/user/documents/
|
||||
vaultik snapshot restore myhost_docs_2025-01-01T12:00:00Z /restore /home/user/documents/
|
||||
|
||||
# Restore and verify all files
|
||||
vaultik restore --verify myhost_docs_2025-01-01T12:00:00Z /restore`,
|
||||
vaultik snapshot restore --verify myhost_docs_2025-01-01T12:00:00Z /restore`,
|
||||
Args: cobra.MinimumNArgs(2),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return runRestore(cmd, args, opts)
|
||||
@@ -127,10 +127,12 @@ func buildRestoreInvokes(snapshotID string, opts *RestoreOptions) []fx.Option {
|
||||
TargetDir: opts.TargetDir,
|
||||
Paths: opts.Paths,
|
||||
Verify: opts.Verify,
|
||||
SkipErrors: GetRootFlags().SkipErrors,
|
||||
}
|
||||
if err := app.Vaultik.Restore(restoreOpts); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Restore operation failed", "error", err)
|
||||
ReportError("Restore failed: %v", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
@@ -130,6 +130,51 @@ func (r *BlobRepository) GetByID(ctx context.Context, id string) (*Blob, error)
|
||||
return &blob, nil
|
||||
}
|
||||
|
||||
// GetAll returns every blob row keyed by blob ID. Useful at restore
|
||||
// start to translate the per-chunk blob_id references in chunkToBlobMap
|
||||
// into blob hashes without doing one GetByID query per chunk.
|
||||
func (r *BlobRepository) GetAll(ctx context.Context) (map[string]*Blob, error) {
|
||||
query := `
|
||||
SELECT id, blob_hash, created_ts, finished_ts, uncompressed_size, compressed_size, uploaded_ts
|
||||
FROM blobs
|
||||
`
|
||||
|
||||
rows, err := r.db.conn.QueryContext(ctx, query)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("querying blobs: %w", err)
|
||||
}
|
||||
defer CloseRows(rows)
|
||||
|
||||
out := make(map[string]*Blob)
|
||||
for rows.Next() {
|
||||
var blob Blob
|
||||
var createdTSUnix int64
|
||||
var finishedTSUnix, uploadedTSUnix sql.NullInt64
|
||||
if err := rows.Scan(
|
||||
&blob.ID,
|
||||
&blob.Hash,
|
||||
&createdTSUnix,
|
||||
&finishedTSUnix,
|
||||
&blob.UncompressedSize,
|
||||
&blob.CompressedSize,
|
||||
&uploadedTSUnix,
|
||||
); err != nil {
|
||||
return nil, fmt.Errorf("scanning blob: %w", err)
|
||||
}
|
||||
blob.CreatedTS = time.Unix(createdTSUnix, 0).UTC()
|
||||
if finishedTSUnix.Valid {
|
||||
ts := time.Unix(finishedTSUnix.Int64, 0).UTC()
|
||||
blob.FinishedTS = &ts
|
||||
}
|
||||
if uploadedTSUnix.Valid {
|
||||
ts := time.Unix(uploadedTSUnix.Int64, 0).UTC()
|
||||
blob.UploadedTS = &ts
|
||||
}
|
||||
out[blob.ID.String()] = &blob
|
||||
}
|
||||
return out, rows.Err()
|
||||
}
|
||||
|
||||
// UpdateFinished updates a blob when it's finalized
|
||||
func (r *BlobRepository) UpdateFinished(ctx context.Context, tx *sql.Tx, id string, hash string, uncompressedSize, compressedSize int64) error {
|
||||
query := `
|
||||
|
||||
@@ -331,6 +331,43 @@ func (r *SnapshotRepository) AddFilesByIDBatch(ctx context.Context, tx *sql.Tx,
|
||||
return nil
|
||||
}
|
||||
|
||||
// PopulateReferencedBlobs ensures snapshot_blobs contains an entry for
|
||||
// every blob that holds a chunk referenced by any file in the snapshot.
|
||||
// This is necessary because the AddBlob hook only runs when a blob is
|
||||
// newly uploaded during a snapshot — fully-deduplicated snapshots (where
|
||||
// every chunk already exists in storage from a prior run) would otherwise
|
||||
// have an empty snapshot_blobs set and be impossible to restore.
|
||||
//
|
||||
// Returns the number of rows inserted (i.e. blobs that were previously
|
||||
// referenced indirectly via file_chunks but not yet recorded in
|
||||
// snapshot_blobs for this snapshot).
|
||||
func (r *SnapshotRepository) PopulateReferencedBlobs(ctx context.Context, tx *sql.Tx, snapshotID string) (int64, error) {
|
||||
query := `
|
||||
INSERT OR IGNORE INTO snapshot_blobs (snapshot_id, blob_id, blob_hash)
|
||||
SELECT DISTINCT ?, blobs.id, blobs.blob_hash
|
||||
FROM blobs
|
||||
JOIN blob_chunks ON blob_chunks.blob_id = blobs.id
|
||||
JOIN file_chunks ON file_chunks.chunk_hash = blob_chunks.chunk_hash
|
||||
JOIN snapshot_files ON snapshot_files.file_id = file_chunks.file_id
|
||||
WHERE snapshot_files.snapshot_id = ?
|
||||
AND blobs.blob_hash IS NOT NULL
|
||||
`
|
||||
|
||||
var result sql.Result
|
||||
var err error
|
||||
if tx != nil {
|
||||
result, err = tx.ExecContext(ctx, query, snapshotID, snapshotID)
|
||||
} else {
|
||||
result, err = r.db.ExecWithLog(ctx, query, snapshotID, snapshotID)
|
||||
}
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("populating referenced blobs: %w", err)
|
||||
}
|
||||
|
||||
n, _ := result.RowsAffected()
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// AddBlob adds a blob to a snapshot
|
||||
func (r *SnapshotRepository) AddBlob(ctx context.Context, tx *sql.Tx, snapshotID string, blobID types.BlobID, blobHash types.BlobHash) error {
|
||||
query := `
|
||||
@@ -397,6 +434,65 @@ func (r *SnapshotRepository) GetSnapshotTotalCompressedSize(ctx context.Context,
|
||||
return totalSize, nil
|
||||
}
|
||||
|
||||
// GetSnapshotUncompressedChunkSize returns the sum of plaintext sizes of all unique
|
||||
// chunks referenced by a snapshot (via snapshot_files → file_chunks → chunks).
|
||||
func (r *SnapshotRepository) GetSnapshotUncompressedChunkSize(ctx context.Context, snapshotID string) (int64, error) {
|
||||
query := `
|
||||
SELECT COALESCE(SUM(c.size), 0)
|
||||
FROM (
|
||||
SELECT DISTINCT fc.chunk_hash
|
||||
FROM snapshot_files sf
|
||||
JOIN file_chunks fc ON sf.file_id = fc.file_id
|
||||
WHERE sf.snapshot_id = ?
|
||||
) sc
|
||||
JOIN chunks c ON sc.chunk_hash = c.chunk_hash
|
||||
`
|
||||
|
||||
var totalSize int64
|
||||
err := r.db.conn.QueryRowContext(ctx, query, snapshotID).Scan(&totalSize)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("querying uncompressed chunk size: %w", err)
|
||||
}
|
||||
|
||||
return totalSize, nil
|
||||
}
|
||||
|
||||
// GetSnapshotNewChunkSize returns the sum of plaintext sizes of chunks that are
|
||||
// referenced by this snapshot but not by any earlier completed snapshot known to
|
||||
// the local database. The result is the marginal uncompressed data this snapshot
|
||||
// added to the dedup pool — i.e., the delta from prior snapshots.
|
||||
func (r *SnapshotRepository) GetSnapshotNewChunkSize(ctx context.Context, snapshotID string) (int64, error) {
|
||||
query := `
|
||||
WITH this_snap_chunks AS (
|
||||
SELECT DISTINCT fc.chunk_hash
|
||||
FROM snapshot_files sf
|
||||
JOIN file_chunks fc ON sf.file_id = fc.file_id
|
||||
WHERE sf.snapshot_id = ?
|
||||
),
|
||||
prior_chunks AS (
|
||||
SELECT DISTINCT fc.chunk_hash
|
||||
FROM snapshots s
|
||||
JOIN snapshot_files sf ON sf.snapshot_id = s.id
|
||||
JOIN file_chunks fc ON fc.file_id = sf.file_id
|
||||
WHERE s.completed_at IS NOT NULL
|
||||
AND s.id != ?
|
||||
AND s.started_at < (SELECT started_at FROM snapshots WHERE id = ?)
|
||||
)
|
||||
SELECT COALESCE(SUM(c.size), 0)
|
||||
FROM chunks c
|
||||
JOIN this_snap_chunks t ON c.chunk_hash = t.chunk_hash
|
||||
WHERE c.chunk_hash NOT IN (SELECT chunk_hash FROM prior_chunks)
|
||||
`
|
||||
|
||||
var totalSize int64
|
||||
err := r.db.conn.QueryRowContext(ctx, query, snapshotID, snapshotID, snapshotID).Scan(&totalSize)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("querying new chunk size: %w", err)
|
||||
}
|
||||
|
||||
return totalSize, nil
|
||||
}
|
||||
|
||||
// GetIncompleteSnapshots returns all snapshots that haven't been completed
|
||||
func (r *SnapshotRepository) GetIncompleteSnapshots(ctx context.Context) ([]*Snapshot, error) {
|
||||
query := `
|
||||
|
||||
@@ -46,8 +46,12 @@ func Initialize(cfg Config) {
|
||||
var level slog.Level
|
||||
|
||||
if cfg.Cron || cfg.Quiet {
|
||||
// In quiet/cron mode, only show errors
|
||||
level = slog.LevelError
|
||||
// In cron/quiet mode keep warnings and errors visible — the
|
||||
// whole point of --cron is to stay silent only on total
|
||||
// success, so that anything cron emails to root is genuinely
|
||||
// "something went wrong, look at it." A backup with stuck
|
||||
// permission errors or skipped files should NOT be silent.
|
||||
level = slog.LevelWarn
|
||||
} else if cfg.Debug || strings.Contains(os.Getenv("GODEBUG"), "vaultik") {
|
||||
level = slog.LevelDebug
|
||||
} else if cfg.Verbose {
|
||||
|
||||
40
internal/snapshot/remotekey.go
Normal file
40
internal/snapshot/remotekey.go
Normal file
@@ -0,0 +1,40 @@
|
||||
package snapshot
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
)
|
||||
|
||||
// remoteKeyPrefix is mixed into the snapshot ID hash so the resulting
|
||||
// hex digest is domain-separated from any other "double SHA256 of a
|
||||
// string" identifier the user might also use. Keeping this stable is a
|
||||
// hard compatibility requirement: changing it invalidates every
|
||||
// existing snapshot's remote storage path.
|
||||
const remoteKeyPrefix = "vaultik|"
|
||||
|
||||
// RemoteSnapshotKey returns the storage-side identifier for a snapshot
|
||||
// given its human snapshot ID. It is hex(SHA256(SHA256(prefix + id))).
|
||||
// The two SHA256 rounds match Bitcoin's "hash256" convention so the
|
||||
// output looks like a 64-character hex blob with no exploitable
|
||||
// structure visible to a remote observer.
|
||||
//
|
||||
// We use this in three places:
|
||||
//
|
||||
// - the "metadata/<remote-key>/..." subdirectory on the storage
|
||||
// backend so a directory listing of the bucket / file:// dest
|
||||
// doesn't reveal hostnames, configured snapshot names, or backup
|
||||
// timestamps;
|
||||
// - the `snapshot_id` field of the unencrypted manifest.json.zst
|
||||
// for the same reason;
|
||||
// - any code path that needs to translate a known local snapshot ID
|
||||
// into the path it would occupy on remote storage.
|
||||
//
|
||||
// The human ID stays the user-visible handle everywhere else — local
|
||||
// database joins, CLI arguments, summary lines, log fields — because
|
||||
// it's never written to the public bytes once this function gates
|
||||
// every storage-path construction.
|
||||
func RemoteSnapshotKey(snapshotID string) string {
|
||||
first := sha256.Sum256([]byte(remoteKeyPrefix + snapshotID))
|
||||
second := sha256.Sum256(first[:])
|
||||
return hex.EncodeToString(second[:])
|
||||
}
|
||||
@@ -224,13 +224,13 @@ func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*Sc
|
||||
|
||||
// Phase 2: Process files and create chunks
|
||||
if len(filesToProcess) > 0 {
|
||||
s.ui.Begin("Processing %s snapshot source files (chunking, compressing, encrypting, uploading).", s.ui.Count(len(filesToProcess)))
|
||||
s.ui.Begin("Backing up %s snapshot source files (chunking, compressing, encrypting, uploading).", s.ui.Count(len(filesToProcess)))
|
||||
log.Info("Phase 2/3: Creating snapshot (chunking, compressing, encrypting, and uploading blobs)")
|
||||
if err := s.processPhase(ctx, filesToProcess, result); err != nil {
|
||||
return nil, fmt.Errorf("process phase failed: %w", err)
|
||||
}
|
||||
} else {
|
||||
s.ui.Info("Snapshot file processing skipped: no changed files (creating metadata-only snapshot).")
|
||||
s.ui.Info("Snapshot file backup skipped: no changed files (creating metadata-only snapshot).")
|
||||
log.Info("Phase 2/3: Skipping (no files need processing, metadata-only snapshot)")
|
||||
}
|
||||
|
||||
@@ -278,7 +278,7 @@ func (s *Scanner) summarizeScanPhase(result *ScanResult, filesToProcess []*FileT
|
||||
"files_skipped", result.FilesSkipped,
|
||||
"bytes_skipped", humanize.Bytes(uint64(result.BytesSkipped)))
|
||||
|
||||
msg := fmt.Sprintf("Enumerated %s snapshot source files (%s total), %s to process (%s)",
|
||||
msg := fmt.Sprintf("Enumerated %s snapshot source files (%s total), %s to back up (%s)",
|
||||
s.ui.Count(result.FilesScanned),
|
||||
s.ui.Size(totalSizeToProcess+result.BytesSkipped),
|
||||
s.ui.Count(len(filesToProcess)),
|
||||
@@ -1067,7 +1067,7 @@ func (s *Scanner) printProcessingProgress(filesProcessed, totalFiles int, bytesP
|
||||
}
|
||||
|
||||
if eta > 0 {
|
||||
s.ui.Progress("Snapshot file processing: %s/%s files (%s), %s/%s, %s, %.0f files/sec, processing elapsed: %s, processing ETA: %s (est remain %s).",
|
||||
s.ui.Progress("Snapshot backup: %s/%s files (%s), %s/%s, %s, %.0f files/sec, backup elapsed: %s, backup ETA: %s (est remain %s).",
|
||||
s.ui.Count(filesProcessed),
|
||||
s.ui.Count(totalFiles),
|
||||
s.ui.Percent(pct),
|
||||
@@ -1079,7 +1079,7 @@ func (s *Scanner) printProcessingProgress(filesProcessed, totalFiles int, bytesP
|
||||
s.ui.Time(time.Now().Add(eta)),
|
||||
s.ui.Duration(eta))
|
||||
} else {
|
||||
s.ui.Progress("Snapshot file processing: %s/%s files (%s), %s/%s, %s, %.0f files/sec, processing elapsed: %s.",
|
||||
s.ui.Progress("Snapshot backup: %s/%s files (%s), %s/%s, %s, %.0f files/sec, backup elapsed: %s.",
|
||||
s.ui.Count(filesProcessed),
|
||||
s.ui.Count(totalFiles),
|
||||
s.ui.Percent(pct),
|
||||
@@ -1177,16 +1177,17 @@ func (s *Scanner) uploadBlobIfNeeded(ctx context.Context, blobPath string, blobW
|
||||
finishedBlob := blobWithReader.FinishedBlob
|
||||
|
||||
// Check if blob already exists (deduplication after restart)
|
||||
destination := s.storage.Info().Location
|
||||
if _, err := s.storage.Stat(ctx, blobPath); err == nil {
|
||||
log.Info("Blob already exists in storage, skipping upload",
|
||||
"hash", finishedBlob.Hash, "size", humanize.Bytes(uint64(finishedBlob.Compressed)))
|
||||
s.ui.Info("Blob %s (%s) already exists in backup destination store. Skipping upload.",
|
||||
s.ui.Hex(finishedBlob.Hash), s.ui.Size(finishedBlob.Compressed))
|
||||
s.ui.Info("Blob %s (%s) already exists at %s. Skipping upload.",
|
||||
s.ui.Hex(finishedBlob.Hash), s.ui.Size(finishedBlob.Compressed), s.ui.Path(destination))
|
||||
return true, nil
|
||||
}
|
||||
|
||||
s.ui.Begin("Uploading blob %s (%s) to backup destination store.",
|
||||
s.ui.Hex(finishedBlob.Hash), s.ui.Size(finishedBlob.Compressed))
|
||||
s.ui.Begin("Uploading blob %s (%s) to %s.",
|
||||
s.ui.Hex(finishedBlob.Hash), s.ui.Size(finishedBlob.Compressed), s.ui.Path(destination))
|
||||
|
||||
progressCallback := s.makeUploadProgressCallback(ctx, finishedBlob, startTime)
|
||||
|
||||
|
||||
@@ -180,10 +180,20 @@ func (sm *SnapshotManager) UpdateSnapshotStatsExtended(ctx context.Context, snap
|
||||
})
|
||||
}
|
||||
|
||||
// CompleteSnapshot marks a snapshot as completed and exports its metadata
|
||||
// CompleteSnapshot marks a snapshot as completed and ensures snapshot_blobs
|
||||
// is populated with every blob holding any chunk referenced by the
|
||||
// snapshot's files (including deduplicated blobs uploaded by prior
|
||||
// snapshots). Without this, fully-deduplicated snapshots are unrestorable.
|
||||
func (sm *SnapshotManager) CompleteSnapshot(ctx context.Context, snapshotID string) error {
|
||||
// Mark the snapshot as completed
|
||||
err := sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
added, err := sm.repos.Snapshots.PopulateReferencedBlobs(ctx, tx, snapshotID)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if added > 0 {
|
||||
log.Info("Populated snapshot_blobs with dedup-referenced blobs",
|
||||
"snapshot_id", snapshotID, "added", added)
|
||||
}
|
||||
return sm.repos.Snapshots.MarkComplete(ctx, tx, snapshotID)
|
||||
})
|
||||
|
||||
@@ -304,10 +314,17 @@ func (sm *SnapshotManager) prepareExportDB(ctx context.Context, dbPath, snapshot
|
||||
return finalData, tempDBPath, nil
|
||||
}
|
||||
|
||||
// uploadSnapshotArtifacts uploads the database backup and blob manifest to S3
|
||||
// uploadSnapshotArtifacts uploads the database backup and blob manifest
|
||||
// to remote storage at metadata/<remote-key>/, where remote-key is the
|
||||
// double-SHA256 derivation of the snapshot ID (see RemoteSnapshotKey).
|
||||
// We never write the human-readable snapshot ID into any unencrypted
|
||||
// part of remote storage so a listing of the destination bucket leaks
|
||||
// no host, configuration, or scheduling information.
|
||||
func (sm *SnapshotManager) uploadSnapshotArtifacts(ctx context.Context, snapshotID string, dbData, manifestData []byte) error {
|
||||
remoteKey := RemoteSnapshotKey(snapshotID)
|
||||
|
||||
// Upload database backup (compressed and encrypted)
|
||||
dbKey := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID)
|
||||
dbKey := fmt.Sprintf("metadata/%s/db.zst.age", remoteKey)
|
||||
|
||||
dbUploadStart := time.Now()
|
||||
if err := sm.storage.Put(ctx, dbKey, bytes.NewReader(dbData)); err != nil {
|
||||
@@ -322,7 +339,7 @@ func (sm *SnapshotManager) uploadSnapshotArtifacts(ctx context.Context, snapshot
|
||||
"speed", humanize.SI(dbUploadSpeed, "bps"))
|
||||
|
||||
// Upload blob manifest (compressed only, not encrypted)
|
||||
manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
|
||||
manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", remoteKey)
|
||||
manifestUploadStart := time.Now()
|
||||
if err := sm.storage.Put(ctx, manifestKey, bytes.NewReader(manifestData)); err != nil {
|
||||
return fmt.Errorf("uploading blob manifest: %w", err)
|
||||
@@ -597,9 +614,11 @@ func (sm *SnapshotManager) generateBlobManifest(ctx context.Context, dbPath stri
|
||||
}
|
||||
}
|
||||
|
||||
// Create manifest
|
||||
// Create manifest. SnapshotID in the unencrypted manifest is the
|
||||
// double-SHA256 remote key, not the human ID, so the public bytes
|
||||
// don't reveal hostname/snapshot-name/timestamp metadata.
|
||||
manifest := &Manifest{
|
||||
SnapshotID: snapshotID,
|
||||
SnapshotID: RemoteSnapshotKey(snapshotID),
|
||||
Timestamp: time.Now().UTC().Format(time.RFC3339),
|
||||
BlobCount: len(blobs),
|
||||
TotalCompressedSize: totalCompressedSize,
|
||||
@@ -670,8 +689,9 @@ func (sm *SnapshotManager) CleanupIncompleteSnapshots(ctx context.Context, hostn
|
||||
|
||||
// Check each incomplete snapshot for metadata in storage
|
||||
for _, snapshot := range incompleteSnapshots {
|
||||
// Check if metadata exists in storage
|
||||
metadataKey := fmt.Sprintf("metadata/%s/db.zst", snapshot.ID)
|
||||
// Check if metadata exists in storage (paths use the hashed
|
||||
// remote key so we don't leak host info to the listing).
|
||||
metadataKey := fmt.Sprintf("metadata/%s/db.zst", RemoteSnapshotKey(snapshot.ID.String()))
|
||||
_, err := sm.storage.Stat(ctx, metadataKey)
|
||||
|
||||
if err != nil {
|
||||
|
||||
@@ -19,15 +19,20 @@ type FileStorer struct {
|
||||
}
|
||||
|
||||
// NewFileStorer creates a new filesystem storage backend.
|
||||
// The basePath directory will be created if it doesn't exist.
|
||||
// Uses the real OS filesystem by default; call SetFilesystem to override for testing.
|
||||
//
|
||||
// Construction is intentionally cheap and does not touch the filesystem.
|
||||
// The basePath is recorded; the directory is created lazily on first
|
||||
// write. Reads (Get/Stat/List) tolerate a missing basePath — a missing
|
||||
// or unmounted destination during `snapshot list` should NOT block the
|
||||
// command, it should degrade to "no remote snapshots reachable" with a
|
||||
// warning. Write operations (Put/PutWithProgress) call MkdirAll for the
|
||||
// per-blob parent directory, which also covers basePath on first use.
|
||||
//
|
||||
// Uses the real OS filesystem by default; call SetFilesystem to
|
||||
// override for testing.
|
||||
func NewFileStorer(basePath string) (*FileStorer, error) {
|
||||
fs := afero.NewOsFs()
|
||||
if err := fs.MkdirAll(basePath, 0755); err != nil {
|
||||
return nil, fmt.Errorf("file:// storage: cannot create or access %s: %w (check that the volume is mounted and writable)", basePath, err)
|
||||
}
|
||||
return &FileStorer{
|
||||
fs: fs,
|
||||
fs: afero.NewOsFs(),
|
||||
basePath: basePath,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -49,9 +49,15 @@ const Marker = "》"
|
||||
// It also counts warnings and errors emitted so the caller can summarize at
|
||||
// the end of an operation ("Finished successfully." vs "Finished with
|
||||
// warnings.").
|
||||
//
|
||||
// When Quiet is set, Begin/Complete/Info/Notice/Detail/Progress/Banner
|
||||
// are silently dropped, but Warning and Error always emit. This honors
|
||||
// the convention that --quiet "Suppresses non-error output" — warnings
|
||||
// and errors are by definition not suppressible.
|
||||
type Writer struct {
|
||||
out io.Writer
|
||||
color bool
|
||||
quiet bool
|
||||
warnings int
|
||||
errors int
|
||||
}
|
||||
@@ -70,6 +76,13 @@ func NewWithColor(out io.Writer, color bool) *Writer {
|
||||
return &Writer{out: out, color: color}
|
||||
}
|
||||
|
||||
// SetQuiet toggles the writer's quiet mode. In quiet mode all message
|
||||
// classes are silenced except Warning and Error.
|
||||
func (w *Writer) SetQuiet(quiet bool) { w.quiet = quiet }
|
||||
|
||||
// Quiet reports whether the writer is in quiet mode.
|
||||
func (w *Writer) Quiet() bool { return w.quiet }
|
||||
|
||||
// Out returns the underlying writer.
|
||||
func (w *Writer) Out() io.Writer { return w.out }
|
||||
|
||||
@@ -100,21 +113,33 @@ func (w *Writer) paint(color, s string) string {
|
||||
|
||||
// Begin prints an operation-start line, left-aligned with a white marker.
|
||||
func (w *Writer) Begin(format string, args ...any) {
|
||||
if w.quiet {
|
||||
return
|
||||
}
|
||||
w.emit(ansiWhite, Marker, "", format, args)
|
||||
}
|
||||
|
||||
// Complete prints an operation-completion line in green, left-aligned.
|
||||
func (w *Writer) Complete(format string, args ...any) {
|
||||
if w.quiet {
|
||||
return
|
||||
}
|
||||
w.emit(ansiGreen, Marker, ansiGreen, format, args)
|
||||
}
|
||||
|
||||
// Info prints a neutral status line, left-aligned with a white marker.
|
||||
func (w *Writer) Info(format string, args ...any) {
|
||||
if w.quiet {
|
||||
return
|
||||
}
|
||||
w.emit(ansiWhite, Marker, "", format, args)
|
||||
}
|
||||
|
||||
// Notice prints an attention-worthy informational line, marker in cyan.
|
||||
func (w *Writer) Notice(format string, args ...any) {
|
||||
if w.quiet {
|
||||
return
|
||||
}
|
||||
w.emit(ansiCyan, Marker, "", format, args)
|
||||
}
|
||||
|
||||
@@ -139,6 +164,9 @@ func (w *Writer) Error(format string, args ...any) {
|
||||
// Distinct from Progress (semantically a "heartbeat") in usage but
|
||||
// visually identical.
|
||||
func (w *Writer) Detail(format string, args ...any) {
|
||||
if w.quiet {
|
||||
return
|
||||
}
|
||||
w.emit(ansiWhite, " "+Marker, "", format, args)
|
||||
}
|
||||
|
||||
@@ -150,12 +178,18 @@ func (w *Writer) ErrorCount() int { return w.errors }
|
||||
|
||||
// Progress prints an indented heartbeat / per-item update, marker in white.
|
||||
func (w *Writer) Progress(format string, args ...any) {
|
||||
if w.quiet {
|
||||
return
|
||||
}
|
||||
w.emit(ansiWhite, " "+Marker, "", format, args)
|
||||
}
|
||||
|
||||
// Banner prints a line with no marker, left-aligned. Bold when color
|
||||
// is enabled. Used for the application startup banner only.
|
||||
func (w *Writer) Banner(format string, args ...any) {
|
||||
if w.quiet {
|
||||
return
|
||||
}
|
||||
body := fmt.Sprintf(format, args...)
|
||||
if w.color {
|
||||
body = ansiBold + body + ansiReset
|
||||
@@ -203,9 +237,26 @@ func (w *Writer) Size(bytes int64) string {
|
||||
return w.paint(ansiMagenta, humanize.Bytes(uint64(bytes)))
|
||||
}
|
||||
|
||||
// Speed colorizes a byte-per-second value as "<size>/sec".
|
||||
// Speed colorizes a network transfer rate. Input is bytes/sec; output is
|
||||
// bits/sec with an appropriate SI unit (bit/s, Kbit/s, Mbit/s, Gbit/s) —
|
||||
// network transfer rates are conventionally expressed in bits.
|
||||
func (w *Writer) Speed(bytesPerSec float64) string {
|
||||
return w.paint(ansiMagenta, humanize.Bytes(uint64(bytesPerSec))+"/sec")
|
||||
if bytesPerSec <= 0 {
|
||||
return w.paint(ansiMagenta, "N/A")
|
||||
}
|
||||
bitsPerSec := bytesPerSec * 8
|
||||
var s string
|
||||
switch {
|
||||
case bitsPerSec >= 1e9:
|
||||
s = fmt.Sprintf("%.1f Gbit/sec", bitsPerSec/1e9)
|
||||
case bitsPerSec >= 1e6:
|
||||
s = fmt.Sprintf("%.0f Mbit/sec", bitsPerSec/1e6)
|
||||
case bitsPerSec >= 1e3:
|
||||
s = fmt.Sprintf("%.0f Kbit/sec", bitsPerSec/1e3)
|
||||
default:
|
||||
s = fmt.Sprintf("%.0f bit/sec", bitsPerSec)
|
||||
}
|
||||
return w.paint(ansiMagenta, s)
|
||||
}
|
||||
|
||||
// Duration colorizes a time.Duration rounded to the nearest second.
|
||||
|
||||
@@ -100,6 +100,17 @@ func TestValueFormattersPlain(t *testing.T) {
|
||||
t.Errorf("Percent: got %q", got)
|
||||
}
|
||||
|
||||
// Speed: input is bytes/sec, output is bits/sec.
|
||||
if got := w.Speed(0); got != "N/A" {
|
||||
t.Errorf("Speed(0): got %q, want N/A", got)
|
||||
}
|
||||
if got := w.Speed(125_000_000); got != "1.0 Gbit/sec" { // 1 Gbit/s = 125 MB/s
|
||||
t.Errorf("Speed(125e6): got %q", got)
|
||||
}
|
||||
if got := w.Speed(125_000); got != "1 Mbit/sec" {
|
||||
t.Errorf("Speed(125e3): got %q", got)
|
||||
}
|
||||
|
||||
// Time format: today → HH:MM:SS, other day → YYYY-MM-DD HH:MM:SS.
|
||||
today := time.Date(time.Now().Year(), time.Now().Month(), time.Now().Day(), 14, 30, 45, 0, time.Local)
|
||||
if got := w.Time(today); got != "14:30:45" {
|
||||
|
||||
@@ -6,9 +6,11 @@ import (
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"time"
|
||||
|
||||
"filippo.io/age"
|
||||
"sneak.berlin/go/vaultik/internal/blobgen"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
)
|
||||
|
||||
// hashVerifyReader wraps a blobgen.Reader and verifies the double-SHA-256 hash
|
||||
@@ -75,19 +77,34 @@ func (v *Vaultik) FetchAndDecryptBlob(ctx context.Context, blobHash string, expe
|
||||
}
|
||||
|
||||
// FetchBlob downloads a blob and returns a reader for the encrypted data.
|
||||
// Times the Storage.Get and Storage.Stat round-trips separately at
|
||||
// debug level so we can see whether the size-only Stat (which is an
|
||||
// extra request on every fetch) is hurting throughput.
|
||||
func (v *Vaultik) FetchBlob(ctx context.Context, blobHash string, expectedSize int64) (io.ReadCloser, int64, error) {
|
||||
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobHash[:2], blobHash[2:4], blobHash)
|
||||
|
||||
t0 := time.Now()
|
||||
rc, err := v.Storage.Get(ctx, blobPath)
|
||||
getDur := time.Since(t0)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("downloading blob %s: %w", blobHash[:16], err)
|
||||
}
|
||||
|
||||
t0 = time.Now()
|
||||
info, err := v.Storage.Stat(ctx, blobPath)
|
||||
statDur := time.Since(t0)
|
||||
if err != nil {
|
||||
_ = rc.Close()
|
||||
return nil, 0, fmt.Errorf("stat blob %s: %w", blobHash[:16], err)
|
||||
}
|
||||
|
||||
log.Debug("FetchBlob round-trips",
|
||||
"hash", blobHash[:16],
|
||||
"ms_storage_get", getDur.Milliseconds(),
|
||||
"ms_storage_stat", statDur.Milliseconds(),
|
||||
"expected_size", expectedSize,
|
||||
"stat_size", info.Size,
|
||||
)
|
||||
|
||||
return rc, info.Size, nil
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package vaultik
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
@@ -15,9 +16,22 @@ type blobDiskCacheEntry struct {
|
||||
next *blobDiskCacheEntry
|
||||
}
|
||||
|
||||
// blobDiskCache is an LRU cache that stores blobs on disk instead of in memory.
|
||||
// Blobs are written to a temp directory keyed by their hash. When total size
|
||||
// exceeds maxBytes, the least-recently-used entries are evicted (deleted from disk).
|
||||
// blobDiskCache stores blobs on disk keyed by hash. It exposes ReadAt
|
||||
// for slice reads (the restore path uses this so chunk extraction
|
||||
// never reads a whole blob into memory) plus Get/Put for whole-blob
|
||||
// access.
|
||||
//
|
||||
// Eviction policy is caller-controlled. The cache keeps an LRU list
|
||||
// internally and will fall back to LRU eviction if curBytes exceeds
|
||||
// maxBytes. Restore passes math.MaxInt64 as maxBytes and drives
|
||||
// eviction itself via Delete() through restoreSweeper, which deletes
|
||||
// each blob the moment every file that references its chunks has been
|
||||
// written. LRU never fires under that configuration; it is kept as a
|
||||
// safety net for callers that don't manage eviction themselves.
|
||||
//
|
||||
// Get/ReadAt/peak-Len counters are debugging instrumentation used by
|
||||
// tests to assert that the restore code path uses ReadAt rather than
|
||||
// Get and to bound peak disk-cache occupancy.
|
||||
type blobDiskCache struct {
|
||||
mu sync.Mutex
|
||||
dir string
|
||||
@@ -26,6 +40,11 @@ type blobDiskCache struct {
|
||||
items map[string]*blobDiskCacheEntry
|
||||
head *blobDiskCacheEntry // most recent
|
||||
tail *blobDiskCacheEntry // least recent
|
||||
|
||||
// Instrumentation. Mutated under mu; readable via the methods below.
|
||||
getCalls int
|
||||
readAtCalls int
|
||||
peakLen int
|
||||
}
|
||||
|
||||
// newBlobDiskCache creates a new disk-based blob cache with the given max size.
|
||||
@@ -115,12 +134,77 @@ func (c *blobDiskCache) Put(key string, data []byte) error {
|
||||
c.evictLRU()
|
||||
}
|
||||
|
||||
if n := len(c.items); n > c.peakLen {
|
||||
c.peakLen = n
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// PutFromReader streams r into the cache file for key, returning the
|
||||
// total number of bytes written. Unlike Put, the data never has to
|
||||
// reside fully in memory at any point — io.Copy uses an internal
|
||||
// 32 KiB buffer. Used by restore to land a freshly decrypted blob on
|
||||
// disk without buffering its entire plaintext (which may be tens of GB)
|
||||
// in RAM.
|
||||
func (c *blobDiskCache) PutFromReader(key string, r io.Reader) (int64, error) {
|
||||
c.mu.Lock()
|
||||
// Remove any prior entry first; we'll re-link after the file is
|
||||
// written successfully.
|
||||
if e, ok := c.items[key]; ok {
|
||||
c.unlink(e)
|
||||
c.curBytes -= e.size
|
||||
_ = os.Remove(c.path(key))
|
||||
delete(c.items, key)
|
||||
}
|
||||
c.mu.Unlock()
|
||||
|
||||
f, err := os.OpenFile(c.path(key), os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0o600)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("creating cache file: %w", err)
|
||||
}
|
||||
written, copyErr := io.Copy(f, r)
|
||||
closeErr := f.Close()
|
||||
if copyErr != nil {
|
||||
_ = os.Remove(c.path(key))
|
||||
return written, fmt.Errorf("streaming to cache file: %w", copyErr)
|
||||
}
|
||||
if closeErr != nil {
|
||||
_ = os.Remove(c.path(key))
|
||||
return written, fmt.Errorf("closing cache file: %w", closeErr)
|
||||
}
|
||||
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
// If the entry would exceed maxBytes outright, drop it on the
|
||||
// floor — but the restore path passes math.MaxInt64 as maxBytes
|
||||
// so this branch is effectively unreachable there.
|
||||
if written > c.maxBytes {
|
||||
_ = os.Remove(c.path(key))
|
||||
return written, nil
|
||||
}
|
||||
|
||||
e := &blobDiskCacheEntry{key: key, size: written}
|
||||
c.pushFront(e)
|
||||
c.items[key] = e
|
||||
c.curBytes += written
|
||||
|
||||
for c.curBytes > c.maxBytes && c.tail != nil {
|
||||
c.evictLRU()
|
||||
}
|
||||
|
||||
if n := len(c.items); n > c.peakLen {
|
||||
c.peakLen = n
|
||||
}
|
||||
|
||||
return written, nil
|
||||
}
|
||||
|
||||
// Get reads a cached blob from disk. Returns data and true on hit.
|
||||
func (c *blobDiskCache) Get(key string) ([]byte, bool) {
|
||||
c.mu.Lock()
|
||||
c.getCalls++
|
||||
e, ok := c.items[key]
|
||||
if !ok {
|
||||
c.mu.Unlock()
|
||||
@@ -147,6 +231,7 @@ func (c *blobDiskCache) Get(key string) ([]byte, bool) {
|
||||
// ReadAt reads a slice of a cached blob without loading the entire blob into memory.
|
||||
func (c *blobDiskCache) ReadAt(key string, offset, length int64) ([]byte, error) {
|
||||
c.mu.Lock()
|
||||
c.readAtCalls++
|
||||
e, ok := c.items[key]
|
||||
if !ok {
|
||||
c.mu.Unlock()
|
||||
@@ -181,6 +266,34 @@ func (c *blobDiskCache) Has(key string) bool {
|
||||
return ok
|
||||
}
|
||||
|
||||
// Delete removes a blob from the cache and its disk file. No-op if absent.
|
||||
// Used by restore's sweep logic to free blobs whose chunks have all been
|
||||
// restored (so they will never be needed again during this restore).
|
||||
func (c *blobDiskCache) Delete(key string) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
e, ok := c.items[key]
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
c.unlink(e)
|
||||
delete(c.items, key)
|
||||
c.curBytes -= e.size
|
||||
_ = os.Remove(c.path(key))
|
||||
}
|
||||
|
||||
// Keys returns a snapshot of all cached keys. Safe for iteration without
|
||||
// holding the cache lock; the cache may change concurrently.
|
||||
func (c *blobDiskCache) Keys() []string {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
keys := make([]string, 0, len(c.items))
|
||||
for k := range c.items {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
return keys
|
||||
}
|
||||
|
||||
// Size returns current total cached bytes.
|
||||
func (c *blobDiskCache) Size() int64 {
|
||||
c.mu.Lock()
|
||||
@@ -195,6 +308,28 @@ func (c *blobDiskCache) Len() int {
|
||||
return len(c.items)
|
||||
}
|
||||
|
||||
// GetCalls returns the number of times Get has been called.
|
||||
func (c *blobDiskCache) GetCalls() int {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
return c.getCalls
|
||||
}
|
||||
|
||||
// ReadAtCalls returns the number of times ReadAt has been called.
|
||||
func (c *blobDiskCache) ReadAtCalls() int {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
return c.readAtCalls
|
||||
}
|
||||
|
||||
// PeakLen returns the maximum number of cached entries ever held at
|
||||
// once during this cache's lifetime.
|
||||
func (c *blobDiskCache) PeakLen() int {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
return c.peakLen
|
||||
}
|
||||
|
||||
// Close removes the cache directory and all cached blobs.
|
||||
func (c *blobDiskCache) Close() error {
|
||||
c.mu.Lock()
|
||||
|
||||
@@ -10,11 +10,17 @@ import (
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
// SnapshotInfo contains information about a snapshot
|
||||
// SnapshotInfo contains information about a snapshot.
|
||||
// UncompressedSize and NewChunkSize are populated only when the snapshot
|
||||
// is present in the local database; LocallyTracked indicates whether
|
||||
// those values are meaningful.
|
||||
type SnapshotInfo struct {
|
||||
ID types.SnapshotID `json:"id"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
CompressedSize int64 `json:"compressed_size"`
|
||||
ID types.SnapshotID `json:"id"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
CompressedSize int64 `json:"compressed_size"`
|
||||
UncompressedSize int64 `json:"uncompressed_size,omitempty"`
|
||||
NewChunkSize int64 `json:"new_chunk_size,omitempty"`
|
||||
LocallyTracked bool `json:"locally_tracked"`
|
||||
}
|
||||
|
||||
// formatBytes formats bytes in a human-readable format
|
||||
|
||||
@@ -22,14 +22,29 @@ func (v *Vaultik) ShowInfo() error {
|
||||
v.printfStdout("Go Version: %s\n", runtime.Version())
|
||||
v.printlnStdout()
|
||||
|
||||
// Storage Configuration
|
||||
// Storage Configuration. The backend is selected by storage_url
|
||||
// (s3://, file://, rclone://); the legacy s3.* fields are only
|
||||
// printed when they're actually populated, since the URL scheme
|
||||
// is the primary configuration.
|
||||
v.printfStdout("=== Storage Configuration ===\n")
|
||||
v.printfStdout("S3 Bucket: %s\n", v.Config.S3.Bucket)
|
||||
storageInfo := v.Storage.Info()
|
||||
v.printfStdout("Type: %s\n", storageInfo.Type)
|
||||
v.printfStdout("Location: %s\n", storageInfo.Location)
|
||||
if v.Config.StorageURL != "" {
|
||||
v.printfStdout("Storage URL: %s\n", v.Config.StorageURL)
|
||||
}
|
||||
if v.Config.S3.Bucket != "" {
|
||||
v.printfStdout("S3 Bucket: %s\n", v.Config.S3.Bucket)
|
||||
}
|
||||
if v.Config.S3.Prefix != "" {
|
||||
v.printfStdout("S3 Prefix: %s\n", v.Config.S3.Prefix)
|
||||
}
|
||||
v.printfStdout("S3 Endpoint: %s\n", v.Config.S3.Endpoint)
|
||||
v.printfStdout("S3 Region: %s\n", v.Config.S3.Region)
|
||||
if v.Config.S3.Endpoint != "" {
|
||||
v.printfStdout("S3 Endpoint: %s\n", v.Config.S3.Endpoint)
|
||||
}
|
||||
if v.Config.S3.Region != "" {
|
||||
v.printfStdout("S3 Region: %s\n", v.Config.S3.Region)
|
||||
}
|
||||
v.printlnStdout()
|
||||
|
||||
// Backup Settings
|
||||
@@ -337,7 +352,7 @@ func (v *Vaultik) printRemoteInfoTable(result *RemoteInfoResult) {
|
||||
humanize.Comma(int64(result.OrphanedBlobCount)), humanize.Bytes(uint64(result.OrphanedBlobSize)))
|
||||
|
||||
if result.OrphanedBlobCount > 0 {
|
||||
v.printfStdout("\nRun 'vaultik prune --remote' to remove orphaned blobs.\n")
|
||||
v.printfStdout("\nRun 'vaultik prune' to remove orphaned blobs.\n")
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -20,6 +20,7 @@ import (
|
||||
"sneak.berlin/go/vaultik/internal/snapshot"
|
||||
"sneak.berlin/go/vaultik/internal/storage"
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
"sneak.berlin/go/vaultik/internal/ui"
|
||||
"sneak.berlin/go/vaultik/internal/vaultik"
|
||||
)
|
||||
|
||||
@@ -520,6 +521,7 @@ func TestBackupAndRestore(t *testing.T) {
|
||||
Fs: fs,
|
||||
Stdout: io.Discard,
|
||||
Stderr: io.Discard,
|
||||
UI: ui.NewWithColor(io.Discard, false),
|
||||
}
|
||||
vaultikApp.SetContext(ctx)
|
||||
|
||||
@@ -649,10 +651,12 @@ func TestEndToEndFileStorage(t *testing.T) {
|
||||
require.NoError(t, sm.ExportSnapshotMetadata(ctx, dbPath, snapshotID))
|
||||
|
||||
// Verify the backup actually landed on disk under blobs/ and metadata/.
|
||||
// The metadata subdirectory uses the hashed remote key, not the human
|
||||
// snapshot ID, so the on-disk structure doesn't leak hostname/name/time.
|
||||
blobInfo, err := os.Stat(filepath.Join(storeDir, "blobs"))
|
||||
require.NoError(t, err)
|
||||
require.True(t, blobInfo.IsDir())
|
||||
metaInfo, err := os.Stat(filepath.Join(storeDir, "metadata", snapshotID))
|
||||
metaInfo, err := os.Stat(filepath.Join(storeDir, "metadata", snapshot.RemoteSnapshotKey(snapshotID)))
|
||||
require.NoError(t, err)
|
||||
require.True(t, metaInfo.IsDir())
|
||||
|
||||
@@ -666,6 +670,7 @@ func TestEndToEndFileStorage(t *testing.T) {
|
||||
Fs: fs,
|
||||
Stdout: io.Discard,
|
||||
Stderr: io.Discard,
|
||||
UI: ui.NewWithColor(io.Discard, false),
|
||||
}
|
||||
restoreVaultik.SetContext(ctx)
|
||||
|
||||
@@ -703,6 +708,124 @@ func TestEndToEndFileStorage(t *testing.T) {
|
||||
assert.Equal(t, "small.txt", target, "symlink target should be preserved")
|
||||
}
|
||||
|
||||
// TestDedupOnlySnapshotRestores backs up the same directory twice without
|
||||
// touching it between runs, then restores the SECOND (fully-deduplicated)
|
||||
// snapshot. The second snapshot uploads no new blobs — every chunk is
|
||||
// already in storage from the first run. This test guards against the
|
||||
// regression where snapshot_blobs was populated only for blobs uploaded
|
||||
// during the snapshot, leaving fully-deduplicated snapshots unrestorable
|
||||
// with "chunk X not found in any blob" errors.
|
||||
func TestDedupOnlySnapshotRestores(t *testing.T) {
|
||||
log.Initialize(log.Config{})
|
||||
|
||||
fs := afero.NewOsFs()
|
||||
tempDir, err := os.MkdirTemp("", "vaultik-dedup-")
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = os.RemoveAll(tempDir) }()
|
||||
|
||||
dataDir := filepath.Join(tempDir, "source")
|
||||
storeDir := filepath.Join(tempDir, "remote")
|
||||
restoreDir := filepath.Join(tempDir, "restored")
|
||||
dbPath := filepath.Join(tempDir, "index.sqlite")
|
||||
|
||||
chunkSize := int64(64 * 1024)
|
||||
maxBlobSize := int64(512 * 1024)
|
||||
|
||||
testFiles := map[string][]byte{
|
||||
filepath.Join(dataDir, "a.bin"): bytesPattern("a-", int(chunkSize*3)),
|
||||
filepath.Join(dataDir, "b.bin"): bytesPattern("b-", int(chunkSize*2)),
|
||||
}
|
||||
for path, content := range testFiles {
|
||||
require.NoError(t, fs.MkdirAll(filepath.Dir(path), 0o755))
|
||||
require.NoError(t, afero.WriteFile(fs, path, content, 0o644))
|
||||
}
|
||||
|
||||
storer, err := storage.NewFileStorer(storeDir)
|
||||
require.NoError(t, err)
|
||||
|
||||
agePublicKey := "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
|
||||
ageSecretKey := "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
|
||||
|
||||
cfg := &config.Config{
|
||||
AgeRecipients: []string{agePublicKey},
|
||||
AgeSecretKey: ageSecretKey,
|
||||
CompressionLevel: 3,
|
||||
Hostname: "test-host",
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
db, err := database.New(ctx, dbPath)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = db.Close() }()
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
makeScanner := func() *snapshot.Scanner {
|
||||
return snapshot.NewScanner(snapshot.ScannerConfig{
|
||||
FS: fs,
|
||||
Storage: storer,
|
||||
ChunkSize: chunkSize,
|
||||
MaxBlobSize: maxBlobSize,
|
||||
CompressionLevel: cfg.CompressionLevel,
|
||||
AgeRecipients: cfg.AgeRecipients,
|
||||
Repositories: repos,
|
||||
})
|
||||
}
|
||||
sm := snapshot.NewSnapshotManager(snapshot.SnapshotManagerParams{
|
||||
Repos: repos, Storage: storer, Config: cfg,
|
||||
})
|
||||
sm.SetFilesystem(fs)
|
||||
|
||||
// First snapshot — uploads all blobs.
|
||||
id1, err := sm.CreateSnapshotWithName(ctx, cfg.Hostname, "dedup", "v", "g")
|
||||
require.NoError(t, err)
|
||||
r1, err := makeScanner().Scan(ctx, dataDir, id1)
|
||||
require.NoError(t, err)
|
||||
require.Greater(t, r1.BlobsCreated, 0, "first snapshot should upload at least one blob")
|
||||
require.NoError(t, sm.CompleteSnapshot(ctx, id1))
|
||||
require.NoError(t, sm.ExportSnapshotMetadata(ctx, dbPath, id1))
|
||||
|
||||
// Second snapshot — same data, every chunk dedups. Sleep past the
|
||||
// second-precision timestamp so the snapshot IDs differ.
|
||||
time.Sleep(1100 * time.Millisecond)
|
||||
id2, err := sm.CreateSnapshotWithName(ctx, cfg.Hostname, "dedup", "v", "g")
|
||||
require.NoError(t, err)
|
||||
r2, err := makeScanner().Scan(ctx, dataDir, id2)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, 0, r2.BlobsCreated, "second snapshot should upload zero new blobs (fully dedup'd)")
|
||||
require.NoError(t, sm.CompleteSnapshot(ctx, id2))
|
||||
require.NoError(t, sm.ExportSnapshotMetadata(ctx, dbPath, id2))
|
||||
|
||||
// snapshot_blobs for id2 must be populated despite no uploads.
|
||||
blobHashes, err := repos.Snapshots.GetBlobHashes(ctx, id2)
|
||||
require.NoError(t, err)
|
||||
require.NotEmpty(t, blobHashes, "snapshot_blobs for fully-dedup'd snapshot must reference blobs uploaded by prior snapshot")
|
||||
|
||||
require.NoError(t, db.Close())
|
||||
|
||||
restoreVaultik := &vaultik.Vaultik{
|
||||
Config: cfg,
|
||||
Storage: storer,
|
||||
Fs: fs,
|
||||
Stdout: io.Discard,
|
||||
Stderr: io.Discard,
|
||||
UI: ui.NewWithColor(io.Discard, false),
|
||||
}
|
||||
restoreVaultik.SetContext(ctx)
|
||||
|
||||
require.NoError(t, restoreVaultik.Restore(&vaultik.RestoreOptions{
|
||||
SnapshotID: id2,
|
||||
TargetDir: restoreDir,
|
||||
Verify: true,
|
||||
}))
|
||||
|
||||
for origPath, expected := range testFiles {
|
||||
restoredPath := filepath.Join(restoreDir, origPath)
|
||||
got, err := afero.ReadFile(fs, restoredPath)
|
||||
require.NoError(t, err, "restored file missing: %s", restoredPath)
|
||||
require.Equalf(t, expected, got, "byte-equality failed for %s", origPath)
|
||||
}
|
||||
}
|
||||
|
||||
// bytesPattern returns a deterministic byte slice of length n with a tag prefix,
|
||||
// useful for forcing chunker behavior with reproducible content.
|
||||
func bytesPattern(tag string, n int) []byte {
|
||||
|
||||
@@ -15,6 +15,31 @@ type PruneOptions struct {
|
||||
JSON bool
|
||||
}
|
||||
|
||||
// NukeRemote deletes every snapshot's metadata and every blob from remote
|
||||
// storage. After this returns successfully the bucket prefix is empty and
|
||||
// the next backup starts from scratch.
|
||||
//
|
||||
// Refuses to run unless force is true. The caller is responsible for
|
||||
// confirming with the user.
|
||||
func (v *Vaultik) NukeRemote(force bool) error {
|
||||
if !force {
|
||||
return fmt.Errorf("nuke requires --force (this deletes ALL remote snapshots and blobs)")
|
||||
}
|
||||
|
||||
v.UI.Begin("Removing all snapshot metadata from backup destination store.")
|
||||
if _, err := v.RemoveAllSnapshots(&RemoveOptions{Force: true, Remote: true}); err != nil {
|
||||
return fmt.Errorf("removing all snapshots: %w", err)
|
||||
}
|
||||
|
||||
v.UI.Begin("Removing all blobs from backup destination store.")
|
||||
if err := v.PruneBlobs(&PruneOptions{Force: true}); err != nil {
|
||||
return fmt.Errorf("pruning blobs: %w", err)
|
||||
}
|
||||
|
||||
v.UI.Complete("Backup destination store is now empty.")
|
||||
return nil
|
||||
}
|
||||
|
||||
// PruneBlobsResult contains the result of a blob prune operation
|
||||
type PruneBlobsResult struct {
|
||||
BlobsFound int `json:"blobs_found"`
|
||||
@@ -23,6 +48,19 @@ type PruneBlobsResult struct {
|
||||
BytesFreed int64 `json:"bytes_freed"`
|
||||
}
|
||||
|
||||
// Prune removes orphaned data from the local index database AND
|
||||
// unreferenced blobs from the backup destination store. This is the
|
||||
// single user-facing prune entry point — the split between local and
|
||||
// remote cleanup is an implementation detail. Calling code should
|
||||
// prefer this method over PruneDatabase or PruneBlobs individually
|
||||
// unless it specifically wants one half.
|
||||
func (v *Vaultik) Prune(opts *PruneOptions) error {
|
||||
if _, err := v.PruneDatabase(); err != nil {
|
||||
return fmt.Errorf("pruning local database: %w", err)
|
||||
}
|
||||
return v.PruneBlobs(opts)
|
||||
}
|
||||
|
||||
// PruneBlobs removes unreferenced blobs from storage
|
||||
func (v *Vaultik) PruneBlobs(opts *PruneOptions) error {
|
||||
log.Info("Starting prune operation")
|
||||
@@ -85,20 +123,22 @@ func (v *Vaultik) PruneBlobs(opts *PruneOptions) error {
|
||||
// collectReferencedBlobs downloads all manifests and returns the set of referenced blob hashes
|
||||
func (v *Vaultik) collectReferencedBlobs() (map[string]bool, error) {
|
||||
log.Info("Listing remote snapshots")
|
||||
snapshotIDs, err := v.listUniqueSnapshotIDs()
|
||||
// IDs returned by listUniqueSnapshotIDs are remote keys (hashed
|
||||
// subdirectories under metadata/), not human snapshot IDs.
|
||||
remoteKeys, err := v.listUniqueSnapshotIDs()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("listing snapshot IDs: %w", err)
|
||||
return nil, fmt.Errorf("listing snapshot keys: %w", err)
|
||||
}
|
||||
log.Info("Found manifests in remote storage", "count", len(snapshotIDs))
|
||||
log.Info("Found manifests in remote storage", "count", len(remoteKeys))
|
||||
|
||||
allBlobsReferenced := make(map[string]bool)
|
||||
manifestCount := 0
|
||||
|
||||
for _, snapshotID := range snapshotIDs {
|
||||
log.Debug("Processing manifest", "snapshot_id", snapshotID)
|
||||
manifest, err := v.downloadManifest(snapshotID)
|
||||
for _, remoteKey := range remoteKeys {
|
||||
log.Debug("Processing manifest", "remote_key", remoteKey)
|
||||
manifest, err := v.downloadManifestByKey(remoteKey)
|
||||
if err != nil {
|
||||
log.Error("Failed to download manifest", "snapshot_id", snapshotID, "error", err)
|
||||
log.Error("Failed to download manifest", "remote_key", remoteKey, "error", err)
|
||||
continue
|
||||
}
|
||||
for _, blob := range manifest.Blobs {
|
||||
|
||||
@@ -132,7 +132,9 @@ func (s *testStorer) Info() storage.StorageInfo {
|
||||
}
|
||||
}
|
||||
|
||||
// addManifest creates a compressed manifest in storage
|
||||
// addManifest creates a compressed manifest in storage at the same
|
||||
// hashed path the production code uses. snapshotID is the human ID;
|
||||
// the storage path uses RemoteSnapshotKey(id).
|
||||
func addManifest(t *testing.T, store *testStorer, snapshotID string, blobHashes []string) {
|
||||
t.Helper()
|
||||
|
||||
@@ -144,8 +146,9 @@ func addManifest(t *testing.T, store *testStorer, snapshotID string, blobHashes
|
||||
}
|
||||
}
|
||||
|
||||
remoteKey := snapshot.RemoteSnapshotKey(snapshotID)
|
||||
manifest := &snapshot.Manifest{
|
||||
SnapshotID: snapshotID,
|
||||
SnapshotID: remoteKey,
|
||||
BlobCount: len(blobs),
|
||||
Blobs: blobs,
|
||||
}
|
||||
@@ -153,11 +156,19 @@ func addManifest(t *testing.T, store *testStorer, snapshotID string, blobHashes
|
||||
data, err := snapshot.EncodeManifest(manifest, 3)
|
||||
require.NoError(t, err)
|
||||
|
||||
key := "metadata/" + snapshotID + "/manifest.json.zst"
|
||||
key := "metadata/" + remoteKey + "/manifest.json.zst"
|
||||
err = store.Put(context.Background(), key, bytes.NewReader(data))
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
// remoteKeyPath returns the storage-relative path to a snapshot's
|
||||
// metadata directory or manifest under the hashed remote-key scheme.
|
||||
// Tests use this in hasKey/asserts to avoid scattering RemoteSnapshotKey
|
||||
// calls throughout.
|
||||
func remoteKeyPath(snapshotID, suffix string) string {
|
||||
return "metadata/" + snapshot.RemoteSnapshotKey(snapshotID) + "/" + suffix
|
||||
}
|
||||
|
||||
// addBlob adds a fake blob to storage
|
||||
func addBlob(t *testing.T, store *testStorer, hash string) {
|
||||
t.Helper()
|
||||
@@ -198,7 +209,7 @@ func TestRemoveSnapshot_LocalOnly(t *testing.T) {
|
||||
// Blobs should NOT be deleted (that's what prune is for)
|
||||
assert.True(t, store.hasKey("blobs/aa/aa/"+blobA))
|
||||
// Remote metadata should NOT be deleted (no --remote flag)
|
||||
assert.True(t, store.hasKey("metadata/snapshot-001/manifest.json.zst"))
|
||||
assert.True(t, store.hasKey(remoteKeyPath("snapshot-001", "manifest.json.zst")))
|
||||
|
||||
// Verify output
|
||||
assert.Contains(t, tv.Stdout.String(), "Removed snapshot 'snapshot-001' from local database")
|
||||
@@ -225,7 +236,7 @@ func TestRemoveSnapshot_WithRemote(t *testing.T) {
|
||||
// Blobs should NOT be deleted
|
||||
assert.True(t, store.hasKey("blobs/aa/aa/"+blobA))
|
||||
// Remote metadata SHOULD be deleted
|
||||
assert.False(t, store.hasKey("metadata/snapshot-001/manifest.json.zst"))
|
||||
assert.False(t, store.hasKey(remoteKeyPath("snapshot-001", "manifest.json.zst")))
|
||||
|
||||
// Verify output mentions prune
|
||||
assert.Contains(t, tv.Stdout.String(), "Removed snapshot 'snapshot-001' from local database")
|
||||
@@ -255,7 +266,7 @@ func TestRemoveSnapshot_DryRun(t *testing.T) {
|
||||
// Nothing should be deleted
|
||||
assert.Equal(t, initialCount, store.keyCount())
|
||||
assert.True(t, store.hasKey("blobs/aa/aa/"+blobA))
|
||||
assert.True(t, store.hasKey("metadata/snapshot-001/manifest.json.zst"))
|
||||
assert.True(t, store.hasKey(remoteKeyPath("snapshot-001", "manifest.json.zst")))
|
||||
|
||||
// Verify dry run message
|
||||
assert.Contains(t, tv.Stdout.String(), "[Dry run - no changes made]")
|
||||
@@ -299,8 +310,8 @@ func TestRemoveAllSnapshots_WithForce(t *testing.T) {
|
||||
// Blobs should NOT be deleted
|
||||
assert.True(t, store.hasKey("blobs/aa/aa/"+blobA))
|
||||
// Remote metadata SHOULD be deleted
|
||||
assert.False(t, store.hasKey("metadata/snapshot-001/manifest.json.zst"))
|
||||
assert.False(t, store.hasKey("metadata/snapshot-002/manifest.json.zst"))
|
||||
assert.False(t, store.hasKey(remoteKeyPath("snapshot-001", "manifest.json.zst")))
|
||||
assert.False(t, store.hasKey(remoteKeyPath("snapshot-002", "manifest.json.zst")))
|
||||
|
||||
// Verify output
|
||||
assert.Contains(t, tv.Stdout.String(), "Removed 2 snapshot(s)")
|
||||
@@ -318,7 +329,10 @@ func TestRemoveAllSnapshots_DryRun(t *testing.T) {
|
||||
|
||||
tv := vaultik.NewForTesting(store)
|
||||
|
||||
opts := &vaultik.RemoveOptions{All: true, Force: true, DryRun: true}
|
||||
// --remote is required to enumerate orphan remote keys; without
|
||||
// it, RemoveAll only acts on local snapshots, and NewForTesting
|
||||
// has no local DB.
|
||||
opts := &vaultik.RemoveOptions{All: true, Force: true, DryRun: true, Remote: true}
|
||||
result, err := tv.RemoveAllSnapshots(opts)
|
||||
|
||||
require.NoError(t, err)
|
||||
|
||||
@@ -7,34 +7,28 @@ import (
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"math"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"filippo.io/age"
|
||||
"github.com/dustin/go-humanize"
|
||||
"github.com/schollz/progressbar/v3"
|
||||
"github.com/spf13/afero"
|
||||
"golang.org/x/term"
|
||||
"sneak.berlin/go/vaultik/internal/blobgen"
|
||||
"sneak.berlin/go/vaultik/internal/database"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/snapshot"
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
const (
|
||||
// progressBarWidth is the character width of the progress bar display.
|
||||
progressBarWidth = 40
|
||||
// progressBarThrottle is the minimum interval between progress bar redraws.
|
||||
progressBarThrottle = 100 * time.Millisecond
|
||||
)
|
||||
|
||||
// RestoreOptions contains options for the restore operation
|
||||
type RestoreOptions struct {
|
||||
SnapshotID string
|
||||
TargetDir string
|
||||
Paths []string // Optional paths to restore (empty = all)
|
||||
Verify bool // Verify restored files by checking chunk hashes
|
||||
SkipErrors bool // Continue past file-restore errors instead of aborting
|
||||
}
|
||||
|
||||
// RestoreResult contains statistics from a restore operation
|
||||
@@ -92,10 +86,12 @@ func (v *Vaultik) Restore(opts *RestoreOptions) error {
|
||||
|
||||
if len(files) == 0 {
|
||||
log.Warn("No files found to restore")
|
||||
v.UI.Warning("No files found to restore.")
|
||||
return nil
|
||||
}
|
||||
|
||||
log.Info("Found files to restore", "count", len(files))
|
||||
v.UI.Info("Found %s files to restore.", v.UI.Count(len(files)))
|
||||
|
||||
// Step 3: Create target directory
|
||||
if err := v.Fs.MkdirAll(opts.TargetDir, 0755); err != nil {
|
||||
@@ -124,16 +120,16 @@ func (v *Vaultik) Restore(opts *RestoreOptions) error {
|
||||
"duration", result.Duration,
|
||||
)
|
||||
|
||||
v.printfStdout("Restored %d files (%s) in %s\n",
|
||||
result.FilesRestored,
|
||||
humanize.Bytes(uint64(result.BytesRestored)),
|
||||
result.Duration.Round(time.Second),
|
||||
v.UI.Complete("Restored %s files (%s) in %s.",
|
||||
v.UI.Count(result.FilesRestored),
|
||||
v.UI.Size(result.BytesRestored),
|
||||
v.UI.Duration(result.Duration),
|
||||
)
|
||||
|
||||
if result.FilesFailed > 0 {
|
||||
_, _ = fmt.Fprintf(v.Stdout, "\nWARNING: %d file(s) failed to restore:\n", result.FilesFailed)
|
||||
v.UI.Warning("%d file(s) failed to restore:", result.FilesFailed)
|
||||
for _, path := range result.FailedFiles {
|
||||
_, _ = fmt.Fprintf(v.Stdout, " - %s\n", path)
|
||||
v.UI.Detail("%s", v.UI.Path(path))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -164,7 +160,12 @@ func (v *Vaultik) prepareRestoreIdentity() (age.Identity, error) {
|
||||
return identity, nil
|
||||
}
|
||||
|
||||
// restoreAllFiles iterates over files and restores each one, tracking progress and failures
|
||||
// restoreAllFiles processes files in blob-locality order: drain every
|
||||
// file whose blob set is on disk, download the missing blobs for the
|
||||
// pending file with the smallest uncached count, repeat. This keeps
|
||||
// peak cache occupancy near 1 even on snapshots whose path order
|
||||
// interleaves blobs, and lets the sweeper free each blob the moment
|
||||
// its file set is exhausted.
|
||||
func (v *Vaultik) restoreAllFiles(
|
||||
files []*database.File,
|
||||
repos *database.Repositories,
|
||||
@@ -173,56 +174,199 @@ func (v *Vaultik) restoreAllFiles(
|
||||
chunkToBlobMap map[string]*database.BlobChunk,
|
||||
) (*RestoreResult, error) {
|
||||
result := &RestoreResult{}
|
||||
blobCache, err := newBlobDiskCache(4 * v.Config.BlobSizeLimit.Int64())
|
||||
|
||||
// The restore-side blob cache is unbounded — restores may read any
|
||||
// blob many times across deduplicated files and we want to avoid
|
||||
// re-downloading until we can prove a blob is no longer needed.
|
||||
// Cleanup is driven by the sweeper below, not by LRU.
|
||||
blobCache, err := newBlobDiskCache(math.MaxInt64)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating blob cache: %w", err)
|
||||
}
|
||||
defer func() { _ = blobCache.Close() }()
|
||||
if v.restoreCacheObserver != nil {
|
||||
v.restoreCacheObserver(blobCache)
|
||||
}
|
||||
defer func() {
|
||||
if v.restoreCacheObserver != nil {
|
||||
v.restoreCacheObserver(blobCache)
|
||||
}
|
||||
_ = blobCache.Close()
|
||||
}()
|
||||
|
||||
// Calculate total bytes for progress bar
|
||||
// Per-restore sweep state: every blob_size_limit/100 bytes written,
|
||||
// scan the cache and delete any blob whose remaining file references
|
||||
// are all already restored.
|
||||
sweeper := newRestoreSweeper(v.ctx, repos, blobCache, v.Config.BlobSizeLimit.Int64()/100)
|
||||
|
||||
// Pre-fetch every blob row once so chunk extraction can map a
|
||||
// blob_id to its hash without a DB round-trip per chunk.
|
||||
blobsByID, err := repos.Blobs.GetAll(v.ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("fetching blob index: %w", err)
|
||||
}
|
||||
blobIDToHash := make(map[string]string, len(blobsByID))
|
||||
blobByHash := make(map[string]*database.Blob, len(blobsByID))
|
||||
for id, blob := range blobsByID {
|
||||
hash := blob.Hash.String()
|
||||
blobIDToHash[id] = hash
|
||||
blobByHash[hash] = blob
|
||||
}
|
||||
|
||||
plan, err := newRestorePlan(v.ctx, repos, files, chunkToBlobMap, blobIDToHash)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("building restore plan: %w", err)
|
||||
}
|
||||
|
||||
// Index files by ID so the loop can look them up by the IDs the
|
||||
// plan hands back.
|
||||
filesByID := make(map[types.FileID]*database.File, len(files))
|
||||
for _, f := range files {
|
||||
filesByID[f.ID] = f
|
||||
}
|
||||
|
||||
// Calculate total bytes expected for percentage / ETA arithmetic.
|
||||
var totalBytesExpected int64
|
||||
for _, file := range files {
|
||||
totalBytesExpected += file.Size
|
||||
}
|
||||
|
||||
// Create progress bar if output is a terminal
|
||||
bar := v.newProgressBar("Restoring", totalBytesExpected)
|
||||
v.UI.Begin("Restoring %s files (%s) to %s.",
|
||||
v.UI.Count(len(files)),
|
||||
v.UI.Size(totalBytesExpected),
|
||||
v.UI.Path(opts.TargetDir))
|
||||
|
||||
for i, file := range files {
|
||||
session := &restoreSession{
|
||||
v: v,
|
||||
ctx: v.ctx,
|
||||
repos: repos,
|
||||
opts: opts,
|
||||
identity: identity,
|
||||
chunkToBlobMap: chunkToBlobMap,
|
||||
blobByHash: blobByHash,
|
||||
blobIDToHash: blobIDToHash,
|
||||
blobCache: blobCache,
|
||||
sweeper: sweeper,
|
||||
result: result,
|
||||
}
|
||||
|
||||
// Periodic progress output, matching the snapshot create cadence.
|
||||
startTime := time.Now()
|
||||
lastStatusTime := startTime
|
||||
const statusInterval = 15 * time.Second
|
||||
|
||||
processed := 0
|
||||
for plan.hasPending() {
|
||||
if v.ctx.Err() != nil {
|
||||
return nil, v.ctx.Err()
|
||||
}
|
||||
|
||||
if err := v.restoreFile(v.ctx, repos, file, opts.TargetDir, identity, chunkToBlobMap, blobCache, result); err != nil {
|
||||
log.Error("Failed to restore file", "path", file.Path, "error", err)
|
||||
result.FilesFailed++
|
||||
result.FailedFiles = append(result.FailedFiles, file.Path.String())
|
||||
// Update progress bar even on failure
|
||||
if bar != nil {
|
||||
_ = bar.Add64(file.Size)
|
||||
fileID, ready := plan.popReady()
|
||||
if !ready {
|
||||
// No file is fully cache-served. First free any blobs
|
||||
// whose file sets are exhausted — without this, the
|
||||
// blob whose last file we just finished would still be
|
||||
// cached when we Put the next one, briefly pushing
|
||||
// peak occupancy from 1 to 2.
|
||||
sweeper.sweep()
|
||||
|
||||
// Pick the pending file with the smallest uncached
|
||||
// blob set and download its blobs. After each blob
|
||||
// lands, the plan moves any pending file whose set
|
||||
// just emptied onto the ready queue.
|
||||
next := plan.pickNextDownload()
|
||||
if next.IsZero() {
|
||||
break
|
||||
}
|
||||
for _, hash := range plan.blobsNeeded(next) {
|
||||
blob, ok := blobByHash[hash]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("blob hash %s missing from blob index", hash[:16])
|
||||
}
|
||||
if err := session.downloadBlobToCache(hash, blob.CompressedSize); err != nil {
|
||||
return nil, fmt.Errorf("downloading blob %s: %w", hash[:16], err)
|
||||
}
|
||||
result.BlobsDownloaded++
|
||||
result.BytesDownloaded += blob.CompressedSize
|
||||
plan.markBlobCached(hash)
|
||||
}
|
||||
continue
|
||||
}
|
||||
|
||||
// Update progress bar
|
||||
if bar != nil {
|
||||
_ = bar.Add64(file.Size)
|
||||
file := filesByID[fileID]
|
||||
if err := session.restoreFile(file); err != nil {
|
||||
log.Error("Failed to restore file", "path", file.Path, "error", err)
|
||||
if !opts.SkipErrors {
|
||||
return nil, fmt.Errorf("restoring %s: %w (pass --skip-errors to continue past restore failures)", file.Path, err)
|
||||
}
|
||||
v.UI.Error("Failed to restore %s: %v. Skipping (--skip-errors).", v.UI.Path(file.Path.String()), err)
|
||||
result.FilesFailed++
|
||||
result.FailedFiles = append(result.FailedFiles, file.Path.String())
|
||||
plan.finishFile(fileID)
|
||||
continue
|
||||
}
|
||||
|
||||
// Progress logging (for non-terminal or structured logs)
|
||||
if (i+1)%100 == 0 || i+1 == len(files) {
|
||||
// Record the file as restored so the sweeper can free blobs
|
||||
// once all referencing files are done, and drop it from the
|
||||
// plan's indexes so future picks ignore it.
|
||||
sweeper.fileRestored(fileID.String())
|
||||
plan.finishFile(fileID)
|
||||
processed++
|
||||
|
||||
if time.Since(lastStatusTime) >= statusInterval {
|
||||
v.printRestoreProgress(processed, len(files), result.BytesRestored, totalBytesExpected, startTime)
|
||||
lastStatusTime = time.Now()
|
||||
}
|
||||
|
||||
// Structured progress log for --verbose / JSON consumers.
|
||||
if processed%100 == 0 || processed == len(files) {
|
||||
log.Info("Restore progress",
|
||||
"files", fmt.Sprintf("%d/%d", i+1, len(files)),
|
||||
"files", fmt.Sprintf("%d/%d", processed, len(files)),
|
||||
"bytes", humanize.Bytes(uint64(result.BytesRestored)),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
if bar != nil {
|
||||
_ = bar.Finish()
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// printRestoreProgress emits a periodic restore-phase status line via
|
||||
// the UI writer, mirroring scanner.printProcessingProgress so the two
|
||||
// long-running commands have the same on-screen rhythm.
|
||||
func (v *Vaultik) printRestoreProgress(filesDone, totalFiles int, bytesDone, totalBytes int64, startTime time.Time) {
|
||||
elapsed := time.Since(startTime)
|
||||
pct := float64(bytesDone) / float64(totalBytes) * 100
|
||||
byteRate := float64(bytesDone) / elapsed.Seconds()
|
||||
fileRate := float64(filesDone) / elapsed.Seconds()
|
||||
|
||||
remainingBytes := totalBytes - bytesDone
|
||||
var eta time.Duration
|
||||
if byteRate > 0 && remainingBytes > 0 {
|
||||
eta = time.Duration(float64(remainingBytes)/byteRate) * time.Second
|
||||
}
|
||||
|
||||
return result, nil
|
||||
if eta > 0 {
|
||||
v.UI.Progress("Restore: %s/%s files (%s), %s/%s, %s, %.0f files/sec, restore elapsed: %s, restore ETA: %s (est remain %s).",
|
||||
v.UI.Count(filesDone),
|
||||
v.UI.Count(totalFiles),
|
||||
v.UI.Percent(pct),
|
||||
v.UI.Size(bytesDone),
|
||||
v.UI.Size(totalBytes),
|
||||
v.UI.Speed(byteRate),
|
||||
fileRate,
|
||||
v.UI.Duration(elapsed),
|
||||
v.UI.Time(time.Now().Add(eta)),
|
||||
v.UI.Duration(eta))
|
||||
return
|
||||
}
|
||||
v.UI.Progress("Restore: %s/%s files (%s), %s/%s, %s, %.0f files/sec, restore elapsed: %s.",
|
||||
v.UI.Count(filesDone),
|
||||
v.UI.Count(totalFiles),
|
||||
v.UI.Percent(pct),
|
||||
v.UI.Size(bytesDone),
|
||||
v.UI.Size(totalBytes),
|
||||
v.UI.Speed(byteRate),
|
||||
fileRate,
|
||||
v.UI.Duration(elapsed))
|
||||
}
|
||||
|
||||
// handleRestoreVerification runs post-restore verification if requested
|
||||
@@ -237,24 +381,26 @@ func (v *Vaultik) handleRestoreVerification(
|
||||
}
|
||||
|
||||
if result.FilesFailed > 0 {
|
||||
v.printfStdout("\nVerification FAILED: %d files did not match expected checksums\n", result.FilesFailed)
|
||||
v.UI.Error("Verification failed: %s files did not match expected checksums.",
|
||||
v.UI.Count(result.FilesFailed))
|
||||
for _, path := range result.FailedFiles {
|
||||
v.printfStdout(" - %s\n", path)
|
||||
v.UI.Detail("%s", v.UI.Path(path))
|
||||
}
|
||||
return fmt.Errorf("%d files failed verification", result.FilesFailed)
|
||||
}
|
||||
|
||||
v.printfStdout("Verified %d files (%s)\n",
|
||||
result.FilesVerified,
|
||||
humanize.Bytes(uint64(result.BytesVerified)),
|
||||
)
|
||||
v.UI.Complete("Verified %s files (%s).",
|
||||
v.UI.Count(result.FilesVerified),
|
||||
v.UI.Size(result.BytesVerified))
|
||||
return nil
|
||||
}
|
||||
|
||||
// downloadSnapshotDB downloads and decrypts the snapshot metadata database
|
||||
// downloadSnapshotDB downloads and decrypts the snapshot metadata
|
||||
// database. The snapshotID is the human ID; we hash it to the remote
|
||||
// key for the storage path.
|
||||
func (v *Vaultik) downloadSnapshotDB(snapshotID string, identity age.Identity) (*database.DB, error) {
|
||||
// Download encrypted database from storage
|
||||
dbKey := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID)
|
||||
dbKey := fmt.Sprintf("metadata/%s/db.zst.age", snapshot.RemoteSnapshotKey(snapshotID))
|
||||
|
||||
reader, err := v.Storage.Get(v.ctx, dbKey)
|
||||
if err != nil {
|
||||
@@ -372,209 +518,211 @@ func (v *Vaultik) buildChunkToBlobMap(ctx context.Context, repos *database.Repos
|
||||
return result, rows.Err()
|
||||
}
|
||||
|
||||
// restoreFile restores a single file
|
||||
func (v *Vaultik) restoreFile(
|
||||
ctx context.Context,
|
||||
repos *database.Repositories,
|
||||
file *database.File,
|
||||
targetDir string,
|
||||
identity age.Identity,
|
||||
chunkToBlobMap map[string]*database.BlobChunk,
|
||||
blobCache *blobDiskCache,
|
||||
result *RestoreResult,
|
||||
) error {
|
||||
// Calculate target path - use full original path under target directory
|
||||
targetPath := filepath.Join(targetDir, file.Path.String())
|
||||
|
||||
// Create parent directories
|
||||
parentDir := filepath.Dir(targetPath)
|
||||
if err := v.Fs.MkdirAll(parentDir, 0755); err != nil {
|
||||
return fmt.Errorf("creating parent directory: %w", err)
|
||||
}
|
||||
|
||||
// Handle symlinks
|
||||
if file.IsSymlink() {
|
||||
return v.restoreSymlink(file, targetPath, result)
|
||||
}
|
||||
|
||||
// Handle directories
|
||||
if file.Mode&uint32(os.ModeDir) != 0 {
|
||||
return v.restoreDirectory(file, targetPath, result)
|
||||
}
|
||||
|
||||
// Handle regular files
|
||||
return v.restoreRegularFile(ctx, repos, file, targetPath, identity, chunkToBlobMap, blobCache, result)
|
||||
// restoreSession holds every piece of per-restore state shared by the
|
||||
// restore-time methods. Each restore builds one of these from the
|
||||
// snapshot's metadata and then drives the file loop through methods on
|
||||
// it. Keeping this state on the struct rather than threading it
|
||||
// through every function signature keeps the inner-loop call sites
|
||||
// readable: restoreFile(file) instead of a ten-argument helper.
|
||||
type restoreSession struct {
|
||||
v *Vaultik
|
||||
ctx context.Context
|
||||
repos *database.Repositories
|
||||
opts *RestoreOptions
|
||||
identity age.Identity
|
||||
chunkToBlobMap map[string]*database.BlobChunk
|
||||
blobByHash map[string]*database.Blob
|
||||
blobIDToHash map[string]string
|
||||
blobCache *blobDiskCache
|
||||
sweeper *restoreSweeper
|
||||
result *RestoreResult
|
||||
}
|
||||
|
||||
// restoreSymlink restores a symbolic link
|
||||
func (v *Vaultik) restoreSymlink(file *database.File, targetPath string, result *RestoreResult) error {
|
||||
// Remove existing file if it exists
|
||||
_ = v.Fs.Remove(targetPath)
|
||||
// restoreFile dispatches to the right per-kind restorer.
|
||||
func (s *restoreSession) restoreFile(file *database.File) error {
|
||||
targetPath := filepath.Join(s.opts.TargetDir, file.Path.String())
|
||||
parentDir := filepath.Dir(targetPath)
|
||||
if err := s.v.Fs.MkdirAll(parentDir, 0755); err != nil {
|
||||
return fmt.Errorf("creating parent directory: %w", err)
|
||||
}
|
||||
if file.IsSymlink() {
|
||||
return s.restoreSymlink(file, targetPath)
|
||||
}
|
||||
if file.Mode&uint32(os.ModeDir) != 0 {
|
||||
return s.restoreDirectory(file, targetPath)
|
||||
}
|
||||
return s.restoreRegularFile(file, targetPath)
|
||||
}
|
||||
|
||||
// Create symlink
|
||||
// Note: afero.MemMapFs doesn't support symlinks, so we use os for real filesystems
|
||||
if osFs, ok := v.Fs.(*afero.OsFs); ok {
|
||||
_ = osFs // silence unused variable warning
|
||||
// restoreSymlink restores a symbolic link.
|
||||
func (s *restoreSession) restoreSymlink(file *database.File, targetPath string) error {
|
||||
_ = s.v.Fs.Remove(targetPath)
|
||||
// afero.MemMapFs doesn't support symlinks, so route real-FS
|
||||
// symlinks through os.
|
||||
if _, ok := s.v.Fs.(*afero.OsFs); ok {
|
||||
if err := os.Symlink(file.LinkTarget.String(), targetPath); err != nil {
|
||||
return fmt.Errorf("creating symlink: %w", err)
|
||||
}
|
||||
} else {
|
||||
log.Debug("Symlink creation not supported on this filesystem", "path", file.Path, "target", file.LinkTarget)
|
||||
}
|
||||
|
||||
result.FilesRestored++
|
||||
s.result.FilesRestored++
|
||||
log.Debug("Restored symlink", "path", file.Path, "target", file.LinkTarget)
|
||||
return nil
|
||||
}
|
||||
|
||||
// restoreDirectory restores a directory with proper permissions
|
||||
func (v *Vaultik) restoreDirectory(file *database.File, targetPath string, result *RestoreResult) error {
|
||||
// Create directory
|
||||
if err := v.Fs.MkdirAll(targetPath, os.FileMode(file.Mode)); err != nil {
|
||||
// restoreDirectory restores a directory with its permissions, mtime,
|
||||
// and (on real filesystems, with sufficient privileges) ownership.
|
||||
func (s *restoreSession) restoreDirectory(file *database.File, targetPath string) error {
|
||||
if err := s.v.Fs.MkdirAll(targetPath, os.FileMode(file.Mode)); err != nil {
|
||||
return fmt.Errorf("creating directory: %w", err)
|
||||
}
|
||||
|
||||
// Set permissions
|
||||
if err := v.Fs.Chmod(targetPath, os.FileMode(file.Mode)); err != nil {
|
||||
if err := s.v.Fs.Chmod(targetPath, os.FileMode(file.Mode)); err != nil {
|
||||
log.Debug("Failed to set directory permissions", "path", targetPath, "error", err)
|
||||
}
|
||||
|
||||
// Set ownership (requires root)
|
||||
if osFs, ok := v.Fs.(*afero.OsFs); ok {
|
||||
_ = osFs
|
||||
if _, ok := s.v.Fs.(*afero.OsFs); ok {
|
||||
if err := os.Chown(targetPath, int(file.UID), int(file.GID)); err != nil {
|
||||
log.Debug("Failed to set directory ownership", "path", targetPath, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Set mtime
|
||||
if err := v.Fs.Chtimes(targetPath, file.MTime, file.MTime); err != nil {
|
||||
if err := s.v.Fs.Chtimes(targetPath, file.MTime, file.MTime); err != nil {
|
||||
log.Debug("Failed to set directory mtime", "path", targetPath, "error", err)
|
||||
}
|
||||
|
||||
result.FilesRestored++
|
||||
s.result.FilesRestored++
|
||||
return nil
|
||||
}
|
||||
|
||||
// restoreRegularFile restores a regular file by reconstructing it from chunks
|
||||
func (v *Vaultik) restoreRegularFile(
|
||||
ctx context.Context,
|
||||
repos *database.Repositories,
|
||||
file *database.File,
|
||||
targetPath string,
|
||||
identity age.Identity,
|
||||
chunkToBlobMap map[string]*database.BlobChunk,
|
||||
blobCache *blobDiskCache,
|
||||
result *RestoreResult,
|
||||
) error {
|
||||
// Get file chunks in order
|
||||
fileChunks, err := repos.FileChunks.GetByFileID(ctx, file.ID)
|
||||
// restoreRegularFile reconstructs a regular file by reading chunks
|
||||
// directly out of cached blobs via ReadAt. The expectation when this
|
||||
// method runs is that every blob this file needs is already in the
|
||||
// disk cache — the planner guarantees that by only marking files
|
||||
// "ready" once their full blob set is on disk.
|
||||
func (s *restoreSession) restoreRegularFile(file *database.File, targetPath string) error {
|
||||
fileStart := time.Now()
|
||||
|
||||
t0 := time.Now()
|
||||
fileChunks, err := s.repos.FileChunks.GetByFileID(s.ctx, file.ID)
|
||||
fileChunksQueryDur := time.Since(t0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting file chunks: %w", err)
|
||||
}
|
||||
|
||||
// Create output file
|
||||
outFile, err := v.Fs.Create(targetPath)
|
||||
t0 = time.Now()
|
||||
outFile, err := s.v.Fs.Create(targetPath)
|
||||
createDur := time.Since(t0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating output file: %w", err)
|
||||
}
|
||||
defer func() { _ = outFile.Close() }()
|
||||
|
||||
// Write chunks in order
|
||||
var bytesWritten int64
|
||||
var (
|
||||
readAtDur time.Duration
|
||||
writeDur time.Duration
|
||||
sweeperDur time.Duration
|
||||
bytesWritten int64
|
||||
)
|
||||
|
||||
for _, fc := range fileChunks {
|
||||
// Find which blob contains this chunk
|
||||
chunkHashStr := fc.ChunkHash.String()
|
||||
blobChunk, ok := chunkToBlobMap[chunkHashStr]
|
||||
blobChunk, ok := s.chunkToBlobMap[chunkHashStr]
|
||||
if !ok {
|
||||
return fmt.Errorf("chunk %s not found in any blob", chunkHashStr[:16])
|
||||
}
|
||||
|
||||
// Get the blob's hash from the database
|
||||
blob, err := repos.Blobs.GetByID(ctx, blobChunk.BlobID.String())
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting blob %s: %w", blobChunk.BlobID, err)
|
||||
}
|
||||
|
||||
// Download and decrypt blob if not cached
|
||||
blobHashStr := blob.Hash.String()
|
||||
blobData, ok := blobCache.Get(blobHashStr)
|
||||
blobHash, ok := s.blobIDToHash[blobChunk.BlobID.String()]
|
||||
if !ok {
|
||||
blobData, err = v.downloadBlob(ctx, blobHashStr, blob.CompressedSize, identity)
|
||||
if err != nil {
|
||||
return fmt.Errorf("downloading blob %s: %w", blobHashStr[:16], err)
|
||||
}
|
||||
if putErr := blobCache.Put(blobHashStr, blobData); putErr != nil {
|
||||
log.Debug("Failed to cache blob on disk", "hash", blobHashStr[:16], "error", putErr)
|
||||
}
|
||||
result.BlobsDownloaded++
|
||||
result.BytesDownloaded += blob.CompressedSize
|
||||
return fmt.Errorf("blob id %s missing from hash index", blobChunk.BlobID)
|
||||
}
|
||||
|
||||
// Extract chunk from blob
|
||||
if blobChunk.Offset+blobChunk.Length > int64(len(blobData)) {
|
||||
return fmt.Errorf("chunk %s extends beyond blob data (offset=%d, length=%d, blob_size=%d)",
|
||||
fc.ChunkHash[:16], blobChunk.Offset, blobChunk.Length, len(blobData))
|
||||
t0 = time.Now()
|
||||
chunkData, err := s.blobCache.ReadAt(blobHash, blobChunk.Offset, blobChunk.Length)
|
||||
readAtDur += time.Since(t0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading chunk %s from cached blob %s: %w", fc.ChunkHash[:16], blobHash[:16], err)
|
||||
}
|
||||
chunkData := blobData[blobChunk.Offset : blobChunk.Offset+blobChunk.Length]
|
||||
|
||||
// Write chunk to output file
|
||||
t0 = time.Now()
|
||||
n, err := outFile.Write(chunkData)
|
||||
writeDur += time.Since(t0)
|
||||
if err != nil {
|
||||
return fmt.Errorf("writing chunk: %w", err)
|
||||
}
|
||||
bytesWritten += int64(n)
|
||||
|
||||
t0 = time.Now()
|
||||
s.sweeper.chunkRestored(int64(n))
|
||||
sweeperDur += time.Since(t0)
|
||||
}
|
||||
|
||||
// Close file before setting metadata
|
||||
log.Debug("Restored regular file (timings)",
|
||||
"path", file.Path,
|
||||
"chunks", len(fileChunks),
|
||||
"bytes_written", bytesWritten,
|
||||
"ms_total", time.Since(fileStart).Milliseconds(),
|
||||
"ms_file_chunks_query", fileChunksQueryDur.Milliseconds(),
|
||||
"ms_create", createDur.Milliseconds(),
|
||||
"ms_readat", readAtDur.Milliseconds(),
|
||||
"ms_writes", writeDur.Milliseconds(),
|
||||
"ms_sweeper", sweeperDur.Milliseconds(),
|
||||
)
|
||||
|
||||
if err := outFile.Close(); err != nil {
|
||||
return fmt.Errorf("closing output file: %w", err)
|
||||
}
|
||||
|
||||
// Set permissions
|
||||
if err := v.Fs.Chmod(targetPath, os.FileMode(file.Mode)); err != nil {
|
||||
if err := s.v.Fs.Chmod(targetPath, os.FileMode(file.Mode)); err != nil {
|
||||
log.Debug("Failed to set file permissions", "path", targetPath, "error", err)
|
||||
}
|
||||
|
||||
// Set ownership (requires root)
|
||||
if osFs, ok := v.Fs.(*afero.OsFs); ok {
|
||||
_ = osFs
|
||||
if _, ok := s.v.Fs.(*afero.OsFs); ok {
|
||||
if err := os.Chown(targetPath, int(file.UID), int(file.GID)); err != nil {
|
||||
log.Debug("Failed to set file ownership", "path", targetPath, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Set mtime
|
||||
if err := v.Fs.Chtimes(targetPath, file.MTime, file.MTime); err != nil {
|
||||
if err := s.v.Fs.Chtimes(targetPath, file.MTime, file.MTime); err != nil {
|
||||
log.Debug("Failed to set file mtime", "path", targetPath, "error", err)
|
||||
}
|
||||
|
||||
result.FilesRestored++
|
||||
result.BytesRestored += bytesWritten
|
||||
s.result.FilesRestored++
|
||||
s.result.BytesRestored += bytesWritten
|
||||
|
||||
log.Debug("Restored file", "path", file.Path, "size", humanize.Bytes(uint64(bytesWritten)))
|
||||
return nil
|
||||
}
|
||||
|
||||
// downloadBlob downloads and decrypts a blob
|
||||
func (v *Vaultik) downloadBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) ([]byte, error) {
|
||||
rc, err := v.FetchAndDecryptBlob(ctx, blobHash, expectedSize, identity)
|
||||
// downloadBlobToCache streams a blob from remote storage straight into
|
||||
// the disk cache, decrypting and decompressing on the fly. The
|
||||
// plaintext never lives fully in memory — io.Copy through
|
||||
// blobDiskCache.PutFromReader uses a 32 KiB buffer regardless of blob
|
||||
// size, which is what makes multi-GB blobs tractable on machines with
|
||||
// less RAM than the blob.
|
||||
func (s *restoreSession) downloadBlobToCache(blobHash string, expectedSize int64) error {
|
||||
start := time.Now()
|
||||
|
||||
t0 := time.Now()
|
||||
rc, err := s.v.FetchAndDecryptBlob(s.ctx, blobHash, expectedSize, s.identity)
|
||||
fetchSetupDur := time.Since(t0)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return err
|
||||
}
|
||||
|
||||
data, err := io.ReadAll(rc)
|
||||
if err != nil {
|
||||
_ = rc.Close()
|
||||
return nil, fmt.Errorf("reading blob data: %w", err)
|
||||
t0 = time.Now()
|
||||
written, copyErr := s.blobCache.PutFromReader(blobHash, rc)
|
||||
streamDur := time.Since(t0)
|
||||
closeErr := rc.Close()
|
||||
if copyErr != nil {
|
||||
return copyErr
|
||||
}
|
||||
if closeErr != nil {
|
||||
return closeErr
|
||||
}
|
||||
|
||||
// Close triggers hash verification
|
||||
if err := rc.Close(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return data, nil
|
||||
log.Debug("Streamed blob into disk cache",
|
||||
"hash", blobHash[:16],
|
||||
"compressed_bytes", expectedSize,
|
||||
"plaintext_bytes", written,
|
||||
"ms_total", time.Since(start).Milliseconds(),
|
||||
"ms_fetch_setup", fetchSetupDur.Milliseconds(),
|
||||
"ms_stream_decrypt_decompress", streamDur.Milliseconds(),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
|
||||
// verifyRestoredFiles verifies that all restored files match their expected chunk hashes
|
||||
@@ -606,16 +754,16 @@ func (v *Vaultik) verifyRestoredFiles(
|
||||
"files", len(regularFiles),
|
||||
"bytes", humanize.Bytes(uint64(totalBytes)),
|
||||
)
|
||||
v.printfStdout("\nVerifying %d files (%s)...\n",
|
||||
len(regularFiles),
|
||||
humanize.Bytes(uint64(totalBytes)),
|
||||
)
|
||||
v.UI.Begin("Verifying %s files (%s).",
|
||||
v.UI.Count(len(regularFiles)),
|
||||
v.UI.Size(totalBytes))
|
||||
|
||||
// Create progress bar if output is a terminal
|
||||
bar := v.newProgressBar("Verifying", totalBytes)
|
||||
startTime := time.Now()
|
||||
lastStatusTime := startTime
|
||||
const statusInterval = 15 * time.Second
|
||||
|
||||
// Verify each file
|
||||
for _, file := range regularFiles {
|
||||
var bytesProcessed int64
|
||||
for i, file := range regularFiles {
|
||||
if ctx.Err() != nil {
|
||||
return ctx.Err()
|
||||
}
|
||||
@@ -630,17 +778,14 @@ func (v *Vaultik) verifyRestoredFiles(
|
||||
result.FilesVerified++
|
||||
result.BytesVerified += bytesVerified
|
||||
}
|
||||
bytesProcessed += file.Size
|
||||
|
||||
// Update progress bar
|
||||
if bar != nil {
|
||||
_ = bar.Add64(file.Size)
|
||||
if time.Since(lastStatusTime) >= statusInterval {
|
||||
v.printVerifyProgress(i+1, len(regularFiles), bytesProcessed, totalBytes, startTime)
|
||||
lastStatusTime = time.Now()
|
||||
}
|
||||
}
|
||||
|
||||
if bar != nil {
|
||||
_ = bar.Finish()
|
||||
}
|
||||
|
||||
log.Info("Verification complete",
|
||||
"files_verified", result.FilesVerified,
|
||||
"bytes_verified", humanize.Bytes(uint64(result.BytesVerified)),
|
||||
@@ -650,6 +795,46 @@ func (v *Vaultik) verifyRestoredFiles(
|
||||
return nil
|
||||
}
|
||||
|
||||
// printVerifyProgress emits a periodic verify-phase status line. Same
|
||||
// shape as the restore progress line so user-facing pacing is uniform
|
||||
// across the two phases.
|
||||
func (v *Vaultik) printVerifyProgress(filesDone, totalFiles int, bytesDone, totalBytes int64, startTime time.Time) {
|
||||
elapsed := time.Since(startTime)
|
||||
pct := float64(bytesDone) / float64(totalBytes) * 100
|
||||
byteRate := float64(bytesDone) / elapsed.Seconds()
|
||||
fileRate := float64(filesDone) / elapsed.Seconds()
|
||||
|
||||
remainingBytes := totalBytes - bytesDone
|
||||
var eta time.Duration
|
||||
if byteRate > 0 && remainingBytes > 0 {
|
||||
eta = time.Duration(float64(remainingBytes)/byteRate) * time.Second
|
||||
}
|
||||
|
||||
if eta > 0 {
|
||||
v.UI.Progress("Verify: %s/%s files (%s), %s/%s, %s, %.0f files/sec, verify elapsed: %s, verify ETA: %s (est remain %s).",
|
||||
v.UI.Count(filesDone),
|
||||
v.UI.Count(totalFiles),
|
||||
v.UI.Percent(pct),
|
||||
v.UI.Size(bytesDone),
|
||||
v.UI.Size(totalBytes),
|
||||
v.UI.Speed(byteRate),
|
||||
fileRate,
|
||||
v.UI.Duration(elapsed),
|
||||
v.UI.Time(time.Now().Add(eta)),
|
||||
v.UI.Duration(eta))
|
||||
return
|
||||
}
|
||||
v.UI.Progress("Verify: %s/%s files (%s), %s/%s, %s, %.0f files/sec, verify elapsed: %s.",
|
||||
v.UI.Count(filesDone),
|
||||
v.UI.Count(totalFiles),
|
||||
v.UI.Percent(pct),
|
||||
v.UI.Size(bytesDone),
|
||||
v.UI.Size(totalBytes),
|
||||
v.UI.Speed(byteRate),
|
||||
fileRate,
|
||||
v.UI.Duration(elapsed))
|
||||
}
|
||||
|
||||
// verifyFile verifies a single restored file by checking its chunk hashes
|
||||
func (v *Vaultik) verifyFile(
|
||||
ctx context.Context,
|
||||
@@ -705,38 +890,3 @@ func (v *Vaultik) verifyFile(
|
||||
log.Debug("File verified", "path", file.Path, "bytes", bytesVerified, "chunks", len(fileChunks))
|
||||
return bytesVerified, nil
|
||||
}
|
||||
|
||||
// newProgressBar creates a terminal-aware progress bar with standard options.
|
||||
// It returns nil if stdout is not a terminal.
|
||||
func (v *Vaultik) newProgressBar(description string, total int64) *progressbar.ProgressBar {
|
||||
if !v.isTerminal() {
|
||||
return nil
|
||||
}
|
||||
return progressbar.NewOptions64(
|
||||
total,
|
||||
progressbar.OptionSetDescription(description),
|
||||
progressbar.OptionSetWriter(v.Stderr),
|
||||
progressbar.OptionShowBytes(true),
|
||||
progressbar.OptionShowCount(),
|
||||
progressbar.OptionSetWidth(progressBarWidth),
|
||||
progressbar.OptionThrottle(progressBarThrottle),
|
||||
progressbar.OptionOnCompletion(func() {
|
||||
v.printfStderr("\n")
|
||||
}),
|
||||
progressbar.OptionSetRenderBlankState(true),
|
||||
)
|
||||
}
|
||||
|
||||
// isTerminal returns true if stdout is a terminal.
|
||||
// It checks whether v.Stdout implements Fd() (i.e. is an *os.File),
|
||||
// and falls back to false for non-file writers (e.g. in tests).
|
||||
func (v *Vaultik) isTerminal() bool {
|
||||
type fder interface {
|
||||
Fd() uintptr
|
||||
}
|
||||
f, ok := v.Stdout.(fder)
|
||||
if !ok {
|
||||
return false
|
||||
}
|
||||
return term.IsTerminal(int(f.Fd()))
|
||||
}
|
||||
|
||||
315
internal/vaultik/restore_locality_test.go
Normal file
315
internal/vaultik/restore_locality_test.go
Normal file
@@ -0,0 +1,315 @@
|
||||
package vaultik
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/spf13/afero"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"sneak.berlin/go/vaultik/internal/config"
|
||||
"sneak.berlin/go/vaultik/internal/database"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/snapshot"
|
||||
"sneak.berlin/go/vaultik/internal/storage"
|
||||
"sneak.berlin/go/vaultik/internal/ui"
|
||||
)
|
||||
|
||||
// TestRestoreLocalityAndReadAt asserts three properties of the restore
|
||||
// hot path that together produce acceptable throughput on real-world
|
||||
// snapshots. All three currently fail on main:
|
||||
//
|
||||
// 1. Peak blob cache occupancy ≤ 1.
|
||||
// Restore order must respect blob locality: every file fully
|
||||
// contained within the currently cached blob should be restored
|
||||
// before any other blob is downloaded. The sweeper then frees
|
||||
// each blob as soon as its file set is exhausted. Without smart
|
||||
// ordering, path-order interleaves blobs and the cache holds
|
||||
// every touched blob until the last file referencing it lands.
|
||||
//
|
||||
// 2. Each remote blob is fetched exactly once.
|
||||
// Counted via wrapping the Storer.
|
||||
//
|
||||
// 3. blobDiskCache.Get is never called during restore.
|
||||
// Chunk extraction from a cached blob must go through ReadAt,
|
||||
// which reads only the chunk's bytes from disk. Get reads the
|
||||
// entire blob (up to 50 GB in production) into memory just to
|
||||
// slice out a few KB — currently the dominant cost in restore.
|
||||
//
|
||||
// The test deliberately constructs an adversarial scenario: three
|
||||
// blobs A/B/C of ~6 MB each, nine files distributed across them, and
|
||||
// path-ordered names that interleave the blobs (a1, b1, c1, a2, b2,
|
||||
// c2, …) so naive path-order processing would touch every blob before
|
||||
// finishing any of them.
|
||||
func TestRestoreLocalityAndReadAt(t *testing.T) {
|
||||
log.Initialize(log.Config{})
|
||||
|
||||
fs := afero.NewOsFs()
|
||||
tempDir, err := os.MkdirTemp("", "vaultik-locality-")
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = os.RemoveAll(tempDir) }()
|
||||
|
||||
dataDir := filepath.Join(tempDir, "source")
|
||||
storeDir := filepath.Join(tempDir, "remote")
|
||||
restoreDir := filepath.Join(tempDir, "restored")
|
||||
dbPath := filepath.Join(tempDir, "index.sqlite")
|
||||
|
||||
require.NoError(t, fs.MkdirAll(dataDir, 0o755))
|
||||
|
||||
// Layout: 15 source files of exactly 1 MiB each. With
|
||||
// chunkSize (avg) = 4 MiB the chunker's minSize is 1 MiB, so any
|
||||
// file of 1 MiB becomes a single chunk. With a 5 MiB blob limit
|
||||
// the packer fits exactly 5 chunks per blob, producing 3 blobs
|
||||
// containing src-001..005, src-006..010, src-011..015.
|
||||
//
|
||||
// Then add 9 "copy" files — byte-for-byte clones of three of the
|
||||
// sources (one from each blob group) — with interleaved names
|
||||
// (cp-001-A, cp-002-B, cp-003-C, cp-004-A, …) so a naive
|
||||
// path-ordered restore would touch all three blobs before
|
||||
// finishing any of them.
|
||||
const (
|
||||
srcBytes = 1024 * 1024
|
||||
srcCount = 15
|
||||
blobsCount = 3
|
||||
perBlob = srcCount / blobsCount
|
||||
)
|
||||
|
||||
type source struct {
|
||||
path string
|
||||
data []byte
|
||||
}
|
||||
sources := make([]*source, srcCount)
|
||||
for i := 0; i < srcCount; i++ {
|
||||
s := &source{
|
||||
path: fmt.Sprintf("src-%03d.bin", i+1),
|
||||
data: randomBytes(t, srcBytes),
|
||||
}
|
||||
sources[i] = s
|
||||
require.NoError(t, afero.WriteFile(fs, filepath.Join(dataDir, s.path), s.data, 0o644))
|
||||
}
|
||||
|
||||
// Pick one representative source per blob group (src-001 → blob
|
||||
// 1, src-006 → blob 2, src-011 → blob 3) and create 3 copies of
|
||||
// each with interleaved alphabetical names.
|
||||
type copyFile struct {
|
||||
path string
|
||||
data []byte
|
||||
sourceBlob int // 0, 1, or 2
|
||||
sourceIndex int // index into sources slice
|
||||
}
|
||||
groupReps := []int{0, perBlob, 2 * perBlob} // 0, 5, 10
|
||||
letters := []byte{'A', 'B', 'C'}
|
||||
var copies []copyFile
|
||||
for i := 0; i < 3; i++ {
|
||||
for j := 0; j < blobsCount; j++ {
|
||||
seq := i*blobsCount + j + 1
|
||||
name := fmt.Sprintf("cp-%03d-%c.bin", seq, letters[j])
|
||||
path := filepath.Join(dataDir, name)
|
||||
src := sources[groupReps[j]]
|
||||
require.NoError(t, afero.WriteFile(fs, path, src.data, 0o644))
|
||||
copies = append(copies, copyFile{path: path, data: src.data, sourceBlob: j, sourceIndex: groupReps[j]})
|
||||
}
|
||||
}
|
||||
|
||||
// chunkSize avg = 4 MiB makes minSize = 1 MiB, so a 1 MiB file
|
||||
// becomes one chunk. maxBlobSize = 5 MiB packs exactly 5 chunks
|
||||
// per blob, yielding 3 blobs from 15 source files.
|
||||
chunkSize := int64(4 * 1024 * 1024)
|
||||
maxBlobSize := int64(5 * 1024 * 1024)
|
||||
|
||||
storer, err := storage.NewFileStorer(storeDir)
|
||||
require.NoError(t, err)
|
||||
|
||||
agePublicKey := "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
|
||||
ageSecretKey := "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
|
||||
|
||||
cfg := &config.Config{
|
||||
AgeRecipients: []string{agePublicKey},
|
||||
AgeSecretKey: ageSecretKey,
|
||||
CompressionLevel: 3,
|
||||
Hostname: "test-host",
|
||||
BlobSizeLimit: config.Size(maxBlobSize),
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
db, err := database.New(ctx, dbPath)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = db.Close() }()
|
||||
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
sm := snapshot.NewSnapshotManager(snapshot.SnapshotManagerParams{
|
||||
Repos: repos,
|
||||
Storage: storer,
|
||||
Config: cfg,
|
||||
})
|
||||
sm.SetFilesystem(fs)
|
||||
|
||||
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
|
||||
FS: fs,
|
||||
Storage: storer,
|
||||
ChunkSize: chunkSize,
|
||||
MaxBlobSize: maxBlobSize,
|
||||
CompressionLevel: cfg.CompressionLevel,
|
||||
AgeRecipients: cfg.AgeRecipients,
|
||||
Repositories: repos,
|
||||
})
|
||||
|
||||
snapshotID, err := sm.CreateSnapshotWithName(ctx, cfg.Hostname, "locality", "test-version", "test-git")
|
||||
require.NoError(t, err)
|
||||
|
||||
_, err = scanner.Scan(ctx, dataDir, snapshotID)
|
||||
require.NoError(t, err)
|
||||
|
||||
require.NoError(t, sm.CompleteSnapshot(ctx, snapshotID))
|
||||
require.NoError(t, sm.ExportSnapshotMetadata(ctx, dbPath, snapshotID))
|
||||
|
||||
blobsOnDisk := listBlobKeys(t, storeDir)
|
||||
t.Logf("backup produced %d blobs", len(blobsOnDisk))
|
||||
require.GreaterOrEqual(t, len(blobsOnDisk), 3, "expected at least 3 blobs from 3 filler groups")
|
||||
|
||||
require.NoError(t, db.Close())
|
||||
|
||||
// Wrap the storer so we can count downloads per blob key.
|
||||
counter := newCountingStorer(storer)
|
||||
|
||||
// Capture the restore-side cache for instrumentation inspection.
|
||||
// The observer fires twice (immediately after creation and
|
||||
// immediately before close) so we read PeakLen and call counters
|
||||
// from the same instance the production code used.
|
||||
var cacheRef *blobDiskCache
|
||||
v := &Vaultik{
|
||||
Config: cfg,
|
||||
Storage: counter,
|
||||
Fs: fs,
|
||||
Stdout: io.Discard,
|
||||
Stderr: io.Discard,
|
||||
UI: ui.NewWithColor(io.Discard, false),
|
||||
restoreCacheObserver: func(c *blobDiskCache) {
|
||||
cacheRef = c
|
||||
},
|
||||
}
|
||||
v.SetContext(ctx)
|
||||
|
||||
require.NoError(t, v.Restore(&RestoreOptions{
|
||||
SnapshotID: snapshotID,
|
||||
TargetDir: restoreDir,
|
||||
}))
|
||||
|
||||
require.NotNil(t, cacheRef, "restoreCacheObserver must fire during restore")
|
||||
|
||||
// Verify restored content matches.
|
||||
for _, s := range sources {
|
||||
restored := filepath.Join(restoreDir, dataDir, s.path)
|
||||
got, err := afero.ReadFile(fs, restored)
|
||||
require.NoErrorf(t, err, "source missing after restore: %s", s.path)
|
||||
require.Truef(t, bytes.Equal(got, s.data), "byte mismatch for source %s", s.path)
|
||||
}
|
||||
for _, c := range copies {
|
||||
restored := filepath.Join(restoreDir, c.path)
|
||||
got, err := afero.ReadFile(fs, restored)
|
||||
require.NoErrorf(t, err, "copy missing after restore: %s", c.path)
|
||||
require.Truef(t, bytes.Equal(got, c.data), "byte mismatch for copy %s", c.path)
|
||||
}
|
||||
|
||||
// (1) Each blob fetched exactly once.
|
||||
for key, n := range counter.snapshot() {
|
||||
if !filterBlobKey(key) {
|
||||
continue
|
||||
}
|
||||
assert.Equalf(t, 1, n, "blob %s fetched %d times, want exactly 1", key, n)
|
||||
}
|
||||
|
||||
// (2) Peak cache size ≤ 1. The sweeper plus locality-aware
|
||||
// ordering should free each blob before the next one downloads.
|
||||
assert.LessOrEqualf(t, cacheRef.PeakLen(), 1,
|
||||
"peak cached blobs was %d; expected ≤ 1 with locality-ordered restore", cacheRef.PeakLen())
|
||||
|
||||
// (3) Cache.Get must never be called during restore — chunk
|
||||
// extraction has to go through ReadAt so we never read the whole
|
||||
// blob from disk to grab a few KB slice.
|
||||
assert.Equalf(t, 0, cacheRef.GetCalls(),
|
||||
"blobDiskCache.Get was called %d times during restore; restore must use ReadAt exclusively", cacheRef.GetCalls())
|
||||
|
||||
t.Logf("blob cache stats: peak_len=%d get_calls=%d readat_calls=%d",
|
||||
cacheRef.PeakLen(), cacheRef.GetCalls(), cacheRef.ReadAtCalls())
|
||||
}
|
||||
|
||||
// randomBytes returns n bytes of random data. Used to make sure the
|
||||
// chunker picks non-degenerate FastCDC boundaries.
|
||||
func randomBytes(t *testing.T, n int) []byte {
|
||||
t.Helper()
|
||||
b := make([]byte, n)
|
||||
_, err := rand.Read(b)
|
||||
require.NoError(t, err)
|
||||
return b
|
||||
}
|
||||
|
||||
// listBlobKeys walks the FileStorer blobs/ tree and returns the
|
||||
// relative keys for every blob file present.
|
||||
func listBlobKeys(t *testing.T, storeDir string) []string {
|
||||
t.Helper()
|
||||
var keys []string
|
||||
root := filepath.Join(storeDir, "blobs")
|
||||
err := filepath.Walk(root, func(p string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
rel, _ := filepath.Rel(storeDir, p)
|
||||
keys = append(keys, rel)
|
||||
return nil
|
||||
})
|
||||
require.NoError(t, err)
|
||||
sort.Strings(keys)
|
||||
return keys
|
||||
}
|
||||
|
||||
// filterBlobKey returns true when key looks like a blob storage path
|
||||
// (rather than a snapshot metadata path).
|
||||
func filterBlobKey(key string) bool {
|
||||
return len(key) > 6 && key[:6] == "blobs/"
|
||||
}
|
||||
|
||||
// countingStorerInternal wraps a storage.Storer and records the number
|
||||
// of Get calls per key, so the locality test can assert each blob is
|
||||
// fetched exactly once. Defined here (rather than reusing the one in
|
||||
// the integration_test package) because this test lives in package
|
||||
// vaultik for access to unexported cache internals.
|
||||
type countingStorerInternal struct {
|
||||
storage.Storer
|
||||
mu sync.Mutex
|
||||
counts map[string]int
|
||||
}
|
||||
|
||||
func newCountingStorer(inner storage.Storer) *countingStorerInternal {
|
||||
return &countingStorerInternal{Storer: inner, counts: make(map[string]int)}
|
||||
}
|
||||
|
||||
func (c *countingStorerInternal) Get(ctx context.Context, key string) (io.ReadCloser, error) {
|
||||
c.mu.Lock()
|
||||
c.counts[key]++
|
||||
c.mu.Unlock()
|
||||
return c.Storer.Get(ctx, key)
|
||||
}
|
||||
|
||||
func (c *countingStorerInternal) snapshot() map[string]int {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
out := make(map[string]int, len(c.counts))
|
||||
for k, v := range c.counts {
|
||||
out[k] = v
|
||||
}
|
||||
return out
|
||||
}
|
||||
185
internal/vaultik/restore_plan.go
Normal file
185
internal/vaultik/restore_plan.go
Normal file
@@ -0,0 +1,185 @@
|
||||
package vaultik
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"math"
|
||||
"os"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/database"
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
// restorePlan orders restore-time file processing by blob locality. The
|
||||
// goal is to keep the blob disk cache occupancy as small as possible:
|
||||
// download one blob, drain every file referencing only that blob, let
|
||||
// the sweeper free the blob, then move on. Files that span multiple
|
||||
// blobs are processed when their full blob set is on disk.
|
||||
//
|
||||
// The plan keeps two indexes:
|
||||
//
|
||||
// - fileBlobs: for each pending file, the set of blob hashes it
|
||||
// still needs that are NOT yet in the cache. Files with an empty
|
||||
// set are "ready" — they can be restored from the current cache
|
||||
// with no further downloads.
|
||||
// - blobFiles: for each blob, the set of pending files referencing
|
||||
// it. Used to short-circuit "when this blob lands, which files
|
||||
// become ready" without a global scan.
|
||||
type restorePlan struct {
|
||||
fileBlobs map[types.FileID]map[string]struct{}
|
||||
blobFiles map[string]map[types.FileID]struct{}
|
||||
ready []types.FileID
|
||||
cached map[string]struct{}
|
||||
}
|
||||
|
||||
// newRestorePlan builds the file→blob index for the given files. Files
|
||||
// whose chunks reference no blobs (symlinks, directories) start in the
|
||||
// ready queue immediately.
|
||||
func newRestorePlan(
|
||||
ctx context.Context,
|
||||
repos *database.Repositories,
|
||||
files []*database.File,
|
||||
chunkToBlobMap map[string]*database.BlobChunk,
|
||||
blobIDToHash map[string]string,
|
||||
) (*restorePlan, error) {
|
||||
p := &restorePlan{
|
||||
fileBlobs: make(map[types.FileID]map[string]struct{}, len(files)),
|
||||
blobFiles: make(map[string]map[types.FileID]struct{}),
|
||||
ready: make([]types.FileID, 0, len(files)),
|
||||
cached: make(map[string]struct{}),
|
||||
}
|
||||
for _, f := range files {
|
||||
if f.IsSymlink() || f.Mode&uint32(os.ModeDir) != 0 {
|
||||
// No chunks to fetch — restore can run immediately.
|
||||
p.fileBlobs[f.ID] = nil
|
||||
p.ready = append(p.ready, f.ID)
|
||||
continue
|
||||
}
|
||||
fileChunks, err := repos.FileChunks.GetByFileID(ctx, f.ID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("planning %s: %w", f.Path, err)
|
||||
}
|
||||
blobs := make(map[string]struct{})
|
||||
for _, fc := range fileChunks {
|
||||
bc, ok := chunkToBlobMap[fc.ChunkHash.String()]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("planning %s: chunk %s missing from blob map",
|
||||
f.Path, fc.ChunkHash.String()[:16])
|
||||
}
|
||||
hash, ok := blobIDToHash[bc.BlobID.String()]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("planning %s: blob id %s missing from id-to-hash map",
|
||||
f.Path, bc.BlobID)
|
||||
}
|
||||
blobs[hash] = struct{}{}
|
||||
}
|
||||
p.fileBlobs[f.ID] = blobs
|
||||
for hash := range blobs {
|
||||
set, ok := p.blobFiles[hash]
|
||||
if !ok {
|
||||
set = make(map[types.FileID]struct{})
|
||||
p.blobFiles[hash] = set
|
||||
}
|
||||
set[f.ID] = struct{}{}
|
||||
}
|
||||
if len(blobs) == 0 {
|
||||
p.ready = append(p.ready, f.ID)
|
||||
}
|
||||
}
|
||||
return p, nil
|
||||
}
|
||||
|
||||
// markBlobCached records that the named blob is now resident in the
|
||||
// disk cache and moves any pending file whose remaining-uncached-blobs
|
||||
// set just dropped to empty onto the ready queue.
|
||||
func (p *restorePlan) markBlobCached(blobHash string) {
|
||||
if _, already := p.cached[blobHash]; already {
|
||||
return
|
||||
}
|
||||
p.cached[blobHash] = struct{}{}
|
||||
for fileID := range p.blobFiles[blobHash] {
|
||||
blobs := p.fileBlobs[fileID]
|
||||
delete(blobs, blobHash)
|
||||
if len(blobs) == 0 {
|
||||
p.ready = append(p.ready, fileID)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// popReady returns the next ready file, removing it from the queue. If
|
||||
// no file is ready, the second return value is false.
|
||||
func (p *restorePlan) popReady() (types.FileID, bool) {
|
||||
if len(p.ready) == 0 {
|
||||
return types.FileID{}, false
|
||||
}
|
||||
id := p.ready[0]
|
||||
p.ready = p.ready[1:]
|
||||
return id, true
|
||||
}
|
||||
|
||||
// finishFile drops a restored file from both indexes so subsequent
|
||||
// planning calls don't reconsider it.
|
||||
func (p *restorePlan) finishFile(fileID types.FileID) {
|
||||
for hash := range p.fileBlobs[fileID] {
|
||||
if set, ok := p.blobFiles[hash]; ok {
|
||||
delete(set, fileID)
|
||||
if len(set) == 0 {
|
||||
delete(p.blobFiles, hash)
|
||||
}
|
||||
}
|
||||
}
|
||||
delete(p.fileBlobs, fileID)
|
||||
// Also scrub the file from any blobFiles entries where it might
|
||||
// still appear even after its uncached-blob set was emptied.
|
||||
for hash, set := range p.blobFiles {
|
||||
if _, ok := set[fileID]; ok {
|
||||
delete(set, fileID)
|
||||
if len(set) == 0 {
|
||||
delete(p.blobFiles, hash)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// pickNextDownload returns the pending file whose remaining-uncached
|
||||
// blob set is smallest (with ties broken by FileID string compare so
|
||||
// the choice is deterministic across runs). This file's blobs are
|
||||
// downloaded next, after which it — together with any other pending
|
||||
// files whose blob sets become empty — moves to the ready queue.
|
||||
//
|
||||
// The zero FileID return means nothing is pending.
|
||||
func (p *restorePlan) pickNextDownload() types.FileID {
|
||||
var best types.FileID
|
||||
bestCount := math.MaxInt
|
||||
var bestID string
|
||||
for id, blobs := range p.fileBlobs {
|
||||
n := len(blobs)
|
||||
if n == 0 {
|
||||
// Already-ready files should have been popped via
|
||||
// popReady; ignore here just in case.
|
||||
continue
|
||||
}
|
||||
idStr := id.String()
|
||||
if n < bestCount || (n == bestCount && (best.IsZero() || idStr < bestID)) {
|
||||
best = id
|
||||
bestCount = n
|
||||
bestID = idStr
|
||||
}
|
||||
}
|
||||
return best
|
||||
}
|
||||
|
||||
// blobsNeeded returns the uncached blob hashes for fileID in any order.
|
||||
func (p *restorePlan) blobsNeeded(fileID types.FileID) []string {
|
||||
blobs := p.fileBlobs[fileID]
|
||||
out := make([]string, 0, len(blobs))
|
||||
for h := range blobs {
|
||||
out = append(out, h)
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// hasPending reports whether any unfinished files remain.
|
||||
func (p *restorePlan) hasPending() bool {
|
||||
return len(p.fileBlobs) > 0
|
||||
}
|
||||
118
internal/vaultik/restore_sweeper.go
Normal file
118
internal/vaultik/restore_sweeper.go
Normal file
@@ -0,0 +1,118 @@
|
||||
package vaultik
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/database"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
)
|
||||
|
||||
// restoreSweeper frees cached blobs once all files that reference any of
|
||||
// their chunks have been restored. It works as follows:
|
||||
//
|
||||
// 1. Callers add a file's ID to an in-memory restored set via
|
||||
// fileRestored once the file is fully written to disk.
|
||||
// 2. After each chunk is restored, chunkRestored accumulates a running
|
||||
// byte count.
|
||||
// 3. When the accumulator crosses a threshold (one hundredth of the
|
||||
// configured blob size — so a sweep runs about a hundred times per
|
||||
// blob's worth of restored bytes), the sweeper iterates every key in
|
||||
// the cache. For each cached blob it asks the DB which files
|
||||
// reference any chunk in that blob, then compares that list against
|
||||
// the in-memory restored set. If any referencing file is missing
|
||||
// from the set the blob is kept; otherwise the cache entry is
|
||||
// deleted.
|
||||
//
|
||||
// All DB reads happen against the snapshot's temporary metadata DB,
|
||||
// which is local, indexed, and not under contention — the queries are
|
||||
// cheap and run at most once per blob per sweep interval.
|
||||
type restoreSweeper struct {
|
||||
ctx context.Context
|
||||
repos *database.Repositories
|
||||
cache *blobDiskCache
|
||||
threshold int64
|
||||
bytesAccum int64
|
||||
restored map[string]struct{}
|
||||
}
|
||||
|
||||
// newRestoreSweeper returns a sweeper that triggers eviction every
|
||||
// `threshold` bytes restored. Callers should pass blob_size_limit/100.
|
||||
func newRestoreSweeper(ctx context.Context, repos *database.Repositories, cache *blobDiskCache, threshold int64) *restoreSweeper {
|
||||
if threshold <= 0 {
|
||||
threshold = 1
|
||||
}
|
||||
return &restoreSweeper{
|
||||
ctx: ctx,
|
||||
repos: repos,
|
||||
cache: cache,
|
||||
threshold: threshold,
|
||||
restored: make(map[string]struct{}),
|
||||
}
|
||||
}
|
||||
|
||||
// fileRestored records a file as fully restored. After this call, any
|
||||
// blob whose only remaining references come from files in the restored
|
||||
// set may be evicted on the next sweep.
|
||||
func (s *restoreSweeper) fileRestored(fileID string) {
|
||||
s.restored[fileID] = struct{}{}
|
||||
}
|
||||
|
||||
// chunkRestored accounts n bytes against the sweep threshold and runs a
|
||||
// sweep if the threshold has been crossed since the last sweep.
|
||||
func (s *restoreSweeper) chunkRestored(n int64) {
|
||||
s.bytesAccum += n
|
||||
if s.bytesAccum < s.threshold {
|
||||
return
|
||||
}
|
||||
s.bytesAccum = 0
|
||||
s.sweep()
|
||||
}
|
||||
|
||||
// sweep deletes any cached blob whose chunks are no longer referenced
|
||||
// by an unrestored file. Per-blob DB failures are logged and the blob
|
||||
// is kept — we'd rather hold a blob longer than risk a re-download.
|
||||
func (s *restoreSweeper) sweep() {
|
||||
for _, blobHash := range s.cache.Keys() {
|
||||
needed, err := s.blobStillNeeded(blobHash)
|
||||
if err != nil {
|
||||
log.Debug("sweeper referencing-files query failed", "blob_hash", blobHash[:16], "error", err)
|
||||
continue
|
||||
}
|
||||
if !needed {
|
||||
s.cache.Delete(blobHash)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// blobStillNeeded returns true if any file that references a chunk in
|
||||
// this blob has not yet been restored. On any error the function
|
||||
// returns true — keeping the blob is always the safe answer because we
|
||||
// can't prove we're done with it.
|
||||
func (s *restoreSweeper) blobStillNeeded(blobHash string) (bool, error) {
|
||||
rows, err := s.repos.DB().Conn().QueryContext(s.ctx, `
|
||||
SELECT DISTINCT fc.file_id
|
||||
FROM file_chunks fc
|
||||
JOIN blob_chunks bc ON bc.chunk_hash = fc.chunk_hash
|
||||
JOIN blobs b ON b.id = bc.blob_id
|
||||
WHERE b.blob_hash = ?
|
||||
`, blobHash)
|
||||
if err != nil {
|
||||
return true, fmt.Errorf("querying referencing files: %w", err)
|
||||
}
|
||||
defer func() { _ = rows.Close() }()
|
||||
|
||||
for rows.Next() {
|
||||
var fileID string
|
||||
if err := rows.Scan(&fileID); err != nil {
|
||||
return true, fmt.Errorf("scanning file_id: %w", err)
|
||||
}
|
||||
if _, ok := s.restored[fileID]; !ok {
|
||||
return true, nil
|
||||
}
|
||||
}
|
||||
if err := rows.Err(); err != nil {
|
||||
return true, err
|
||||
}
|
||||
return false, nil
|
||||
}
|
||||
248
internal/vaultik/restore_sweeper_integration_test.go
Normal file
248
internal/vaultik/restore_sweeper_integration_test.go
Normal file
@@ -0,0 +1,248 @@
|
||||
package vaultik_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"math/rand"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"testing"
|
||||
|
||||
"github.com/spf13/afero"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"sneak.berlin/go/vaultik/internal/config"
|
||||
"sneak.berlin/go/vaultik/internal/database"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/snapshot"
|
||||
"sneak.berlin/go/vaultik/internal/storage"
|
||||
"sneak.berlin/go/vaultik/internal/ui"
|
||||
"sneak.berlin/go/vaultik/internal/vaultik"
|
||||
)
|
||||
|
||||
// TestRestoreSweeperEvictsBlobs exercises the reference-counted blob
|
||||
// disk cache eviction during restore.
|
||||
//
|
||||
// The scenario: 30 unique 1 MB random files plus 10 duplicates of those
|
||||
// (40 files total, 30 MB of unique content) get backed up with a 10 MB
|
||||
// blob_size_limit. After backup the snapshot's encrypted blobs are
|
||||
// restored through Vaultik.Restore, and per-key Get counts on the
|
||||
// storage layer are recorded. Each blob in the snapshot MUST be
|
||||
// downloaded exactly once — re-downloads would mean the sweeper either
|
||||
// evicted a blob that was still needed (LRU regression) or that the
|
||||
// cache held nothing at all (broken cache).
|
||||
//
|
||||
// The duplicates ensure deduplicated files share blobs with their
|
||||
// originals; the sweeper must keep each blob alive until BOTH the
|
||||
// original AND every duplicate referencing its chunks have been
|
||||
// restored.
|
||||
func TestRestoreSweeperEvictsBlobs(t *testing.T) {
|
||||
log.Initialize(log.Config{})
|
||||
|
||||
fs := afero.NewOsFs()
|
||||
tempDir, err := os.MkdirTemp("", "vaultik-sweeper-")
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = os.RemoveAll(tempDir) }()
|
||||
|
||||
dataDir := filepath.Join(tempDir, "source")
|
||||
storeDir := filepath.Join(tempDir, "remote")
|
||||
restoreDir := filepath.Join(tempDir, "restored")
|
||||
dbPath := filepath.Join(tempDir, "index.sqlite")
|
||||
|
||||
require.NoError(t, fs.MkdirAll(dataDir, 0o755))
|
||||
|
||||
// Generate 30 unique 1 MB random files. The PRNG seed is fixed so
|
||||
// failures are reproducible; the entropy is what matters here — the
|
||||
// FastCDC chunker needs realistic-looking data to pick chunk
|
||||
// boundaries naturally.
|
||||
const (
|
||||
uniqueFiles = 30
|
||||
duplicateFiles = 10
|
||||
fileSize = 1 * 1024 * 1024
|
||||
)
|
||||
rng := rand.New(rand.NewSource(42))
|
||||
|
||||
type sourceFile struct {
|
||||
path string
|
||||
data []byte
|
||||
}
|
||||
uniques := make([]sourceFile, 0, uniqueFiles)
|
||||
expected := make(map[string][]byte, uniqueFiles+duplicateFiles)
|
||||
|
||||
for i := 0; i < uniqueFiles; i++ {
|
||||
data := make([]byte, fileSize)
|
||||
_, err := rng.Read(data)
|
||||
require.NoError(t, err)
|
||||
path := filepath.Join(dataDir, fmt.Sprintf("unique-%02d.bin", i))
|
||||
require.NoError(t, afero.WriteFile(fs, path, data, 0o644))
|
||||
uniques = append(uniques, sourceFile{path: path, data: data})
|
||||
expected[path] = data
|
||||
}
|
||||
|
||||
// Pick 10 of the originals and copy each to a fresh path so the
|
||||
// chunker dedups them against the originals' blobs.
|
||||
for i, idx := range rng.Perm(uniqueFiles)[:duplicateFiles] {
|
||||
src := uniques[idx]
|
||||
dstPath := filepath.Join(dataDir, fmt.Sprintf("dup-%02d.bin", i))
|
||||
require.NoError(t, afero.WriteFile(fs, dstPath, src.data, 0o644))
|
||||
expected[dstPath] = src.data
|
||||
}
|
||||
|
||||
chunkSize := int64(64 * 1024)
|
||||
maxBlobSize := int64(10 * 1024 * 1024)
|
||||
|
||||
storer, err := storage.NewFileStorer(storeDir)
|
||||
require.NoError(t, err)
|
||||
|
||||
agePublicKey := "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
|
||||
ageSecretKey := "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
|
||||
|
||||
cfg := &config.Config{
|
||||
AgeRecipients: []string{agePublicKey},
|
||||
AgeSecretKey: ageSecretKey,
|
||||
CompressionLevel: 3,
|
||||
Hostname: "test-host",
|
||||
BlobSizeLimit: config.Size(maxBlobSize),
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
db, err := database.New(ctx, dbPath)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = db.Close() }()
|
||||
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
sm := snapshot.NewSnapshotManager(snapshot.SnapshotManagerParams{
|
||||
Repos: repos,
|
||||
Storage: storer,
|
||||
Config: cfg,
|
||||
})
|
||||
sm.SetFilesystem(fs)
|
||||
|
||||
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
|
||||
FS: fs,
|
||||
Storage: storer,
|
||||
ChunkSize: chunkSize,
|
||||
MaxBlobSize: maxBlobSize,
|
||||
CompressionLevel: cfg.CompressionLevel,
|
||||
AgeRecipients: cfg.AgeRecipients,
|
||||
Repositories: repos,
|
||||
})
|
||||
|
||||
snapshotID, err := sm.CreateSnapshotWithName(ctx, cfg.Hostname, "sweeper", "test-version", "test-git")
|
||||
require.NoError(t, err)
|
||||
|
||||
scanResult, err := scanner.Scan(ctx, dataDir, snapshotID)
|
||||
require.NoError(t, err)
|
||||
require.Equal(t, uniqueFiles+duplicateFiles, scanResult.FilesScanned)
|
||||
require.Greater(t, scanResult.BlobsCreated, 1, "30 MB of unique data at 10 MB blob size should yield multiple blobs")
|
||||
|
||||
require.NoError(t, sm.CompleteSnapshot(ctx, snapshotID))
|
||||
require.NoError(t, sm.ExportSnapshotMetadata(ctx, dbPath, snapshotID))
|
||||
|
||||
// Count blobs actually present on disk; this is the ground-truth
|
||||
// figure each blob's GET count must equal exactly once.
|
||||
blobCount := countBlobsOnDisk(t, storeDir)
|
||||
require.Greater(t, blobCount, 1, "expected more than one blob")
|
||||
t.Logf("backup produced %d blobs from %d files (%d unique + %d duplicates)",
|
||||
blobCount, uniqueFiles+duplicateFiles, uniqueFiles, duplicateFiles)
|
||||
|
||||
// Force restore to operate without the source-side index, exactly
|
||||
// as a real restore on a fresh machine would.
|
||||
require.NoError(t, db.Close())
|
||||
|
||||
counter := newCountingStorer(storer)
|
||||
|
||||
restoreVaultik := &vaultik.Vaultik{
|
||||
Config: cfg,
|
||||
Storage: counter,
|
||||
Fs: fs,
|
||||
Stdout: io.Discard,
|
||||
Stderr: io.Discard,
|
||||
UI: ui.NewWithColor(io.Discard, false),
|
||||
}
|
||||
restoreVaultik.SetContext(ctx)
|
||||
|
||||
require.NoError(t, restoreVaultik.Restore(&vaultik.RestoreOptions{
|
||||
SnapshotID: snapshotID,
|
||||
TargetDir: restoreDir,
|
||||
}))
|
||||
|
||||
// Verify every restored file byte-matches its source.
|
||||
for origPath, want := range expected {
|
||||
restoredPath := filepath.Join(restoreDir, origPath)
|
||||
got, err := afero.ReadFile(fs, restoredPath)
|
||||
require.NoErrorf(t, err, "restored file missing: %s", restoredPath)
|
||||
require.Equalf(t, want, got, "byte mismatch for %s", origPath)
|
||||
}
|
||||
|
||||
// Each blob must have been downloaded exactly once. >1 means the
|
||||
// sweeper evicted a still-needed blob; 0 means the cache silently
|
||||
// stopped being consulted.
|
||||
blobDownloads := 0
|
||||
for key, count := range counter.snapshot() {
|
||||
if !strings.HasPrefix(key, "blobs/") {
|
||||
continue
|
||||
}
|
||||
assert.Equalf(t, 1, count,
|
||||
"blob %s should have been downloaded exactly once during restore, got %d", key, count)
|
||||
blobDownloads++
|
||||
}
|
||||
assert.Equal(t, blobCount, blobDownloads,
|
||||
"every blob on disk should have been fetched exactly once during restore")
|
||||
t.Logf("restore downloaded %d blobs, each exactly once", blobDownloads)
|
||||
}
|
||||
|
||||
// countingStorer wraps a Storer and records the number of Get calls per
|
||||
// key. Used to verify that the restore-side blob cache + sweeper avoid
|
||||
// re-downloading blobs that are evicted while still needed.
|
||||
type countingStorer struct {
|
||||
storage.Storer
|
||||
mu sync.Mutex
|
||||
counts map[string]int
|
||||
}
|
||||
|
||||
func newCountingStorer(inner storage.Storer) *countingStorer {
|
||||
return &countingStorer{Storer: inner, counts: make(map[string]int)}
|
||||
}
|
||||
|
||||
func (c *countingStorer) Get(ctx context.Context, key string) (io.ReadCloser, error) {
|
||||
c.mu.Lock()
|
||||
c.counts[key]++
|
||||
c.mu.Unlock()
|
||||
return c.Storer.Get(ctx, key)
|
||||
}
|
||||
|
||||
func (c *countingStorer) snapshot() map[string]int {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
out := make(map[string]int, len(c.counts))
|
||||
for k, v := range c.counts {
|
||||
out[k] = v
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// countBlobsOnDisk walks the blobs/ tree of a FileStorer-backed store
|
||||
// and returns the total number of blob files. Used to ground-truth the
|
||||
// expected number of restore-time downloads.
|
||||
func countBlobsOnDisk(t *testing.T, storeDir string) int {
|
||||
t.Helper()
|
||||
count := 0
|
||||
root := filepath.Join(storeDir, "blobs")
|
||||
err := filepath.Walk(root, func(_ string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if !info.IsDir() {
|
||||
count++
|
||||
}
|
||||
return nil
|
||||
})
|
||||
require.NoError(t, err)
|
||||
return count
|
||||
}
|
||||
@@ -8,16 +8,13 @@ import (
|
||||
"regexp"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"text/tabwriter"
|
||||
"time"
|
||||
|
||||
"github.com/dustin/go-humanize"
|
||||
"golang.org/x/sync/errgroup"
|
||||
"sneak.berlin/go/vaultik/internal/database"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/snapshot"
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
// SnapshotCreateOptions contains options for the snapshot create command
|
||||
@@ -92,8 +89,13 @@ func (v *Vaultik) CreateSnapshot(opts *SnapshotCreateOptions) error {
|
||||
}
|
||||
}
|
||||
|
||||
// Terminus must obey the --cron invariant: silent on total
|
||||
// success only. UI.Complete is dropped in cron/quiet mode (that's
|
||||
// the success path), but if any warnings fired during the run we
|
||||
// emit the summary via UI.Warning so cron actually delivers
|
||||
// something for the user to look at.
|
||||
if v.UI.WarningCount() > 0 {
|
||||
v.UI.Complete("Finished (with %d warnings).", v.UI.WarningCount())
|
||||
v.UI.Warning("Finished with %d warning(s) — review the output above.", v.UI.WarningCount())
|
||||
} else {
|
||||
v.UI.Complete("Finished successfully.")
|
||||
}
|
||||
@@ -306,23 +308,13 @@ func (v *Vaultik) finalizeSnapshotMetadata(snapshotID string, stats *snapshotSta
|
||||
return nil
|
||||
}
|
||||
|
||||
// formatUploadSpeed formats bytes uploaded and duration into a human-readable speed string
|
||||
func formatUploadSpeed(bytesUploaded int64, duration time.Duration) string {
|
||||
// uploadSpeed returns the average network upload rate as a colorized
|
||||
// bits/sec string, or "N/A" when there's no usable data.
|
||||
func (v *Vaultik) uploadSpeed(bytesUploaded int64, duration time.Duration) string {
|
||||
if bytesUploaded <= 0 || duration <= 0 {
|
||||
return "N/A"
|
||||
}
|
||||
bytesPerSec := float64(bytesUploaded) / duration.Seconds()
|
||||
bitsPerSec := bytesPerSec * 8
|
||||
switch {
|
||||
case bitsPerSec >= 1e9:
|
||||
return fmt.Sprintf("%.1f Gbit/s", bitsPerSec/1e9)
|
||||
case bitsPerSec >= 1e6:
|
||||
return fmt.Sprintf("%.0f Mbit/s", bitsPerSec/1e6)
|
||||
case bitsPerSec >= 1e3:
|
||||
return fmt.Sprintf("%.0f Kbit/s", bitsPerSec/1e3)
|
||||
default:
|
||||
return fmt.Sprintf("%.0f bit/s", bitsPerSec)
|
||||
return v.UI.Speed(0)
|
||||
}
|
||||
return v.UI.Speed(float64(bytesUploaded) / duration.Seconds())
|
||||
}
|
||||
|
||||
// printSnapshotSummary prints the comprehensive snapshot completion summary
|
||||
@@ -342,7 +334,7 @@ func (v *Vaultik) printSnapshotSummary(snapshotID string, startTime time.Time, s
|
||||
}
|
||||
|
||||
v.UI.Complete("Created snapshot %s.", v.UI.Snapshot(snapshotID))
|
||||
filesMsg := fmt.Sprintf("Files: %s examined, %s processed, %s unchanged",
|
||||
filesMsg := fmt.Sprintf("Files: %s examined, %s backed up, %s unchanged",
|
||||
v.UI.Count(stats.totalFiles),
|
||||
v.UI.Count(totalFilesChanged),
|
||||
v.UI.Count(stats.totalFilesSkipped))
|
||||
@@ -351,7 +343,7 @@ func (v *Vaultik) printSnapshotSummary(snapshotID string, startTime time.Time, s
|
||||
}
|
||||
v.UI.Detail("%s.", filesMsg)
|
||||
|
||||
dataMsg := fmt.Sprintf("Data: %s total (%s processed)",
|
||||
dataMsg := fmt.Sprintf("Data: %s total (%s backed up)",
|
||||
v.UI.Size(totalBytesAll),
|
||||
v.UI.Size(stats.totalBytes))
|
||||
if stats.totalBytesDeleted > 0 {
|
||||
@@ -368,7 +360,7 @@ func (v *Vaultik) printSnapshotSummary(snapshotID string, startTime time.Time, s
|
||||
stats.totalBlobsUploaded,
|
||||
v.UI.Size(stats.totalBytesUploaded),
|
||||
v.UI.Duration(stats.uploadDuration),
|
||||
formatUploadSpeed(stats.totalBytesUploaded, stats.uploadDuration))
|
||||
v.uploadSpeed(stats.totalBytesUploaded, stats.uploadDuration))
|
||||
}
|
||||
v.UI.Detail("Snapshot create duration: %s.", v.UI.Duration(snapshotDuration))
|
||||
}
|
||||
@@ -388,25 +380,43 @@ func (v *Vaultik) getSnapshotBlobSizes(snapshotID string) (compressed int64, unc
|
||||
return compressed, uncompressed
|
||||
}
|
||||
|
||||
// ListSnapshots lists all snapshots
|
||||
// ListSnapshots prints the table of snapshots, plus any reconciliation
|
||||
// warnings/notes between the local index and the backup destination
|
||||
// store.
|
||||
//
|
||||
// The local index database is always the primary source for the
|
||||
// table — it has the human snapshot IDs, timestamps, and per-snapshot
|
||||
// stats.
|
||||
//
|
||||
// If an age secret key is configured AND remote listing succeeds, we
|
||||
// cross-reference: any local snapshot whose hashed key isn't visible
|
||||
// remotely gets a "local-only" cleanup hint, and any remote key that
|
||||
// doesn't correspond to a known local snapshot gets reported in a
|
||||
// NOTE.
|
||||
//
|
||||
// If no age key is set the local machine is assumed write-only
|
||||
// (backup-only), so we skip remote listing entirely — there's no
|
||||
// value showing keys the user couldn't restore anyway.
|
||||
//
|
||||
// If remote listing fails (unmounted volume, permission denied,
|
||||
// network), we degrade to local-only with a warning. List never
|
||||
// fails just because the destination is unreachable.
|
||||
func (v *Vaultik) ListSnapshots(jsonOutput bool) error {
|
||||
log.Info("Listing snapshots")
|
||||
remoteSnapshots, err := v.listRemoteSnapshotIDs()
|
||||
|
||||
localSnaps, err := v.Repositories.Snapshots.ListRecent(v.ctx, 10000)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("listing local snapshots: %w", err)
|
||||
}
|
||||
|
||||
localSnapshotMap, err := v.reconcileLocalWithRemote(remoteSnapshots)
|
||||
if err != nil {
|
||||
return err
|
||||
snapshots := make([]SnapshotInfo, 0, len(localSnaps))
|
||||
for _, ls := range localSnaps {
|
||||
if ls.CompletedAt == nil {
|
||||
continue
|
||||
}
|
||||
snapshots = append(snapshots, v.snapshotInfoFromLocal(ls))
|
||||
}
|
||||
|
||||
snapshots, err := v.buildSnapshotInfoList(remoteSnapshots, localSnapshotMap)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Sort by timestamp (newest first)
|
||||
sort.Slice(snapshots, func(i, j int) bool {
|
||||
return snapshots[i].Timestamp.After(snapshots[j].Timestamp)
|
||||
})
|
||||
@@ -421,159 +431,85 @@ func (v *Vaultik) ListSnapshots(jsonOutput bool) error {
|
||||
return err
|
||||
}
|
||||
|
||||
// Warn about local snapshots that don't exist in remote storage.
|
||||
var stale []string
|
||||
for id := range localSnapshotMap {
|
||||
if !remoteSnapshots[id] {
|
||||
stale = append(stale, id)
|
||||
if v.Config.AgeSecretKey == "" {
|
||||
return nil
|
||||
}
|
||||
|
||||
remoteKeys, err := v.listAllRemoteSnapshotKeys()
|
||||
if err != nil {
|
||||
v.UI.Warning("Could not list backup destination store: %v.", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
localKeys := make(map[string]string, len(localSnaps))
|
||||
for _, ls := range localSnaps {
|
||||
if ls.CompletedAt == nil {
|
||||
continue
|
||||
}
|
||||
localKeys[snapshot.RemoteSnapshotKey(ls.ID.String())] = ls.ID.String()
|
||||
}
|
||||
remoteSet := make(map[string]bool, len(remoteKeys))
|
||||
for _, k := range remoteKeys {
|
||||
remoteSet[k] = true
|
||||
}
|
||||
|
||||
var localOnly []string
|
||||
for key, humanID := range localKeys {
|
||||
if !remoteSet[key] {
|
||||
localOnly = append(localOnly, humanID)
|
||||
}
|
||||
}
|
||||
if len(stale) > 0 {
|
||||
v.UI.Warning("%d local snapshot record(s) not found in backup destination store:", len(stale))
|
||||
for _, id := range stale {
|
||||
var remoteOnlyCount int
|
||||
for key := range remoteSet {
|
||||
if _, ok := localKeys[key]; !ok {
|
||||
remoteOnlyCount++
|
||||
}
|
||||
}
|
||||
|
||||
if len(localOnly) > 0 {
|
||||
v.UI.Warning("%d local snapshot record(s) not found in backup destination store:", len(localOnly))
|
||||
for _, id := range localOnly {
|
||||
v.UI.Info("%s", v.UI.Snapshot(id))
|
||||
}
|
||||
v.UI.Info("Run 'vaultik snapshot cleanup' to remove stale local records.")
|
||||
}
|
||||
if remoteOnlyCount > 0 {
|
||||
v.UI.Notice("NOTE: %d remote snapshot(s) found in backup destination store but not in local database.", remoteOnlyCount)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// listRemoteSnapshotIDs returns a set of snapshot IDs found in remote storage
|
||||
func (v *Vaultik) listRemoteSnapshotIDs() (map[string]bool, error) {
|
||||
remoteSnapshots := make(map[string]bool)
|
||||
objectCh := v.Storage.ListStream(v.ctx, "metadata/")
|
||||
// snapshotInfoFromLocal builds a SnapshotInfo row from a local snapshot
|
||||
// record. Failures from any per-snapshot stat query degrade that
|
||||
// column to its snapshot-row fallback but never fail the listing.
|
||||
func (v *Vaultik) snapshotInfoFromLocal(ls *database.Snapshot) SnapshotInfo {
|
||||
idStr := ls.ID.String()
|
||||
|
||||
for object := range objectCh {
|
||||
if object.Err != nil {
|
||||
return nil, fmt.Errorf("listing remote snapshots: %w", object.Err)
|
||||
}
|
||||
|
||||
parts := strings.Split(object.Key, "/")
|
||||
if len(parts) >= 2 && parts[0] == "metadata" && parts[1] != "" {
|
||||
if strings.HasPrefix(parts[1], ".") {
|
||||
continue
|
||||
}
|
||||
remoteSnapshots[parts[1]] = true
|
||||
}
|
||||
}
|
||||
|
||||
return remoteSnapshots, nil
|
||||
}
|
||||
|
||||
// reconcileLocalWithRemote builds a map of local snapshots keyed by ID for cross-referencing with remote
|
||||
func (v *Vaultik) reconcileLocalWithRemote(remoteSnapshots map[string]bool) (map[string]*database.Snapshot, error) {
|
||||
localSnapshots, err := v.Repositories.Snapshots.ListRecent(v.ctx, 10000)
|
||||
totalSize, err := v.Repositories.Snapshots.GetSnapshotTotalCompressedSize(v.ctx, idStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("listing local snapshots: %w", err)
|
||||
log.Warn("Failed to get total compressed size", "id", idStr, "error", err)
|
||||
totalSize = ls.BlobSize
|
||||
}
|
||||
|
||||
localSnapshotMap := make(map[string]*database.Snapshot)
|
||||
for _, s := range localSnapshots {
|
||||
localSnapshotMap[s.ID.String()] = s
|
||||
uncompressedSize, err := v.Repositories.Snapshots.GetSnapshotUncompressedChunkSize(v.ctx, idStr)
|
||||
if err != nil {
|
||||
log.Warn("Failed to get uncompressed chunk size", "id", idStr, "error", err)
|
||||
}
|
||||
|
||||
return localSnapshotMap, nil
|
||||
}
|
||||
|
||||
// buildSnapshotInfoList constructs SnapshotInfo entries from remote IDs and local data
|
||||
func (v *Vaultik) buildSnapshotInfoList(remoteSnapshots map[string]bool, localSnapshotMap map[string]*database.Snapshot) ([]SnapshotInfo, error) {
|
||||
snapshots := make([]SnapshotInfo, 0, len(remoteSnapshots))
|
||||
|
||||
// remoteOnly collects snapshot IDs that need a manifest download.
|
||||
var remoteOnly []string
|
||||
|
||||
for snapshotID := range remoteSnapshots {
|
||||
if localSnap, exists := localSnapshotMap[snapshotID]; exists && localSnap.CompletedAt != nil {
|
||||
totalSize, err := v.Repositories.Snapshots.GetSnapshotTotalCompressedSize(v.ctx, snapshotID)
|
||||
if err != nil {
|
||||
log.Warn("Failed to get total compressed size", "id", snapshotID, "error", err)
|
||||
totalSize = localSnap.BlobSize
|
||||
}
|
||||
|
||||
snapshots = append(snapshots, SnapshotInfo{
|
||||
ID: localSnap.ID,
|
||||
Timestamp: localSnap.StartedAt,
|
||||
CompressedSize: totalSize,
|
||||
})
|
||||
} else {
|
||||
timestamp, err := parseSnapshotTimestamp(snapshotID)
|
||||
if err != nil {
|
||||
log.Warn("Failed to parse snapshot timestamp", "id", snapshotID, "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
// Pre-add with zero size; will be filled by concurrent downloads.
|
||||
snapshots = append(snapshots, SnapshotInfo{
|
||||
ID: types.SnapshotID(snapshotID),
|
||||
Timestamp: timestamp,
|
||||
CompressedSize: 0,
|
||||
})
|
||||
remoteOnly = append(remoteOnly, snapshotID)
|
||||
}
|
||||
newChunkSize, err := v.Repositories.Snapshots.GetSnapshotNewChunkSize(v.ctx, idStr)
|
||||
if err != nil {
|
||||
log.Warn("Failed to get new chunk size", "id", idStr, "error", err)
|
||||
}
|
||||
|
||||
// Download manifests concurrently for remote-only snapshots.
|
||||
if len(remoteOnly) > 0 {
|
||||
// maxConcurrentManifestDownloads bounds parallel manifest fetches to
|
||||
// avoid overwhelming the S3 endpoint while still being much faster
|
||||
// than serial downloads.
|
||||
const maxConcurrentManifestDownloads = 10
|
||||
|
||||
type manifestResult struct {
|
||||
snapshotID string
|
||||
size int64
|
||||
}
|
||||
|
||||
var (
|
||||
mu sync.Mutex
|
||||
results []manifestResult
|
||||
)
|
||||
|
||||
g, gctx := errgroup.WithContext(v.ctx)
|
||||
g.SetLimit(maxConcurrentManifestDownloads)
|
||||
|
||||
for _, sid := range remoteOnly {
|
||||
g.Go(func() error {
|
||||
manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", sid)
|
||||
reader, err := v.Storage.Get(gctx, manifestPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("downloading manifest for %s: %w", sid, err)
|
||||
}
|
||||
defer func() { _ = reader.Close() }()
|
||||
|
||||
manifest, err := snapshot.DecodeManifest(reader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("decoding manifest for %s: %w", sid, err)
|
||||
}
|
||||
|
||||
mu.Lock()
|
||||
results = append(results, manifestResult{
|
||||
snapshotID: sid,
|
||||
size: manifest.TotalCompressedSize,
|
||||
})
|
||||
mu.Unlock()
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
if err := g.Wait(); err != nil {
|
||||
return nil, fmt.Errorf("fetching manifest sizes: %w", err)
|
||||
}
|
||||
|
||||
// Build a lookup from results and patch the pre-added entries.
|
||||
sizeMap := make(map[string]int64, len(results))
|
||||
for _, r := range results {
|
||||
sizeMap[r.snapshotID] = r.size
|
||||
}
|
||||
for i := range snapshots {
|
||||
if sz, ok := sizeMap[string(snapshots[i].ID)]; ok {
|
||||
snapshots[i].CompressedSize = sz
|
||||
}
|
||||
}
|
||||
return SnapshotInfo{
|
||||
ID: ls.ID,
|
||||
Timestamp: ls.StartedAt,
|
||||
CompressedSize: totalSize,
|
||||
UncompressedSize: uncompressedSize,
|
||||
NewChunkSize: newChunkSize,
|
||||
LocallyTracked: true,
|
||||
}
|
||||
|
||||
return snapshots, nil
|
||||
}
|
||||
|
||||
// printSnapshotTable renders the snapshot list as a formatted table
|
||||
@@ -603,18 +539,27 @@ func (v *Vaultik) printSnapshotTable(snapshots []SnapshotInfo) error {
|
||||
if _, err := fmt.Fprintln(w, "REMOTE SNAPSHOTS:"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := fmt.Fprintln(w, "SNAPSHOT ID\tTIMESTAMP\tCOMPRESSED SIZE"); err != nil {
|
||||
if _, err := fmt.Fprintln(w, "SNAPSHOT ID\tTIMESTAMP\tCOMPRESSED SIZE\tUNCOMPRESSED SIZE\tNEW CHUNK SIZE"); err != nil {
|
||||
return err
|
||||
}
|
||||
if _, err := fmt.Fprintln(w, "───────────\t─────────\t───────────────"); err != nil {
|
||||
if _, err := fmt.Fprintln(w, "───────────\t─────────\t───────────────\t─────────────────\t──────────────"); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
const remoteOnlyCell = "<remote only>"
|
||||
for _, snap := range snapshots {
|
||||
if _, err := fmt.Fprintf(w, "%s\t%s\t%s\n",
|
||||
uncompressed := remoteOnlyCell
|
||||
newChunks := remoteOnlyCell
|
||||
if snap.LocallyTracked {
|
||||
uncompressed = formatBytes(snap.UncompressedSize)
|
||||
newChunks = formatBytes(snap.NewChunkSize)
|
||||
}
|
||||
if _, err := fmt.Fprintf(w, "%s\t%s\t%s\t%s\t%s\n",
|
||||
snap.ID,
|
||||
snap.Timestamp.Format("2006-01-02 15:04:05"),
|
||||
formatBytes(snap.CompressedSize)); err != nil {
|
||||
formatBytes(snap.CompressedSize),
|
||||
uncompressed,
|
||||
newChunks); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
@@ -750,14 +695,23 @@ func (v *Vaultik) confirmAndExecutePurge(toDelete []SnapshotInfo, force, quiet b
|
||||
if err := v.deleteSnapshotFromLocalDB(snapshotID); err != nil {
|
||||
log.Error("Failed to delete from local database", "snapshot_id", snapshotID, "error", err)
|
||||
}
|
||||
if err := v.deleteSnapshotFromRemote(snapshotID); err != nil {
|
||||
if err := v.deleteRemoteSnapshotByKey(snapshot.RemoteSnapshotKey(snapshotID)); err != nil {
|
||||
return fmt.Errorf("deleting snapshot %s from remote: %w", snapshotID, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Tidy up local DB orphans now so users don't have to run a
|
||||
// separate command after a purge. Guarded against nil for tests
|
||||
// that don't wire up a SnapshotManager.
|
||||
if v.SnapshotManager != nil {
|
||||
if err := v.SnapshotManager.CleanupOrphanedData(v.ctx); err != nil {
|
||||
log.Warn("Failed to clean up orphaned local data after purge", "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
if !quiet {
|
||||
v.printfStdout("Deleted %d snapshot(s)\n", len(toDelete))
|
||||
v.printlnStdout("\nNote: Run 'vaultik prune' to clean up unreferenced blobs.")
|
||||
v.printlnStdout("\nNote: Run 'vaultik prune' to clean up unreferenced remote blobs.")
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -786,8 +740,9 @@ func (v *Vaultik) VerifySnapshotWithOptions(snapshotID string, opts *VerifyOptio
|
||||
|
||||
v.printVerifyHeader(snapshotID, opts)
|
||||
|
||||
// Download and parse manifest
|
||||
manifest, err := v.downloadManifest(snapshotID)
|
||||
// Download and parse manifest. The caller supplies a human
|
||||
// snapshot ID; we hash it to address remote storage.
|
||||
manifest, err := v.downloadManifestByKey(snapshot.RemoteSnapshotKey(snapshotID))
|
||||
if err != nil {
|
||||
if opts.JSON {
|
||||
result.Status = "failed"
|
||||
@@ -902,12 +857,18 @@ func (v *Vaultik) outputVerifyJSON(result *VerifyResult) error {
|
||||
|
||||
// CleanupLocalSnapshots removes local snapshot records that have no
|
||||
// corresponding metadata in remote storage. These are typically left
|
||||
// behind by incomplete or interrupted backups.
|
||||
// behind by incomplete or interrupted backups. Each local snapshot's
|
||||
// human ID is hashed via RemoteSnapshotKey and compared against the
|
||||
// remote listing.
|
||||
func (v *Vaultik) CleanupLocalSnapshots() error {
|
||||
remoteSnapshots, err := v.listRemoteSnapshotIDs()
|
||||
remoteKeys, err := v.listAllRemoteSnapshotKeys()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
remoteSet := make(map[string]bool, len(remoteKeys))
|
||||
for _, k := range remoteKeys {
|
||||
remoteSet[k] = true
|
||||
}
|
||||
|
||||
localSnapshots, err := v.Repositories.Snapshots.ListRecent(v.ctx, 10000)
|
||||
if err != nil {
|
||||
@@ -917,7 +878,7 @@ func (v *Vaultik) CleanupLocalSnapshots() error {
|
||||
var removed int
|
||||
for _, snap := range localSnapshots {
|
||||
id := snap.ID.String()
|
||||
if !remoteSnapshots[id] {
|
||||
if !remoteSet[snapshot.RemoteSnapshotKey(id)] {
|
||||
v.printfStdout("Removing stale local record: %s\n", id)
|
||||
if err := v.deleteSnapshotFromLocalDB(id); err != nil {
|
||||
log.Error("Failed to delete local snapshot", "snapshot_id", id, "error", err)
|
||||
@@ -937,8 +898,12 @@ func (v *Vaultik) CleanupLocalSnapshots() error {
|
||||
|
||||
// Helper methods that were previously on SnapshotApp
|
||||
|
||||
func (v *Vaultik) downloadManifest(snapshotID string) (*snapshot.Manifest, error) {
|
||||
manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
|
||||
// downloadManifestByKey fetches the manifest at
|
||||
// metadata/<remoteKey>/manifest.json.zst. The remoteKey is the double-
|
||||
// SHA256 derivation produced by snapshot.RemoteSnapshotKey, not the
|
||||
// human snapshot ID. Callers that have a human ID must hash first.
|
||||
func (v *Vaultik) downloadManifestByKey(remoteKey string) (*snapshot.Manifest, error) {
|
||||
manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", remoteKey)
|
||||
|
||||
reader, err := v.Storage.Get(v.ctx, manifestPath)
|
||||
if err != nil {
|
||||
@@ -1073,12 +1038,22 @@ func (v *Vaultik) RemoveSnapshot(snapshotID string, opts *RemoveOptions) (*Remov
|
||||
// If --remote, also remove from remote storage
|
||||
if opts.Remote {
|
||||
log.Info("Removing snapshot metadata from remote storage", "snapshot_id", snapshotID)
|
||||
if err := v.deleteSnapshotFromRemote(snapshotID); err != nil {
|
||||
if err := v.deleteRemoteSnapshotByKey(snapshot.RemoteSnapshotKey(snapshotID)); err != nil {
|
||||
return result, fmt.Errorf("removing from remote storage: %w", err)
|
||||
}
|
||||
result.RemoteRemoved = true
|
||||
}
|
||||
|
||||
// Clean up the local rows that just became orphaned (files, chunks,
|
||||
// blob_chunks, blobs no longer referenced by any snapshot). This
|
||||
// used to be a separate `vaultik snapshot prune` step; running it
|
||||
// inline means `snapshot remove` leaves no ghost rows behind.
|
||||
if v.SnapshotManager != nil {
|
||||
if err := v.SnapshotManager.CleanupOrphanedData(v.ctx); err != nil {
|
||||
log.Warn("Failed to clean up orphaned local data after removal", "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Output result
|
||||
if opts.JSON {
|
||||
return result, v.outputRemoveJSON(result)
|
||||
@@ -1088,20 +1063,48 @@ func (v *Vaultik) RemoveSnapshot(snapshotID string, opts *RemoveOptions) (*Remov
|
||||
v.printfStdout("Removed snapshot '%s' from local database\n", snapshotID)
|
||||
if opts.Remote {
|
||||
v.printlnStdout("Removed snapshot metadata from remote storage")
|
||||
v.printlnStdout("\nNote: Blobs were not removed. Run 'vaultik prune' to remove orphaned blobs.")
|
||||
v.printlnStdout("\nNote: Remote blobs were not removed. Run 'vaultik prune' to remove orphaned blobs.")
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// RemoveAllSnapshots removes all snapshots from local database and optionally from remote
|
||||
// RemoveAllSnapshots removes every snapshot known to the local
|
||||
// database from the local index, and (with --remote) every snapshot
|
||||
// metadata directory in remote storage. Both sides are processed so a
|
||||
// "remove --all" leaves nothing behind, even when the local DB and
|
||||
// remote storage have diverged.
|
||||
func (v *Vaultik) RemoveAllSnapshots(opts *RemoveOptions) (*RemoveResult, error) {
|
||||
snapshotIDs, err := v.listAllRemoteSnapshotIDs()
|
||||
localSnaps, err := v.localSnapshotIDs()
|
||||
if err != nil {
|
||||
return nil, err
|
||||
return nil, fmt.Errorf("listing local snapshots: %w", err)
|
||||
}
|
||||
|
||||
if len(snapshotIDs) == 0 {
|
||||
// remoteKeys is the set of metadata/<key>/ subdirectories on the
|
||||
// destination store; failures are downgraded to a warning so a
|
||||
// permission-denied or unreachable remote can't block a local-only
|
||||
// remove.
|
||||
remoteKeys, remoteErr := v.listAllRemoteSnapshotKeys()
|
||||
if remoteErr != nil {
|
||||
log.Warn("Could not list remote snapshots", "error", remoteErr)
|
||||
v.UI.Warning("Could not list remote snapshots: %v.", remoteErr)
|
||||
}
|
||||
|
||||
// Anything visible on the remote that doesn't correspond to a
|
||||
// known local human ID is treated as an orphan key — handled only
|
||||
// when --remote is in effect.
|
||||
knownLocalKeys := make(map[string]string, len(localSnaps))
|
||||
for _, id := range localSnaps {
|
||||
knownLocalKeys[snapshot.RemoteSnapshotKey(id)] = id
|
||||
}
|
||||
var orphanRemoteKeys []string
|
||||
for _, key := range remoteKeys {
|
||||
if _, known := knownLocalKeys[key]; !known {
|
||||
orphanRemoteKeys = append(orphanRemoteKeys, key)
|
||||
}
|
||||
}
|
||||
|
||||
if len(localSnaps) == 0 && len(orphanRemoteKeys) == 0 {
|
||||
if !opts.JSON {
|
||||
v.printlnStdout("No snapshots found")
|
||||
}
|
||||
@@ -1109,19 +1112,42 @@ func (v *Vaultik) RemoveAllSnapshots(opts *RemoveOptions) (*RemoveResult, error)
|
||||
}
|
||||
|
||||
if opts.DryRun {
|
||||
return v.handleRemoveAllDryRun(snapshotIDs, opts)
|
||||
return v.handleRemoveAllDryRun(localSnaps, orphanRemoteKeys, opts)
|
||||
}
|
||||
|
||||
return v.executeRemoveAll(snapshotIDs, opts)
|
||||
return v.executeRemoveAll(localSnaps, orphanRemoteKeys, opts)
|
||||
}
|
||||
|
||||
// listAllRemoteSnapshotIDs collects all unique snapshot IDs from remote storage
|
||||
func (v *Vaultik) listAllRemoteSnapshotIDs() ([]string, error) {
|
||||
log.Info("Listing all snapshots")
|
||||
// localSnapshotIDs returns every snapshot ID present in the local
|
||||
// index database, sorted for deterministic iteration. Empty slice if
|
||||
// the database has no Repositories (e.g. tests).
|
||||
func (v *Vaultik) localSnapshotIDs() ([]string, error) {
|
||||
if v.Repositories == nil {
|
||||
return nil, nil
|
||||
}
|
||||
snaps, err := v.Repositories.Snapshots.ListRecent(v.ctx, 100000)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ids := make([]string, 0, len(snaps))
|
||||
for _, s := range snaps {
|
||||
ids = append(ids, s.ID.String())
|
||||
}
|
||||
sort.Strings(ids)
|
||||
return ids, nil
|
||||
}
|
||||
|
||||
// listAllRemoteSnapshotKeys collects the hashed remote keys
|
||||
// (subdirectories under metadata/) currently present in the
|
||||
// destination store. Returns (nil, err) when the store cannot be
|
||||
// listed; callers must treat that as "no remote info available," not
|
||||
// fatal.
|
||||
func (v *Vaultik) listAllRemoteSnapshotKeys() ([]string, error) {
|
||||
log.Info("Listing all remote snapshots")
|
||||
objectCh := v.Storage.ListStream(v.ctx, "metadata/")
|
||||
|
||||
seen := make(map[string]bool)
|
||||
var snapshotIDs []string
|
||||
var keys []string
|
||||
for object := range objectCh {
|
||||
if object.Err != nil {
|
||||
return nil, fmt.Errorf("listing remote snapshots: %w", object.Err)
|
||||
@@ -1134,30 +1160,36 @@ func (v *Vaultik) listAllRemoteSnapshotIDs() ([]string, error) {
|
||||
continue
|
||||
}
|
||||
if strings.HasSuffix(object.Key, "/") || strings.Contains(object.Key, "/manifest.json.zst") {
|
||||
sid := parts[1]
|
||||
if !seen[sid] {
|
||||
seen[sid] = true
|
||||
snapshotIDs = append(snapshotIDs, sid)
|
||||
key := parts[1]
|
||||
if !seen[key] {
|
||||
seen[key] = true
|
||||
keys = append(keys, key)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return snapshotIDs, nil
|
||||
return keys, nil
|
||||
}
|
||||
|
||||
// handleRemoveAllDryRun handles the dry-run mode for removing all snapshots
|
||||
func (v *Vaultik) handleRemoveAllDryRun(snapshotIDs []string, opts *RemoveOptions) (*RemoveResult, error) {
|
||||
result := &RemoveResult{
|
||||
DryRun: true,
|
||||
SnapshotsRemoved: snapshotIDs,
|
||||
func (v *Vaultik) handleRemoveAllDryRun(localSnaps, orphanRemoteKeys []string, opts *RemoveOptions) (*RemoveResult, error) {
|
||||
result := &RemoveResult{DryRun: true}
|
||||
result.SnapshotsRemoved = append(result.SnapshotsRemoved, localSnaps...)
|
||||
if opts.Remote {
|
||||
result.SnapshotsRemoved = append(result.SnapshotsRemoved, orphanRemoteKeys...)
|
||||
}
|
||||
if !opts.JSON {
|
||||
v.printfStdout("Would remove %d snapshot(s):\n", len(snapshotIDs))
|
||||
for _, id := range snapshotIDs {
|
||||
v.printfStdout("Would remove %d local snapshot(s):\n", len(localSnaps))
|
||||
for _, id := range localSnaps {
|
||||
v.printfStdout(" %s\n", id)
|
||||
}
|
||||
if opts.Remote {
|
||||
if opts.Remote && len(orphanRemoteKeys) > 0 {
|
||||
v.printfStdout("Would also remove %d orphan remote snapshot key(s):\n", len(orphanRemoteKeys))
|
||||
for _, key := range orphanRemoteKeys {
|
||||
v.printfStdout(" %s\n", key)
|
||||
}
|
||||
} else if opts.Remote {
|
||||
v.printlnStdout("Would also remove from remote storage")
|
||||
}
|
||||
v.printlnStdout("[Dry run - no changes made]")
|
||||
@@ -1168,17 +1200,19 @@ func (v *Vaultik) handleRemoveAllDryRun(snapshotIDs []string, opts *RemoveOption
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// executeRemoveAll removes all snapshots from local database and optionally from remote storage
|
||||
func (v *Vaultik) executeRemoveAll(snapshotIDs []string, opts *RemoveOptions) (*RemoveResult, error) {
|
||||
// executeRemoveAll deletes every local snapshot (and, with --remote,
|
||||
// every corresponding remote metadata directory plus any orphan remote
|
||||
// keys that don't match a local snapshot).
|
||||
func (v *Vaultik) executeRemoveAll(localSnaps, orphanRemoteKeys []string, opts *RemoveOptions) (*RemoveResult, error) {
|
||||
// --all requires --force
|
||||
if !opts.Force {
|
||||
return nil, fmt.Errorf("--all requires --force")
|
||||
}
|
||||
|
||||
log.Info("Removing all snapshots", "count", len(snapshotIDs))
|
||||
log.Info("Removing all snapshots", "local_count", len(localSnaps), "orphan_remote_count", len(orphanRemoteKeys))
|
||||
|
||||
result := &RemoveResult{}
|
||||
for _, snapshotID := range snapshotIDs {
|
||||
for _, snapshotID := range localSnaps {
|
||||
log.Info("Removing snapshot", "snapshot_id", snapshotID)
|
||||
|
||||
if err := v.deleteSnapshotFromLocalDB(snapshotID); err != nil {
|
||||
@@ -1187,7 +1221,7 @@ func (v *Vaultik) executeRemoveAll(snapshotIDs []string, opts *RemoveOptions) (*
|
||||
}
|
||||
|
||||
if opts.Remote {
|
||||
if err := v.deleteSnapshotFromRemote(snapshotID); err != nil {
|
||||
if err := v.deleteRemoteSnapshotByKey(snapshot.RemoteSnapshotKey(snapshotID)); err != nil {
|
||||
log.Error("Failed to remove from remote", "snapshot_id", snapshotID, "error", err)
|
||||
continue
|
||||
}
|
||||
@@ -1196,10 +1230,29 @@ func (v *Vaultik) executeRemoveAll(snapshotIDs []string, opts *RemoveOptions) (*
|
||||
result.SnapshotsRemoved = append(result.SnapshotsRemoved, snapshotID)
|
||||
}
|
||||
|
||||
if opts.Remote {
|
||||
for _, key := range orphanRemoteKeys {
|
||||
log.Info("Removing orphan remote snapshot", "remote_key", key)
|
||||
if err := v.deleteRemoteSnapshotByKey(key); err != nil {
|
||||
log.Error("Failed to remove orphan from remote", "remote_key", key, "error", err)
|
||||
continue
|
||||
}
|
||||
result.SnapshotsRemoved = append(result.SnapshotsRemoved, key)
|
||||
}
|
||||
}
|
||||
|
||||
if opts.Remote {
|
||||
result.RemoteRemoved = true
|
||||
}
|
||||
|
||||
// Clean up everything that just became orphaned locally so the
|
||||
// index database doesn't carry 39k ghost rows after a wipe.
|
||||
if v.SnapshotManager != nil {
|
||||
if err := v.SnapshotManager.CleanupOrphanedData(v.ctx); err != nil {
|
||||
log.Warn("Failed to clean up orphaned local data after bulk removal", "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
if opts.JSON {
|
||||
return result, v.outputRemoveJSON(result)
|
||||
}
|
||||
@@ -1207,7 +1260,7 @@ func (v *Vaultik) executeRemoveAll(snapshotIDs []string, opts *RemoveOptions) (*
|
||||
v.printfStdout("Removed %d snapshot(s)\n", len(result.SnapshotsRemoved))
|
||||
if opts.Remote {
|
||||
v.printlnStdout("Removed snapshot metadata from remote storage")
|
||||
v.printlnStdout("\nNote: Blobs were not removed. Run 'vaultik prune' to remove orphaned blobs.")
|
||||
v.printlnStdout("\nNote: Remote blobs were not removed. Run 'vaultik prune' to remove orphaned blobs.")
|
||||
}
|
||||
|
||||
return result, nil
|
||||
@@ -1236,9 +1289,13 @@ func (v *Vaultik) deleteSnapshotFromLocalDB(snapshotID string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// deleteSnapshotFromRemote removes snapshot metadata files from remote storage
|
||||
func (v *Vaultik) deleteSnapshotFromRemote(snapshotID string) error {
|
||||
prefix := fmt.Sprintf("metadata/%s/", snapshotID)
|
||||
// deleteRemoteSnapshotByKey removes everything under
|
||||
// metadata/<remoteKey>/ on the destination store. The argument is a
|
||||
// remote key (double-SHA256 derivation), not a human snapshot ID;
|
||||
// callers that have a human ID must hash via snapshot.RemoteSnapshotKey
|
||||
// first.
|
||||
func (v *Vaultik) deleteRemoteSnapshotByKey(remoteKey string) error {
|
||||
prefix := fmt.Sprintf("metadata/%s/", remoteKey)
|
||||
objectCh := v.Storage.ListStream(v.ctx, prefix)
|
||||
|
||||
var objectsToDelete []string
|
||||
|
||||
@@ -44,6 +44,13 @@ type Vaultik struct {
|
||||
// writer wrapping Stdout; the cli layer replaces it with a discarding
|
||||
// writer in --cron mode.
|
||||
UI *ui.Writer
|
||||
|
||||
// restoreCacheObserver, if non-nil, is invoked once with the
|
||||
// restore-side blob disk cache immediately after the cache is
|
||||
// created and again immediately before it is closed. Only
|
||||
// internal-package tests set this; the type is unexported so
|
||||
// callers outside this package can't reach it.
|
||||
restoreCacheObserver func(*blobDiskCache)
|
||||
}
|
||||
|
||||
// VaultikParams contains all parameters for New that can be provided by fx
|
||||
@@ -147,11 +154,6 @@ func (v *Vaultik) printlnStdout(args ...any) {
|
||||
_, _ = fmt.Fprintln(v.Stdout, args...)
|
||||
}
|
||||
|
||||
// printfStderr writes formatted output to stderr.
|
||||
func (v *Vaultik) printfStderr(format string, args ...any) {
|
||||
_, _ = fmt.Fprintf(v.Stderr, format, args...)
|
||||
}
|
||||
|
||||
// scanStdin reads a line of input from stdin.
|
||||
func (v *Vaultik) scanStdin(a ...any) (int, error) {
|
||||
return fmt.Fscanln(v.Stdin, a...)
|
||||
|
||||
@@ -106,8 +106,11 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
|
||||
|
||||
// loadVerificationData downloads manifest, database, and blob list for verification
|
||||
func (v *Vaultik) loadVerificationData(snapshotID string, opts *VerifyOptions, result *VerifyResult) (*snapshot.Manifest, *tempDB, []snapshot.BlobInfo, error) {
|
||||
// All remote paths use the hashed key derived from the human ID.
|
||||
remoteKey := snapshot.RemoteSnapshotKey(snapshotID)
|
||||
|
||||
// Download manifest
|
||||
manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
|
||||
manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", remoteKey)
|
||||
log.Info("Downloading manifest", "path", manifestPath)
|
||||
if !opts.JSON {
|
||||
v.printfStdout("Downloading manifest...\n")
|
||||
@@ -136,7 +139,7 @@ func (v *Vaultik) loadVerificationData(snapshotID string, opts *VerifyOptions, r
|
||||
}
|
||||
|
||||
// Download and decrypt database
|
||||
dbPath := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID)
|
||||
dbPath := fmt.Sprintf("metadata/%s/db.zst.age", remoteKey)
|
||||
log.Info("Downloading encrypted database", "path", dbPath)
|
||||
dbReader, err := v.Storage.Get(v.ctx, dbPath)
|
||||
if err != nil {
|
||||
|
||||
Reference in New Issue
Block a user