From 1f22b9c603db0f395832562186771b5fa68d4302 Mon Sep 17 00:00:00 2001 From: sneak Date: Wed, 24 Jun 2026 08:55:00 +0200 Subject: [PATCH] Collapse snapshot prune into vaultik prune; auto-clean on removal MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The CLI had two commands named "prune" doing different jobs (local DB orphan cleanup vs. remote blob garbage collection), which was confusing and forced a manual two-step workflow after deleting any snapshot. Single user-facing prune surface is now `vaultik prune`, which calls PruneDatabase (local orphan cleanup) then PruneBlobs (remote unref blob GC). Snapshot deletion paths (snapshot remove, snapshot remove --all, snapshot purge) auto-run CleanupOrphanedData inline so the local index database doesn't accumulate ghost rows after every removal — the user observed ~39k orphaned files and 2 orphaned blobs after a remove --all because that cleanup was previously a separate opt-in command. `snapshot prune` is removed. Also addresses the doc/help-string drift the user audit caught: * cli/prune.go help text used to reference a non-existent `vaultik purge` command. * cli/config.go get/set short/long examples were S3-specific (s3.bucket) when the primary storage configuration is storage_url. * vaultik/info.go printed S3 Bucket/Endpoint/Region labels unconditionally; for file:// or rclone:// users those rows were empty. The Storage Configuration block now prints the storer's Type+Location first, the storage_url string when set, and only emits S3 rows that are actually populated. * vaultik/info.go's "Run 'vaultik prune --remote'" hint referenced a flag that doesn't exist. * vaultik/blobcache.go's doc comment claimed LRU eviction, which is no longer the restore-time policy (the sweeper drives eviction; LRU is the safety-net fallback when maxBytes is finite). * README.md listed `vaultik restore`, `vaultik snapshot prune`, and `s3.bucket` example, all out of date. README's roadmap section is rewritten with concrete pre-1.0 items (security audit, error-condition tests, parallel blob downloads, restart of interrupted restore, …) so the next-steps surface matches what the project actually still needs. The cleanup calls are guarded against a nil SnapshotManager so tests that construct a bare Vaultik struct continue to work. --- README.md | 104 +++++++++++++++++++++++++++------- internal/cli/config.go | 7 ++- internal/cli/prune.go | 19 ++++--- internal/cli/snapshot.go | 59 ------------------- internal/vaultik/blobcache.go | 22 ++++--- internal/vaultik/info.go | 25 ++++++-- internal/vaultik/prune.go | 13 +++++ internal/vaultik/snapshot.go | 33 ++++++++++- 8 files changed, 179 insertions(+), 103 deletions(-) diff --git a/README.md b/README.md index bd5cf65..9bdf67f 100644 --- a/README.md +++ b/README.md @@ -100,9 +100,8 @@ vaultik [--config ] snapshot list [--json] vaultik [--config ] snapshot verify [--deep] [--json] vaultik [--config ] snapshot purge [--keep-latest | --older-than ] [--snapshot ...] [--force] vaultik [--config ] snapshot remove [--dry-run] [--force] [--remote] [--json] -vaultik [--config ] snapshot prune vaultik [--config ] snapshot cleanup -vaultik [--config ] restore [paths...] [--verify] +vaultik [--config ] snapshot restore [paths...] [--verify] vaultik [--config ] prune [--force] [--json] vaultik [--config ] info vaultik [--config ] remote info [--json] @@ -123,7 +122,7 @@ vaultik version ### environment variables -* `VAULTIK_AGE_SECRET_KEY`: Age private key for decryption (required for `restore` and `verify --deep`) +* `VAULTIK_AGE_SECRET_KEY`: Age private key for decryption (required for `snapshot restore` and `snapshot verify --deep`) * `VAULTIK_CONFIG`: Path to config file (overridden by `--config`) * `VAULTIK_INDEX_PATH`: Override local SQLite index path @@ -157,11 +156,13 @@ existing file. Created with mode `0600` since it will contain credentials. **`config edit`**: Open the config file in `$EDITOR` (falls back to `vi`). **`config get`**: Print a config value addressed by dotted YAML path -(e.g. `vaultik config get s3.bucket`). Non-scalar values print as YAML. +(e.g. `vaultik config get storage_url`). Non-scalar values print as YAML. **`config set`**: Set a scalar config value by dotted YAML path -(e.g. `vaultik config set compression_level 9`). Comments and formatting -in the file are preserved; intermediate maps are created as needed. +(e.g. `vaultik config set compression_level 9`, +`vaultik config set storage_url "file:///mnt/backups"`). Comments and +formatting in the file are preserved; intermediate maps are created as +needed. **`snapshot create`**: Perform incremental backup of configured snapshots. * Optional snapshot names argument to create specific snapshots (default: all) @@ -176,7 +177,11 @@ in the file are preserved; intermediate maps are created as needed. * `--keep-newer-than `: With `--prune`, keep snapshots newer than this duration instead of only the latest (e.g. `4w`, `30d`, `6mo`, `1y`) -**`snapshot list`**: List all snapshots with their timestamps and sizes. +**`snapshot list`**: Show every snapshot known to the destination +store with timestamps and three sizes per snapshot (compressed +remote size; total uncompressed chunk size; size of chunks newly +referenced by that snapshot). The uncompressed and "new chunk" +columns show `` for snapshots not in the local index. * `--json`: Output in JSON format **`snapshot verify`**: Verify snapshot integrity. @@ -194,28 +199,31 @@ latest globally). * `--force`: Skip confirmation prompt **`snapshot remove`**: Remove a specific snapshot from the local database. +Automatically cleans up local rows (files, chunks, blobs) that the removed +snapshot was the last referrer for — you don't need a separate prune step +after removal. * `--remote`: Also remove snapshot metadata from remote storage * `--all`: Remove all snapshots (requires `--force`) * `--dry-run`: Show what would be deleted without deleting * `--force`: Skip confirmation prompt * `--json`: Output result as JSON -**`snapshot prune`**: Clean orphaned data from the local database (files, -chunks, blobs not referenced by any snapshot). - **`snapshot cleanup`**: Remove stale local snapshot records that have no corresponding metadata in remote storage. These are typically left behind by incomplete or interrupted backups. Does not touch remote storage. -**`restore`**: Restore files from a backup snapshot. +**`snapshot restore`**: Restore files from a backup snapshot. * Requires `VAULTIK_AGE_SECRET_KEY` environment variable * Optional path arguments to restore specific files/directories (default: all) * Preserves file permissions, timestamps, ownership (ownership requires root), symlinks, and empty directories * `--verify`: After restoring, verify every file's chunk hashes match -**`prune`**: Remove unreferenced blobs from remote storage. -* Scans all snapshot manifests for referenced blobs, deletes any blob not referenced +**`prune`**: Tidy up everything that isn't needed. Removes orphaned local +database rows (files, chunks, blobs no longer referenced by any completed +snapshot) AND deletes unreferenced blobs from remote storage. `snapshot +create --prune`, `snapshot remove`, and `snapshot purge` run the same +cleanup automatically; this is the manual entry point for the same work. * `--force`: Skip confirmation prompt * `--json`: Output stats as JSON @@ -385,13 +393,71 @@ Key fields: ## roadmap -Items for future releases: +Items still to do before / shortly after 1.0. Loosely ordered by +priority. -* Error-condition tests (network failures, disk full, corrupted/missing blobs) -* Parallel blob downloads during restore -* Bandwidth limiting (`--bwlimit`) -* Security audit of encryption implementation -* Man pages and richer `--help` examples +### correctness and operability + +* **Security audit of the encryption implementation.** Pre-1.0 + blocker if we're advertising "secure" at the top of this README. + age + zstd + content-defined chunking is mostly off-the-shelf + pieces, but the seams (key handling, recipient parsing, manifest + trust boundary, restore-time identity validation) need an outside + read. +* **Error-condition tests.** Today's coverage is the happy path + plus a few specific regressions. Need fault-injection coverage: + network failures mid-blob, disk-full during restore, corrupted / + truncated / missing blobs, partial uploads, kill -9 between + manifest and db.zst.age writes. +* **Verify restored content end-to-end in CI.** The current + integration test does this for a small synthetic snapshot but + not at scale. A nightly job against a multi-GB representative + snapshot would catch silent regressions in the chunker, packer, + or restore planner. + +### performance + +* **Parallel blob downloads during restore.** Single-stream right + now. With a fast S3 endpoint and a multi-core machine restore is + bound by per-blob fetch + decrypt + decompress; running N of + those in parallel against the disk cache would close most of the + remaining gap. Needs to interact correctly with the locality + planner and sweeper. +* **Bandwidth limiting (`--bwlimit`).** Both upload and download. + Useful for backing up over a shared link. Tricky to make work + correctly with the parallel-download story. +* **Restart of interrupted restore.** Today restore is restartable + in the sense that re-running it overwrites partial output; it + doesn't resume from where it stopped or skip already-present + files. A `--resume` mode that checks targets before fetching + blobs would matter for very large restores. + +### usability + +* **Man pages and richer `--help` examples.** Cobra generates + basic help; man pages would be a separate target. +* **`--bwlimit` style human-readable size flags** across the + command surface where they're currently raw integers. +* **`vaultik snapshot diff `** — show which files changed + between two snapshots without restoring either. +* **Status reporting hook for `--cron`.** When a backup fails + silently in cron, the user has no idea. A configurable + webhook / email / `notify-send` hook on completion (success and + failure) would close the loop. + +### infrastructure + +* **Cross-machine restore documentation.** The "restore from + another host" workflow works but isn't documented as a + first-class operation in this README. Worth a dedicated section + once it's settled. +* **Schema migrations.** Currently nonexistent — pre-1.0 schema + changes are handled by `vaultik database purge` plus a full + re-scan. Post-1.0 we'll need a migration story to keep existing + index databases usable across upgrades. +* **Storage backend coverage tests.** S3, file://, and rclone:// + all share the Storer interface but the rclone path is the least + exercised in CI. --- diff --git a/internal/cli/config.go b/internal/cli/config.go index 7ac890c..1dc2c4f 100644 --- a/internal/cli/config.go +++ b/internal/cli/config.go @@ -285,7 +285,7 @@ func newConfigEditCommand() *cobra.Command { func newConfigGetCommand() *cobra.Command { return &cobra.Command{ Use: "get ", - Short: "Print a config value by dotted path (e.g. s3.bucket)", + Short: "Print a config value by dotted path (e.g. storage_url, compression_level)", Args: cobra.ExactArgs(1), RunE: func(cmd *cobra.Command, args []string) error { path, err := ResolveConfigPath() @@ -328,9 +328,10 @@ the file back, preserving comments and formatting. Intermediate maps are created as needed. Examples: + vaultik config set storage_url "file:///mnt/backups" + vaultik config set storage_url "s3://bucket/prefix?endpoint=host®ion=us-east-1" vaultik config set compression_level 9 - vaultik config set s3.bucket mybucket - vaultik config set storage_url "file:///mnt/backups"`, + vaultik config set s3.bucket mybucket # legacy S3 fields still supported`, Args: cobra.ExactArgs(2), RunE: func(cmd *cobra.Command, args []string) error { path, err := ResolveConfigPath() diff --git a/internal/cli/prune.go b/internal/cli/prune.go index f4d76b6..b367e9c 100644 --- a/internal/cli/prune.go +++ b/internal/cli/prune.go @@ -16,14 +16,19 @@ func NewPruneCommand() *cobra.Command { cmd := &cobra.Command{ Use: "prune", - Short: "Remove unreferenced blobs", - Long: `Removes blobs that are not referenced by any snapshot. + Short: "Tidy local database and remote storage", + Long: `Removes orphaned data from both the local index database and +unreferenced blobs from the backup destination store. -This command scans all snapshots and their manifests to build a list of -referenced blobs, then removes any blobs in storage that are not in this list. +Local cleanup drops incomplete snapshots and any files, chunks, or +blobs no longer referenced by a completed snapshot. Remote cleanup +scans every snapshot manifest in the destination store, builds the +set of still-referenced blob hashes, and deletes any blob not in that +set. -Use this command after deleting snapshots with 'vaultik purge' to reclaim -storage space.`, +Snapshot create --prune and snapshot remove run the same cleanup +automatically; this command is the manual entry point for the same +work (e.g. after a crashed backup or to reclaim storage).`, Args: cobra.NoArgs, RunE: func(cmd *cobra.Command, args []string) error { // Use unified config resolution @@ -49,7 +54,7 @@ storage space.`, // Start the prune operation in a goroutine go func() { // Run the prune operation - if err := v.PruneBlobs(opts); err != nil { + if err := v.Prune(opts); err != nil { if err != context.Canceled { if !opts.JSON { log.Error("Prune operation failed", "error", err) diff --git a/internal/cli/snapshot.go b/internal/cli/snapshot.go index 376f4c2..c6bf575 100644 --- a/internal/cli/snapshot.go +++ b/internal/cli/snapshot.go @@ -25,7 +25,6 @@ func NewSnapshotCommand() *cobra.Command { cmd.AddCommand(newSnapshotPurgeCommand()) cmd.AddCommand(newSnapshotVerifyCommand()) cmd.AddCommand(newSnapshotRemoveCommand()) - cmd.AddCommand(newSnapshotPruneCommand()) cmd.AddCommand(newSnapshotCleanupCommand()) cmd.AddCommand(newSnapshotRestoreCommand()) @@ -415,64 +414,6 @@ Use --all --force to remove all snapshots.`, return cmd } -// newSnapshotPruneCommand creates the 'snapshot prune' subcommand -func newSnapshotPruneCommand() *cobra.Command { - cmd := &cobra.Command{ - Use: "prune", - Short: "Remove orphaned data from local database", - Long: `Removes orphaned files, chunks, and blobs from the local database. - -This cleans up data that is no longer referenced by any snapshot, which can -accumulate from incomplete backups or deleted snapshots.`, - Args: cobra.NoArgs, - RunE: func(cmd *cobra.Command, args []string) error { - // Use unified config resolution - configPath, err := ResolveConfigPath() - if err != nil { - return err - } - - rootFlags := GetRootFlags() - return RunWithApp(cmd.Context(), AppOptions{ - ConfigPath: configPath, - LogOptions: log.LogOptions{ - Verbose: rootFlags.Verbose, - Debug: rootFlags.Debug, - Quiet: rootFlags.Quiet, - }, - Modules: []fx.Option{}, - Invokes: []fx.Option{ - fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) { - lc.Append(fx.Hook{ - OnStart: func(ctx context.Context) error { - go func() { - if _, err := v.PruneDatabase(); err != nil { - if err != context.Canceled { - log.Error("Failed to prune database", "error", err) - ReportError("Failed to prune database: %v", err) - os.Exit(1) - } - } - if err := v.Shutdowner.Shutdown(); err != nil { - log.Error("Failed to shutdown", "error", err) - } - }() - return nil - }, - OnStop: func(ctx context.Context) error { - v.Cancel() - return nil - }, - }) - }), - }, - }) - }, - } - - return cmd -} - // newSnapshotCleanupCommand creates the 'snapshot cleanup' subcommand func newSnapshotCleanupCommand() *cobra.Command { cmd := &cobra.Command{ diff --git a/internal/vaultik/blobcache.go b/internal/vaultik/blobcache.go index 4fbc3ff..7ebbefe 100644 --- a/internal/vaultik/blobcache.go +++ b/internal/vaultik/blobcache.go @@ -16,14 +16,22 @@ type blobDiskCacheEntry struct { next *blobDiskCacheEntry } -// blobDiskCache is an LRU cache that stores blobs on disk instead of in memory. -// Blobs are written to a temp directory keyed by their hash. When total size -// exceeds maxBytes, the least-recently-used entries are evicted (deleted from disk). +// blobDiskCache stores blobs on disk keyed by hash. It exposes ReadAt +// for slice reads (the restore path uses this so chunk extraction +// never reads a whole blob into memory) plus Get/Put for whole-blob +// access. // -// The Get/ReadAt/peak-Len counters are debugging instrumentation used by -// tests to assert that the restore code path uses ReadAt (which reads -// only the requested slice of a blob) rather than Get (which reads the -// full blob into memory). +// Eviction policy is caller-controlled. The cache keeps an LRU list +// internally and will fall back to LRU eviction if curBytes exceeds +// maxBytes. Restore passes math.MaxInt64 as maxBytes and drives +// eviction itself via Delete() through restoreSweeper, which deletes +// each blob the moment every file that references its chunks has been +// written. LRU never fires under that configuration; it is kept as a +// safety net for callers that don't manage eviction themselves. +// +// Get/ReadAt/peak-Len counters are debugging instrumentation used by +// tests to assert that the restore code path uses ReadAt rather than +// Get and to bound peak disk-cache occupancy. type blobDiskCache struct { mu sync.Mutex dir string diff --git a/internal/vaultik/info.go b/internal/vaultik/info.go index 00f8819..ac5f76f 100644 --- a/internal/vaultik/info.go +++ b/internal/vaultik/info.go @@ -22,14 +22,29 @@ func (v *Vaultik) ShowInfo() error { v.printfStdout("Go Version: %s\n", runtime.Version()) v.printlnStdout() - // Storage Configuration + // Storage Configuration. The backend is selected by storage_url + // (s3://, file://, rclone://); the legacy s3.* fields are only + // printed when they're actually populated, since the URL scheme + // is the primary configuration. v.printfStdout("=== Storage Configuration ===\n") - v.printfStdout("S3 Bucket: %s\n", v.Config.S3.Bucket) + storageInfo := v.Storage.Info() + v.printfStdout("Type: %s\n", storageInfo.Type) + v.printfStdout("Location: %s\n", storageInfo.Location) + if v.Config.StorageURL != "" { + v.printfStdout("Storage URL: %s\n", v.Config.StorageURL) + } + if v.Config.S3.Bucket != "" { + v.printfStdout("S3 Bucket: %s\n", v.Config.S3.Bucket) + } if v.Config.S3.Prefix != "" { v.printfStdout("S3 Prefix: %s\n", v.Config.S3.Prefix) } - v.printfStdout("S3 Endpoint: %s\n", v.Config.S3.Endpoint) - v.printfStdout("S3 Region: %s\n", v.Config.S3.Region) + if v.Config.S3.Endpoint != "" { + v.printfStdout("S3 Endpoint: %s\n", v.Config.S3.Endpoint) + } + if v.Config.S3.Region != "" { + v.printfStdout("S3 Region: %s\n", v.Config.S3.Region) + } v.printlnStdout() // Backup Settings @@ -337,7 +352,7 @@ func (v *Vaultik) printRemoteInfoTable(result *RemoteInfoResult) { humanize.Comma(int64(result.OrphanedBlobCount)), humanize.Bytes(uint64(result.OrphanedBlobSize))) if result.OrphanedBlobCount > 0 { - v.printfStdout("\nRun 'vaultik prune --remote' to remove orphaned blobs.\n") + v.printfStdout("\nRun 'vaultik prune' to remove orphaned blobs.\n") } } diff --git a/internal/vaultik/prune.go b/internal/vaultik/prune.go index 4490364..ef86027 100644 --- a/internal/vaultik/prune.go +++ b/internal/vaultik/prune.go @@ -48,6 +48,19 @@ type PruneBlobsResult struct { BytesFreed int64 `json:"bytes_freed"` } +// Prune removes orphaned data from the local index database AND +// unreferenced blobs from the backup destination store. This is the +// single user-facing prune entry point — the split between local and +// remote cleanup is an implementation detail. Calling code should +// prefer this method over PruneDatabase or PruneBlobs individually +// unless it specifically wants one half. +func (v *Vaultik) Prune(opts *PruneOptions) error { + if _, err := v.PruneDatabase(); err != nil { + return fmt.Errorf("pruning local database: %w", err) + } + return v.PruneBlobs(opts) +} + // PruneBlobs removes unreferenced blobs from storage func (v *Vaultik) PruneBlobs(opts *PruneOptions) error { log.Info("Starting prune operation") diff --git a/internal/vaultik/snapshot.go b/internal/vaultik/snapshot.go index fedec9a..1607049 100644 --- a/internal/vaultik/snapshot.go +++ b/internal/vaultik/snapshot.go @@ -768,9 +768,18 @@ func (v *Vaultik) confirmAndExecutePurge(toDelete []SnapshotInfo, force, quiet b } } + // Tidy up local DB orphans now so users don't have to run a + // separate command after a purge. Guarded against nil for tests + // that don't wire up a SnapshotManager. + if v.SnapshotManager != nil { + if err := v.SnapshotManager.CleanupOrphanedData(v.ctx); err != nil { + log.Warn("Failed to clean up orphaned local data after purge", "error", err) + } + } + if !quiet { v.printfStdout("Deleted %d snapshot(s)\n", len(toDelete)) - v.printlnStdout("\nNote: Run 'vaultik prune' to clean up unreferenced blobs.") + v.printlnStdout("\nNote: Run 'vaultik prune' to clean up unreferenced remote blobs.") } return nil @@ -1092,6 +1101,16 @@ func (v *Vaultik) RemoveSnapshot(snapshotID string, opts *RemoveOptions) (*Remov result.RemoteRemoved = true } + // Clean up the local rows that just became orphaned (files, chunks, + // blob_chunks, blobs no longer referenced by any snapshot). This + // used to be a separate `vaultik snapshot prune` step; running it + // inline means `snapshot remove` leaves no ghost rows behind. + if v.SnapshotManager != nil { + if err := v.SnapshotManager.CleanupOrphanedData(v.ctx); err != nil { + log.Warn("Failed to clean up orphaned local data after removal", "error", err) + } + } + // Output result if opts.JSON { return result, v.outputRemoveJSON(result) @@ -1101,7 +1120,7 @@ func (v *Vaultik) RemoveSnapshot(snapshotID string, opts *RemoveOptions) (*Remov v.printfStdout("Removed snapshot '%s' from local database\n", snapshotID) if opts.Remote { v.printlnStdout("Removed snapshot metadata from remote storage") - v.printlnStdout("\nNote: Blobs were not removed. Run 'vaultik prune' to remove orphaned blobs.") + v.printlnStdout("\nNote: Remote blobs were not removed. Run 'vaultik prune' to remove orphaned blobs.") } return result, nil @@ -1213,6 +1232,14 @@ func (v *Vaultik) executeRemoveAll(snapshotIDs []string, opts *RemoveOptions) (* result.RemoteRemoved = true } + // Clean up everything that just became orphaned locally so the + // index database doesn't carry 39k ghost rows after a wipe. + if v.SnapshotManager != nil { + if err := v.SnapshotManager.CleanupOrphanedData(v.ctx); err != nil { + log.Warn("Failed to clean up orphaned local data after bulk removal", "error", err) + } + } + if opts.JSON { return result, v.outputRemoveJSON(result) } @@ -1220,7 +1247,7 @@ func (v *Vaultik) executeRemoveAll(snapshotIDs []string, opts *RemoveOptions) (* v.printfStdout("Removed %d snapshot(s)\n", len(result.SnapshotsRemoved)) if opts.Remote { v.printlnStdout("Removed snapshot metadata from remote storage") - v.printlnStdout("\nNote: Blobs were not removed. Run 'vaultik prune' to remove orphaned blobs.") + v.printlnStdout("\nNote: Remote blobs were not removed. Run 'vaultik prune' to remove orphaned blobs.") } return result, nil