Merge fix/info-and-doc-drift

2026-06-24 08:55:04 +02:00
parent 60abeb636a 1f22b9c603
commit aa3e8f081b
8 changed files with 179 additions and 103 deletions
--- a/README.md
+++ b/README.md
@@ -100,9 +100,8 @@ vaultik [--config <path>] snapshot list [--json]
 vaultik [--config <path>] snapshot verify <snapshot-id> [--deep] [--json]
 vaultik [--config <path>] snapshot purge [--keep-latest | --older-than <duration>] [--snapshot <name>...] [--force]
 vaultik [--config <path>] snapshot remove <snapshot-id|--all> [--dry-run] [--force] [--remote] [--json]
-vaultik [--config <path>] snapshot prune
 vaultik [--config <path>] snapshot cleanup
-vaultik [--config <path>] restore <snapshot-id> <target-dir> [paths...] [--verify]
+vaultik [--config <path>] snapshot restore <snapshot-id> <target-dir> [paths...] [--verify]
 vaultik [--config <path>] prune [--force] [--json]
 vaultik [--config <path>] info
 vaultik [--config <path>] remote info [--json]
@@ -123,7 +122,7 @@ vaultik version

 ### environment variables

-* `VAULTIK_AGE_SECRET_KEY`: Age private key for decryption (required for `restore` and `verify --deep`)
+* `VAULTIK_AGE_SECRET_KEY`: Age private key for decryption (required for `snapshot restore` and `snapshot verify --deep`)
 * `VAULTIK_CONFIG`: Path to config file (overridden by `--config`)
 * `VAULTIK_INDEX_PATH`: Override local SQLite index path

@@ -157,11 +156,13 @@ existing file. Created with mode `0600` since it will contain credentials.
 **`config edit`**: Open the config file in `$EDITOR` (falls back to `vi`).

 **`config get`**: Print a config value addressed by dotted YAML path
-(e.g. `vaultik config get s3.bucket`). Non-scalar values print as YAML.
+(e.g. `vaultik config get storage_url`). Non-scalar values print as YAML.

 **`config set`**: Set a scalar config value by dotted YAML path
-(e.g. `vaultik config set compression_level 9`). Comments and formatting
-in the file are preserved; intermediate maps are created as needed.
+(e.g. `vaultik config set compression_level 9`,
+`vaultik config set storage_url "file:///mnt/backups"`). Comments and
+formatting in the file are preserved; intermediate maps are created as
+needed.

 **`snapshot create`**: Perform incremental backup of configured snapshots.
 * Optional snapshot names argument to create specific snapshots (default: all)
@@ -176,7 +177,11 @@ in the file are preserved; intermediate maps are created as needed.
 * `--keep-newer-than <duration>`: With `--prune`, keep snapshots newer than
  this duration instead of only the latest (e.g. `4w`, `30d`, `6mo`, `1y`)

-**`snapshot list`**: List all snapshots with their timestamps and sizes.
+**`snapshot list`**: Show every snapshot known to the destination
+store with timestamps and three sizes per snapshot (compressed
+remote size; total uncompressed chunk size; size of chunks newly
+referenced by that snapshot). The uncompressed and "new chunk"
+columns show `<remote only>` for snapshots not in the local index.
 * `--json`: Output in JSON format

 **`snapshot verify`**: Verify snapshot integrity.
@@ -194,28 +199,31 @@ latest globally).
 * `--force`: Skip confirmation prompt

 **`snapshot remove`**: Remove a specific snapshot from the local database.
+Automatically cleans up local rows (files, chunks, blobs) that the removed
+snapshot was the last referrer for — you don't need a separate prune step
+after removal.
 * `--remote`: Also remove snapshot metadata from remote storage
 * `--all`: Remove all snapshots (requires `--force`)
 * `--dry-run`: Show what would be deleted without deleting
 * `--force`: Skip confirmation prompt
 * `--json`: Output result as JSON

-**`snapshot prune`**: Clean orphaned data from the local database (files,
-chunks, blobs not referenced by any snapshot).
-
 **`snapshot cleanup`**: Remove stale local snapshot records that have no
 corresponding metadata in remote storage. These are typically left behind
 by incomplete or interrupted backups. Does not touch remote storage.

-**`restore`**: Restore files from a backup snapshot.
+**`snapshot restore`**: Restore files from a backup snapshot.
 * Requires `VAULTIK_AGE_SECRET_KEY` environment variable
 * Optional path arguments to restore specific files/directories (default: all)
 * Preserves file permissions, timestamps, ownership (ownership requires root),
  symlinks, and empty directories
 * `--verify`: After restoring, verify every file's chunk hashes match

-**`prune`**: Remove unreferenced blobs from remote storage.
-* Scans all snapshot manifests for referenced blobs, deletes any blob not referenced
+**`prune`**: Tidy up everything that isn't needed. Removes orphaned local
+database rows (files, chunks, blobs no longer referenced by any completed
+snapshot) AND deletes unreferenced blobs from remote storage. `snapshot
+create --prune`, `snapshot remove`, and `snapshot purge` run the same
+cleanup automatically; this is the manual entry point for the same work.
 * `--force`: Skip confirmation prompt
 * `--json`: Output stats as JSON

@@ -385,13 +393,71 @@ Key fields:

 ## roadmap

-Items for future releases:
+Items still to do before / shortly after 1.0. Loosely ordered by
+priority.

-* Error-condition tests (network failures, disk full, corrupted/missing blobs)
-* Parallel blob downloads during restore
-* Bandwidth limiting (`--bwlimit`)
-* Security audit of encryption implementation
-* Man pages and richer `--help` examples
+### correctness and operability
+
+* **Security audit of the encryption implementation.** Pre-1.0
+  blocker if we're advertising "secure" at the top of this README.
+  age + zstd + content-defined chunking is mostly off-the-shelf
+  pieces, but the seams (key handling, recipient parsing, manifest
+  trust boundary, restore-time identity validation) need an outside
+  read.
+* **Error-condition tests.** Today's coverage is the happy path
+  plus a few specific regressions. Need fault-injection coverage:
+  network failures mid-blob, disk-full during restore, corrupted /
+  truncated / missing blobs, partial uploads, kill -9 between
+  manifest and db.zst.age writes.
+* **Verify restored content end-to-end in CI.** The current
+  integration test does this for a small synthetic snapshot but
+  not at scale. A nightly job against a multi-GB representative
+  snapshot would catch silent regressions in the chunker, packer,
+  or restore planner.
+
+### performance
+
+* **Parallel blob downloads during restore.** Single-stream right
+  now. With a fast S3 endpoint and a multi-core machine restore is
+  bound by per-blob fetch + decrypt + decompress; running N of
+  those in parallel against the disk cache would close most of the
+  remaining gap. Needs to interact correctly with the locality
+  planner and sweeper.
+* **Bandwidth limiting (`--bwlimit`).** Both upload and download.
+  Useful for backing up over a shared link. Tricky to make work
+  correctly with the parallel-download story.
+* **Restart of interrupted restore.** Today restore is restartable
+  in the sense that re-running it overwrites partial output; it
+  doesn't resume from where it stopped or skip already-present
+  files. A `--resume` mode that checks targets before fetching
+  blobs would matter for very large restores.
+
+### usability
+
+* **Man pages and richer `--help` examples.** Cobra generates
+  basic help; man pages would be a separate target.
+* **`--bwlimit` style human-readable size flags** across the
+  command surface where they're currently raw integers.
+* **`vaultik snapshot diff <a> <b>`** — show which files changed
+  between two snapshots without restoring either.
+* **Status reporting hook for `--cron`.** When a backup fails
+  silently in cron, the user has no idea. A configurable
+  webhook / email / `notify-send` hook on completion (success and
+  failure) would close the loop.
+
+### infrastructure
+
+* **Cross-machine restore documentation.** The "restore from
+  another host" workflow works but isn't documented as a
+  first-class operation in this README. Worth a dedicated section
+  once it's settled.
+* **Schema migrations.** Currently nonexistent — pre-1.0 schema
+  changes are handled by `vaultik database purge` plus a full
+  re-scan. Post-1.0 we'll need a migration story to keep existing
+  index databases usable across upgrades.
+* **Storage backend coverage tests.** S3, file://, and rclone://
+  all share the Storer interface but the rclone path is the least
+  exercised in CI.

 ---

--- a/internal/cli/config.go
+++ b/internal/cli/config.go
@@ -285,7 +285,7 @@ func newConfigEditCommand() *cobra.Command {
 func newConfigGetCommand() *cobra.Command {
 	return &cobra.Command{
 		Use:   "get <key>",
-		Short: "Print a config value by dotted path (e.g. s3.bucket)",
+		Short: "Print a config value by dotted path (e.g. storage_url, compression_level)",
 		Args:  cobra.ExactArgs(1),
 		RunE: func(cmd *cobra.Command, args []string) error {
 			path, err := ResolveConfigPath()
@@ -328,9 +328,10 @@ the file back, preserving comments and formatting. Intermediate maps
 are created as needed.

 Examples:
+  vaultik config set storage_url "file:///mnt/backups"
+  vaultik config set storage_url "s3://bucket/prefix?endpoint=host&region=us-east-1"
  vaultik config set compression_level 9
-  vaultik config set s3.bucket mybucket
-  vaultik config set storage_url "file:///mnt/backups"`,
+  vaultik config set s3.bucket mybucket  # legacy S3 fields still supported`,
 		Args: cobra.ExactArgs(2),
 		RunE: func(cmd *cobra.Command, args []string) error {
 			path, err := ResolveConfigPath()
--- a/internal/cli/prune.go
+++ b/internal/cli/prune.go
@@ -16,14 +16,19 @@ func NewPruneCommand() *cobra.Command {

 	cmd := &cobra.Command{
 		Use:   "prune",
-		Short: "Remove unreferenced blobs",
-		Long: `Removes blobs that are not referenced by any snapshot.
+		Short: "Tidy local database and remote storage",
+		Long: `Removes orphaned data from both the local index database and
+unreferenced blobs from the backup destination store.

-This command scans all snapshots and their manifests to build a list of
-referenced blobs, then removes any blobs in storage that are not in this list.
+Local cleanup drops incomplete snapshots and any files, chunks, or
+blobs no longer referenced by a completed snapshot. Remote cleanup
+scans every snapshot manifest in the destination store, builds the
+set of still-referenced blob hashes, and deletes any blob not in that
+set.

-Use this command after deleting snapshots with 'vaultik purge' to reclaim
-storage space.`,
+Snapshot create --prune and snapshot remove run the same cleanup
+automatically; this command is the manual entry point for the same
+work (e.g. after a crashed backup or to reclaim storage).`,
 		Args: cobra.NoArgs,
 		RunE: func(cmd *cobra.Command, args []string) error {
 			// Use unified config resolution
@@ -49,7 +54,7 @@ storage space.`,
 								// Start the prune operation in a goroutine
 								go func() {
 									// Run the prune operation
-									if err := v.PruneBlobs(opts); err != nil {
+									if err := v.Prune(opts); err != nil {
 										if err != context.Canceled {
 											if !opts.JSON {
 												log.Error("Prune operation failed", "error", err)
--- a/internal/cli/snapshot.go
+++ b/internal/cli/snapshot.go
@@ -25,7 +25,6 @@ func NewSnapshotCommand() *cobra.Command {
 	cmd.AddCommand(newSnapshotPurgeCommand())
 	cmd.AddCommand(newSnapshotVerifyCommand())
 	cmd.AddCommand(newSnapshotRemoveCommand())
-	cmd.AddCommand(newSnapshotPruneCommand())
 	cmd.AddCommand(newSnapshotCleanupCommand())
 	cmd.AddCommand(newSnapshotRestoreCommand())

@@ -415,64 +414,6 @@ Use --all --force to remove all snapshots.`,
 	return cmd
 }

-// newSnapshotPruneCommand creates the 'snapshot prune' subcommand
-func newSnapshotPruneCommand() *cobra.Command {
-	cmd := &cobra.Command{
-		Use:   "prune",
-		Short: "Remove orphaned data from local database",
-		Long: `Removes orphaned files, chunks, and blobs from the local database.
-
-This cleans up data that is no longer referenced by any snapshot, which can
-accumulate from incomplete backups or deleted snapshots.`,
-		Args: cobra.NoArgs,
-		RunE: func(cmd *cobra.Command, args []string) error {
-			// Use unified config resolution
-			configPath, err := ResolveConfigPath()
-			if err != nil {
-				return err
-			}
-
-			rootFlags := GetRootFlags()
-			return RunWithApp(cmd.Context(), AppOptions{
-				ConfigPath: configPath,
-				LogOptions: log.LogOptions{
-					Verbose: rootFlags.Verbose,
-					Debug:   rootFlags.Debug,
-					Quiet:   rootFlags.Quiet,
-				},
-				Modules: []fx.Option{},
-				Invokes: []fx.Option{
-					fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) {
-						lc.Append(fx.Hook{
-							OnStart: func(ctx context.Context) error {
-								go func() {
-									if _, err := v.PruneDatabase(); err != nil {
-										if err != context.Canceled {
-											log.Error("Failed to prune database", "error", err)
-											ReportError("Failed to prune database: %v", err)
-											os.Exit(1)
-										}
-									}
-									if err := v.Shutdowner.Shutdown(); err != nil {
-										log.Error("Failed to shutdown", "error", err)
-									}
-								}()
-								return nil
-							},
-							OnStop: func(ctx context.Context) error {
-								v.Cancel()
-								return nil
-							},
-						})
-					}),
-				},
-			})
-		},
-	}
-
-	return cmd
-}
-
 // newSnapshotCleanupCommand creates the 'snapshot cleanup' subcommand
 func newSnapshotCleanupCommand() *cobra.Command {
 	cmd := &cobra.Command{
--- a/internal/vaultik/blobcache.go
+++ b/internal/vaultik/blobcache.go
@@ -16,14 +16,22 @@ type blobDiskCacheEntry struct {
 	next *blobDiskCacheEntry
 }

-// blobDiskCache is an LRU cache that stores blobs on disk instead of in memory.
-// Blobs are written to a temp directory keyed by their hash. When total size
-// exceeds maxBytes, the least-recently-used entries are evicted (deleted from disk).
+// blobDiskCache stores blobs on disk keyed by hash. It exposes ReadAt
+// for slice reads (the restore path uses this so chunk extraction
+// never reads a whole blob into memory) plus Get/Put for whole-blob
+// access.
 //
-// The Get/ReadAt/peak-Len counters are debugging instrumentation used by
-// tests to assert that the restore code path uses ReadAt (which reads
-// only the requested slice of a blob) rather than Get (which reads the
-// full blob into memory).
+// Eviction policy is caller-controlled. The cache keeps an LRU list
+// internally and will fall back to LRU eviction if curBytes exceeds
+// maxBytes. Restore passes math.MaxInt64 as maxBytes and drives
+// eviction itself via Delete() through restoreSweeper, which deletes
+// each blob the moment every file that references its chunks has been
+// written. LRU never fires under that configuration; it is kept as a
+// safety net for callers that don't manage eviction themselves.
+//
+// Get/ReadAt/peak-Len counters are debugging instrumentation used by
+// tests to assert that the restore code path uses ReadAt rather than
+// Get and to bound peak disk-cache occupancy.
 type blobDiskCache struct {
 	mu       sync.Mutex
 	dir      string
--- a/internal/vaultik/info.go
+++ b/internal/vaultik/info.go
@@ -22,14 +22,29 @@ func (v *Vaultik) ShowInfo() error {
 	v.printfStdout("Go Version:      %s\n", runtime.Version())
 	v.printlnStdout()

-	// Storage Configuration
+	// Storage Configuration. The backend is selected by storage_url
+	// (s3://, file://, rclone://); the legacy s3.* fields are only
+	// printed when they're actually populated, since the URL scheme
+	// is the primary configuration.
 	v.printfStdout("=== Storage Configuration ===\n")
+	storageInfo := v.Storage.Info()
+	v.printfStdout("Type:            %s\n", storageInfo.Type)
+	v.printfStdout("Location:        %s\n", storageInfo.Location)
+	if v.Config.StorageURL != "" {
+		v.printfStdout("Storage URL:     %s\n", v.Config.StorageURL)
+	}
+	if v.Config.S3.Bucket != "" {
 		v.printfStdout("S3 Bucket:       %s\n", v.Config.S3.Bucket)
+	}
 	if v.Config.S3.Prefix != "" {
 		v.printfStdout("S3 Prefix:       %s\n", v.Config.S3.Prefix)
 	}
+	if v.Config.S3.Endpoint != "" {
 		v.printfStdout("S3 Endpoint:     %s\n", v.Config.S3.Endpoint)
+	}
+	if v.Config.S3.Region != "" {
 		v.printfStdout("S3 Region:       %s\n", v.Config.S3.Region)
+	}
 	v.printlnStdout()

 	// Backup Settings
@@ -337,7 +352,7 @@ func (v *Vaultik) printRemoteInfoTable(result *RemoteInfoResult) {
 		humanize.Comma(int64(result.OrphanedBlobCount)), humanize.Bytes(uint64(result.OrphanedBlobSize)))

 	if result.OrphanedBlobCount > 0 {
-		v.printfStdout("\nRun 'vaultik prune --remote' to remove orphaned blobs.\n")
+		v.printfStdout("\nRun 'vaultik prune' to remove orphaned blobs.\n")
 	}
 }

--- a/internal/vaultik/prune.go
+++ b/internal/vaultik/prune.go
@@ -48,6 +48,19 @@ type PruneBlobsResult struct {
 	BytesFreed   int64 `json:"bytes_freed"`
 }

+// Prune removes orphaned data from the local index database AND
+// unreferenced blobs from the backup destination store. This is the
+// single user-facing prune entry point — the split between local and
+// remote cleanup is an implementation detail. Calling code should
+// prefer this method over PruneDatabase or PruneBlobs individually
+// unless it specifically wants one half.
+func (v *Vaultik) Prune(opts *PruneOptions) error {
+	if _, err := v.PruneDatabase(); err != nil {
+		return fmt.Errorf("pruning local database: %w", err)
+	}
+	return v.PruneBlobs(opts)
+}
+
 // PruneBlobs removes unreferenced blobs from storage
 func (v *Vaultik) PruneBlobs(opts *PruneOptions) error {
 	log.Info("Starting prune operation")
--- a/internal/vaultik/snapshot.go
+++ b/internal/vaultik/snapshot.go
@@ -768,9 +768,18 @@ func (v *Vaultik) confirmAndExecutePurge(toDelete []SnapshotInfo, force, quiet b
 		}
 	}

+	// Tidy up local DB orphans now so users don't have to run a
+	// separate command after a purge. Guarded against nil for tests
+	// that don't wire up a SnapshotManager.
+	if v.SnapshotManager != nil {
+		if err := v.SnapshotManager.CleanupOrphanedData(v.ctx); err != nil {
+			log.Warn("Failed to clean up orphaned local data after purge", "error", err)
+		}
+	}
+
 	if !quiet {
 		v.printfStdout("Deleted %d snapshot(s)\n", len(toDelete))
-		v.printlnStdout("\nNote: Run 'vaultik prune' to clean up unreferenced blobs.")
+		v.printlnStdout("\nNote: Run 'vaultik prune' to clean up unreferenced remote blobs.")
 	}

 	return nil
@@ -1092,6 +1101,16 @@ func (v *Vaultik) RemoveSnapshot(snapshotID string, opts *RemoveOptions) (*Remov
 		result.RemoteRemoved = true
 	}

+	// Clean up the local rows that just became orphaned (files, chunks,
+	// blob_chunks, blobs no longer referenced by any snapshot). This
+	// used to be a separate `vaultik snapshot prune` step; running it
+	// inline means `snapshot remove` leaves no ghost rows behind.
+	if v.SnapshotManager != nil {
+		if err := v.SnapshotManager.CleanupOrphanedData(v.ctx); err != nil {
+			log.Warn("Failed to clean up orphaned local data after removal", "error", err)
+		}
+	}
+
 	// Output result
 	if opts.JSON {
 		return result, v.outputRemoveJSON(result)
@@ -1101,7 +1120,7 @@ func (v *Vaultik) RemoveSnapshot(snapshotID string, opts *RemoveOptions) (*Remov
 	v.printfStdout("Removed snapshot '%s' from local database\n", snapshotID)
 	if opts.Remote {
 		v.printlnStdout("Removed snapshot metadata from remote storage")
-		v.printlnStdout("\nNote: Blobs were not removed. Run 'vaultik prune' to remove orphaned blobs.")
+		v.printlnStdout("\nNote: Remote blobs were not removed. Run 'vaultik prune' to remove orphaned blobs.")
 	}

 	return result, nil
@@ -1213,6 +1232,14 @@ func (v *Vaultik) executeRemoveAll(snapshotIDs []string, opts *RemoveOptions) (*
 		result.RemoteRemoved = true
 	}

+	// Clean up everything that just became orphaned locally so the
+	// index database doesn't carry 39k ghost rows after a wipe.
+	if v.SnapshotManager != nil {
+		if err := v.SnapshotManager.CleanupOrphanedData(v.ctx); err != nil {
+			log.Warn("Failed to clean up orphaned local data after bulk removal", "error", err)
+		}
+	}
+
 	if opts.JSON {
 		return result, v.outputRemoveJSON(result)
 	}
@@ -1220,7 +1247,7 @@ func (v *Vaultik) executeRemoveAll(snapshotIDs []string, opts *RemoveOptions) (*
 	v.printfStdout("Removed %d snapshot(s)\n", len(result.SnapshotsRemoved))
 	if opts.Remote {
 		v.printlnStdout("Removed snapshot metadata from remote storage")
-		v.printlnStdout("\nNote: Blobs were not removed. Run 'vaultik prune' to remove orphaned blobs.")
+		v.printlnStdout("\nNote: Remote blobs were not removed. Run 'vaultik prune' to remove orphaned blobs.")
 	}

 	return result, nil