diff --git a/DESIGN.md b/DESIGN.md index 167ffd8..3bb3118 100644 --- a/DESIGN.md +++ b/DESIGN.md @@ -58,12 +58,14 @@ Surprisingly, no existing tool meets these requirements, so I wrote `vaultik`. ## S3 Bucket Layout -S3 stores only three things: +S3 stores only four things: 1) Blobs: encrypted, compressed packs of file chunks. 2) Metadata: encrypted SQLite databases containing the current state of the filesystem at the time of the snapshot. 3) Metadata hashes: encrypted hashes of the metadata SQLite databases. +4) Blob manifests: unencrypted compressed JSON files listing all blob hashes + referenced in the snapshot, enabling pruning without decryption. ``` s3://// @@ -73,6 +75,7 @@ s3://// │ ├── .sqlite.age │ ├── .sqlite.00.age │ ├── .sqlite.01.age +│ ├── .manifest.json.zst ``` To retrieve a given file, you would: @@ -99,6 +102,23 @@ memory (<10GB). * ``: UTC timestamp in iso860 format, e.g. `2023-10-01T12:00:00Z`. These are lexicographically sortable. * `blobs///...`: where `aa` and `bb` are the first 2 hex bytes of the blob hash. +### Blob Manifest Format + +The `.manifest.json.zst` file is an unencrypted, compressed JSON file containing: + +```json +{ + "snapshot_id": "2023-10-01T12:00:00Z", + "blob_hashes": [ + "aa1234567890abcdef...", + "bb2345678901bcdef0...", + ... + ] +} +``` + +This allows pruning operations to determine which blobs are referenced without requiring decryption keys. + --- ## 3. Local SQLite Index Schema (source host) @@ -223,18 +243,20 @@ metadata/.sqlite.01.age 9. Compress, encrypt, split, and upload to S3 10. Encrypt the hash of the snapshot database to the backup age key 11. Upload the encrypted hash to S3 as `metadata/.hash.age` -12. Optionally prune remote blobs that are no longer referenced in the +12. Create blob manifest JSON listing all blob hashes referenced in snapshot +13. Compress manifest with zstd and upload as `metadata/.manifest.json.zst` +14. Optionally prune remote blobs that are no longer referenced in the snapshot, based on local state db ### 5.2 Manual Prune 1. List all objects under `metadata/` 2. Determine the latest valid `snapshot_id` by timestamp -3. Download, decrypt, and reconstruct the latest snapshot SQLite database -4. Extract set of referenced blob hashes +3. Download and decompress the latest `.manifest.json.zst` +4. Extract set of referenced blob hashes from manifest (no decryption needed) 5. List all blob objects under `blobs/` 6. For each blob: - * If the hash is not in the latest snapshot: + * If the hash is not in the manifest: * Issue `DeleteObject` to remove it ### 5.3 Verify @@ -261,11 +283,14 @@ Verify runs on a host that has no state, but access to the bucket. ## 6. CLI Commands ``` -vaultik backup [--config ] [--cron] [--daemon] +vaultik backup [--config ] [--cron] [--daemon] [--prune] vaultik restore --bucket --prefix --snapshot --target vaultik prune --bucket --prefix [--dry-run] vaultik verify --bucket --prefix [--snapshot ] [--quick] vaultik fetch --bucket --prefix --snapshot --file --target +vaultik snapshot list --bucket --prefix [--limit ] +vaultik snapshot rm --bucket --prefix --snapshot +vaultik snapshot latest --bucket --prefix ``` * `VAULTIK_PRIVATE_KEY` is required for `restore`, `prune`, `verify`, and diff --git a/internal/cli/backup.go b/internal/cli/backup.go index f6f24c2..32cd56a 100644 --- a/internal/cli/backup.go +++ b/internal/cli/backup.go @@ -17,6 +17,7 @@ type BackupOptions struct { ConfigPath string Daemon bool Cron bool + Prune bool } // NewBackupCommand creates the backup command @@ -52,6 +53,7 @@ a path using --config or by setting VAULTIK_CONFIG to a path.`, cmd.Flags().StringVar(&opts.ConfigPath, "config", "", "Path to config file") cmd.Flags().BoolVar(&opts.Daemon, "daemon", false, "Run in daemon mode with inotify monitoring") cmd.Flags().BoolVar(&opts.Cron, "cron", false, "Run in cron mode (silent unless error)") + cmd.Flags().BoolVar(&opts.Prune, "prune", false, "Delete all previous snapshots and unreferenced blobs after backup") return cmd } @@ -71,6 +73,9 @@ func runBackup(ctx context.Context, opts *BackupOptions) error { if opts.Cron { fmt.Println("Running in cron mode") } + if opts.Prune { + fmt.Println("Pruning enabled - will delete old snapshots after backup") + } return nil }), }, diff --git a/internal/cli/root.go b/internal/cli/root.go index 020034a..8e49fae 100644 --- a/internal/cli/root.go +++ b/internal/cli/root.go @@ -22,6 +22,7 @@ on the source system.`, NewPruneCommand(), NewVerifyCommand(), NewFetchCommand(), + SnapshotCmd(), ) return cmd diff --git a/internal/cli/snapshot.go b/internal/cli/snapshot.go new file mode 100644 index 0000000..835df20 --- /dev/null +++ b/internal/cli/snapshot.go @@ -0,0 +1,90 @@ +package cli + +import ( + "github.com/spf13/cobra" +) + +func SnapshotCmd() *cobra.Command { + cmd := &cobra.Command{ + Use: "snapshot", + Short: "Manage snapshots", + Long: "Commands for listing, removing, and querying snapshots", + } + + cmd.AddCommand(snapshotListCmd()) + cmd.AddCommand(snapshotRmCmd()) + cmd.AddCommand(snapshotLatestCmd()) + + return cmd +} + +func snapshotListCmd() *cobra.Command { + var ( + bucket string + prefix string + limit int + ) + + cmd := &cobra.Command{ + Use: "list", + Short: "List snapshots", + Long: "List all snapshots in the bucket, sorted by timestamp", + RunE: func(cmd *cobra.Command, args []string) error { + panic("unimplemented") + }, + } + + cmd.Flags().StringVar(&bucket, "bucket", "", "S3 bucket name") + cmd.Flags().StringVar(&prefix, "prefix", "", "S3 prefix") + cmd.Flags().IntVar(&limit, "limit", 10, "Maximum number of snapshots to list") + cmd.MarkFlagRequired("bucket") + + return cmd +} + +func snapshotRmCmd() *cobra.Command { + var ( + bucket string + prefix string + snapshot string + ) + + cmd := &cobra.Command{ + Use: "rm", + Short: "Remove a snapshot", + Long: "Remove a snapshot and optionally its associated blobs", + RunE: func(cmd *cobra.Command, args []string) error { + panic("unimplemented") + }, + } + + cmd.Flags().StringVar(&bucket, "bucket", "", "S3 bucket name") + cmd.Flags().StringVar(&prefix, "prefix", "", "S3 prefix") + cmd.Flags().StringVar(&snapshot, "snapshot", "", "Snapshot ID to remove") + cmd.MarkFlagRequired("bucket") + cmd.MarkFlagRequired("snapshot") + + return cmd +} + +func snapshotLatestCmd() *cobra.Command { + var ( + bucket string + prefix string + ) + + cmd := &cobra.Command{ + Use: "latest", + Short: "Get the latest snapshot ID", + Long: "Display the ID of the most recent snapshot", + RunE: func(cmd *cobra.Command, args []string) error { + panic("unimplemented") + }, + } + + cmd.Flags().StringVar(&bucket, "bucket", "", "S3 bucket name") + cmd.Flags().StringVar(&prefix, "prefix", "", "S3 prefix") + cmd.MarkFlagRequired("bucket") + + return cmd +}