Add deterministic deduplication, rclone backend, and database purge command

- Implement deterministic blob hashing using double SHA256 of uncompressed
  plaintext data, enabling deduplication even after local DB is cleared
- Add Stat() check before blob upload to skip existing blobs in storage
- Add rclone storage backend for additional remote storage options
- Add 'vaultik database purge' command to erase local state DB
- Add 'vaultik remote check' command to verify remote connectivity
- Show configured snapshots in 'vaultik snapshot list' output
- Skip macOS resource fork files (._*) when listing remote snapshots
- Use multi-threaded zstd compression (CPUs - 2 threads)
- Add writer tests for double hashing behavior
This commit is contained in:
2026-01-28 15:50:17 -08:00
parent bdaaadf990
commit 470bf648c4
26 changed files with 2966 additions and 777 deletions

102
internal/cli/database.go Normal file
View File

@@ -0,0 +1,102 @@
package cli
import (
"fmt"
"os"
"git.eeqj.de/sneak/vaultik/internal/config"
"git.eeqj.de/sneak/vaultik/internal/log"
"github.com/spf13/cobra"
)
// NewDatabaseCommand creates the database command group
func NewDatabaseCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "database",
Short: "Manage the local state database",
Long: `Commands for managing the local SQLite state database.`,
}
cmd.AddCommand(
newDatabasePurgeCommand(),
)
return cmd
}
// newDatabasePurgeCommand creates the database purge command
func newDatabasePurgeCommand() *cobra.Command {
var force bool
cmd := &cobra.Command{
Use: "purge",
Short: "Delete the local state database",
Long: `Completely removes the local SQLite state database.
This will erase all local tracking of:
- File metadata and change detection state
- Chunk and blob mappings
- Local snapshot records
The remote storage is NOT affected. After purging, the next backup will
perform a full scan and re-deduplicate against existing remote blobs.
Use --force to skip the confirmation prompt.`,
Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
// Resolve config path
configPath, err := ResolveConfigPath()
if err != nil {
return err
}
// Load config to get database path
cfg, err := config.Load(configPath)
if err != nil {
return fmt.Errorf("failed to load config: %w", err)
}
dbPath := cfg.IndexPath
// Check if database exists
if _, err := os.Stat(dbPath); os.IsNotExist(err) {
fmt.Printf("Database does not exist: %s\n", dbPath)
return nil
}
// Confirm unless --force
if !force {
fmt.Printf("This will delete the local state database at:\n %s\n\n", dbPath)
fmt.Print("Are you sure? Type 'yes' to confirm: ")
var confirm string
if _, err := fmt.Scanln(&confirm); err != nil || confirm != "yes" {
fmt.Println("Aborted.")
return nil
}
}
// Delete the database file
if err := os.Remove(dbPath); err != nil {
return fmt.Errorf("failed to delete database: %w", err)
}
// Also delete WAL and SHM files if they exist
walPath := dbPath + "-wal"
shmPath := dbPath + "-shm"
_ = os.Remove(walPath) // Ignore errors - files may not exist
_ = os.Remove(shmPath)
rootFlags := GetRootFlags()
if !rootFlags.Quiet {
fmt.Printf("Database purged: %s\n", dbPath)
}
log.Info("Local state database purged", "path", dbPath)
return nil
},
}
cmd.Flags().BoolVar(&force, "force", false, "Skip confirmation prompt")
return cmd
}

89
internal/cli/remote.go Normal file
View File

@@ -0,0 +1,89 @@
package cli
import (
"context"
"os"
"git.eeqj.de/sneak/vaultik/internal/log"
"git.eeqj.de/sneak/vaultik/internal/vaultik"
"github.com/spf13/cobra"
"go.uber.org/fx"
)
// NewRemoteCommand creates the remote command and subcommands
func NewRemoteCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "remote",
Short: "Remote storage management commands",
Long: "Commands for inspecting and managing remote storage",
}
// Add subcommands
cmd.AddCommand(newRemoteInfoCommand())
return cmd
}
// newRemoteInfoCommand creates the 'remote info' subcommand
func newRemoteInfoCommand() *cobra.Command {
var jsonOutput bool
cmd := &cobra.Command{
Use: "info",
Short: "Display remote storage information",
Long: `Shows detailed information about remote storage, including:
- Size of all snapshot metadata (per snapshot and total)
- Count and total size of all blobs
- Count and size of referenced blobs (from all manifests)
- Count and size of orphaned blobs (not referenced by any manifest)`,
Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
// Use unified config resolution
configPath, err := ResolveConfigPath()
if err != nil {
return err
}
rootFlags := GetRootFlags()
return RunWithApp(cmd.Context(), AppOptions{
ConfigPath: configPath,
LogOptions: log.LogOptions{
Verbose: rootFlags.Verbose,
Debug: rootFlags.Debug,
Quiet: rootFlags.Quiet || jsonOutput,
},
Modules: []fx.Option{},
Invokes: []fx.Option{
fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) {
lc.Append(fx.Hook{
OnStart: func(ctx context.Context) error {
go func() {
if err := v.RemoteInfo(jsonOutput); err != nil {
if err != context.Canceled {
if !jsonOutput {
log.Error("Failed to get remote info", "error", err)
}
os.Exit(1)
}
}
if err := v.Shutdowner.Shutdown(); err != nil {
log.Error("Failed to shutdown", "error", err)
}
}()
return nil
},
OnStop: func(ctx context.Context) error {
v.Cancel()
return nil
},
})
}),
},
})
},
}
cmd.Flags().BoolVar(&jsonOutput, "json", false, "Output in JSON format")
return cmd
}

View File

@@ -46,6 +46,8 @@ on the source system.`,
NewSnapshotCommand(),
NewInfoCommand(),
NewVersionCommand(),
NewRemoteCommand(),
NewDatabaseCommand(),
)
return cmd

View File

@@ -111,10 +111,11 @@ func newSnapshotListCommand() *cobra.Command {
var jsonOutput bool
cmd := &cobra.Command{
Use: "list",
Short: "List all snapshots",
Long: "Lists all snapshots with their ID, timestamp, and compressed size",
Args: cobra.NoArgs,
Use: "list",
Aliases: []string{"ls"},
Short: "List all snapshots",
Long: "Lists all snapshots with their ID, timestamp, and compressed size",
Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
// Use unified config resolution
configPath, err := ResolveConfigPath()
@@ -242,7 +243,16 @@ func newSnapshotVerifyCommand() *cobra.Command {
Use: "verify <snapshot-id>",
Short: "Verify snapshot integrity",
Long: "Verifies that all blobs referenced in a snapshot exist",
Args: cobra.ExactArgs(1),
Args: func(cmd *cobra.Command, args []string) error {
if len(args) != 1 {
_ = cmd.Help()
if len(args) == 0 {
return fmt.Errorf("snapshot ID required")
}
return fmt.Errorf("expected 1 argument, got %d", len(args))
}
return nil
},
RunE: func(cmd *cobra.Command, args []string) error {
snapshotID := args[0]
@@ -266,7 +276,13 @@ func newSnapshotVerifyCommand() *cobra.Command {
lc.Append(fx.Hook{
OnStart: func(ctx context.Context) error {
go func() {
if err := v.VerifySnapshotWithOptions(snapshotID, opts); err != nil {
var err error
if opts.Deep {
err = v.RunDeepVerify(snapshotID, opts)
} else {
err = v.VerifySnapshotWithOptions(snapshotID, opts)
}
if err != nil {
if err != context.Canceled {
if !opts.JSON {
log.Error("Verification failed", "error", err)
@@ -302,17 +318,37 @@ func newSnapshotRemoveCommand() *cobra.Command {
opts := &vaultik.RemoveOptions{}
cmd := &cobra.Command{
Use: "remove <snapshot-id>",
Use: "remove [snapshot-id]",
Aliases: []string{"rm"},
Short: "Remove a snapshot and its orphaned blobs",
Long: `Removes a snapshot and any blobs that are no longer referenced by other snapshots.
Short: "Remove a snapshot from the local database",
Long: `Removes a snapshot from the local database.
This command downloads manifests from all other snapshots to determine which blobs
are still in use, then deletes any blobs that would become orphaned.`,
Args: cobra.ExactArgs(1),
By default, only removes from the local database. Use --remote to also remove
the snapshot metadata from remote storage.
Note: This does NOT remove blobs. Use 'vaultik prune' to remove orphaned blobs
after removing snapshots.
Use --all --force to remove all snapshots.`,
Args: func(cmd *cobra.Command, args []string) error {
all, _ := cmd.Flags().GetBool("all")
if all {
if len(args) > 0 {
_ = cmd.Help()
return fmt.Errorf("--all cannot be used with a snapshot ID")
}
return nil
}
if len(args) != 1 {
_ = cmd.Help()
if len(args) == 0 {
return fmt.Errorf("snapshot ID required (or use --all --force)")
}
return fmt.Errorf("expected 1 argument, got %d", len(args))
}
return nil
},
RunE: func(cmd *cobra.Command, args []string) error {
snapshotID := args[0]
// Use unified config resolution
configPath, err := ResolveConfigPath()
if err != nil {
@@ -333,7 +369,13 @@ are still in use, then deletes any blobs that would become orphaned.`,
lc.Append(fx.Hook{
OnStart: func(ctx context.Context) error {
go func() {
if _, err := v.RemoveSnapshot(snapshotID, opts); err != nil {
var err error
if opts.All {
_, err = v.RemoveAllSnapshots(opts)
} else {
_, err = v.RemoveSnapshot(args[0], opts)
}
if err != nil {
if err != context.Canceled {
if !opts.JSON {
log.Error("Failed to remove snapshot", "error", err)
@@ -359,8 +401,10 @@ are still in use, then deletes any blobs that would become orphaned.`,
}
cmd.Flags().BoolVarP(&opts.Force, "force", "f", false, "Skip confirmation prompt")
cmd.Flags().BoolVar(&opts.DryRun, "dry-run", false, "Show what would be deleted without deleting")
cmd.Flags().BoolVar(&opts.JSON, "json", false, "Output deletion stats as JSON")
cmd.Flags().BoolVar(&opts.DryRun, "dry-run", false, "Show what would be removed without removing")
cmd.Flags().BoolVar(&opts.JSON, "json", false, "Output result as JSON")
cmd.Flags().BoolVar(&opts.Remote, "remote", false, "Also remove snapshot metadata from remote storage")
cmd.Flags().BoolVar(&opts.All, "all", false, "Remove all snapshots (requires --force)")
return cmd
}

View File

@@ -23,7 +23,7 @@ func NewStoreCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "store",
Short: "Storage information commands",
Long: "Commands for viewing information about the S3 storage backend",
Long: "Commands for viewing information about the storage backend",
}
// Add subcommands
@@ -37,7 +37,7 @@ func newStoreInfoCommand() *cobra.Command {
return &cobra.Command{
Use: "info",
Short: "Display storage information",
Long: "Shows S3 bucket configuration and storage statistics including snapshots and blobs",
Long: "Shows storage configuration and statistics including snapshots and blobs",
RunE: func(cmd *cobra.Command, args []string) error {
return runWithApp(cmd.Context(), func(app *StoreApp) error {
return app.Info(cmd.Context())