Add deterministic deduplication, rclone backend, and database purge command

- Implement deterministic blob hashing using double SHA256 of uncompressed
  plaintext data, enabling deduplication even after local DB is cleared
- Add Stat() check before blob upload to skip existing blobs in storage
- Add rclone storage backend for additional remote storage options
- Add 'vaultik database purge' command to erase local state DB
- Add 'vaultik remote check' command to verify remote connectivity
- Show configured snapshots in 'vaultik snapshot list' output
- Skip macOS resource fork files (._*) when listing remote snapshots
- Use multi-threaded zstd compression (CPUs - 2 threads)
- Add writer tests for double hashing behavior
This commit is contained in:
2026-01-28 15:50:17 -08:00
parent bdaaadf990
commit 470bf648c4
26 changed files with 2966 additions and 777 deletions

View File

@@ -1,10 +1,14 @@
package vaultik
import (
"encoding/json"
"fmt"
"runtime"
"sort"
"strings"
"git.eeqj.de/sneak/vaultik/internal/log"
"git.eeqj.de/sneak/vaultik/internal/snapshot"
"github.com/dustin/go-humanize"
)
@@ -108,3 +112,237 @@ func (v *Vaultik) ShowInfo() error {
return nil
}
// SnapshotMetadataInfo contains information about a single snapshot's metadata
type SnapshotMetadataInfo struct {
SnapshotID string `json:"snapshot_id"`
ManifestSize int64 `json:"manifest_size"`
DatabaseSize int64 `json:"database_size"`
TotalSize int64 `json:"total_size"`
BlobCount int `json:"blob_count"`
BlobsSize int64 `json:"blobs_size"`
}
// RemoteInfoResult contains all remote storage information
type RemoteInfoResult struct {
// Storage info
StorageType string `json:"storage_type"`
StorageLocation string `json:"storage_location"`
// Snapshot metadata
Snapshots []SnapshotMetadataInfo `json:"snapshots"`
TotalMetadataSize int64 `json:"total_metadata_size"`
TotalMetadataCount int `json:"total_metadata_count"`
// All blobs on remote
TotalBlobCount int `json:"total_blob_count"`
TotalBlobSize int64 `json:"total_blob_size"`
// Referenced blobs (from manifests)
ReferencedBlobCount int `json:"referenced_blob_count"`
ReferencedBlobSize int64 `json:"referenced_blob_size"`
// Orphaned blobs
OrphanedBlobCount int `json:"orphaned_blob_count"`
OrphanedBlobSize int64 `json:"orphaned_blob_size"`
}
// RemoteInfo displays information about remote storage
func (v *Vaultik) RemoteInfo(jsonOutput bool) error {
result := &RemoteInfoResult{}
// Get storage info
storageInfo := v.Storage.Info()
result.StorageType = storageInfo.Type
result.StorageLocation = storageInfo.Location
if !jsonOutput {
fmt.Printf("=== Remote Storage ===\n")
fmt.Printf("Type: %s\n", storageInfo.Type)
fmt.Printf("Location: %s\n", storageInfo.Location)
fmt.Println()
}
// List all snapshot metadata
if !jsonOutput {
fmt.Printf("Scanning snapshot metadata...\n")
}
snapshotMetadata := make(map[string]*SnapshotMetadataInfo)
// Collect metadata files
metadataCh := v.Storage.ListStream(v.ctx, "metadata/")
for obj := range metadataCh {
if obj.Err != nil {
return fmt.Errorf("listing metadata: %w", obj.Err)
}
// Parse key: metadata/<snapshot-id>/<filename>
parts := strings.Split(obj.Key, "/")
if len(parts) < 3 {
continue
}
snapshotID := parts[1]
if _, exists := snapshotMetadata[snapshotID]; !exists {
snapshotMetadata[snapshotID] = &SnapshotMetadataInfo{
SnapshotID: snapshotID,
}
}
info := snapshotMetadata[snapshotID]
filename := parts[2]
if strings.HasPrefix(filename, "manifest") {
info.ManifestSize = obj.Size
} else if strings.HasPrefix(filename, "db") {
info.DatabaseSize = obj.Size
}
info.TotalSize = info.ManifestSize + info.DatabaseSize
}
// Sort snapshots by ID for consistent output
var snapshotIDs []string
for id := range snapshotMetadata {
snapshotIDs = append(snapshotIDs, id)
}
sort.Strings(snapshotIDs)
// Download and parse all manifests to get referenced blobs
if !jsonOutput {
fmt.Printf("Downloading %d manifest(s)...\n", len(snapshotIDs))
}
referencedBlobs := make(map[string]int64) // hash -> compressed size
for _, snapshotID := range snapshotIDs {
manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
reader, err := v.Storage.Get(v.ctx, manifestKey)
if err != nil {
log.Warn("Failed to get manifest", "snapshot", snapshotID, "error", err)
continue
}
manifest, err := snapshot.DecodeManifest(reader)
_ = reader.Close()
if err != nil {
log.Warn("Failed to decode manifest", "snapshot", snapshotID, "error", err)
continue
}
// Record blob info from manifest
info := snapshotMetadata[snapshotID]
info.BlobCount = manifest.BlobCount
var blobsSize int64
for _, blob := range manifest.Blobs {
referencedBlobs[blob.Hash] = blob.CompressedSize
blobsSize += blob.CompressedSize
}
info.BlobsSize = blobsSize
}
// Build result snapshots
var totalMetadataSize int64
for _, id := range snapshotIDs {
info := snapshotMetadata[id]
result.Snapshots = append(result.Snapshots, *info)
totalMetadataSize += info.TotalSize
}
result.TotalMetadataSize = totalMetadataSize
result.TotalMetadataCount = len(snapshotIDs)
// Calculate referenced blob stats
for _, size := range referencedBlobs {
result.ReferencedBlobCount++
result.ReferencedBlobSize += size
}
// List all blobs on remote
if !jsonOutput {
fmt.Printf("Scanning blobs...\n")
}
allBlobs := make(map[string]int64) // hash -> size from storage
blobCh := v.Storage.ListStream(v.ctx, "blobs/")
for obj := range blobCh {
if obj.Err != nil {
return fmt.Errorf("listing blobs: %w", obj.Err)
}
// Extract hash from key: blobs/xx/yy/hash
parts := strings.Split(obj.Key, "/")
if len(parts) < 4 {
continue
}
hash := parts[3]
allBlobs[hash] = obj.Size
result.TotalBlobCount++
result.TotalBlobSize += obj.Size
}
// Calculate orphaned blobs
for hash, size := range allBlobs {
if _, referenced := referencedBlobs[hash]; !referenced {
result.OrphanedBlobCount++
result.OrphanedBlobSize += size
}
}
// Output results
if jsonOutput {
enc := json.NewEncoder(v.Stdout)
enc.SetIndent("", " ")
return enc.Encode(result)
}
// Human-readable output
fmt.Printf("\n=== Snapshot Metadata ===\n")
if len(result.Snapshots) == 0 {
fmt.Printf("No snapshots found\n")
} else {
fmt.Printf("%-45s %12s %12s %12s %10s %12s\n", "SNAPSHOT", "MANIFEST", "DATABASE", "TOTAL", "BLOBS", "BLOB SIZE")
fmt.Printf("%-45s %12s %12s %12s %10s %12s\n", strings.Repeat("-", 45), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 10), strings.Repeat("-", 12))
for _, info := range result.Snapshots {
fmt.Printf("%-45s %12s %12s %12s %10s %12s\n",
truncateString(info.SnapshotID, 45),
humanize.Bytes(uint64(info.ManifestSize)),
humanize.Bytes(uint64(info.DatabaseSize)),
humanize.Bytes(uint64(info.TotalSize)),
humanize.Comma(int64(info.BlobCount)),
humanize.Bytes(uint64(info.BlobsSize)),
)
}
fmt.Printf("%-45s %12s %12s %12s %10s %12s\n", strings.Repeat("-", 45), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 10), strings.Repeat("-", 12))
fmt.Printf("%-45s %12s %12s %12s\n", fmt.Sprintf("Total (%d snapshots)", result.TotalMetadataCount), "", "", humanize.Bytes(uint64(result.TotalMetadataSize)))
}
fmt.Printf("\n=== Blob Storage ===\n")
fmt.Printf("Total blobs on remote: %s (%s)\n",
humanize.Comma(int64(result.TotalBlobCount)),
humanize.Bytes(uint64(result.TotalBlobSize)))
fmt.Printf("Referenced by snapshots: %s (%s)\n",
humanize.Comma(int64(result.ReferencedBlobCount)),
humanize.Bytes(uint64(result.ReferencedBlobSize)))
fmt.Printf("Orphaned (unreferenced): %s (%s)\n",
humanize.Comma(int64(result.OrphanedBlobCount)),
humanize.Bytes(uint64(result.OrphanedBlobSize)))
if result.OrphanedBlobCount > 0 {
fmt.Printf("\nRun 'vaultik prune --remote' to remove orphaned blobs.\n")
}
return nil
}
// truncateString truncates a string to maxLen, adding "..." if truncated
func truncateString(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
if maxLen <= 3 {
return s[:maxLen]
}
return s[:maxLen-3] + "..."
}

View File

@@ -5,6 +5,8 @@ import (
"context"
"database/sql"
"io"
"os"
"path/filepath"
"sync"
"testing"
"time"
@@ -15,6 +17,7 @@ import (
"git.eeqj.de/sneak/vaultik/internal/snapshot"
"git.eeqj.de/sneak/vaultik/internal/storage"
"git.eeqj.de/sneak/vaultik/internal/types"
"git.eeqj.de/sneak/vaultik/internal/vaultik"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@@ -403,3 +406,138 @@ func TestBackupAndVerify(t *testing.T) {
t.Logf("Backup and verify test completed successfully")
}
// TestBackupAndRestore tests the full backup and restore workflow
// This test verifies that the restore code correctly handles the binary SQLite
// database format that is exported by the snapshot manager.
func TestBackupAndRestore(t *testing.T) {
// Initialize logger
log.Initialize(log.Config{})
// Create real temp directory for the database (SQLite needs real filesystem)
realTempDir, err := os.MkdirTemp("", "vaultik-test-")
require.NoError(t, err)
defer func() { _ = os.RemoveAll(realTempDir) }()
// Use real OS filesystem for this test
fs := afero.NewOsFs()
// Create test directory structure and files
dataDir := filepath.Join(realTempDir, "data")
testFiles := map[string]string{
filepath.Join(dataDir, "file1.txt"): "This is file 1 content",
filepath.Join(dataDir, "file2.txt"): "This is file 2 content with more data",
filepath.Join(dataDir, "subdir", "file3.txt"): "This is file 3 in a subdirectory",
}
// Create directories and files
for path, content := range testFiles {
dir := filepath.Dir(path)
if err := fs.MkdirAll(dir, 0755); err != nil {
t.Fatalf("failed to create directory %s: %v", dir, err)
}
if err := afero.WriteFile(fs, path, []byte(content), 0644); err != nil {
t.Fatalf("failed to create test file %s: %v", path, err)
}
}
ctx := context.Background()
// Create mock storage
mockStorage := NewMockStorer()
// Test keypair
agePublicKey := "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
ageSecretKey := "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
// Create database file
dbPath := filepath.Join(realTempDir, "test.db")
db, err := database.New(ctx, dbPath)
require.NoError(t, err)
defer func() { _ = db.Close() }()
repos := database.NewRepositories(db)
// Create config for snapshot manager
cfg := &config.Config{
AgeSecretKey: ageSecretKey,
AgeRecipients: []string{agePublicKey},
CompressionLevel: 3,
}
// Create snapshot manager
sm := snapshot.NewSnapshotManager(snapshot.SnapshotManagerParams{
Repos: repos,
Storage: mockStorage,
Config: cfg,
})
sm.SetFilesystem(fs)
// Create scanner
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
FS: fs,
Storage: mockStorage,
ChunkSize: int64(16 * 1024),
MaxBlobSize: int64(100 * 1024),
CompressionLevel: 3,
AgeRecipients: []string{agePublicKey},
Repositories: repos,
})
// Create a snapshot
snapshotID, err := sm.CreateSnapshot(ctx, "test-host", "test-version", "test-git")
require.NoError(t, err)
t.Logf("Created snapshot: %s", snapshotID)
// Run the backup (scan)
result, err := scanner.Scan(ctx, dataDir, snapshotID)
require.NoError(t, err)
t.Logf("Scan complete: %d files, %d blobs", result.FilesScanned, result.BlobsCreated)
// Complete the snapshot
err = sm.CompleteSnapshot(ctx, snapshotID)
require.NoError(t, err)
// Export snapshot metadata (this uploads db.zst.age and manifest.json.zst)
err = sm.ExportSnapshotMetadata(ctx, dbPath, snapshotID)
require.NoError(t, err)
t.Logf("Exported snapshot metadata")
// Verify metadata was uploaded
keys, err := mockStorage.List(ctx, "metadata/")
require.NoError(t, err)
t.Logf("Metadata keys: %v", keys)
assert.GreaterOrEqual(t, len(keys), 2, "Should have at least db.zst.age and manifest.json.zst")
// Close the source database
err = db.Close()
require.NoError(t, err)
// Create Vaultik instance for restore
vaultikApp := &vaultik.Vaultik{
Config: cfg,
Storage: mockStorage,
Fs: fs,
Stdout: io.Discard,
Stderr: io.Discard,
}
vaultikApp.SetContext(ctx)
// Try to restore - this should work with binary SQLite format
restoreDir := filepath.Join(realTempDir, "restored")
err = vaultikApp.Restore(&vaultik.RestoreOptions{
SnapshotID: snapshotID,
TargetDir: restoreDir,
})
require.NoError(t, err, "Restore should succeed with binary SQLite database format")
// Verify restored files match originals
for origPath, expectedContent := range testFiles {
restoredPath := filepath.Join(restoreDir, origPath)
restoredContent, err := afero.ReadFile(fs, restoredPath)
require.NoError(t, err, "Should be able to read restored file: %s", restoredPath)
assert.Equal(t, expectedContent, string(restoredContent), "Restored content should match original for: %s", origPath)
}
t.Log("Backup and restore test completed successfully")
}

View File

@@ -8,7 +8,6 @@ import (
"fmt"
"io"
"os"
"os/exec"
"path/filepath"
"time"
@@ -173,7 +172,7 @@ func (v *Vaultik) Restore(opts *RestoreOptions) error {
// downloadSnapshotDB downloads and decrypts the snapshot metadata database
func (v *Vaultik) downloadSnapshotDB(snapshotID string, identity age.Identity) (*database.DB, error) {
// Download encrypted database from S3
// Download encrypted database from storage
dbKey := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID)
reader, err := v.Storage.Get(v.ctx, dbKey)
@@ -196,43 +195,30 @@ func (v *Vaultik) downloadSnapshotDB(snapshotID string, identity age.Identity) (
}
defer func() { _ = blobReader.Close() }()
// Read the SQL dump
sqlDump, err := io.ReadAll(blobReader)
// Read the binary SQLite database
dbData, err := io.ReadAll(blobReader)
if err != nil {
return nil, fmt.Errorf("decrypting and decompressing: %w", err)
}
log.Debug("Decrypted database SQL dump", "size", humanize.Bytes(uint64(len(sqlDump))))
log.Debug("Decrypted database", "size", humanize.Bytes(uint64(len(dbData))))
// Create a temporary database file
// Create a temporary database file and write the binary SQLite data directly
tempFile, err := afero.TempFile(v.Fs, "", "vaultik-restore-*.db")
if err != nil {
return nil, fmt.Errorf("creating temp file: %w", err)
}
tempPath := tempFile.Name()
// Write the binary SQLite database directly
if _, err := tempFile.Write(dbData); err != nil {
_ = tempFile.Close()
_ = v.Fs.Remove(tempPath)
return nil, fmt.Errorf("writing database file: %w", err)
}
if err := tempFile.Close(); err != nil {
_ = v.Fs.Remove(tempPath)
return nil, fmt.Errorf("closing temp file: %w", err)
}
// Write SQL to a temp file for sqlite3 to read
sqlTempFile, err := afero.TempFile(v.Fs, "", "vaultik-restore-*.sql")
if err != nil {
return nil, fmt.Errorf("creating SQL temp file: %w", err)
}
sqlTempPath := sqlTempFile.Name()
if _, err := sqlTempFile.Write(sqlDump); err != nil {
_ = sqlTempFile.Close()
return nil, fmt.Errorf("writing SQL dump: %w", err)
}
if err := sqlTempFile.Close(); err != nil {
return nil, fmt.Errorf("closing SQL temp file: %w", err)
}
defer func() { _ = v.Fs.Remove(sqlTempPath) }()
// Execute the SQL dump to create the database
cmd := exec.Command("sqlite3", tempPath, ".read "+sqlTempPath)
if output, err := cmd.CombinedOutput(); err != nil {
return nil, fmt.Errorf("executing SQL dump: %w\nOutput: %s", err, output)
}
log.Debug("Created restore database", "path", tempPath)
// Open the database
@@ -433,13 +419,13 @@ func (v *Vaultik) restoreRegularFile(
blobHashStr := blob.Hash.String()
blobData, ok := blobCache[blobHashStr]
if !ok {
blobData, err = v.downloadBlob(ctx, blobHashStr, identity)
blobData, err = v.downloadBlob(ctx, blobHashStr, blob.CompressedSize, identity)
if err != nil {
return fmt.Errorf("downloading blob %s: %w", blobHashStr[:16], err)
}
blobCache[blobHashStr] = blobData
result.BlobsDownloaded++
result.BytesDownloaded += int64(len(blobData))
result.BytesDownloaded += blob.CompressedSize
}
// Extract chunk from blob
@@ -488,41 +474,12 @@ func (v *Vaultik) restoreRegularFile(
}
// downloadBlob downloads and decrypts a blob
func (v *Vaultik) downloadBlob(ctx context.Context, blobHash string, identity age.Identity) ([]byte, error) {
// Construct blob path with sharding
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobHash[:2], blobHash[2:4], blobHash)
reader, err := v.Storage.Get(ctx, blobPath)
func (v *Vaultik) downloadBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) ([]byte, error) {
result, err := v.FetchAndDecryptBlob(ctx, blobHash, expectedSize, identity)
if err != nil {
return nil, fmt.Errorf("downloading blob: %w", err)
return nil, err
}
defer func() { _ = reader.Close() }()
// Read encrypted data
encryptedData, err := io.ReadAll(reader)
if err != nil {
return nil, fmt.Errorf("reading blob data: %w", err)
}
// Decrypt and decompress
blobReader, err := blobgen.NewReader(bytes.NewReader(encryptedData), identity)
if err != nil {
return nil, fmt.Errorf("creating decryption reader: %w", err)
}
defer func() { _ = blobReader.Close() }()
data, err := io.ReadAll(blobReader)
if err != nil {
return nil, fmt.Errorf("decrypting blob: %w", err)
}
log.Debug("Downloaded and decrypted blob",
"hash", blobHash[:16],
"encrypted_size", humanize.Bytes(uint64(len(encryptedData))),
"decrypted_size", humanize.Bytes(uint64(len(data))),
)
return data, nil
return result.Data, nil
}
// verifyRestoredFiles verifies that all restored files match their expected chunk hashes

View File

@@ -327,6 +327,10 @@ func (v *Vaultik) ListSnapshots(jsonOutput bool) error {
// Extract snapshot ID from paths like metadata/hostname-20240115-143052Z/
parts := strings.Split(object.Key, "/")
if len(parts) >= 2 && parts[0] == "metadata" && parts[1] != "" {
// Skip macOS resource fork files (._*) and other hidden files
if strings.HasPrefix(parts[1], ".") {
continue
}
remoteSnapshots[parts[1]] = true
}
}
@@ -425,6 +429,32 @@ func (v *Vaultik) ListSnapshots(jsonOutput bool) error {
// Table output
w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0)
// Show configured snapshots from config file
if _, err := fmt.Fprintln(w, "CONFIGURED SNAPSHOTS:"); err != nil {
return err
}
if _, err := fmt.Fprintln(w, "NAME\tPATHS"); err != nil {
return err
}
if _, err := fmt.Fprintln(w, "────\t─────"); err != nil {
return err
}
for _, name := range v.Config.SnapshotNames() {
snap := v.Config.Snapshots[name]
paths := strings.Join(snap.Paths, ", ")
if _, err := fmt.Fprintf(w, "%s\t%s\n", name, paths); err != nil {
return err
}
}
if _, err := fmt.Fprintln(w); err != nil {
return err
}
// Show remote snapshots
if _, err := fmt.Fprintln(w, "REMOTE SNAPSHOTS:"); err != nil {
return err
}
if _, err := fmt.Fprintln(w, "SNAPSHOT ID\tTIMESTAMP\tCOMPRESSED SIZE"); err != nil {
return err
}
@@ -527,11 +557,15 @@ func (v *Vaultik) PurgeSnapshots(keepLatest bool, olderThan string, force bool)
fmt.Printf("\nDeleting %d snapshot(s) (--force specified)\n", len(toDelete))
}
// Delete snapshots
// Delete snapshots (both local and remote)
for _, snap := range toDelete {
log.Info("Deleting snapshot", "id", snap.ID)
if err := v.deleteSnapshot(snap.ID.String()); err != nil {
return fmt.Errorf("deleting snapshot %s: %w", snap.ID, err)
snapshotID := snap.ID.String()
log.Info("Deleting snapshot", "id", snapshotID)
if err := v.deleteSnapshotFromLocalDB(snapshotID); err != nil {
log.Error("Failed to delete from local database", "snapshot_id", snapshotID, "error", err)
}
if err := v.deleteSnapshotFromRemote(snapshotID); err != nil {
return fmt.Errorf("deleting snapshot %s from remote: %w", snapshotID, err)
}
}
@@ -722,49 +756,6 @@ func (v *Vaultik) downloadManifest(snapshotID string) (*snapshot.Manifest, error
return manifest, nil
}
func (v *Vaultik) deleteSnapshot(snapshotID string) error {
// First, delete from storage
// List all objects under metadata/{snapshotID}/
prefix := fmt.Sprintf("metadata/%s/", snapshotID)
objectCh := v.Storage.ListStream(v.ctx, prefix)
var objectsToDelete []string
for object := range objectCh {
if object.Err != nil {
return fmt.Errorf("listing objects: %w", object.Err)
}
objectsToDelete = append(objectsToDelete, object.Key)
}
// Delete all objects
for _, key := range objectsToDelete {
if err := v.Storage.Delete(v.ctx, key); err != nil {
return fmt.Errorf("removing %s: %w", key, err)
}
}
// Then, delete from local database (if we have a local database)
if v.Repositories != nil {
// Delete related records first to avoid foreign key constraints
if err := v.Repositories.Snapshots.DeleteSnapshotFiles(v.ctx, snapshotID); err != nil {
log.Error("Failed to delete snapshot files", "snapshot_id", snapshotID, "error", err)
}
if err := v.Repositories.Snapshots.DeleteSnapshotBlobs(v.ctx, snapshotID); err != nil {
log.Error("Failed to delete snapshot blobs", "snapshot_id", snapshotID, "error", err)
}
if err := v.Repositories.Snapshots.DeleteSnapshotUploads(v.ctx, snapshotID); err != nil {
log.Error("Failed to delete snapshot uploads", "snapshot_id", snapshotID, "error", err)
}
// Now delete the snapshot itself
if err := v.Repositories.Snapshots.Delete(v.ctx, snapshotID); err != nil {
return fmt.Errorf("deleting snapshot from database: %w", err)
}
}
return nil
}
func (v *Vaultik) syncWithRemote() error {
log.Info("Syncing with remote snapshots")
@@ -780,6 +771,10 @@ func (v *Vaultik) syncWithRemote() error {
// Extract snapshot ID from paths like metadata/hostname-20240115-143052Z/
parts := strings.Split(object.Key, "/")
if len(parts) >= 2 && parts[0] == "metadata" && parts[1] != "" {
// Skip macOS resource fork files (._*) and other hidden files
if strings.HasPrefix(parts[1], ".") {
continue
}
remoteSnapshots[parts[1]] = true
}
}
@@ -818,137 +813,47 @@ type RemoveOptions struct {
Force bool
DryRun bool
JSON bool
Remote bool // Also remove metadata from remote storage
All bool // Remove all snapshots (requires Force)
}
// RemoveResult contains the result of a snapshot removal
type RemoveResult struct {
SnapshotID string `json:"snapshot_id"`
BlobsDeleted int `json:"blobs_deleted"`
BytesFreed int64 `json:"bytes_freed"`
BlobsFailed int `json:"blobs_failed,omitempty"`
DryRun bool `json:"dry_run,omitempty"`
SnapshotID string `json:"snapshot_id,omitempty"`
SnapshotsRemoved []string `json:"snapshots_removed,omitempty"`
RemoteRemoved bool `json:"remote_removed,omitempty"`
DryRun bool `json:"dry_run,omitempty"`
}
// RemoveSnapshot removes a snapshot and any blobs that become orphaned
// RemoveSnapshot removes a snapshot from the local database and optionally from remote storage
// Note: This does NOT remove blobs. Use 'vaultik prune' to remove orphaned blobs.
func (v *Vaultik) RemoveSnapshot(snapshotID string, opts *RemoveOptions) (*RemoveResult, error) {
log.Info("Starting snapshot removal", "snapshot_id", snapshotID)
result := &RemoveResult{
SnapshotID: snapshotID,
}
// Step 1: List all snapshots in storage
log.Info("Listing remote snapshots")
objectCh := v.Storage.ListStream(v.ctx, "metadata/")
var allSnapshotIDs []string
targetExists := false
for object := range objectCh {
if object.Err != nil {
return nil, fmt.Errorf("listing remote snapshots: %w", object.Err)
}
// Extract snapshot ID from paths like metadata/hostname-20240115-143052Z/
parts := strings.Split(object.Key, "/")
if len(parts) >= 2 && parts[0] == "metadata" && parts[1] != "" {
if strings.HasSuffix(object.Key, "/") || strings.Contains(object.Key, "/manifest.json.zst") {
sid := parts[1]
// Only add unique snapshot IDs
found := false
for _, id := range allSnapshotIDs {
if id == sid {
found = true
break
}
}
if !found {
allSnapshotIDs = append(allSnapshotIDs, sid)
if sid == snapshotID {
targetExists = true
}
}
}
}
}
if !targetExists {
return nil, fmt.Errorf("snapshot not found: %s", snapshotID)
}
log.Info("Found snapshots", "total", len(allSnapshotIDs))
// Step 2: Download target snapshot's manifest
log.Info("Downloading target manifest", "snapshot_id", snapshotID)
targetManifest, err := v.downloadManifest(snapshotID)
if err != nil {
return nil, fmt.Errorf("downloading target manifest: %w", err)
}
// Build set of target blob hashes with sizes
targetBlobs := make(map[string]int64) // hash -> size
for _, blob := range targetManifest.Blobs {
targetBlobs[blob.Hash] = blob.CompressedSize
}
log.Info("Target snapshot has blobs", "count", len(targetBlobs))
// Step 3: Download manifests from all OTHER snapshots to build "in-use" set
inUseBlobs := make(map[string]bool)
otherCount := 0
for _, sid := range allSnapshotIDs {
if sid == snapshotID {
continue // Skip target snapshot
}
log.Debug("Processing manifest", "snapshot_id", sid)
manifest, err := v.downloadManifest(sid)
if err != nil {
log.Error("Failed to download manifest", "snapshot_id", sid, "error", err)
continue
}
for _, blob := range manifest.Blobs {
inUseBlobs[blob.Hash] = true
}
otherCount++
}
log.Info("Processed other manifests", "count", otherCount, "in_use_blobs", len(inUseBlobs))
// Step 4: Find orphaned blobs (in target but not in use by others)
var orphanedBlobs []string
var totalSize int64
for hash, size := range targetBlobs {
if !inUseBlobs[hash] {
orphanedBlobs = append(orphanedBlobs, hash)
totalSize += size
}
}
log.Info("Found orphaned blobs",
"count", len(orphanedBlobs),
"total_size", humanize.Bytes(uint64(totalSize)),
)
// Show summary (unless JSON mode)
if !opts.JSON {
_, _ = fmt.Fprintf(v.Stdout, "\nSnapshot: %s\n", snapshotID)
_, _ = fmt.Fprintf(v.Stdout, "Blobs in snapshot: %d\n", len(targetBlobs))
_, _ = fmt.Fprintf(v.Stdout, "Orphaned blobs to delete: %d (%s)\n", len(orphanedBlobs), humanize.Bytes(uint64(totalSize)))
}
if opts.DryRun {
result.DryRun = true
if !opts.JSON {
_, _ = fmt.Fprintf(v.Stdout, "Would remove snapshot: %s\n", snapshotID)
if opts.Remote {
_, _ = fmt.Fprintln(v.Stdout, "Would also remove from remote storage")
}
_, _ = fmt.Fprintln(v.Stdout, "[Dry run - no changes made]")
}
if opts.JSON {
return result, v.outputRemoveJSON(result)
}
_, _ = fmt.Fprintln(v.Stdout, "\n[Dry run - no changes made]")
return result, nil
}
// Confirm unless --force is used (skip in JSON mode - require --force)
if !opts.Force && !opts.JSON {
_, _ = fmt.Fprintf(v.Stdout, "\nDelete snapshot and %d orphaned blob(s)? [y/N] ", len(orphanedBlobs))
if opts.Remote {
_, _ = fmt.Fprintf(v.Stdout, "Remove snapshot '%s' from local database and remote storage? [y/N] ", snapshotID)
} else {
_, _ = fmt.Fprintf(v.Stdout, "Remove snapshot '%s' from local database? [y/N] ", snapshotID)
}
var confirm string
if _, err := fmt.Fscanln(v.Stdin, &confirm); err != nil {
_, _ = fmt.Fprintln(v.Stdout, "Cancelled")
@@ -960,36 +865,20 @@ func (v *Vaultik) RemoveSnapshot(snapshotID string, opts *RemoveOptions) (*Remov
}
}
// Step 5: Delete orphaned blobs
if len(orphanedBlobs) > 0 {
log.Info("Deleting orphaned blobs")
for i, hash := range orphanedBlobs {
blobPath := fmt.Sprintf("blobs/%s/%s/%s", hash[:2], hash[2:4], hash)
log.Info("Removing snapshot from local database", "snapshot_id", snapshotID)
if err := v.Storage.Delete(v.ctx, blobPath); err != nil {
log.Error("Failed to delete blob", "hash", hash, "error", err)
result.BlobsFailed++
continue
}
result.BlobsDeleted++
result.BytesFreed += targetBlobs[hash]
// Progress update every 100 blobs
if (i+1)%100 == 0 || i == len(orphanedBlobs)-1 {
log.Info("Deletion progress",
"deleted", i+1,
"total", len(orphanedBlobs),
"percent", fmt.Sprintf("%.1f%%", float64(i+1)/float64(len(orphanedBlobs))*100),
)
}
}
// Remove from local database
if err := v.deleteSnapshotFromLocalDB(snapshotID); err != nil {
return result, fmt.Errorf("removing from local database: %w", err)
}
// Step 6: Delete snapshot metadata
log.Info("Deleting snapshot metadata")
if err := v.deleteSnapshot(snapshotID); err != nil {
return result, fmt.Errorf("deleting snapshot metadata: %w", err)
// If --remote, also remove from remote storage
if opts.Remote {
log.Info("Removing snapshot metadata from remote storage", "snapshot_id", snapshotID)
if err := v.deleteSnapshotFromRemote(snapshotID); err != nil {
return result, fmt.Errorf("removing from remote storage: %w", err)
}
result.RemoteRemoved = true
}
// Output result
@@ -998,16 +887,165 @@ func (v *Vaultik) RemoveSnapshot(snapshotID string, opts *RemoveOptions) (*Remov
}
// Print summary
_, _ = fmt.Fprintf(v.Stdout, "\nRemoved snapshot %s\n", snapshotID)
_, _ = fmt.Fprintf(v.Stdout, " Blobs deleted: %d\n", result.BlobsDeleted)
_, _ = fmt.Fprintf(v.Stdout, " Storage freed: %s\n", humanize.Bytes(uint64(result.BytesFreed)))
if result.BlobsFailed > 0 {
_, _ = fmt.Fprintf(v.Stdout, " Blobs failed: %d\n", result.BlobsFailed)
_, _ = fmt.Fprintf(v.Stdout, "Removed snapshot '%s' from local database\n", snapshotID)
if opts.Remote {
_, _ = fmt.Fprintln(v.Stdout, "Removed snapshot metadata from remote storage")
_, _ = fmt.Fprintln(v.Stdout, "\nNote: Blobs were not removed. Run 'vaultik prune' to remove orphaned blobs.")
}
return result, nil
}
// RemoveAllSnapshots removes all snapshots from local database and optionally from remote
func (v *Vaultik) RemoveAllSnapshots(opts *RemoveOptions) (*RemoveResult, error) {
result := &RemoveResult{}
// List all snapshots
log.Info("Listing all snapshots")
objectCh := v.Storage.ListStream(v.ctx, "metadata/")
var snapshotIDs []string
for object := range objectCh {
if object.Err != nil {
return nil, fmt.Errorf("listing remote snapshots: %w", object.Err)
}
parts := strings.Split(object.Key, "/")
if len(parts) >= 2 && parts[0] == "metadata" && parts[1] != "" {
// Skip macOS resource fork files (._*) and other hidden files
if strings.HasPrefix(parts[1], ".") {
continue
}
if strings.HasSuffix(object.Key, "/") || strings.Contains(object.Key, "/manifest.json.zst") {
sid := parts[1]
found := false
for _, id := range snapshotIDs {
if id == sid {
found = true
break
}
}
if !found {
snapshotIDs = append(snapshotIDs, sid)
}
}
}
}
if len(snapshotIDs) == 0 {
if !opts.JSON {
_, _ = fmt.Fprintln(v.Stdout, "No snapshots found")
}
return result, nil
}
if opts.DryRun {
result.DryRun = true
result.SnapshotsRemoved = snapshotIDs
if !opts.JSON {
_, _ = fmt.Fprintf(v.Stdout, "Would remove %d snapshot(s):\n", len(snapshotIDs))
for _, id := range snapshotIDs {
_, _ = fmt.Fprintf(v.Stdout, " %s\n", id)
}
if opts.Remote {
_, _ = fmt.Fprintln(v.Stdout, "Would also remove from remote storage")
}
_, _ = fmt.Fprintln(v.Stdout, "[Dry run - no changes made]")
}
if opts.JSON {
return result, v.outputRemoveJSON(result)
}
return result, nil
}
// --all requires --force
if !opts.Force {
return nil, fmt.Errorf("--all requires --force")
}
log.Info("Removing all snapshots", "count", len(snapshotIDs))
for _, snapshotID := range snapshotIDs {
log.Info("Removing snapshot", "snapshot_id", snapshotID)
if err := v.deleteSnapshotFromLocalDB(snapshotID); err != nil {
log.Error("Failed to remove from local database", "snapshot_id", snapshotID, "error", err)
continue
}
if opts.Remote {
if err := v.deleteSnapshotFromRemote(snapshotID); err != nil {
log.Error("Failed to remove from remote", "snapshot_id", snapshotID, "error", err)
continue
}
}
result.SnapshotsRemoved = append(result.SnapshotsRemoved, snapshotID)
}
if opts.Remote {
result.RemoteRemoved = true
}
if opts.JSON {
return result, v.outputRemoveJSON(result)
}
_, _ = fmt.Fprintf(v.Stdout, "Removed %d snapshot(s)\n", len(result.SnapshotsRemoved))
if opts.Remote {
_, _ = fmt.Fprintln(v.Stdout, "Removed snapshot metadata from remote storage")
_, _ = fmt.Fprintln(v.Stdout, "\nNote: Blobs were not removed. Run 'vaultik prune' to remove orphaned blobs.")
}
return result, nil
}
// deleteSnapshotFromLocalDB removes a snapshot from the local database only
func (v *Vaultik) deleteSnapshotFromLocalDB(snapshotID string) error {
if v.Repositories == nil {
return nil // No local database
}
// Delete related records first to avoid foreign key constraints
if err := v.Repositories.Snapshots.DeleteSnapshotFiles(v.ctx, snapshotID); err != nil {
log.Error("Failed to delete snapshot files", "snapshot_id", snapshotID, "error", err)
}
if err := v.Repositories.Snapshots.DeleteSnapshotBlobs(v.ctx, snapshotID); err != nil {
log.Error("Failed to delete snapshot blobs", "snapshot_id", snapshotID, "error", err)
}
if err := v.Repositories.Snapshots.DeleteSnapshotUploads(v.ctx, snapshotID); err != nil {
log.Error("Failed to delete snapshot uploads", "snapshot_id", snapshotID, "error", err)
}
if err := v.Repositories.Snapshots.Delete(v.ctx, snapshotID); err != nil {
log.Error("Failed to delete snapshot record", "snapshot_id", snapshotID, "error", err)
}
return nil
}
// deleteSnapshotFromRemote removes snapshot metadata files from remote storage
func (v *Vaultik) deleteSnapshotFromRemote(snapshotID string) error {
prefix := fmt.Sprintf("metadata/%s/", snapshotID)
objectCh := v.Storage.ListStream(v.ctx, prefix)
var objectsToDelete []string
for object := range objectCh {
if object.Err != nil {
return fmt.Errorf("listing objects: %w", object.Err)
}
objectsToDelete = append(objectsToDelete, object.Key)
}
for _, key := range objectsToDelete {
if err := v.Storage.Delete(v.ctx, key); err != nil {
return fmt.Errorf("removing %s: %w", key, err)
}
log.Debug("Deleted remote object", "key", key)
}
return nil
}
// outputRemoveJSON outputs the removal result as JSON
func (v *Vaultik) outputRemoveJSON(result *RemoveResult) error {
encoder := json.NewEncoder(os.Stdout)
@@ -1027,7 +1065,7 @@ type PruneResult struct {
// and blobs from the local database. This ensures database consistency
// before starting a new backup or on-demand via the prune command.
func (v *Vaultik) PruneDatabase() (*PruneResult, error) {
log.Info("Pruning database: removing incomplete snapshots and orphaned data")
log.Info("Pruning local database: removing incomplete snapshots and orphaned data")
result := &PruneResult{}
@@ -1076,7 +1114,7 @@ func (v *Vaultik) PruneDatabase() (*PruneResult, error) {
result.ChunksDeleted = chunkCountBefore - chunkCountAfter
result.BlobsDeleted = blobCountBefore - blobCountAfter
log.Info("Prune complete",
log.Info("Local database prune complete",
"incomplete_snapshots", result.SnapshotsDeleted,
"orphaned_files", result.FilesDeleted,
"orphaned_chunks", result.ChunksDeleted,
@@ -1084,7 +1122,7 @@ func (v *Vaultik) PruneDatabase() (*PruneResult, error) {
)
// Print summary
_, _ = fmt.Fprintf(v.Stdout, "Prune complete:\n")
_, _ = fmt.Fprintf(v.Stdout, "Local database prune complete:\n")
_, _ = fmt.Fprintf(v.Stdout, " Incomplete snapshots removed: %d\n", result.SnapshotsDeleted)
_, _ = fmt.Fprintf(v.Stdout, " Orphaned files removed: %d\n", result.FilesDeleted)
_, _ = fmt.Fprintf(v.Stdout, " Orphaned chunks removed: %d\n", result.ChunksDeleted)

View File

@@ -91,6 +91,11 @@ func (v *Vaultik) Context() context.Context {
return v.ctx
}
// SetContext sets the Vaultik's context (primarily for testing)
func (v *Vaultik) SetContext(ctx context.Context) {
v.ctx = ctx
}
// Cancel cancels the Vaultik's context
func (v *Vaultik) Cancel() {
v.cancel()
@@ -124,6 +129,12 @@ func (v *Vaultik) GetFilesystem() afero.Fs {
return v.Fs
}
// Outputf writes formatted output to stdout for user-facing messages.
// This should be used for all non-log user output.
func (v *Vaultik) Outputf(format string, args ...any) {
_, _ = fmt.Fprintf(v.Stdout, format, args...)
}
// TestVaultik wraps a Vaultik with captured stdout/stderr for testing
type TestVaultik struct {
*Vaultik

View File

@@ -7,6 +7,7 @@ import (
"fmt"
"io"
"os"
"time"
"git.eeqj.de/sneak/vaultik/internal/log"
"git.eeqj.de/sneak/vaultik/internal/snapshot"
@@ -36,22 +37,44 @@ type VerifyResult struct {
// RunDeepVerify executes deep verification operation
func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
result := &VerifyResult{
SnapshotID: snapshotID,
Mode: "deep",
}
// Check for decryption capability
if !v.CanDecrypt() {
return fmt.Errorf("age_secret_key missing from config - required for deep verification")
result.Status = "failed"
result.ErrorMessage = "VAULTIK_AGE_SECRET_KEY environment variable not set - required for deep verification"
if opts.JSON {
return v.outputVerifyJSON(result)
}
return fmt.Errorf("VAULTIK_AGE_SECRET_KEY environment variable not set - required for deep verification")
}
log.Info("Starting snapshot verification",
"snapshot_id", snapshotID,
"mode", map[bool]string{true: "deep", false: "shallow"}[opts.Deep],
"mode", "deep",
)
if !opts.JSON {
v.Outputf("Deep verification of snapshot: %s\n\n", snapshotID)
}
// Step 1: Download manifest
manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
log.Info("Downloading manifest", "path", manifestPath)
if !opts.JSON {
v.Outputf("Downloading manifest...\n")
}
manifestReader, err := v.Storage.Get(v.ctx, manifestPath)
if err != nil {
result.Status = "failed"
result.ErrorMessage = fmt.Sprintf("failed to download manifest: %v", err)
if opts.JSON {
return v.outputVerifyJSON(result)
}
return fmt.Errorf("failed to download manifest: %w", err)
}
defer func() { _ = manifestReader.Close() }()
@@ -59,20 +82,36 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
// Decompress manifest
manifest, err := snapshot.DecodeManifest(manifestReader)
if err != nil {
result.Status = "failed"
result.ErrorMessage = fmt.Sprintf("failed to decode manifest: %v", err)
if opts.JSON {
return v.outputVerifyJSON(result)
}
return fmt.Errorf("failed to decode manifest: %w", err)
}
log.Info("Manifest loaded",
"blob_count", manifest.BlobCount,
"total_size", humanize.Bytes(uint64(manifest.TotalCompressedSize)),
"manifest_blob_count", manifest.BlobCount,
"manifest_total_size", humanize.Bytes(uint64(manifest.TotalCompressedSize)),
)
if !opts.JSON {
v.Outputf("Manifest loaded: %d blobs (%s)\n", manifest.BlobCount, humanize.Bytes(uint64(manifest.TotalCompressedSize)))
}
// Step 2: Download and decrypt database
// Step 2: Download and decrypt database (authoritative source)
dbPath := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID)
log.Info("Downloading encrypted database", "path", dbPath)
if !opts.JSON {
v.Outputf("Downloading and decrypting database...\n")
}
dbReader, err := v.Storage.Get(v.ctx, dbPath)
if err != nil {
result.Status = "failed"
result.ErrorMessage = fmt.Sprintf("failed to download database: %v", err)
if opts.JSON {
return v.outputVerifyJSON(result)
}
return fmt.Errorf("failed to download database: %w", err)
}
defer func() { _ = dbReader.Close() }()
@@ -80,6 +119,11 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
// Decrypt and decompress database
tempDB, err := v.decryptAndLoadDatabase(dbReader, v.Config.AgeSecretKey)
if err != nil {
result.Status = "failed"
result.ErrorMessage = fmt.Sprintf("failed to decrypt database: %v", err)
if opts.JSON {
return v.outputVerifyJSON(result)
}
return fmt.Errorf("failed to decrypt database: %w", err)
}
defer func() {
@@ -88,28 +132,90 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
}
}()
// Step 3: Compare blob lists
if err := v.verifyBlobLists(snapshotID, manifest, tempDB.DB); err != nil {
return err
}
// Step 4: Verify blob existence
if err := v.verifyBlobExistence(manifest); err != nil {
return err
}
// Step 5: Deep verification if requested
if opts.Deep {
if err := v.performDeepVerification(manifest, tempDB.DB); err != nil {
return err
// Step 3: Get authoritative blob list from database
dbBlobs, err := v.getBlobsFromDatabase(snapshotID, tempDB.DB)
if err != nil {
result.Status = "failed"
result.ErrorMessage = fmt.Sprintf("failed to get blobs from database: %v", err)
if opts.JSON {
return v.outputVerifyJSON(result)
}
return fmt.Errorf("failed to get blobs from database: %w", err)
}
result.BlobCount = len(dbBlobs)
var totalSize int64
for _, blob := range dbBlobs {
totalSize += blob.CompressedSize
}
result.TotalSize = totalSize
log.Info("Database loaded",
"db_blob_count", len(dbBlobs),
"db_total_size", humanize.Bytes(uint64(totalSize)),
)
if !opts.JSON {
v.Outputf("Database loaded: %d blobs (%s)\n", len(dbBlobs), humanize.Bytes(uint64(totalSize)))
v.Outputf("Verifying manifest against database...\n")
}
// Step 4: Verify manifest matches database
if err := v.verifyManifestAgainstDatabase(manifest, dbBlobs); err != nil {
result.Status = "failed"
result.ErrorMessage = err.Error()
if opts.JSON {
return v.outputVerifyJSON(result)
}
return err
}
// Step 5: Verify all blobs exist in S3 (using database as source)
if !opts.JSON {
v.Outputf("Manifest verified.\n")
v.Outputf("Checking blob existence in remote storage...\n")
}
if err := v.verifyBlobExistenceFromDB(dbBlobs); err != nil {
result.Status = "failed"
result.ErrorMessage = err.Error()
if opts.JSON {
return v.outputVerifyJSON(result)
}
return err
}
// Step 6: Deep verification - download and verify blob contents
if !opts.JSON {
v.Outputf("All blobs exist.\n")
v.Outputf("Downloading and verifying blob contents (%d blobs, %s)...\n", len(dbBlobs), humanize.Bytes(uint64(totalSize)))
}
if err := v.performDeepVerificationFromDB(dbBlobs, tempDB.DB, opts); err != nil {
result.Status = "failed"
result.ErrorMessage = err.Error()
if opts.JSON {
return v.outputVerifyJSON(result)
}
return err
}
// Success
result.Status = "ok"
result.Verified = len(dbBlobs)
if opts.JSON {
return v.outputVerifyJSON(result)
}
log.Info("✓ Verification completed successfully",
"snapshot_id", snapshotID,
"mode", map[bool]string{true: "deep", false: "shallow"}[opts.Deep],
"mode", "deep",
"blobs_verified", len(dbBlobs),
)
v.Outputf("\n✓ Verification completed successfully\n")
v.Outputf(" Snapshot: %s\n", snapshotID)
v.Outputf(" Blobs verified: %d\n", len(dbBlobs))
v.Outputf(" Total size: %s\n", humanize.Bytes(uint64(totalSize)))
return nil
}
@@ -125,7 +231,7 @@ func (t *tempDB) Close() error {
return err
}
// decryptAndLoadDatabase decrypts and loads the database from the encrypted stream
// decryptAndLoadDatabase decrypts and loads the binary SQLite database from the encrypted stream
func (v *Vaultik) decryptAndLoadDatabase(reader io.ReadCloser, secretKey string) (*tempDB, error) {
// Get decryptor
decryptor, err := v.GetDecryptor()
@@ -139,32 +245,31 @@ func (v *Vaultik) decryptAndLoadDatabase(reader io.ReadCloser, secretKey string)
return nil, fmt.Errorf("failed to decrypt database: %w", err)
}
// Decompress the database
// Decompress the binary database
decompressor, err := zstd.NewReader(decryptedReader)
if err != nil {
return nil, fmt.Errorf("failed to create decompressor: %w", err)
}
defer decompressor.Close()
// Create temporary file for database
// Create temporary file for the database
tempFile, err := os.CreateTemp("", "vaultik-verify-*.db")
if err != nil {
return nil, fmt.Errorf("failed to create temp file: %w", err)
}
tempPath := tempFile.Name()
// Copy decompressed data to temp file
if _, err := io.Copy(tempFile, decompressor); err != nil {
// Stream decompress directly to file
log.Info("Decompressing database...")
written, err := io.Copy(tempFile, decompressor)
if err != nil {
_ = tempFile.Close()
_ = os.Remove(tempPath)
return nil, fmt.Errorf("failed to write database: %w", err)
return nil, fmt.Errorf("failed to decompress database: %w", err)
}
_ = tempFile.Close()
// Close temp file before opening with sqlite
if err := tempFile.Close(); err != nil {
_ = os.Remove(tempPath)
return nil, fmt.Errorf("failed to close temp file: %w", err)
}
log.Info("Database decompressed", "size", humanize.Bytes(uint64(written)))
// Open the database
db, err := sql.Open("sqlite3", tempPath)
@@ -179,137 +284,10 @@ func (v *Vaultik) decryptAndLoadDatabase(reader io.ReadCloser, secretKey string)
}, nil
}
// verifyBlobLists compares the blob lists between manifest and database
func (v *Vaultik) verifyBlobLists(snapshotID string, manifest *snapshot.Manifest, db *sql.DB) error {
log.Info("Verifying blob lists match between manifest and database")
// Get blobs from database
query := `
SELECT b.blob_hash, b.compressed_size
FROM snapshot_blobs sb
JOIN blobs b ON sb.blob_hash = b.blob_hash
WHERE sb.snapshot_id = ?
ORDER BY b.blob_hash
`
rows, err := db.QueryContext(v.ctx, query, snapshotID)
if err != nil {
return fmt.Errorf("failed to query snapshot blobs: %w", err)
}
defer func() { _ = rows.Close() }()
// Build map of database blobs
dbBlobs := make(map[string]int64)
for rows.Next() {
var hash string
var size int64
if err := rows.Scan(&hash, &size); err != nil {
return fmt.Errorf("failed to scan blob row: %w", err)
}
dbBlobs[hash] = size
}
// Build map of manifest blobs
manifestBlobs := make(map[string]int64)
for _, blob := range manifest.Blobs {
manifestBlobs[blob.Hash] = blob.CompressedSize
}
// Compare counts
if len(dbBlobs) != len(manifestBlobs) {
return fmt.Errorf("blob count mismatch: database has %d blobs, manifest has %d blobs",
len(dbBlobs), len(manifestBlobs))
}
// Check each blob exists in both
for hash, dbSize := range dbBlobs {
manifestSize, exists := manifestBlobs[hash]
if !exists {
return fmt.Errorf("blob %s exists in database but not in manifest", hash)
}
if dbSize != manifestSize {
return fmt.Errorf("blob %s size mismatch: database has %d bytes, manifest has %d bytes",
hash, dbSize, manifestSize)
}
}
for hash := range manifestBlobs {
if _, exists := dbBlobs[hash]; !exists {
return fmt.Errorf("blob %s exists in manifest but not in database", hash)
}
}
log.Info("✓ Blob lists match", "blob_count", len(dbBlobs))
return nil
}
// verifyBlobExistence checks that all blobs exist in S3
func (v *Vaultik) verifyBlobExistence(manifest *snapshot.Manifest) error {
log.Info("Verifying blob existence in S3", "blob_count", len(manifest.Blobs))
for i, blob := range manifest.Blobs {
// Construct blob path
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blob.Hash[:2], blob.Hash[2:4], blob.Hash)
// Check blob exists
stat, err := v.Storage.Stat(v.ctx, blobPath)
if err != nil {
return fmt.Errorf("blob %s missing from storage: %w", blob.Hash, err)
}
// Verify size matches
if stat.Size != blob.CompressedSize {
return fmt.Errorf("blob %s size mismatch: S3 has %d bytes, manifest has %d bytes",
blob.Hash, stat.Size, blob.CompressedSize)
}
// Progress update every 100 blobs
if (i+1)%100 == 0 || i == len(manifest.Blobs)-1 {
log.Info("Blob existence check progress",
"checked", i+1,
"total", len(manifest.Blobs),
"percent", fmt.Sprintf("%.1f%%", float64(i+1)/float64(len(manifest.Blobs))*100),
)
}
}
log.Info("✓ All blobs exist in storage")
return nil
}
// performDeepVerification downloads and verifies the content of each blob
func (v *Vaultik) performDeepVerification(manifest *snapshot.Manifest, db *sql.DB) error {
log.Info("Starting deep verification - downloading and verifying all blobs")
totalBytes := int64(0)
for i, blobInfo := range manifest.Blobs {
// Verify individual blob
if err := v.verifyBlob(blobInfo, db); err != nil {
return fmt.Errorf("blob %s verification failed: %w", blobInfo.Hash, err)
}
totalBytes += blobInfo.CompressedSize
// Progress update
log.Info("Deep verification progress",
"blob", fmt.Sprintf("%d/%d", i+1, len(manifest.Blobs)),
"total_downloaded", humanize.Bytes(uint64(totalBytes)),
"percent", fmt.Sprintf("%.1f%%", float64(i+1)/float64(len(manifest.Blobs))*100),
)
}
log.Info("✓ Deep verification completed successfully",
"blobs_verified", len(manifest.Blobs),
"total_size", humanize.Bytes(uint64(totalBytes)),
)
return nil
}
// verifyBlob downloads and verifies a single blob
func (v *Vaultik) verifyBlob(blobInfo snapshot.BlobInfo, db *sql.DB) error {
// Download blob
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobInfo.Hash[:2], blobInfo.Hash[2:4], blobInfo.Hash)
reader, err := v.Storage.Get(v.ctx, blobPath)
// Download blob using shared fetch method
reader, _, err := v.FetchBlob(v.ctx, blobInfo.Hash, blobInfo.CompressedSize)
if err != nil {
return fmt.Errorf("failed to download: %w", err)
}
@@ -321,8 +299,12 @@ func (v *Vaultik) verifyBlob(blobInfo snapshot.BlobInfo, db *sql.DB) error {
return fmt.Errorf("failed to get decryptor: %w", err)
}
// Decrypt blob
decryptedReader, err := decryptor.DecryptStream(reader)
// Hash the encrypted blob data as it streams through to decryption
blobHasher := sha256.New()
teeReader := io.TeeReader(reader, blobHasher)
// Decrypt blob (reading through teeReader to hash encrypted data)
decryptedReader, err := decryptor.DecryptStream(teeReader)
if err != nil {
return fmt.Errorf("failed to decrypt: %w", err)
}
@@ -400,11 +382,209 @@ func (v *Vaultik) verifyBlob(blobInfo snapshot.BlobInfo, db *sql.DB) error {
return fmt.Errorf("error iterating blob chunks: %w", err)
}
log.Debug("Blob verified",
"hash", blobInfo.Hash,
// Verify no remaining data in blob - if chunk list is accurate, blob should be fully consumed
remaining, err := io.Copy(io.Discard, decompressor)
if err != nil {
return fmt.Errorf("failed to check for remaining blob data: %w", err)
}
if remaining > 0 {
return fmt.Errorf("blob has %d unexpected trailing bytes not covered by chunk list", remaining)
}
// Verify blob hash matches the encrypted data we downloaded
calculatedBlobHash := hex.EncodeToString(blobHasher.Sum(nil))
if calculatedBlobHash != blobInfo.Hash {
return fmt.Errorf("blob hash mismatch: calculated %s, expected %s",
calculatedBlobHash, blobInfo.Hash)
}
log.Info("Blob verified",
"hash", blobInfo.Hash[:16]+"...",
"chunks", chunkCount,
"size", humanize.Bytes(uint64(blobInfo.CompressedSize)),
)
return nil
}
// getBlobsFromDatabase gets all blobs for the snapshot from the database
func (v *Vaultik) getBlobsFromDatabase(snapshotID string, db *sql.DB) ([]snapshot.BlobInfo, error) {
query := `
SELECT b.blob_hash, b.compressed_size
FROM snapshot_blobs sb
JOIN blobs b ON sb.blob_hash = b.blob_hash
WHERE sb.snapshot_id = ?
ORDER BY b.blob_hash
`
rows, err := db.QueryContext(v.ctx, query, snapshotID)
if err != nil {
return nil, fmt.Errorf("failed to query snapshot blobs: %w", err)
}
defer func() { _ = rows.Close() }()
var blobs []snapshot.BlobInfo
for rows.Next() {
var hash string
var size int64
if err := rows.Scan(&hash, &size); err != nil {
return nil, fmt.Errorf("failed to scan blob row: %w", err)
}
blobs = append(blobs, snapshot.BlobInfo{
Hash: hash,
CompressedSize: size,
})
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("error iterating blobs: %w", err)
}
return blobs, nil
}
// verifyManifestAgainstDatabase verifies the manifest matches the authoritative database
func (v *Vaultik) verifyManifestAgainstDatabase(manifest *snapshot.Manifest, dbBlobs []snapshot.BlobInfo) error {
log.Info("Verifying manifest against database")
// Build map of database blobs
dbBlobMap := make(map[string]int64)
for _, blob := range dbBlobs {
dbBlobMap[blob.Hash] = blob.CompressedSize
}
// Build map of manifest blobs
manifestBlobMap := make(map[string]int64)
for _, blob := range manifest.Blobs {
manifestBlobMap[blob.Hash] = blob.CompressedSize
}
// Check counts match
if len(dbBlobMap) != len(manifestBlobMap) {
log.Warn("Manifest blob count mismatch",
"database_blobs", len(dbBlobMap),
"manifest_blobs", len(manifestBlobMap),
)
// This is a warning, not an error - database is authoritative
}
// Check each manifest blob exists in database with correct size
for hash, manifestSize := range manifestBlobMap {
dbSize, exists := dbBlobMap[hash]
if !exists {
return fmt.Errorf("manifest contains blob %s not in database", hash)
}
if dbSize != manifestSize {
return fmt.Errorf("blob %s size mismatch: database has %d bytes, manifest has %d bytes",
hash, dbSize, manifestSize)
}
}
log.Info("✓ Manifest verified against database",
"manifest_blobs", len(manifestBlobMap),
"database_blobs", len(dbBlobMap),
)
return nil
}
// verifyBlobExistenceFromDB checks that all blobs from database exist in S3
func (v *Vaultik) verifyBlobExistenceFromDB(blobs []snapshot.BlobInfo) error {
log.Info("Verifying blob existence in S3", "blob_count", len(blobs))
for i, blob := range blobs {
// Construct blob path
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blob.Hash[:2], blob.Hash[2:4], blob.Hash)
// Check blob exists
stat, err := v.Storage.Stat(v.ctx, blobPath)
if err != nil {
return fmt.Errorf("blob %s missing from storage: %w", blob.Hash, err)
}
// Verify size matches
if stat.Size != blob.CompressedSize {
return fmt.Errorf("blob %s size mismatch: S3 has %d bytes, database has %d bytes",
blob.Hash, stat.Size, blob.CompressedSize)
}
// Progress update every 100 blobs
if (i+1)%100 == 0 || i == len(blobs)-1 {
log.Info("Blob existence check progress",
"checked", i+1,
"total", len(blobs),
"percent", fmt.Sprintf("%.1f%%", float64(i+1)/float64(len(blobs))*100),
)
}
}
log.Info("✓ All blobs exist in storage")
return nil
}
// performDeepVerificationFromDB downloads and verifies the content of each blob using database as source
func (v *Vaultik) performDeepVerificationFromDB(blobs []snapshot.BlobInfo, db *sql.DB, opts *VerifyOptions) error {
// Calculate total bytes for ETA
var totalBytesExpected int64
for _, b := range blobs {
totalBytesExpected += b.CompressedSize
}
log.Info("Starting deep verification - downloading and verifying all blobs",
"blob_count", len(blobs),
"total_size", humanize.Bytes(uint64(totalBytesExpected)),
)
startTime := time.Now()
bytesProcessed := int64(0)
for i, blobInfo := range blobs {
// Verify individual blob
if err := v.verifyBlob(blobInfo, db); err != nil {
return fmt.Errorf("blob %s verification failed: %w", blobInfo.Hash, err)
}
bytesProcessed += blobInfo.CompressedSize
elapsed := time.Since(startTime)
remaining := len(blobs) - (i + 1)
// Calculate ETA based on bytes processed
var eta time.Duration
if bytesProcessed > 0 {
bytesPerSec := float64(bytesProcessed) / elapsed.Seconds()
bytesRemaining := totalBytesExpected - bytesProcessed
if bytesPerSec > 0 {
eta = time.Duration(float64(bytesRemaining)/bytesPerSec) * time.Second
}
}
log.Info("Verification progress",
"blobs_done", i+1,
"blobs_total", len(blobs),
"blobs_remaining", remaining,
"bytes_done", bytesProcessed,
"bytes_done_human", humanize.Bytes(uint64(bytesProcessed)),
"bytes_total", totalBytesExpected,
"bytes_total_human", humanize.Bytes(uint64(totalBytesExpected)),
"elapsed", elapsed.Round(time.Second),
"eta", eta.Round(time.Second),
)
if !opts.JSON {
v.Outputf(" Verified %d/%d blobs (%d remaining) - %s/%s - elapsed %s, eta %s\n",
i+1, len(blobs), remaining,
humanize.Bytes(uint64(bytesProcessed)),
humanize.Bytes(uint64(totalBytesExpected)),
elapsed.Round(time.Second),
eta.Round(time.Second))
}
}
totalElapsed := time.Since(startTime)
log.Info("✓ Deep verification completed successfully",
"blobs_verified", len(blobs),
"total_bytes", bytesProcessed,
"total_bytes_human", humanize.Bytes(uint64(bytesProcessed)),
"duration", totalElapsed.Round(time.Second),
)
return nil
}