diff --git a/internal/vaultik/blob_fetch_stub.go b/internal/vaultik/blob_fetch_stub.go new file mode 100644 index 0000000..e8ef0a7 --- /dev/null +++ b/internal/vaultik/blob_fetch_stub.go @@ -0,0 +1,55 @@ +package vaultik + +import ( + "context" + "fmt" + "io" + + "filippo.io/age" + "git.eeqj.de/sneak/vaultik/internal/blobgen" +) + +// FetchAndDecryptBlobResult holds the result of fetching and decrypting a blob. +type FetchAndDecryptBlobResult struct { + Data []byte +} + +// FetchAndDecryptBlob downloads a blob, decrypts it, and returns the plaintext data. +func (v *Vaultik) FetchAndDecryptBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) (*FetchAndDecryptBlobResult, error) { + rc, _, err := v.FetchBlob(ctx, blobHash, expectedSize) + if err != nil { + return nil, err + } + defer func() { _ = rc.Close() }() + + reader, err := blobgen.NewReader(rc, identity) + if err != nil { + return nil, fmt.Errorf("creating blob reader: %w", err) + } + defer func() { _ = reader.Close() }() + + data, err := io.ReadAll(reader) + if err != nil { + return nil, fmt.Errorf("reading blob data: %w", err) + } + + return &FetchAndDecryptBlobResult{Data: data}, nil +} + +// FetchBlob downloads a blob and returns a reader for the encrypted data. +func (v *Vaultik) FetchBlob(ctx context.Context, blobHash string, expectedSize int64) (io.ReadCloser, int64, error) { + blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobHash[:2], blobHash[2:4], blobHash) + + rc, err := v.Storage.Get(ctx, blobPath) + if err != nil { + return nil, 0, fmt.Errorf("downloading blob %s: %w", blobHash[:16], err) + } + + info, err := v.Storage.Stat(ctx, blobPath) + if err != nil { + _ = rc.Close() + return nil, 0, fmt.Errorf("stat blob %s: %w", blobHash[:16], err) + } + + return rc, info.Size, nil +} diff --git a/internal/vaultik/blobcache.go b/internal/vaultik/blobcache.go new file mode 100644 index 0000000..50b5565 --- /dev/null +++ b/internal/vaultik/blobcache.go @@ -0,0 +1,210 @@ +package vaultik + +import ( + "fmt" + "os" + "path/filepath" + "sync" +) + +// defaultMaxBlobCacheBytes is the default maximum size of the disk blob cache (10 GB). +const defaultMaxBlobCacheBytes = 10 << 30 // 10 GiB + +// blobDiskCacheEntry tracks a cached blob on disk. +type blobDiskCacheEntry struct { + key string + size int64 + prev *blobDiskCacheEntry + next *blobDiskCacheEntry +} + +// blobDiskCache is an LRU cache that stores blobs on disk instead of in memory. +// Blobs are written to a temp directory keyed by their hash. When total size +// exceeds maxBytes, the least-recently-used entries are evicted (deleted from disk). +type blobDiskCache struct { + mu sync.Mutex + dir string + maxBytes int64 + curBytes int64 + items map[string]*blobDiskCacheEntry + head *blobDiskCacheEntry // most recent + tail *blobDiskCacheEntry // least recent +} + +// newBlobDiskCache creates a new disk-based blob cache with the given max size. +func newBlobDiskCache(maxBytes int64) (*blobDiskCache, error) { + dir, err := os.MkdirTemp("", "vaultik-blobcache-*") + if err != nil { + return nil, fmt.Errorf("creating blob cache dir: %w", err) + } + return &blobDiskCache{ + dir: dir, + maxBytes: maxBytes, + items: make(map[string]*blobDiskCacheEntry), + }, nil +} + +func (c *blobDiskCache) path(key string) string { + return filepath.Join(c.dir, key) +} + +func (c *blobDiskCache) unlink(e *blobDiskCacheEntry) { + if e.prev != nil { + e.prev.next = e.next + } else { + c.head = e.next + } + if e.next != nil { + e.next.prev = e.prev + } else { + c.tail = e.prev + } + e.prev = nil + e.next = nil +} + +func (c *blobDiskCache) pushFront(e *blobDiskCacheEntry) { + e.prev = nil + e.next = c.head + if c.head != nil { + c.head.prev = e + } + c.head = e + if c.tail == nil { + c.tail = e + } +} + +func (c *blobDiskCache) evictLRU() { + if c.tail == nil { + return + } + victim := c.tail + c.unlink(victim) + delete(c.items, victim.key) + c.curBytes -= victim.size + _ = os.Remove(c.path(victim.key)) +} + +// Put writes blob data to disk cache. Entries larger than maxBytes are silently skipped. +func (c *blobDiskCache) Put(key string, data []byte) error { + entrySize := int64(len(data)) + + c.mu.Lock() + defer c.mu.Unlock() + + if entrySize > c.maxBytes { + return nil + } + + // Remove old entry if updating + if e, ok := c.items[key]; ok { + c.unlink(e) + c.curBytes -= e.size + _ = os.Remove(c.path(key)) + delete(c.items, key) + } + + if err := os.WriteFile(c.path(key), data, 0600); err != nil { + return fmt.Errorf("writing blob to cache: %w", err) + } + + e := &blobDiskCacheEntry{key: key, size: entrySize} + c.pushFront(e) + c.items[key] = e + c.curBytes += entrySize + + for c.curBytes > c.maxBytes && c.tail != nil { + c.evictLRU() + } + + return nil +} + +// Get reads a cached blob from disk. Returns data and true on hit. +func (c *blobDiskCache) Get(key string) ([]byte, bool) { + c.mu.Lock() + e, ok := c.items[key] + if !ok { + c.mu.Unlock() + return nil, false + } + c.unlink(e) + c.pushFront(e) + c.mu.Unlock() + + data, err := os.ReadFile(c.path(key)) + if err != nil { + c.mu.Lock() + if e2, ok2 := c.items[key]; ok2 && e2 == e { + c.unlink(e) + delete(c.items, key) + c.curBytes -= e.size + } + c.mu.Unlock() + return nil, false + } + return data, true +} + +// ReadAt reads a slice of a cached blob without loading the entire blob into memory. +func (c *blobDiskCache) ReadAt(key string, offset, length int64) ([]byte, error) { + c.mu.Lock() + e, ok := c.items[key] + if !ok { + c.mu.Unlock() + return nil, fmt.Errorf("key %q not in cache", key) + } + if offset+length > e.size { + c.mu.Unlock() + return nil, fmt.Errorf("read beyond blob size: offset=%d length=%d size=%d", offset, length, e.size) + } + c.unlink(e) + c.pushFront(e) + c.mu.Unlock() + + f, err := os.Open(c.path(key)) + if err != nil { + return nil, err + } + defer func() { _ = f.Close() }() + + buf := make([]byte, length) + if _, err := f.ReadAt(buf, offset); err != nil { + return nil, err + } + return buf, nil +} + +// Has returns whether a key exists in the cache. +func (c *blobDiskCache) Has(key string) bool { + c.mu.Lock() + defer c.mu.Unlock() + _, ok := c.items[key] + return ok +} + +// Size returns current total cached bytes. +func (c *blobDiskCache) Size() int64 { + c.mu.Lock() + defer c.mu.Unlock() + return c.curBytes +} + +// Len returns number of cached entries. +func (c *blobDiskCache) Len() int { + c.mu.Lock() + defer c.mu.Unlock() + return len(c.items) +} + +// Close removes the cache directory and all cached blobs. +func (c *blobDiskCache) Close() error { + c.mu.Lock() + defer c.mu.Unlock() + c.items = nil + c.head = nil + c.tail = nil + c.curBytes = 0 + return os.RemoveAll(c.dir) +} diff --git a/internal/vaultik/blobcache_test.go b/internal/vaultik/blobcache_test.go new file mode 100644 index 0000000..778aadd --- /dev/null +++ b/internal/vaultik/blobcache_test.go @@ -0,0 +1,189 @@ +package vaultik + +import ( + "bytes" + "crypto/rand" + "fmt" + "testing" +) + +func TestBlobDiskCache_BasicGetPut(t *testing.T) { + cache, err := newBlobDiskCache(1 << 20) + if err != nil { + t.Fatal(err) + } + defer func() { _ = cache.Close() }() + + data := []byte("hello world") + if err := cache.Put("key1", data); err != nil { + t.Fatal(err) + } + + got, ok := cache.Get("key1") + if !ok { + t.Fatal("expected cache hit") + } + if !bytes.Equal(got, data) { + t.Fatalf("got %q, want %q", got, data) + } + + _, ok = cache.Get("nonexistent") + if ok { + t.Fatal("expected cache miss") + } +} + +func TestBlobDiskCache_EvictionUnderPressure(t *testing.T) { + maxBytes := int64(1000) + cache, err := newBlobDiskCache(maxBytes) + if err != nil { + t.Fatal(err) + } + defer func() { _ = cache.Close() }() + + for i := 0; i < 5; i++ { + data := make([]byte, 300) + if err := cache.Put(fmt.Sprintf("key%d", i), data); err != nil { + t.Fatal(err) + } + } + + if cache.Size() > maxBytes { + t.Fatalf("cache size %d exceeds max %d", cache.Size(), maxBytes) + } + + if !cache.Has("key4") { + t.Fatal("expected key4 to be cached") + } + if cache.Has("key0") { + t.Fatal("expected key0 to be evicted") + } +} + +func TestBlobDiskCache_OversizedEntryRejected(t *testing.T) { + cache, err := newBlobDiskCache(100) + if err != nil { + t.Fatal(err) + } + defer func() { _ = cache.Close() }() + + data := make([]byte, 200) + if err := cache.Put("big", data); err != nil { + t.Fatal(err) + } + + if cache.Has("big") { + t.Fatal("oversized entry should not be cached") + } +} + +func TestBlobDiskCache_UpdateInPlace(t *testing.T) { + cache, err := newBlobDiskCache(1 << 20) + if err != nil { + t.Fatal(err) + } + defer func() { _ = cache.Close() }() + + if err := cache.Put("key1", []byte("v1")); err != nil { + t.Fatal(err) + } + if err := cache.Put("key1", []byte("version2")); err != nil { + t.Fatal(err) + } + + got, ok := cache.Get("key1") + if !ok { + t.Fatal("expected hit") + } + if string(got) != "version2" { + t.Fatalf("got %q, want %q", got, "version2") + } + if cache.Len() != 1 { + t.Fatalf("expected 1 entry, got %d", cache.Len()) + } + if cache.Size() != int64(len("version2")) { + t.Fatalf("expected size %d, got %d", len("version2"), cache.Size()) + } +} + +func TestBlobDiskCache_ReadAt(t *testing.T) { + cache, err := newBlobDiskCache(1 << 20) + if err != nil { + t.Fatal(err) + } + defer func() { _ = cache.Close() }() + + data := make([]byte, 1024) + if _, err := rand.Read(data); err != nil { + t.Fatal(err) + } + if err := cache.Put("blob1", data); err != nil { + t.Fatal(err) + } + + chunk, err := cache.ReadAt("blob1", 100, 200) + if err != nil { + t.Fatal(err) + } + if !bytes.Equal(chunk, data[100:300]) { + t.Fatal("ReadAt returned wrong data") + } + + _, err = cache.ReadAt("blob1", 900, 200) + if err == nil { + t.Fatal("expected error for out-of-bounds read") + } + + _, err = cache.ReadAt("missing", 0, 10) + if err == nil { + t.Fatal("expected error for missing key") + } +} + +func TestBlobDiskCache_Close(t *testing.T) { + cache, err := newBlobDiskCache(1 << 20) + if err != nil { + t.Fatal(err) + } + + if err := cache.Put("key1", []byte("data")); err != nil { + t.Fatal(err) + } + if err := cache.Close(); err != nil { + t.Fatal(err) + } +} + +func TestBlobDiskCache_LRUOrder(t *testing.T) { + cache, err := newBlobDiskCache(200) + if err != nil { + t.Fatal(err) + } + defer func() { _ = cache.Close() }() + + d := make([]byte, 100) + if err := cache.Put("a", d); err != nil { + t.Fatal(err) + } + if err := cache.Put("b", d); err != nil { + t.Fatal(err) + } + + // Access "a" to make it most recently used + cache.Get("a") + + // Adding "c" should evict "b" (LRU), not "a" + if err := cache.Put("c", d); err != nil { + t.Fatal(err) + } + + if !cache.Has("a") { + t.Fatal("expected 'a' to survive") + } + if !cache.Has("c") { + t.Fatal("expected 'c' to be present") + } + if cache.Has("b") { + t.Fatal("expected 'b' to be evicted") + } +} diff --git a/internal/vaultik/info.go b/internal/vaultik/info.go index ff28859..53cfc2c 100644 --- a/internal/vaultik/info.go +++ b/internal/vaultik/info.go @@ -15,99 +15,99 @@ import ( // ShowInfo displays system and configuration information func (v *Vaultik) ShowInfo() error { // System Information - fmt.Printf("=== System Information ===\n") - fmt.Printf("OS/Architecture: %s/%s\n", runtime.GOOS, runtime.GOARCH) - fmt.Printf("Version: %s\n", v.Globals.Version) - fmt.Printf("Commit: %s\n", v.Globals.Commit) - fmt.Printf("Go Version: %s\n", runtime.Version()) - fmt.Println() + v.printfStdout("=== System Information ===\n") + v.printfStdout("OS/Architecture: %s/%s\n", runtime.GOOS, runtime.GOARCH) + v.printfStdout("Version: %s\n", v.Globals.Version) + v.printfStdout("Commit: %s\n", v.Globals.Commit) + v.printfStdout("Go Version: %s\n", runtime.Version()) + v.printlnStdout() // Storage Configuration - fmt.Printf("=== Storage Configuration ===\n") - fmt.Printf("S3 Bucket: %s\n", v.Config.S3.Bucket) + v.printfStdout("=== Storage Configuration ===\n") + v.printfStdout("S3 Bucket: %s\n", v.Config.S3.Bucket) if v.Config.S3.Prefix != "" { - fmt.Printf("S3 Prefix: %s\n", v.Config.S3.Prefix) + v.printfStdout("S3 Prefix: %s\n", v.Config.S3.Prefix) } - fmt.Printf("S3 Endpoint: %s\n", v.Config.S3.Endpoint) - fmt.Printf("S3 Region: %s\n", v.Config.S3.Region) - fmt.Println() + v.printfStdout("S3 Endpoint: %s\n", v.Config.S3.Endpoint) + v.printfStdout("S3 Region: %s\n", v.Config.S3.Region) + v.printlnStdout() // Backup Settings - fmt.Printf("=== Backup Settings ===\n") + v.printfStdout("=== Backup Settings ===\n") // Show configured snapshots - fmt.Printf("Snapshots:\n") + v.printfStdout("Snapshots:\n") for _, name := range v.Config.SnapshotNames() { snap := v.Config.Snapshots[name] - fmt.Printf(" %s:\n", name) + v.printfStdout(" %s:\n", name) for _, path := range snap.Paths { - fmt.Printf(" - %s\n", path) + v.printfStdout(" - %s\n", path) } if len(snap.Exclude) > 0 { - fmt.Printf(" exclude: %s\n", strings.Join(snap.Exclude, ", ")) + v.printfStdout(" exclude: %s\n", strings.Join(snap.Exclude, ", ")) } } // Global exclude patterns if len(v.Config.Exclude) > 0 { - fmt.Printf("Global Exclude: %s\n", strings.Join(v.Config.Exclude, ", ")) + v.printfStdout("Global Exclude: %s\n", strings.Join(v.Config.Exclude, ", ")) } - fmt.Printf("Compression: zstd level %d\n", v.Config.CompressionLevel) - fmt.Printf("Chunk Size: %s\n", humanize.Bytes(uint64(v.Config.ChunkSize))) - fmt.Printf("Blob Size Limit: %s\n", humanize.Bytes(uint64(v.Config.BlobSizeLimit))) - fmt.Println() + v.printfStdout("Compression: zstd level %d\n", v.Config.CompressionLevel) + v.printfStdout("Chunk Size: %s\n", humanize.Bytes(uint64(v.Config.ChunkSize))) + v.printfStdout("Blob Size Limit: %s\n", humanize.Bytes(uint64(v.Config.BlobSizeLimit))) + v.printlnStdout() // Encryption Configuration - fmt.Printf("=== Encryption Configuration ===\n") - fmt.Printf("Recipients:\n") + v.printfStdout("=== Encryption Configuration ===\n") + v.printfStdout("Recipients:\n") for _, recipient := range v.Config.AgeRecipients { - fmt.Printf(" - %s\n", recipient) + v.printfStdout(" - %s\n", recipient) } - fmt.Println() + v.printlnStdout() // Daemon Settings (if applicable) if v.Config.BackupInterval > 0 || v.Config.MinTimeBetweenRun > 0 { - fmt.Printf("=== Daemon Settings ===\n") + v.printfStdout("=== Daemon Settings ===\n") if v.Config.BackupInterval > 0 { - fmt.Printf("Backup Interval: %s\n", v.Config.BackupInterval) + v.printfStdout("Backup Interval: %s\n", v.Config.BackupInterval) } if v.Config.MinTimeBetweenRun > 0 { - fmt.Printf("Minimum Time: %s\n", v.Config.MinTimeBetweenRun) + v.printfStdout("Minimum Time: %s\n", v.Config.MinTimeBetweenRun) } - fmt.Println() + v.printlnStdout() } // Local Database - fmt.Printf("=== Local Database ===\n") - fmt.Printf("Index Path: %s\n", v.Config.IndexPath) + v.printfStdout("=== Local Database ===\n") + v.printfStdout("Index Path: %s\n", v.Config.IndexPath) // Check if index file exists and get its size if info, err := v.Fs.Stat(v.Config.IndexPath); err == nil { - fmt.Printf("Index Size: %s\n", humanize.Bytes(uint64(info.Size()))) + v.printfStdout("Index Size: %s\n", humanize.Bytes(uint64(info.Size()))) // Get snapshot count from database query := `SELECT COUNT(*) FROM snapshots WHERE completed_at IS NOT NULL` var snapshotCount int if err := v.DB.Conn().QueryRowContext(v.ctx, query).Scan(&snapshotCount); err == nil { - fmt.Printf("Snapshots: %d\n", snapshotCount) + v.printfStdout("Snapshots: %d\n", snapshotCount) } // Get blob count from database query = `SELECT COUNT(*) FROM blobs` var blobCount int if err := v.DB.Conn().QueryRowContext(v.ctx, query).Scan(&blobCount); err == nil { - fmt.Printf("Blobs: %d\n", blobCount) + v.printfStdout("Blobs: %d\n", blobCount) } // Get file count from database query = `SELECT COUNT(*) FROM files` var fileCount int if err := v.DB.Conn().QueryRowContext(v.ctx, query).Scan(&fileCount); err == nil { - fmt.Printf("Files: %d\n", fileCount) + v.printfStdout("Files: %d\n", fileCount) } } else { - fmt.Printf("Index Size: (not created)\n") + v.printfStdout("Index Size: (not created)\n") } return nil @@ -157,15 +157,15 @@ func (v *Vaultik) RemoteInfo(jsonOutput bool) error { result.StorageLocation = storageInfo.Location if !jsonOutput { - fmt.Printf("=== Remote Storage ===\n") - fmt.Printf("Type: %s\n", storageInfo.Type) - fmt.Printf("Location: %s\n", storageInfo.Location) - fmt.Println() + v.printfStdout("=== Remote Storage ===\n") + v.printfStdout("Type: %s\n", storageInfo.Type) + v.printfStdout("Location: %s\n", storageInfo.Location) + v.printlnStdout() } // List all snapshot metadata if !jsonOutput { - fmt.Printf("Scanning snapshot metadata...\n") + v.printfStdout("Scanning snapshot metadata...\n") } snapshotMetadata := make(map[string]*SnapshotMetadataInfo) @@ -210,7 +210,7 @@ func (v *Vaultik) RemoteInfo(jsonOutput bool) error { // Download and parse all manifests to get referenced blobs if !jsonOutput { - fmt.Printf("Downloading %d manifest(s)...\n", len(snapshotIDs)) + v.printfStdout("Downloading %d manifest(s)...\n", len(snapshotIDs)) } referencedBlobs := make(map[string]int64) // hash -> compressed size @@ -260,7 +260,7 @@ func (v *Vaultik) RemoteInfo(jsonOutput bool) error { // List all blobs on remote if !jsonOutput { - fmt.Printf("Scanning blobs...\n") + v.printfStdout("Scanning blobs...\n") } allBlobs := make(map[string]int64) // hash -> size from storage @@ -298,14 +298,14 @@ func (v *Vaultik) RemoteInfo(jsonOutput bool) error { } // Human-readable output - fmt.Printf("\n=== Snapshot Metadata ===\n") + v.printfStdout("\n=== Snapshot Metadata ===\n") if len(result.Snapshots) == 0 { - fmt.Printf("No snapshots found\n") + v.printfStdout("No snapshots found\n") } else { - fmt.Printf("%-45s %12s %12s %12s %10s %12s\n", "SNAPSHOT", "MANIFEST", "DATABASE", "TOTAL", "BLOBS", "BLOB SIZE") - fmt.Printf("%-45s %12s %12s %12s %10s %12s\n", strings.Repeat("-", 45), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 10), strings.Repeat("-", 12)) + v.printfStdout("%-45s %12s %12s %12s %10s %12s\n", "SNAPSHOT", "MANIFEST", "DATABASE", "TOTAL", "BLOBS", "BLOB SIZE") + v.printfStdout("%-45s %12s %12s %12s %10s %12s\n", strings.Repeat("-", 45), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 10), strings.Repeat("-", 12)) for _, info := range result.Snapshots { - fmt.Printf("%-45s %12s %12s %12s %10s %12s\n", + v.printfStdout("%-45s %12s %12s %12s %10s %12s\n", truncateString(info.SnapshotID, 45), humanize.Bytes(uint64(info.ManifestSize)), humanize.Bytes(uint64(info.DatabaseSize)), @@ -314,23 +314,23 @@ func (v *Vaultik) RemoteInfo(jsonOutput bool) error { humanize.Bytes(uint64(info.BlobsSize)), ) } - fmt.Printf("%-45s %12s %12s %12s %10s %12s\n", strings.Repeat("-", 45), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 10), strings.Repeat("-", 12)) - fmt.Printf("%-45s %12s %12s %12s\n", fmt.Sprintf("Total (%d snapshots)", result.TotalMetadataCount), "", "", humanize.Bytes(uint64(result.TotalMetadataSize))) + v.printfStdout("%-45s %12s %12s %12s %10s %12s\n", strings.Repeat("-", 45), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 10), strings.Repeat("-", 12)) + v.printfStdout("%-45s %12s %12s %12s\n", fmt.Sprintf("Total (%d snapshots)", result.TotalMetadataCount), "", "", humanize.Bytes(uint64(result.TotalMetadataSize))) } - fmt.Printf("\n=== Blob Storage ===\n") - fmt.Printf("Total blobs on remote: %s (%s)\n", + v.printfStdout("\n=== Blob Storage ===\n") + v.printfStdout("Total blobs on remote: %s (%s)\n", humanize.Comma(int64(result.TotalBlobCount)), humanize.Bytes(uint64(result.TotalBlobSize))) - fmt.Printf("Referenced by snapshots: %s (%s)\n", + v.printfStdout("Referenced by snapshots: %s (%s)\n", humanize.Comma(int64(result.ReferencedBlobCount)), humanize.Bytes(uint64(result.ReferencedBlobSize))) - fmt.Printf("Orphaned (unreferenced): %s (%s)\n", + v.printfStdout("Orphaned (unreferenced): %s (%s)\n", humanize.Comma(int64(result.OrphanedBlobCount)), humanize.Bytes(uint64(result.OrphanedBlobSize))) if result.OrphanedBlobCount > 0 { - fmt.Printf("\nRun 'vaultik prune --remote' to remove orphaned blobs.\n") + v.printfStdout("\nRun 'vaultik prune --remote' to remove orphaned blobs.\n") } return nil diff --git a/internal/vaultik/prune.go b/internal/vaultik/prune.go index 946461e..dff9dd9 100644 --- a/internal/vaultik/prune.go +++ b/internal/vaultik/prune.go @@ -3,7 +3,6 @@ package vaultik import ( "encoding/json" "fmt" - "os" "strings" "git.eeqj.de/sneak/vaultik/internal/log" @@ -121,29 +120,29 @@ func (v *Vaultik) PruneBlobs(opts *PruneOptions) error { if len(unreferencedBlobs) == 0 { log.Info("No unreferenced blobs found") if opts.JSON { - return outputPruneBlobsJSON(result) + return v.outputPruneBlobsJSON(result) } - fmt.Println("No unreferenced blobs to remove.") + v.printlnStdout("No unreferenced blobs to remove.") return nil } // Show what will be deleted log.Info("Found unreferenced blobs", "count", len(unreferencedBlobs), "total_size", humanize.Bytes(uint64(totalSize))) if !opts.JSON { - fmt.Printf("Found %d unreferenced blob(s) totaling %s\n", len(unreferencedBlobs), humanize.Bytes(uint64(totalSize))) + v.printfStdout("Found %d unreferenced blob(s) totaling %s\n", len(unreferencedBlobs), humanize.Bytes(uint64(totalSize))) } // Confirm unless --force is used (skip in JSON mode - require --force) if !opts.Force && !opts.JSON { - fmt.Printf("\nDelete %d unreferenced blob(s)? [y/N] ", len(unreferencedBlobs)) + v.printfStdout("\nDelete %d unreferenced blob(s)? [y/N] ", len(unreferencedBlobs)) var confirm string - if _, err := fmt.Scanln(&confirm); err != nil { + if _, err := v.scanStdin(&confirm); err != nil { // Treat EOF or error as "no" - fmt.Println("Cancelled") + v.printlnStdout("Cancelled") return nil } if strings.ToLower(confirm) != "y" { - fmt.Println("Cancelled") + v.printlnStdout("Cancelled") return nil } } @@ -185,20 +184,20 @@ func (v *Vaultik) PruneBlobs(opts *PruneOptions) error { ) if opts.JSON { - return outputPruneBlobsJSON(result) + return v.outputPruneBlobsJSON(result) } - fmt.Printf("\nDeleted %d blob(s) totaling %s\n", deletedCount, humanize.Bytes(uint64(deletedSize))) + v.printfStdout("\nDeleted %d blob(s) totaling %s\n", deletedCount, humanize.Bytes(uint64(deletedSize))) if deletedCount < len(unreferencedBlobs) { - fmt.Printf("Failed to delete %d blob(s)\n", len(unreferencedBlobs)-deletedCount) + v.printfStdout("Failed to delete %d blob(s)\n", len(unreferencedBlobs)-deletedCount) } return nil } // outputPruneBlobsJSON outputs the prune result as JSON -func outputPruneBlobsJSON(result *PruneBlobsResult) error { - encoder := json.NewEncoder(os.Stdout) +func (v *Vaultik) outputPruneBlobsJSON(result *PruneBlobsResult) error { + encoder := json.NewEncoder(v.Stdout) encoder.SetIndent("", " ") return encoder.Encode(result) } diff --git a/internal/vaultik/restore.go b/internal/vaultik/restore.go index aa00a27..d5ac3a7 100644 --- a/internal/vaultik/restore.go +++ b/internal/vaultik/restore.go @@ -109,7 +109,11 @@ func (v *Vaultik) Restore(opts *RestoreOptions) error { // Step 5: Restore files result := &RestoreResult{} - blobCache := make(map[string][]byte) // Cache downloaded and decrypted blobs + blobCache, err := newBlobDiskCache(defaultMaxBlobCacheBytes) + if err != nil { + return fmt.Errorf("creating blob cache: %w", err) + } + defer func() { _ = blobCache.Close() }() for i, file := range files { if v.ctx.Err() != nil { @@ -141,7 +145,7 @@ func (v *Vaultik) Restore(opts *RestoreOptions) error { "duration", result.Duration, ) - _, _ = fmt.Fprintf(v.Stdout, "Restored %d files (%s) in %s\n", + v.printfStdout("Restored %d files (%s) in %s\n", result.FilesRestored, humanize.Bytes(uint64(result.BytesRestored)), result.Duration.Round(time.Second), @@ -154,14 +158,14 @@ func (v *Vaultik) Restore(opts *RestoreOptions) error { } if result.FilesFailed > 0 { - _, _ = fmt.Fprintf(v.Stdout, "\nVerification FAILED: %d files did not match expected checksums\n", result.FilesFailed) + v.printfStdout("\nVerification FAILED: %d files did not match expected checksums\n", result.FilesFailed) for _, path := range result.FailedFiles { - _, _ = fmt.Fprintf(v.Stdout, " - %s\n", path) + v.printfStdout(" - %s\n", path) } return fmt.Errorf("%d files failed verification", result.FilesFailed) } - _, _ = fmt.Fprintf(v.Stdout, "Verified %d files (%s)\n", + v.printfStdout("Verified %d files (%s)\n", result.FilesVerified, humanize.Bytes(uint64(result.BytesVerified)), ) @@ -299,7 +303,7 @@ func (v *Vaultik) restoreFile( targetDir string, identity age.Identity, chunkToBlobMap map[string]*database.BlobChunk, - blobCache map[string][]byte, + blobCache *blobDiskCache, result *RestoreResult, ) error { // Calculate target path - use full original path under target directory @@ -383,7 +387,7 @@ func (v *Vaultik) restoreRegularFile( targetPath string, identity age.Identity, chunkToBlobMap map[string]*database.BlobChunk, - blobCache map[string][]byte, + blobCache *blobDiskCache, result *RestoreResult, ) error { // Get file chunks in order @@ -417,13 +421,15 @@ func (v *Vaultik) restoreRegularFile( // Download and decrypt blob if not cached blobHashStr := blob.Hash.String() - blobData, ok := blobCache[blobHashStr] + blobData, ok := blobCache.Get(blobHashStr) if !ok { blobData, err = v.downloadBlob(ctx, blobHashStr, blob.CompressedSize, identity) if err != nil { return fmt.Errorf("downloading blob %s: %w", blobHashStr[:16], err) } - blobCache[blobHashStr] = blobData + if putErr := blobCache.Put(blobHashStr, blobData); putErr != nil { + log.Debug("Failed to cache blob on disk", "hash", blobHashStr[:16], "error", putErr) + } result.BlobsDownloaded++ result.BytesDownloaded += blob.CompressedSize } @@ -473,53 +479,6 @@ func (v *Vaultik) restoreRegularFile( return nil } -// BlobFetchResult holds the result of fetching and decrypting a blob. -type BlobFetchResult struct { - Data []byte - CompressedSize int64 -} - -// FetchAndDecryptBlob downloads a blob from storage, decrypts and decompresses it. -func (v *Vaultik) FetchAndDecryptBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) (*BlobFetchResult, error) { - // Construct blob path with sharding - blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobHash[:2], blobHash[2:4], blobHash) - - reader, err := v.Storage.Get(ctx, blobPath) - if err != nil { - return nil, fmt.Errorf("downloading blob: %w", err) - } - defer func() { _ = reader.Close() }() - - // Read encrypted data - encryptedData, err := io.ReadAll(reader) - if err != nil { - return nil, fmt.Errorf("reading blob data: %w", err) - } - - // Decrypt and decompress - blobReader, err := blobgen.NewReader(bytes.NewReader(encryptedData), identity) - if err != nil { - return nil, fmt.Errorf("creating decryption reader: %w", err) - } - defer func() { _ = blobReader.Close() }() - - data, err := io.ReadAll(blobReader) - if err != nil { - return nil, fmt.Errorf("decrypting blob: %w", err) - } - - log.Debug("Downloaded and decrypted blob", - "hash", blobHash[:16], - "encrypted_size", humanize.Bytes(uint64(len(encryptedData))), - "decrypted_size", humanize.Bytes(uint64(len(data))), - ) - - return &BlobFetchResult{ - Data: data, - CompressedSize: int64(len(encryptedData)), - }, nil -} - // downloadBlob downloads and decrypts a blob func (v *Vaultik) downloadBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) ([]byte, error) { result, err := v.FetchAndDecryptBlob(ctx, blobHash, expectedSize, identity) @@ -558,7 +517,7 @@ func (v *Vaultik) verifyRestoredFiles( "files", len(regularFiles), "bytes", humanize.Bytes(uint64(totalBytes)), ) - _, _ = fmt.Fprintf(v.Stdout, "\nVerifying %d files (%s)...\n", + v.printfStdout("\nVerifying %d files (%s)...\n", len(regularFiles), humanize.Bytes(uint64(totalBytes)), ) @@ -569,13 +528,13 @@ func (v *Vaultik) verifyRestoredFiles( bar = progressbar.NewOptions64( totalBytes, progressbar.OptionSetDescription("Verifying"), - progressbar.OptionSetWriter(os.Stderr), + progressbar.OptionSetWriter(v.Stderr), progressbar.OptionShowBytes(true), progressbar.OptionShowCount(), progressbar.OptionSetWidth(40), progressbar.OptionThrottle(100*time.Millisecond), progressbar.OptionOnCompletion(func() { - fmt.Fprint(os.Stderr, "\n") + v.printfStderr("\n") }), progressbar.OptionSetRenderBlankState(true), ) diff --git a/internal/vaultik/snapshot.go b/internal/vaultik/snapshot.go index a6e498b..2960389 100644 --- a/internal/vaultik/snapshot.go +++ b/internal/vaultik/snapshot.go @@ -545,7 +545,7 @@ func (v *Vaultik) PurgeSnapshots(keepLatest bool, olderThan string, force bool) if !force { v.printfStdout("\nDelete %d snapshot(s)? [y/N] ", len(toDelete)) var confirm string - if _, err := fmt.Scanln(&confirm); err != nil { + if _, err := v.scanStdin(&confirm); err != nil { // Treat EOF or error as "no" v.printlnStdout("Cancelled") return nil @@ -851,7 +851,7 @@ func (v *Vaultik) RemoveSnapshot(snapshotID string, opts *RemoveOptions) (*Remov v.printfStdout("Remove snapshot '%s' from local database? [y/N] ", snapshotID) } var confirm string - if err := v.scanlnStdin(&confirm); err != nil { + if _, err := v.scanStdin(&confirm); err != nil { v.printlnStdout("Cancelled") return result, nil } diff --git a/internal/vaultik/vaultik.go b/internal/vaultik/vaultik.go index 7fe5a3b..7dce62a 100644 --- a/internal/vaultik/vaultik.go +++ b/internal/vaultik/vaultik.go @@ -129,12 +129,6 @@ func (v *Vaultik) GetFilesystem() afero.Fs { return v.Fs } -// Outputf writes formatted output to stdout for user-facing messages. -// This should be used for all non-log user output. -func (v *Vaultik) Outputf(format string, args ...any) { - _, _ = fmt.Fprintf(v.Stdout, format, args...) -} - // printfStdout writes formatted output to stdout. func (v *Vaultik) printfStdout(format string, args ...any) { _, _ = fmt.Fprintf(v.Stdout, format, args...) @@ -145,22 +139,14 @@ func (v *Vaultik) printlnStdout(args ...any) { _, _ = fmt.Fprintln(v.Stdout, args...) } -// scanlnStdin reads a line from stdin into the provided string pointer. -func (v *Vaultik) scanlnStdin(s *string) error { - _, err := fmt.Fscanln(v.Stdin, s) - return err +// printfStderr writes formatted output to stderr. +func (v *Vaultik) printfStderr(format string, args ...any) { + _, _ = fmt.Fprintf(v.Stderr, format, args...) } -// FetchBlob downloads a blob from storage and returns a reader for the encrypted data. -func (v *Vaultik) FetchBlob(ctx context.Context, blobHash string, expectedSize int64) (io.ReadCloser, int64, error) { - blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobHash[:2], blobHash[2:4], blobHash) - - reader, err := v.Storage.Get(ctx, blobPath) - if err != nil { - return nil, 0, fmt.Errorf("downloading blob: %w", err) - } - - return reader, expectedSize, nil +// scanStdin reads a line of input from stdin. +func (v *Vaultik) scanStdin(a ...any) (int, error) { + return fmt.Fscanln(v.Stdin, a...) } // TestVaultik wraps a Vaultik with captured stdout/stderr for testing diff --git a/internal/vaultik/verify.go b/internal/vaultik/verify.go index 3c793db..55213ef 100644 --- a/internal/vaultik/verify.go +++ b/internal/vaultik/verify.go @@ -58,14 +58,14 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error { ) if !opts.JSON { - v.Outputf("Deep verification of snapshot: %s\n\n", snapshotID) + v.printfStdout("Deep verification of snapshot: %s\n\n", snapshotID) } // Step 1: Download manifest manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID) log.Info("Downloading manifest", "path", manifestPath) if !opts.JSON { - v.Outputf("Downloading manifest...\n") + v.printfStdout("Downloading manifest...\n") } manifestReader, err := v.Storage.Get(v.ctx, manifestPath) @@ -95,14 +95,14 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error { "manifest_total_size", humanize.Bytes(uint64(manifest.TotalCompressedSize)), ) if !opts.JSON { - v.Outputf("Manifest loaded: %d blobs (%s)\n", manifest.BlobCount, humanize.Bytes(uint64(manifest.TotalCompressedSize))) + v.printfStdout("Manifest loaded: %d blobs (%s)\n", manifest.BlobCount, humanize.Bytes(uint64(manifest.TotalCompressedSize))) } // Step 2: Download and decrypt database (authoritative source) dbPath := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID) log.Info("Downloading encrypted database", "path", dbPath) if !opts.JSON { - v.Outputf("Downloading and decrypting database...\n") + v.printfStdout("Downloading and decrypting database...\n") } dbReader, err := v.Storage.Get(v.ctx, dbPath) @@ -155,8 +155,8 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error { "db_total_size", humanize.Bytes(uint64(totalSize)), ) if !opts.JSON { - v.Outputf("Database loaded: %d blobs (%s)\n", len(dbBlobs), humanize.Bytes(uint64(totalSize))) - v.Outputf("Verifying manifest against database...\n") + v.printfStdout("Database loaded: %d blobs (%s)\n", len(dbBlobs), humanize.Bytes(uint64(totalSize))) + v.printfStdout("Verifying manifest against database...\n") } // Step 4: Verify manifest matches database @@ -171,8 +171,8 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error { // Step 5: Verify all blobs exist in S3 (using database as source) if !opts.JSON { - v.Outputf("Manifest verified.\n") - v.Outputf("Checking blob existence in remote storage...\n") + v.printfStdout("Manifest verified.\n") + v.printfStdout("Checking blob existence in remote storage...\n") } if err := v.verifyBlobExistenceFromDB(dbBlobs); err != nil { result.Status = "failed" @@ -185,8 +185,8 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error { // Step 6: Deep verification - download and verify blob contents if !opts.JSON { - v.Outputf("All blobs exist.\n") - v.Outputf("Downloading and verifying blob contents (%d blobs, %s)...\n", len(dbBlobs), humanize.Bytes(uint64(totalSize))) + v.printfStdout("All blobs exist.\n") + v.printfStdout("Downloading and verifying blob contents (%d blobs, %s)...\n", len(dbBlobs), humanize.Bytes(uint64(totalSize))) } if err := v.performDeepVerificationFromDB(dbBlobs, tempDB.DB, opts); err != nil { result.Status = "failed" @@ -211,10 +211,10 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error { "blobs_verified", len(dbBlobs), ) - v.Outputf("\n✓ Verification completed successfully\n") - v.Outputf(" Snapshot: %s\n", snapshotID) - v.Outputf(" Blobs verified: %d\n", len(dbBlobs)) - v.Outputf(" Total size: %s\n", humanize.Bytes(uint64(totalSize))) + v.printfStdout("\n✓ Verification completed successfully\n") + v.printfStdout(" Snapshot: %s\n", snapshotID) + v.printfStdout(" Blobs verified: %d\n", len(dbBlobs)) + v.printfStdout(" Total size: %s\n", humanize.Bytes(uint64(totalSize))) return nil } @@ -569,7 +569,7 @@ func (v *Vaultik) performDeepVerificationFromDB(blobs []snapshot.BlobInfo, db *s ) if !opts.JSON { - v.Outputf(" Verified %d/%d blobs (%d remaining) - %s/%s - elapsed %s, eta %s\n", + v.printfStdout(" Verified %d/%d blobs (%d remaining) - %s/%s - elapsed %s, eta %s\n", i+1, len(blobs), remaining, humanize.Bytes(uint64(bytesProcessed)), humanize.Bytes(uint64(totalBytesExpected)),