Compare commits

..

1 Commits

Author SHA1 Message Date
clawbot
3d3c13cd01 fix: bound blob cache during restore with LRU eviction to prevent OOM
The restore operation cached every downloaded blob in an unbounded map,
which could exhaust system memory when restoring large backups with many
unique blobs (each up to 10GB).

Replaced with an LRU cache bounded to 1 GiB by default, evicting
least-recently-used blobs when the limit is exceeded.
2026-02-08 12:04:50 -08:00
5 changed files with 63 additions and 418 deletions

View File

@ -51,13 +51,7 @@ func CompressStream(dst io.Writer, src io.Reader, compressionLevel int, recipien
if err != nil { if err != nil {
return 0, "", fmt.Errorf("creating writer: %w", err) return 0, "", fmt.Errorf("creating writer: %w", err)
} }
defer func() { _ = w.Close() }()
closed := false
defer func() {
if !closed {
_ = w.Close()
}
}()
// Copy data // Copy data
if _, err := io.Copy(w, src); err != nil { if _, err := io.Copy(w, src); err != nil {
@ -68,7 +62,6 @@ func CompressStream(dst io.Writer, src io.Reader, compressionLevel int, recipien
if err := w.Close(); err != nil { if err := w.Close(); err != nil {
return 0, "", fmt.Errorf("closing writer: %w", err) return 0, "", fmt.Errorf("closing writer: %w", err)
} }
closed = true
return w.BytesWritten(), hex.EncodeToString(w.Sum256()), nil return w.BytesWritten(), hex.EncodeToString(w.Sum256()), nil
} }

View File

@ -1,28 +0,0 @@
package vaultik
// TODO: These are stub implementations for methods referenced but not yet
// implemented. They allow the package to compile for testing.
// Remove once the real implementations land.
import (
"context"
"fmt"
"io"
"filippo.io/age"
)
// FetchAndDecryptBlobResult holds the result of fetching and decrypting a blob.
type FetchAndDecryptBlobResult struct {
Data []byte
}
// FetchAndDecryptBlob downloads a blob, decrypts it, and returns the plaintext data.
func (v *Vaultik) FetchAndDecryptBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) (*FetchAndDecryptBlobResult, error) {
return nil, fmt.Errorf("FetchAndDecryptBlob not yet implemented")
}
// FetchBlob downloads a blob and returns a reader for the encrypted data.
func (v *Vaultik) FetchBlob(ctx context.Context, blobHash string, expectedSize int64) (io.ReadCloser, int64, error) {
return nil, 0, fmt.Errorf("FetchBlob not yet implemented")
}

View File

@ -1,210 +1,83 @@
package vaultik package vaultik
import ( import (
"fmt" "container/list"
"os"
"path/filepath"
"sync" "sync"
) )
// defaultMaxBlobCacheBytes is the default maximum size of the disk blob cache (10 GB). // defaultMaxBlobCacheBytes is the default maximum size of the blob cache (1 GB).
const defaultMaxBlobCacheBytes = 10 << 30 // 10 GiB const defaultMaxBlobCacheBytes = 1 << 30 // 1 GiB
// blobDiskCacheEntry tracks a cached blob on disk. // blobCacheEntry is an entry in the LRU blob cache.
type blobDiskCacheEntry struct { type blobCacheEntry struct {
key string key string
size int64 data []byte
prev *blobDiskCacheEntry
next *blobDiskCacheEntry
} }
// blobDiskCache is an LRU cache that stores blobs on disk instead of in memory. // blobLRUCache is a simple LRU cache for downloaded blob data, bounded by
// Blobs are written to a temp directory keyed by their hash. When total size // total byte size to prevent memory exhaustion during large restores.
// exceeds maxBytes, the least-recently-used entries are evicted (deleted from disk). type blobLRUCache struct {
type blobDiskCache struct {
mu sync.Mutex mu sync.Mutex
dir string
maxBytes int64 maxBytes int64
curBytes int64 curBytes int64
items map[string]*blobDiskCacheEntry ll *list.List
head *blobDiskCacheEntry // most recent items map[string]*list.Element
tail *blobDiskCacheEntry // least recent
} }
// newBlobDiskCache creates a new disk-based blob cache with the given max size. // newBlobLRUCache creates a new LRU blob cache with the given maximum size in bytes.
func newBlobDiskCache(maxBytes int64) (*blobDiskCache, error) { func newBlobLRUCache(maxBytes int64) *blobLRUCache {
dir, err := os.MkdirTemp("", "vaultik-blobcache-*") return &blobLRUCache{
if err != nil {
return nil, fmt.Errorf("creating blob cache dir: %w", err)
}
return &blobDiskCache{
dir: dir,
maxBytes: maxBytes, maxBytes: maxBytes,
items: make(map[string]*blobDiskCacheEntry), ll: list.New(),
}, nil items: make(map[string]*list.Element),
}
func (c *blobDiskCache) path(key string) string {
return filepath.Join(c.dir, key)
}
func (c *blobDiskCache) unlink(e *blobDiskCacheEntry) {
if e.prev != nil {
e.prev.next = e.next
} else {
c.head = e.next
}
if e.next != nil {
e.next.prev = e.prev
} else {
c.tail = e.prev
}
e.prev = nil
e.next = nil
}
func (c *blobDiskCache) pushFront(e *blobDiskCacheEntry) {
e.prev = nil
e.next = c.head
if c.head != nil {
c.head.prev = e
}
c.head = e
if c.tail == nil {
c.tail = e
} }
} }
func (c *blobDiskCache) evictLRU() { // Get returns the blob data for the given key, or nil if not cached.
if c.tail == nil { func (c *blobLRUCache) Get(key string) ([]byte, bool) {
return c.mu.Lock()
defer c.mu.Unlock()
if ele, ok := c.items[key]; ok {
c.ll.MoveToFront(ele)
return ele.Value.(*blobCacheEntry).data, true
} }
victim := c.tail return nil, false
c.unlink(victim)
delete(c.items, victim.key)
c.curBytes -= victim.size
_ = os.Remove(c.path(victim.key))
} }
// Put writes blob data to disk cache. Entries larger than maxBytes are silently skipped. // Put adds a blob to the cache, evicting least-recently-used entries if needed.
func (c *blobDiskCache) Put(key string, data []byte) error { func (c *blobLRUCache) Put(key string, data []byte) {
c.mu.Lock()
defer c.mu.Unlock()
entrySize := int64(len(data)) entrySize := int64(len(data))
c.mu.Lock() // If this single entry exceeds max, don't cache it
defer c.mu.Unlock()
if entrySize > c.maxBytes { if entrySize > c.maxBytes {
return nil return
} }
// Remove old entry if updating // If already present, update in place
if e, ok := c.items[key]; ok { if ele, ok := c.items[key]; ok {
c.unlink(e) c.ll.MoveToFront(ele)
c.curBytes -= e.size old := ele.Value.(*blobCacheEntry)
_ = os.Remove(c.path(key)) c.curBytes += entrySize - int64(len(old.data))
delete(c.items, key) old.data = data
} else {
ele := c.ll.PushFront(&blobCacheEntry{key: key, data: data})
c.items[key] = ele
c.curBytes += entrySize
} }
if err := os.WriteFile(c.path(key), data, 0600); err != nil { // Evict from back until under limit
return fmt.Errorf("writing blob to cache: %w", err) for c.curBytes > c.maxBytes && c.ll.Len() > 0 {
} oldest := c.ll.Back()
if oldest == nil {
e := &blobDiskCacheEntry{key: key, size: entrySize} break
c.pushFront(e)
c.items[key] = e
c.curBytes += entrySize
for c.curBytes > c.maxBytes && c.tail != nil {
c.evictLRU()
}
return nil
}
// Get reads a cached blob from disk. Returns data and true on hit.
func (c *blobDiskCache) Get(key string) ([]byte, bool) {
c.mu.Lock()
e, ok := c.items[key]
if !ok {
c.mu.Unlock()
return nil, false
}
c.unlink(e)
c.pushFront(e)
c.mu.Unlock()
data, err := os.ReadFile(c.path(key))
if err != nil {
c.mu.Lock()
if e2, ok2 := c.items[key]; ok2 && e2 == e {
c.unlink(e)
delete(c.items, key)
c.curBytes -= e.size
} }
c.mu.Unlock() entry := oldest.Value.(*blobCacheEntry)
return nil, false c.ll.Remove(oldest)
delete(c.items, entry.key)
c.curBytes -= int64(len(entry.data))
} }
return data, true
}
// ReadAt reads a slice of a cached blob without loading the entire blob into memory.
func (c *blobDiskCache) ReadAt(key string, offset, length int64) ([]byte, error) {
c.mu.Lock()
e, ok := c.items[key]
if !ok {
c.mu.Unlock()
return nil, fmt.Errorf("key %q not in cache", key)
}
if offset+length > e.size {
c.mu.Unlock()
return nil, fmt.Errorf("read beyond blob size: offset=%d length=%d size=%d", offset, length, e.size)
}
c.unlink(e)
c.pushFront(e)
c.mu.Unlock()
f, err := os.Open(c.path(key))
if err != nil {
return nil, err
}
defer f.Close()
buf := make([]byte, length)
if _, err := f.ReadAt(buf, offset); err != nil {
return nil, err
}
return buf, nil
}
// Has returns whether a key exists in the cache.
func (c *blobDiskCache) Has(key string) bool {
c.mu.Lock()
defer c.mu.Unlock()
_, ok := c.items[key]
return ok
}
// Size returns current total cached bytes.
func (c *blobDiskCache) Size() int64 {
c.mu.Lock()
defer c.mu.Unlock()
return c.curBytes
}
// Len returns number of cached entries.
func (c *blobDiskCache) Len() int {
c.mu.Lock()
defer c.mu.Unlock()
return len(c.items)
}
// Close removes the cache directory and all cached blobs.
func (c *blobDiskCache) Close() error {
c.mu.Lock()
defer c.mu.Unlock()
c.items = nil
c.head = nil
c.tail = nil
c.curBytes = 0
return os.RemoveAll(c.dir)
} }

View File

@ -1,189 +0,0 @@
package vaultik
import (
"bytes"
"crypto/rand"
"fmt"
"testing"
)
func TestBlobDiskCache_BasicGetPut(t *testing.T) {
cache, err := newBlobDiskCache(1 << 20)
if err != nil {
t.Fatal(err)
}
defer cache.Close()
data := []byte("hello world")
if err := cache.Put("key1", data); err != nil {
t.Fatal(err)
}
got, ok := cache.Get("key1")
if !ok {
t.Fatal("expected cache hit")
}
if !bytes.Equal(got, data) {
t.Fatalf("got %q, want %q", got, data)
}
_, ok = cache.Get("nonexistent")
if ok {
t.Fatal("expected cache miss")
}
}
func TestBlobDiskCache_EvictionUnderPressure(t *testing.T) {
maxBytes := int64(1000)
cache, err := newBlobDiskCache(maxBytes)
if err != nil {
t.Fatal(err)
}
defer cache.Close()
for i := 0; i < 5; i++ {
data := make([]byte, 300)
if err := cache.Put(fmt.Sprintf("key%d", i), data); err != nil {
t.Fatal(err)
}
}
if cache.Size() > maxBytes {
t.Fatalf("cache size %d exceeds max %d", cache.Size(), maxBytes)
}
if !cache.Has("key4") {
t.Fatal("expected key4 to be cached")
}
if cache.Has("key0") {
t.Fatal("expected key0 to be evicted")
}
}
func TestBlobDiskCache_OversizedEntryRejected(t *testing.T) {
cache, err := newBlobDiskCache(100)
if err != nil {
t.Fatal(err)
}
defer cache.Close()
data := make([]byte, 200)
if err := cache.Put("big", data); err != nil {
t.Fatal(err)
}
if cache.Has("big") {
t.Fatal("oversized entry should not be cached")
}
}
func TestBlobDiskCache_UpdateInPlace(t *testing.T) {
cache, err := newBlobDiskCache(1 << 20)
if err != nil {
t.Fatal(err)
}
defer cache.Close()
if err := cache.Put("key1", []byte("v1")); err != nil {
t.Fatal(err)
}
if err := cache.Put("key1", []byte("version2")); err != nil {
t.Fatal(err)
}
got, ok := cache.Get("key1")
if !ok {
t.Fatal("expected hit")
}
if string(got) != "version2" {
t.Fatalf("got %q, want %q", got, "version2")
}
if cache.Len() != 1 {
t.Fatalf("expected 1 entry, got %d", cache.Len())
}
if cache.Size() != int64(len("version2")) {
t.Fatalf("expected size %d, got %d", len("version2"), cache.Size())
}
}
func TestBlobDiskCache_ReadAt(t *testing.T) {
cache, err := newBlobDiskCache(1 << 20)
if err != nil {
t.Fatal(err)
}
defer cache.Close()
data := make([]byte, 1024)
if _, err := rand.Read(data); err != nil {
t.Fatal(err)
}
if err := cache.Put("blob1", data); err != nil {
t.Fatal(err)
}
chunk, err := cache.ReadAt("blob1", 100, 200)
if err != nil {
t.Fatal(err)
}
if !bytes.Equal(chunk, data[100:300]) {
t.Fatal("ReadAt returned wrong data")
}
_, err = cache.ReadAt("blob1", 900, 200)
if err == nil {
t.Fatal("expected error for out-of-bounds read")
}
_, err = cache.ReadAt("missing", 0, 10)
if err == nil {
t.Fatal("expected error for missing key")
}
}
func TestBlobDiskCache_Close(t *testing.T) {
cache, err := newBlobDiskCache(1 << 20)
if err != nil {
t.Fatal(err)
}
if err := cache.Put("key1", []byte("data")); err != nil {
t.Fatal(err)
}
if err := cache.Close(); err != nil {
t.Fatal(err)
}
}
func TestBlobDiskCache_LRUOrder(t *testing.T) {
cache, err := newBlobDiskCache(200)
if err != nil {
t.Fatal(err)
}
defer cache.Close()
d := make([]byte, 100)
if err := cache.Put("a", d); err != nil {
t.Fatal(err)
}
if err := cache.Put("b", d); err != nil {
t.Fatal(err)
}
// Access "a" to make it most recently used
cache.Get("a")
// Adding "c" should evict "b" (LRU), not "a"
if err := cache.Put("c", d); err != nil {
t.Fatal(err)
}
if !cache.Has("a") {
t.Fatal("expected 'a' to survive")
}
if !cache.Has("c") {
t.Fatal("expected 'c' to be present")
}
if cache.Has("b") {
t.Fatal("expected 'b' to be evicted")
}
}

View File

@ -109,11 +109,7 @@ func (v *Vaultik) Restore(opts *RestoreOptions) error {
// Step 5: Restore files // Step 5: Restore files
result := &RestoreResult{} result := &RestoreResult{}
blobCache, err := newBlobDiskCache(defaultMaxBlobCacheBytes) blobCache := newBlobLRUCache(defaultMaxBlobCacheBytes) // LRU cache bounded to ~1 GiB
if err != nil {
return fmt.Errorf("creating blob cache: %w", err)
}
defer blobCache.Close()
for i, file := range files { for i, file := range files {
if v.ctx.Err() != nil { if v.ctx.Err() != nil {
@ -145,7 +141,7 @@ func (v *Vaultik) Restore(opts *RestoreOptions) error {
"duration", result.Duration, "duration", result.Duration,
) )
v.printfStdout("Restored %d files (%s) in %s\n", _, _ = fmt.Fprintf(v.Stdout, "Restored %d files (%s) in %s\n",
result.FilesRestored, result.FilesRestored,
humanize.Bytes(uint64(result.BytesRestored)), humanize.Bytes(uint64(result.BytesRestored)),
result.Duration.Round(time.Second), result.Duration.Round(time.Second),
@ -158,14 +154,14 @@ func (v *Vaultik) Restore(opts *RestoreOptions) error {
} }
if result.FilesFailed > 0 { if result.FilesFailed > 0 {
v.printfStdout("\nVerification FAILED: %d files did not match expected checksums\n", result.FilesFailed) _, _ = fmt.Fprintf(v.Stdout, "\nVerification FAILED: %d files did not match expected checksums\n", result.FilesFailed)
for _, path := range result.FailedFiles { for _, path := range result.FailedFiles {
v.printfStdout(" - %s\n", path) _, _ = fmt.Fprintf(v.Stdout, " - %s\n", path)
} }
return fmt.Errorf("%d files failed verification", result.FilesFailed) return fmt.Errorf("%d files failed verification", result.FilesFailed)
} }
v.printfStdout("Verified %d files (%s)\n", _, _ = fmt.Fprintf(v.Stdout, "Verified %d files (%s)\n",
result.FilesVerified, result.FilesVerified,
humanize.Bytes(uint64(result.BytesVerified)), humanize.Bytes(uint64(result.BytesVerified)),
) )
@ -303,7 +299,7 @@ func (v *Vaultik) restoreFile(
targetDir string, targetDir string,
identity age.Identity, identity age.Identity,
chunkToBlobMap map[string]*database.BlobChunk, chunkToBlobMap map[string]*database.BlobChunk,
blobCache *blobDiskCache, blobCache *blobLRUCache,
result *RestoreResult, result *RestoreResult,
) error { ) error {
// Calculate target path - use full original path under target directory // Calculate target path - use full original path under target directory
@ -387,7 +383,7 @@ func (v *Vaultik) restoreRegularFile(
targetPath string, targetPath string,
identity age.Identity, identity age.Identity,
chunkToBlobMap map[string]*database.BlobChunk, chunkToBlobMap map[string]*database.BlobChunk,
blobCache *blobDiskCache, blobCache *blobLRUCache,
result *RestoreResult, result *RestoreResult,
) error { ) error {
// Get file chunks in order // Get file chunks in order
@ -427,7 +423,7 @@ func (v *Vaultik) restoreRegularFile(
if err != nil { if err != nil {
return fmt.Errorf("downloading blob %s: %w", blobHashStr[:16], err) return fmt.Errorf("downloading blob %s: %w", blobHashStr[:16], err)
} }
if putErr := blobCache.Put(blobHashStr, blobData); putErr != nil { log.Debug("Failed to cache blob on disk", "hash", blobHashStr[:16], "error", putErr) } blobCache.Put(blobHashStr, blobData)
result.BlobsDownloaded++ result.BlobsDownloaded++
result.BytesDownloaded += blob.CompressedSize result.BytesDownloaded += blob.CompressedSize
} }
@ -515,7 +511,7 @@ func (v *Vaultik) verifyRestoredFiles(
"files", len(regularFiles), "files", len(regularFiles),
"bytes", humanize.Bytes(uint64(totalBytes)), "bytes", humanize.Bytes(uint64(totalBytes)),
) )
v.printfStdout("\nVerifying %d files (%s)...\n", _, _ = fmt.Fprintf(v.Stdout, "\nVerifying %d files (%s)...\n",
len(regularFiles), len(regularFiles),
humanize.Bytes(uint64(totalBytes)), humanize.Bytes(uint64(totalBytes)),
) )
@ -526,13 +522,13 @@ func (v *Vaultik) verifyRestoredFiles(
bar = progressbar.NewOptions64( bar = progressbar.NewOptions64(
totalBytes, totalBytes,
progressbar.OptionSetDescription("Verifying"), progressbar.OptionSetDescription("Verifying"),
progressbar.OptionSetWriter(v.Stderr), progressbar.OptionSetWriter(os.Stderr),
progressbar.OptionShowBytes(true), progressbar.OptionShowBytes(true),
progressbar.OptionShowCount(), progressbar.OptionShowCount(),
progressbar.OptionSetWidth(40), progressbar.OptionSetWidth(40),
progressbar.OptionThrottle(100*time.Millisecond), progressbar.OptionThrottle(100*time.Millisecond),
progressbar.OptionOnCompletion(func() { progressbar.OptionOnCompletion(func() {
v.printfStderr("\n") fmt.Fprint(os.Stderr, "\n")
}), }),
progressbar.OptionSetRenderBlankState(true), progressbar.OptionSetRenderBlankState(true),
) )