Add failing test for restore blob-cache locality and ReadAt usage

Captures three behaviors the restore hot path must exhibit but
currently doesn't, all under one test:

  * Peak blob disk cache occupancy ≤ 1. Smart restore ordering should
    drain every file referencing the currently-cached blob before
    downloading the next one, so the sweeper can free each blob the
    moment its file set is exhausted.
  * Every remote blob fetched exactly once (counter on a wrapping
    Storer). Already true today; the test pins it so neither future
    cache-eviction nor reorder regressions can introduce
    re-downloads.
  * blobDiskCache.Get is never called during restore — chunk
    extraction must go through ReadAt so we never read the whole
    blob from disk to slice out a few KB. The 10 GB
    photo-snapshot --debug output showed ~900 ms per cache-hit chunk
    extract; ReadAt should bring that to sub-millisecond.

Adds Get/ReadAt call counters and a peak-Len tracker to
blobDiskCache, plus an internal restoreCacheObserver hook on Vaultik
so the test can capture the production cache instance without
exporting unexported types.

Currently fails with peak_len=3, get_calls=24, readat_calls=0. The
fix follows in subsequent commits.
This commit is contained in:
2026-06-17 08:14:55 +02:00
parent 39d5d21d48
commit a92b1a82ad
4 changed files with 369 additions and 1 deletions

View File

@@ -18,6 +18,11 @@ type blobDiskCacheEntry struct {
// blobDiskCache is an LRU cache that stores blobs on disk instead of in memory. // blobDiskCache is an LRU cache that stores blobs on disk instead of in memory.
// Blobs are written to a temp directory keyed by their hash. When total size // Blobs are written to a temp directory keyed by their hash. When total size
// exceeds maxBytes, the least-recently-used entries are evicted (deleted from disk). // exceeds maxBytes, the least-recently-used entries are evicted (deleted from disk).
//
// The Get/ReadAt/peak-Len counters are debugging instrumentation used by
// tests to assert that the restore code path uses ReadAt (which reads
// only the requested slice of a blob) rather than Get (which reads the
// full blob into memory).
type blobDiskCache struct { type blobDiskCache struct {
mu sync.Mutex mu sync.Mutex
dir string dir string
@@ -26,6 +31,11 @@ type blobDiskCache struct {
items map[string]*blobDiskCacheEntry items map[string]*blobDiskCacheEntry
head *blobDiskCacheEntry // most recent head *blobDiskCacheEntry // most recent
tail *blobDiskCacheEntry // least recent tail *blobDiskCacheEntry // least recent
// Instrumentation. Mutated under mu; readable via the methods below.
getCalls int
readAtCalls int
peakLen int
} }
// newBlobDiskCache creates a new disk-based blob cache with the given max size. // newBlobDiskCache creates a new disk-based blob cache with the given max size.
@@ -115,12 +125,17 @@ func (c *blobDiskCache) Put(key string, data []byte) error {
c.evictLRU() c.evictLRU()
} }
if n := len(c.items); n > c.peakLen {
c.peakLen = n
}
return nil return nil
} }
// Get reads a cached blob from disk. Returns data and true on hit. // Get reads a cached blob from disk. Returns data and true on hit.
func (c *blobDiskCache) Get(key string) ([]byte, bool) { func (c *blobDiskCache) Get(key string) ([]byte, bool) {
c.mu.Lock() c.mu.Lock()
c.getCalls++
e, ok := c.items[key] e, ok := c.items[key]
if !ok { if !ok {
c.mu.Unlock() c.mu.Unlock()
@@ -147,6 +162,7 @@ func (c *blobDiskCache) Get(key string) ([]byte, bool) {
// ReadAt reads a slice of a cached blob without loading the entire blob into memory. // ReadAt reads a slice of a cached blob without loading the entire blob into memory.
func (c *blobDiskCache) ReadAt(key string, offset, length int64) ([]byte, error) { func (c *blobDiskCache) ReadAt(key string, offset, length int64) ([]byte, error) {
c.mu.Lock() c.mu.Lock()
c.readAtCalls++
e, ok := c.items[key] e, ok := c.items[key]
if !ok { if !ok {
c.mu.Unlock() c.mu.Unlock()
@@ -223,6 +239,28 @@ func (c *blobDiskCache) Len() int {
return len(c.items) return len(c.items)
} }
// GetCalls returns the number of times Get has been called.
func (c *blobDiskCache) GetCalls() int {
c.mu.Lock()
defer c.mu.Unlock()
return c.getCalls
}
// ReadAtCalls returns the number of times ReadAt has been called.
func (c *blobDiskCache) ReadAtCalls() int {
c.mu.Lock()
defer c.mu.Unlock()
return c.readAtCalls
}
// PeakLen returns the maximum number of cached entries ever held at
// once during this cache's lifetime.
func (c *blobDiskCache) PeakLen() int {
c.mu.Lock()
defer c.mu.Unlock()
return c.peakLen
}
// Close removes the cache directory and all cached blobs. // Close removes the cache directory and all cached blobs.
func (c *blobDiskCache) Close() error { func (c *blobDiskCache) Close() error {
c.mu.Lock() c.mu.Lock()

View File

@@ -177,7 +177,15 @@ func (v *Vaultik) restoreAllFiles(
if err != nil { if err != nil {
return nil, fmt.Errorf("creating blob cache: %w", err) return nil, fmt.Errorf("creating blob cache: %w", err)
} }
defer func() { _ = blobCache.Close() }() if v.restoreCacheObserver != nil {
v.restoreCacheObserver(blobCache)
}
defer func() {
if v.restoreCacheObserver != nil {
v.restoreCacheObserver(blobCache)
}
_ = blobCache.Close()
}()
// Per-restore sweep state: every blob_size_limit/100 bytes written, // Per-restore sweep state: every blob_size_limit/100 bytes written,
// scan the cache and delete any blob whose remaining file references // scan the cache and delete any blob whose remaining file references

View File

@@ -0,0 +1,315 @@
package vaultik
import (
"bytes"
"context"
"crypto/rand"
"fmt"
"io"
"os"
"path/filepath"
"sort"
"sync"
"testing"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"sneak.berlin/go/vaultik/internal/config"
"sneak.berlin/go/vaultik/internal/database"
"sneak.berlin/go/vaultik/internal/log"
"sneak.berlin/go/vaultik/internal/snapshot"
"sneak.berlin/go/vaultik/internal/storage"
"sneak.berlin/go/vaultik/internal/ui"
)
// TestRestoreLocalityAndReadAt asserts three properties of the restore
// hot path that together produce acceptable throughput on real-world
// snapshots. All three currently fail on main:
//
// 1. Peak blob cache occupancy ≤ 1.
// Restore order must respect blob locality: every file fully
// contained within the currently cached blob should be restored
// before any other blob is downloaded. The sweeper then frees
// each blob as soon as its file set is exhausted. Without smart
// ordering, path-order interleaves blobs and the cache holds
// every touched blob until the last file referencing it lands.
//
// 2. Each remote blob is fetched exactly once.
// Counted via wrapping the Storer.
//
// 3. blobDiskCache.Get is never called during restore.
// Chunk extraction from a cached blob must go through ReadAt,
// which reads only the chunk's bytes from disk. Get reads the
// entire blob (up to 50 GB in production) into memory just to
// slice out a few KB — currently the dominant cost in restore.
//
// The test deliberately constructs an adversarial scenario: three
// blobs A/B/C of ~6 MB each, nine files distributed across them, and
// path-ordered names that interleave the blobs (a1, b1, c1, a2, b2,
// c2, …) so naive path-order processing would touch every blob before
// finishing any of them.
func TestRestoreLocalityAndReadAt(t *testing.T) {
log.Initialize(log.Config{})
fs := afero.NewOsFs()
tempDir, err := os.MkdirTemp("", "vaultik-locality-")
require.NoError(t, err)
defer func() { _ = os.RemoveAll(tempDir) }()
dataDir := filepath.Join(tempDir, "source")
storeDir := filepath.Join(tempDir, "remote")
restoreDir := filepath.Join(tempDir, "restored")
dbPath := filepath.Join(tempDir, "index.sqlite")
require.NoError(t, fs.MkdirAll(dataDir, 0o755))
// Layout: 15 source files of exactly 1 MiB each. With
// chunkSize (avg) = 4 MiB the chunker's minSize is 1 MiB, so any
// file of 1 MiB becomes a single chunk. With a 5 MiB blob limit
// the packer fits exactly 5 chunks per blob, producing 3 blobs
// containing src-001..005, src-006..010, src-011..015.
//
// Then add 9 "copy" files — byte-for-byte clones of three of the
// sources (one from each blob group) — with interleaved names
// (cp-001-A, cp-002-B, cp-003-C, cp-004-A, …) so a naive
// path-ordered restore would touch all three blobs before
// finishing any of them.
const (
srcBytes = 1024 * 1024
srcCount = 15
blobsCount = 3
perBlob = srcCount / blobsCount
)
type source struct {
path string
data []byte
}
sources := make([]*source, srcCount)
for i := 0; i < srcCount; i++ {
s := &source{
path: fmt.Sprintf("src-%03d.bin", i+1),
data: randomBytes(t, srcBytes),
}
sources[i] = s
require.NoError(t, afero.WriteFile(fs, filepath.Join(dataDir, s.path), s.data, 0o644))
}
// Pick one representative source per blob group (src-001 → blob
// 1, src-006 → blob 2, src-011 → blob 3) and create 3 copies of
// each with interleaved alphabetical names.
type copyFile struct {
path string
data []byte
sourceBlob int // 0, 1, or 2
sourceIndex int // index into sources slice
}
groupReps := []int{0, perBlob, 2 * perBlob} // 0, 5, 10
letters := []byte{'A', 'B', 'C'}
var copies []copyFile
for i := 0; i < 3; i++ {
for j := 0; j < blobsCount; j++ {
seq := i*blobsCount + j + 1
name := fmt.Sprintf("cp-%03d-%c.bin", seq, letters[j])
path := filepath.Join(dataDir, name)
src := sources[groupReps[j]]
require.NoError(t, afero.WriteFile(fs, path, src.data, 0o644))
copies = append(copies, copyFile{path: path, data: src.data, sourceBlob: j, sourceIndex: groupReps[j]})
}
}
// chunkSize avg = 4 MiB makes minSize = 1 MiB, so a 1 MiB file
// becomes one chunk. maxBlobSize = 5 MiB packs exactly 5 chunks
// per blob, yielding 3 blobs from 15 source files.
chunkSize := int64(4 * 1024 * 1024)
maxBlobSize := int64(5 * 1024 * 1024)
storer, err := storage.NewFileStorer(storeDir)
require.NoError(t, err)
agePublicKey := "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
ageSecretKey := "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
cfg := &config.Config{
AgeRecipients: []string{agePublicKey},
AgeSecretKey: ageSecretKey,
CompressionLevel: 3,
Hostname: "test-host",
BlobSizeLimit: config.Size(maxBlobSize),
}
ctx := context.Background()
db, err := database.New(ctx, dbPath)
require.NoError(t, err)
defer func() { _ = db.Close() }()
repos := database.NewRepositories(db)
sm := snapshot.NewSnapshotManager(snapshot.SnapshotManagerParams{
Repos: repos,
Storage: storer,
Config: cfg,
})
sm.SetFilesystem(fs)
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
FS: fs,
Storage: storer,
ChunkSize: chunkSize,
MaxBlobSize: maxBlobSize,
CompressionLevel: cfg.CompressionLevel,
AgeRecipients: cfg.AgeRecipients,
Repositories: repos,
})
snapshotID, err := sm.CreateSnapshotWithName(ctx, cfg.Hostname, "locality", "test-version", "test-git")
require.NoError(t, err)
_, err = scanner.Scan(ctx, dataDir, snapshotID)
require.NoError(t, err)
require.NoError(t, sm.CompleteSnapshot(ctx, snapshotID))
require.NoError(t, sm.ExportSnapshotMetadata(ctx, dbPath, snapshotID))
blobsOnDisk := listBlobKeys(t, storeDir)
t.Logf("backup produced %d blobs", len(blobsOnDisk))
require.GreaterOrEqual(t, len(blobsOnDisk), 3, "expected at least 3 blobs from 3 filler groups")
require.NoError(t, db.Close())
// Wrap the storer so we can count downloads per blob key.
counter := newCountingStorer(storer)
// Capture the restore-side cache for instrumentation inspection.
// The observer fires twice (immediately after creation and
// immediately before close) so we read PeakLen and call counters
// from the same instance the production code used.
var cacheRef *blobDiskCache
v := &Vaultik{
Config: cfg,
Storage: counter,
Fs: fs,
Stdout: io.Discard,
Stderr: io.Discard,
UI: ui.NewWithColor(io.Discard, false),
restoreCacheObserver: func(c *blobDiskCache) {
cacheRef = c
},
}
v.SetContext(ctx)
require.NoError(t, v.Restore(&RestoreOptions{
SnapshotID: snapshotID,
TargetDir: restoreDir,
}))
require.NotNil(t, cacheRef, "restoreCacheObserver must fire during restore")
// Verify restored content matches.
for _, s := range sources {
restored := filepath.Join(restoreDir, dataDir, s.path)
got, err := afero.ReadFile(fs, restored)
require.NoErrorf(t, err, "source missing after restore: %s", s.path)
require.Truef(t, bytes.Equal(got, s.data), "byte mismatch for source %s", s.path)
}
for _, c := range copies {
restored := filepath.Join(restoreDir, c.path)
got, err := afero.ReadFile(fs, restored)
require.NoErrorf(t, err, "copy missing after restore: %s", c.path)
require.Truef(t, bytes.Equal(got, c.data), "byte mismatch for copy %s", c.path)
}
// (1) Each blob fetched exactly once.
for key, n := range counter.snapshot() {
if !filterBlobKey(key) {
continue
}
assert.Equalf(t, 1, n, "blob %s fetched %d times, want exactly 1", key, n)
}
// (2) Peak cache size ≤ 1. The sweeper plus locality-aware
// ordering should free each blob before the next one downloads.
assert.LessOrEqualf(t, cacheRef.PeakLen(), 1,
"peak cached blobs was %d; expected ≤ 1 with locality-ordered restore", cacheRef.PeakLen())
// (3) Cache.Get must never be called during restore — chunk
// extraction has to go through ReadAt so we never read the whole
// blob from disk to grab a few KB slice.
assert.Equalf(t, 0, cacheRef.GetCalls(),
"blobDiskCache.Get was called %d times during restore; restore must use ReadAt exclusively", cacheRef.GetCalls())
t.Logf("blob cache stats: peak_len=%d get_calls=%d readat_calls=%d",
cacheRef.PeakLen(), cacheRef.GetCalls(), cacheRef.ReadAtCalls())
}
// randomBytes returns n bytes of random data. Used to make sure the
// chunker picks non-degenerate FastCDC boundaries.
func randomBytes(t *testing.T, n int) []byte {
t.Helper()
b := make([]byte, n)
_, err := rand.Read(b)
require.NoError(t, err)
return b
}
// listBlobKeys walks the FileStorer blobs/ tree and returns the
// relative keys for every blob file present.
func listBlobKeys(t *testing.T, storeDir string) []string {
t.Helper()
var keys []string
root := filepath.Join(storeDir, "blobs")
err := filepath.Walk(root, func(p string, info os.FileInfo, err error) error {
if err != nil {
return err
}
if info.IsDir() {
return nil
}
rel, _ := filepath.Rel(storeDir, p)
keys = append(keys, rel)
return nil
})
require.NoError(t, err)
sort.Strings(keys)
return keys
}
// filterBlobKey returns true when key looks like a blob storage path
// (rather than a snapshot metadata path).
func filterBlobKey(key string) bool {
return len(key) > 6 && key[:6] == "blobs/"
}
// countingStorerInternal wraps a storage.Storer and records the number
// of Get calls per key, so the locality test can assert each blob is
// fetched exactly once. Defined here (rather than reusing the one in
// the integration_test package) because this test lives in package
// vaultik for access to unexported cache internals.
type countingStorerInternal struct {
storage.Storer
mu sync.Mutex
counts map[string]int
}
func newCountingStorer(inner storage.Storer) *countingStorerInternal {
return &countingStorerInternal{Storer: inner, counts: make(map[string]int)}
}
func (c *countingStorerInternal) Get(ctx context.Context, key string) (io.ReadCloser, error) {
c.mu.Lock()
c.counts[key]++
c.mu.Unlock()
return c.Storer.Get(ctx, key)
}
func (c *countingStorerInternal) snapshot() map[string]int {
c.mu.Lock()
defer c.mu.Unlock()
out := make(map[string]int, len(c.counts))
for k, v := range c.counts {
out[k] = v
}
return out
}

View File

@@ -44,6 +44,13 @@ type Vaultik struct {
// writer wrapping Stdout; the cli layer replaces it with a discarding // writer wrapping Stdout; the cli layer replaces it with a discarding
// writer in --cron mode. // writer in --cron mode.
UI *ui.Writer UI *ui.Writer
// restoreCacheObserver, if non-nil, is invoked once with the
// restore-side blob disk cache immediately after the cache is
// created and again immediately before it is closed. Only
// internal-package tests set this; the type is unexported so
// callers outside this package can't reach it.
restoreCacheObserver func(*blobDiskCache)
} }
// VaultikParams contains all parameters for New that can be provided by fx // VaultikParams contains all parameters for New that can be provided by fx