Add failing test for restore blob-cache locality and ReadAt usage

Captures three behaviors the restore hot path must exhibit but currently doesn't, all under one test: * Peak blob disk cache occupancy ≤ 1. Smart restore ordering should drain every file referencing the currently-cached blob before downloading the next one, so the sweeper can free each blob the moment its file set is exhausted. * Every remote blob fetched exactly once (counter on a wrapping Storer). Already true today; the test pins it so neither future cache-eviction nor reorder regressions can introduce re-downloads. * blobDiskCache.Get is never called during restore — chunk extraction must go through ReadAt so we never read the whole blob from disk to slice out a few KB. The 10 GB photo-snapshot --debug output showed ~900 ms per cache-hit chunk extract; ReadAt should bring that to sub-millisecond. Adds Get/ReadAt call counters and a peak-Len tracker to blobDiskCache, plus an internal restoreCacheObserver hook on Vaultik so the test can capture the production cache instance without exporting unexported types. Currently fails with peak_len=3, get_calls=24, readat_calls=0. The fix follows in subsequent commits.
2026-06-17 08:14:55 +02:00
parent 39d5d21d48
commit a92b1a82ad
4 changed files with 369 additions and 1 deletions
--- a/internal/vaultik/blobcache.go
+++ b/internal/vaultik/blobcache.go
@@ -18,6 +18,11 @@ type blobDiskCacheEntry struct {
 // blobDiskCache is an LRU cache that stores blobs on disk instead of in memory.
 // Blobs are written to a temp directory keyed by their hash. When total size
 // exceeds maxBytes, the least-recently-used entries are evicted (deleted from disk).
+//
+// The Get/ReadAt/peak-Len counters are debugging instrumentation used by
+// tests to assert that the restore code path uses ReadAt (which reads
+// only the requested slice of a blob) rather than Get (which reads the
+// full blob into memory).
 type blobDiskCache struct {
 	mu       sync.Mutex
 	dir      string
@@ -26,6 +31,11 @@ type blobDiskCache struct {
 	items    map[string]*blobDiskCacheEntry
 	head     *blobDiskCacheEntry // most recent
 	tail     *blobDiskCacheEntry // least recent
+
+	// Instrumentation. Mutated under mu; readable via the methods below.
+	getCalls    int
+	readAtCalls int
+	peakLen     int
 }

 // newBlobDiskCache creates a new disk-based blob cache with the given max size.
@@ -115,12 +125,17 @@ func (c *blobDiskCache) Put(key string, data []byte) error {
 		c.evictLRU()
 	}

+	if n := len(c.items); n > c.peakLen {
+		c.peakLen = n
+	}
+
 	return nil
 }

 // Get reads a cached blob from disk. Returns data and true on hit.
 func (c *blobDiskCache) Get(key string) ([]byte, bool) {
 	c.mu.Lock()
+	c.getCalls++
 	e, ok := c.items[key]
 	if !ok {
 		c.mu.Unlock()
@@ -147,6 +162,7 @@ func (c *blobDiskCache) Get(key string) ([]byte, bool) {
 // ReadAt reads a slice of a cached blob without loading the entire blob into memory.
 func (c *blobDiskCache) ReadAt(key string, offset, length int64) ([]byte, error) {
 	c.mu.Lock()
+	c.readAtCalls++
 	e, ok := c.items[key]
 	if !ok {
 		c.mu.Unlock()
@@ -223,6 +239,28 @@ func (c *blobDiskCache) Len() int {
 	return len(c.items)
 }

+// GetCalls returns the number of times Get has been called.
+func (c *blobDiskCache) GetCalls() int {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	return c.getCalls
+}
+
+// ReadAtCalls returns the number of times ReadAt has been called.
+func (c *blobDiskCache) ReadAtCalls() int {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	return c.readAtCalls
+}
+
+// PeakLen returns the maximum number of cached entries ever held at
+// once during this cache's lifetime.
+func (c *blobDiskCache) PeakLen() int {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	return c.peakLen
+}
+
 // Close removes the cache directory and all cached blobs.
 func (c *blobDiskCache) Close() error {
 	c.mu.Lock()
--- a/internal/vaultik/restore.go
+++ b/internal/vaultik/restore.go
@@ -177,7 +177,15 @@ func (v *Vaultik) restoreAllFiles(
 	if err != nil {
 		return nil, fmt.Errorf("creating blob cache: %w", err)
 	}
-	defer func() { _ = blobCache.Close() }()
+	if v.restoreCacheObserver != nil {
+		v.restoreCacheObserver(blobCache)
+	}
+	defer func() {
+		if v.restoreCacheObserver != nil {
+			v.restoreCacheObserver(blobCache)
+		}
+		_ = blobCache.Close()
+	}()

 	// Per-restore sweep state: every blob_size_limit/100 bytes written,
 	// scan the cache and delete any blob whose remaining file references
--- a/internal/vaultik/restore_locality_test.go
+++ b/internal/vaultik/restore_locality_test.go
@@ -0,0 +1,315 @@
+package vaultik
+
+import (
+	"bytes"
+	"context"
+	"crypto/rand"
+	"fmt"
+	"io"
+	"os"
+	"path/filepath"
+	"sort"
+	"sync"
+	"testing"
+
+	"github.com/spf13/afero"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+	"sneak.berlin/go/vaultik/internal/config"
+	"sneak.berlin/go/vaultik/internal/database"
+	"sneak.berlin/go/vaultik/internal/log"
+	"sneak.berlin/go/vaultik/internal/snapshot"
+	"sneak.berlin/go/vaultik/internal/storage"
+	"sneak.berlin/go/vaultik/internal/ui"
+)
+
+// TestRestoreLocalityAndReadAt asserts three properties of the restore
+// hot path that together produce acceptable throughput on real-world
+// snapshots. All three currently fail on main:
+//
+//  1. Peak blob cache occupancy ≤ 1.
+//     Restore order must respect blob locality: every file fully
+//     contained within the currently cached blob should be restored
+//     before any other blob is downloaded. The sweeper then frees
+//     each blob as soon as its file set is exhausted. Without smart
+//     ordering, path-order interleaves blobs and the cache holds
+//     every touched blob until the last file referencing it lands.
+//
+//  2. Each remote blob is fetched exactly once.
+//     Counted via wrapping the Storer.
+//
+//  3. blobDiskCache.Get is never called during restore.
+//     Chunk extraction from a cached blob must go through ReadAt,
+//     which reads only the chunk's bytes from disk. Get reads the
+//     entire blob (up to 50 GB in production) into memory just to
+//     slice out a few KB — currently the dominant cost in restore.
+//
+// The test deliberately constructs an adversarial scenario: three
+// blobs A/B/C of ~6 MB each, nine files distributed across them, and
+// path-ordered names that interleave the blobs (a1, b1, c1, a2, b2,
+// c2, …) so naive path-order processing would touch every blob before
+// finishing any of them.
+func TestRestoreLocalityAndReadAt(t *testing.T) {
+	log.Initialize(log.Config{})
+
+	fs := afero.NewOsFs()
+	tempDir, err := os.MkdirTemp("", "vaultik-locality-")
+	require.NoError(t, err)
+	defer func() { _ = os.RemoveAll(tempDir) }()
+
+	dataDir := filepath.Join(tempDir, "source")
+	storeDir := filepath.Join(tempDir, "remote")
+	restoreDir := filepath.Join(tempDir, "restored")
+	dbPath := filepath.Join(tempDir, "index.sqlite")
+
+	require.NoError(t, fs.MkdirAll(dataDir, 0o755))
+
+	// Layout: 15 source files of exactly 1 MiB each. With
+	// chunkSize (avg) = 4 MiB the chunker's minSize is 1 MiB, so any
+	// file of 1 MiB becomes a single chunk. With a 5 MiB blob limit
+	// the packer fits exactly 5 chunks per blob, producing 3 blobs
+	// containing src-001..005, src-006..010, src-011..015.
+	//
+	// Then add 9 "copy" files — byte-for-byte clones of three of the
+	// sources (one from each blob group) — with interleaved names
+	// (cp-001-A, cp-002-B, cp-003-C, cp-004-A, …) so a naive
+	// path-ordered restore would touch all three blobs before
+	// finishing any of them.
+	const (
+		srcBytes   = 1024 * 1024
+		srcCount   = 15
+		blobsCount = 3
+		perBlob    = srcCount / blobsCount
+	)
+
+	type source struct {
+		path string
+		data []byte
+	}
+	sources := make([]*source, srcCount)
+	for i := 0; i < srcCount; i++ {
+		s := &source{
+			path: fmt.Sprintf("src-%03d.bin", i+1),
+			data: randomBytes(t, srcBytes),
+		}
+		sources[i] = s
+		require.NoError(t, afero.WriteFile(fs, filepath.Join(dataDir, s.path), s.data, 0o644))
+	}
+
+	// Pick one representative source per blob group (src-001 → blob
+	// 1, src-006 → blob 2, src-011 → blob 3) and create 3 copies of
+	// each with interleaved alphabetical names.
+	type copyFile struct {
+		path        string
+		data        []byte
+		sourceBlob  int // 0, 1, or 2
+		sourceIndex int // index into sources slice
+	}
+	groupReps := []int{0, perBlob, 2 * perBlob} // 0, 5, 10
+	letters := []byte{'A', 'B', 'C'}
+	var copies []copyFile
+	for i := 0; i < 3; i++ {
+		for j := 0; j < blobsCount; j++ {
+			seq := i*blobsCount + j + 1
+			name := fmt.Sprintf("cp-%03d-%c.bin", seq, letters[j])
+			path := filepath.Join(dataDir, name)
+			src := sources[groupReps[j]]
+			require.NoError(t, afero.WriteFile(fs, path, src.data, 0o644))
+			copies = append(copies, copyFile{path: path, data: src.data, sourceBlob: j, sourceIndex: groupReps[j]})
+		}
+	}
+
+	// chunkSize avg = 4 MiB makes minSize = 1 MiB, so a 1 MiB file
+	// becomes one chunk. maxBlobSize = 5 MiB packs exactly 5 chunks
+	// per blob, yielding 3 blobs from 15 source files.
+	chunkSize := int64(4 * 1024 * 1024)
+	maxBlobSize := int64(5 * 1024 * 1024)
+
+	storer, err := storage.NewFileStorer(storeDir)
+	require.NoError(t, err)
+
+	agePublicKey := "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
+	ageSecretKey := "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
+
+	cfg := &config.Config{
+		AgeRecipients:    []string{agePublicKey},
+		AgeSecretKey:     ageSecretKey,
+		CompressionLevel: 3,
+		Hostname:         "test-host",
+		BlobSizeLimit:    config.Size(maxBlobSize),
+	}
+
+	ctx := context.Background()
+
+	db, err := database.New(ctx, dbPath)
+	require.NoError(t, err)
+	defer func() { _ = db.Close() }()
+
+	repos := database.NewRepositories(db)
+
+	sm := snapshot.NewSnapshotManager(snapshot.SnapshotManagerParams{
+		Repos:   repos,
+		Storage: storer,
+		Config:  cfg,
+	})
+	sm.SetFilesystem(fs)
+
+	scanner := snapshot.NewScanner(snapshot.ScannerConfig{
+		FS:               fs,
+		Storage:          storer,
+		ChunkSize:        chunkSize,
+		MaxBlobSize:      maxBlobSize,
+		CompressionLevel: cfg.CompressionLevel,
+		AgeRecipients:    cfg.AgeRecipients,
+		Repositories:     repos,
+	})
+
+	snapshotID, err := sm.CreateSnapshotWithName(ctx, cfg.Hostname, "locality", "test-version", "test-git")
+	require.NoError(t, err)
+
+	_, err = scanner.Scan(ctx, dataDir, snapshotID)
+	require.NoError(t, err)
+
+	require.NoError(t, sm.CompleteSnapshot(ctx, snapshotID))
+	require.NoError(t, sm.ExportSnapshotMetadata(ctx, dbPath, snapshotID))
+
+	blobsOnDisk := listBlobKeys(t, storeDir)
+	t.Logf("backup produced %d blobs", len(blobsOnDisk))
+	require.GreaterOrEqual(t, len(blobsOnDisk), 3, "expected at least 3 blobs from 3 filler groups")
+
+	require.NoError(t, db.Close())
+
+	// Wrap the storer so we can count downloads per blob key.
+	counter := newCountingStorer(storer)
+
+	// Capture the restore-side cache for instrumentation inspection.
+	// The observer fires twice (immediately after creation and
+	// immediately before close) so we read PeakLen and call counters
+	// from the same instance the production code used.
+	var cacheRef *blobDiskCache
+	v := &Vaultik{
+		Config:  cfg,
+		Storage: counter,
+		Fs:      fs,
+		Stdout:  io.Discard,
+		Stderr:  io.Discard,
+		UI:      ui.NewWithColor(io.Discard, false),
+		restoreCacheObserver: func(c *blobDiskCache) {
+			cacheRef = c
+		},
+	}
+	v.SetContext(ctx)
+
+	require.NoError(t, v.Restore(&RestoreOptions{
+		SnapshotID: snapshotID,
+		TargetDir:  restoreDir,
+	}))
+
+	require.NotNil(t, cacheRef, "restoreCacheObserver must fire during restore")
+
+	// Verify restored content matches.
+	for _, s := range sources {
+		restored := filepath.Join(restoreDir, dataDir, s.path)
+		got, err := afero.ReadFile(fs, restored)
+		require.NoErrorf(t, err, "source missing after restore: %s", s.path)
+		require.Truef(t, bytes.Equal(got, s.data), "byte mismatch for source %s", s.path)
+	}
+	for _, c := range copies {
+		restored := filepath.Join(restoreDir, c.path)
+		got, err := afero.ReadFile(fs, restored)
+		require.NoErrorf(t, err, "copy missing after restore: %s", c.path)
+		require.Truef(t, bytes.Equal(got, c.data), "byte mismatch for copy %s", c.path)
+	}
+
+	// (1) Each blob fetched exactly once.
+	for key, n := range counter.snapshot() {
+		if !filterBlobKey(key) {
+			continue
+		}
+		assert.Equalf(t, 1, n, "blob %s fetched %d times, want exactly 1", key, n)
+	}
+
+	// (2) Peak cache size ≤ 1. The sweeper plus locality-aware
+	// ordering should free each blob before the next one downloads.
+	assert.LessOrEqualf(t, cacheRef.PeakLen(), 1,
+		"peak cached blobs was %d; expected ≤ 1 with locality-ordered restore", cacheRef.PeakLen())
+
+	// (3) Cache.Get must never be called during restore — chunk
+	// extraction has to go through ReadAt so we never read the whole
+	// blob from disk to grab a few KB slice.
+	assert.Equalf(t, 0, cacheRef.GetCalls(),
+		"blobDiskCache.Get was called %d times during restore; restore must use ReadAt exclusively", cacheRef.GetCalls())
+
+	t.Logf("blob cache stats: peak_len=%d get_calls=%d readat_calls=%d",
+		cacheRef.PeakLen(), cacheRef.GetCalls(), cacheRef.ReadAtCalls())
+}
+
+// randomBytes returns n bytes of random data. Used to make sure the
+// chunker picks non-degenerate FastCDC boundaries.
+func randomBytes(t *testing.T, n int) []byte {
+	t.Helper()
+	b := make([]byte, n)
+	_, err := rand.Read(b)
+	require.NoError(t, err)
+	return b
+}
+
+// listBlobKeys walks the FileStorer blobs/ tree and returns the
+// relative keys for every blob file present.
+func listBlobKeys(t *testing.T, storeDir string) []string {
+	t.Helper()
+	var keys []string
+	root := filepath.Join(storeDir, "blobs")
+	err := filepath.Walk(root, func(p string, info os.FileInfo, err error) error {
+		if err != nil {
+			return err
+		}
+		if info.IsDir() {
+			return nil
+		}
+		rel, _ := filepath.Rel(storeDir, p)
+		keys = append(keys, rel)
+		return nil
+	})
+	require.NoError(t, err)
+	sort.Strings(keys)
+	return keys
+}
+
+// filterBlobKey returns true when key looks like a blob storage path
+// (rather than a snapshot metadata path).
+func filterBlobKey(key string) bool {
+	return len(key) > 6 && key[:6] == "blobs/"
+}
+
+// countingStorerInternal wraps a storage.Storer and records the number
+// of Get calls per key, so the locality test can assert each blob is
+// fetched exactly once. Defined here (rather than reusing the one in
+// the integration_test package) because this test lives in package
+// vaultik for access to unexported cache internals.
+type countingStorerInternal struct {
+	storage.Storer
+	mu     sync.Mutex
+	counts map[string]int
+}
+
+func newCountingStorer(inner storage.Storer) *countingStorerInternal {
+	return &countingStorerInternal{Storer: inner, counts: make(map[string]int)}
+}
+
+func (c *countingStorerInternal) Get(ctx context.Context, key string) (io.ReadCloser, error) {
+	c.mu.Lock()
+	c.counts[key]++
+	c.mu.Unlock()
+	return c.Storer.Get(ctx, key)
+}
+
+func (c *countingStorerInternal) snapshot() map[string]int {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	out := make(map[string]int, len(c.counts))
+	for k, v := range c.counts {
+		out[k] = v
+	}
+	return out
+}
--- a/internal/vaultik/vaultik.go
+++ b/internal/vaultik/vaultik.go
@@ -44,6 +44,13 @@ type Vaultik struct {
 	// writer wrapping Stdout; the cli layer replaces it with a discarding
 	// writer in --cron mode.
 	UI *ui.Writer
+
+	// restoreCacheObserver, if non-nil, is invoked once with the
+	// restore-side blob disk cache immediately after the cache is
+	// created and again immediately before it is closed. Only
+	// internal-package tests set this; the type is unexported so
+	// callers outside this package can't reach it.
+	restoreCacheObserver func(*blobDiskCache)
 }

 // VaultikParams contains all parameters for New that can be provided by fx