The bug: fully-deduplicated snapshots (every chunk already in storage from a prior run) had an empty snapshot_blobs table. The metadata- export pipeline then dropped all blob/blob_chunks rows from the exported database, leaving file_chunks references to chunks whose blobs were no longer recorded. Restore fails on every file with "chunk X not found in any blob". Fix: at CompleteSnapshot time, run an INSERT OR IGNORE that links every blob holding a chunk referenced by this snapshot's files into snapshot_blobs. New blobs uploaded during the snapshot are already recorded (no-op for them); dedup-referenced blobs are added. The cleanup query in deleteOrphanedBlobs already restricts to snapshot_blobs entries for the current snapshot — so once snapshot_blobs is correctly populated, the exported database contains the full set of blob/blob_chunks rows needed for restore. Regression test: TestDedupOnlySnapshotRestores creates two identical snapshots (the second uploads zero new blobs) and restores the second. Without the fix, restore fails on every file.
832 lines
26 KiB
Go
832 lines
26 KiB
Go
package vaultik_test
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"database/sql"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
"sync"
|
|
"testing"
|
|
"time"
|
|
|
|
"github.com/spf13/afero"
|
|
"github.com/stretchr/testify/assert"
|
|
"github.com/stretchr/testify/require"
|
|
"sneak.berlin/go/vaultik/internal/config"
|
|
"sneak.berlin/go/vaultik/internal/database"
|
|
"sneak.berlin/go/vaultik/internal/log"
|
|
"sneak.berlin/go/vaultik/internal/snapshot"
|
|
"sneak.berlin/go/vaultik/internal/storage"
|
|
"sneak.berlin/go/vaultik/internal/types"
|
|
"sneak.berlin/go/vaultik/internal/vaultik"
|
|
)
|
|
|
|
// MockStorer implements storage.Storer for testing
|
|
type MockStorer struct {
|
|
mu sync.Mutex
|
|
data map[string][]byte
|
|
calls []string
|
|
}
|
|
|
|
func NewMockStorer() *MockStorer {
|
|
return &MockStorer{
|
|
data: make(map[string][]byte),
|
|
calls: make([]string, 0),
|
|
}
|
|
}
|
|
|
|
func (m *MockStorer) Put(ctx context.Context, key string, reader io.Reader) error {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
m.calls = append(m.calls, "Put:"+key)
|
|
data, err := io.ReadAll(reader)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
m.data[key] = data
|
|
return nil
|
|
}
|
|
|
|
func (m *MockStorer) PutWithProgress(ctx context.Context, key string, reader io.Reader, size int64, progress storage.ProgressCallback) error {
|
|
return m.Put(ctx, key, reader)
|
|
}
|
|
|
|
func (m *MockStorer) Get(ctx context.Context, key string) (io.ReadCloser, error) {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
m.calls = append(m.calls, "Get:"+key)
|
|
data, exists := m.data[key]
|
|
if !exists {
|
|
return nil, storage.ErrNotFound
|
|
}
|
|
return io.NopCloser(bytes.NewReader(data)), nil
|
|
}
|
|
|
|
func (m *MockStorer) Stat(ctx context.Context, key string) (*storage.ObjectInfo, error) {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
m.calls = append(m.calls, "Stat:"+key)
|
|
data, exists := m.data[key]
|
|
if !exists {
|
|
return nil, storage.ErrNotFound
|
|
}
|
|
return &storage.ObjectInfo{
|
|
Key: key,
|
|
Size: int64(len(data)),
|
|
}, nil
|
|
}
|
|
|
|
func (m *MockStorer) Delete(ctx context.Context, key string) error {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
m.calls = append(m.calls, "Delete:"+key)
|
|
delete(m.data, key)
|
|
return nil
|
|
}
|
|
|
|
func (m *MockStorer) List(ctx context.Context, prefix string) ([]string, error) {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
m.calls = append(m.calls, "List:"+prefix)
|
|
var keys []string
|
|
for key := range m.data {
|
|
if len(prefix) == 0 || (len(key) >= len(prefix) && key[:len(prefix)] == prefix) {
|
|
keys = append(keys, key)
|
|
}
|
|
}
|
|
return keys, nil
|
|
}
|
|
|
|
func (m *MockStorer) ListStream(ctx context.Context, prefix string) <-chan storage.ObjectInfo {
|
|
ch := make(chan storage.ObjectInfo)
|
|
go func() {
|
|
defer close(ch)
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
for key, data := range m.data {
|
|
if len(prefix) == 0 || (len(key) >= len(prefix) && key[:len(prefix)] == prefix) {
|
|
ch <- storage.ObjectInfo{
|
|
Key: key,
|
|
Size: int64(len(data)),
|
|
}
|
|
}
|
|
}
|
|
}()
|
|
return ch
|
|
}
|
|
|
|
func (m *MockStorer) Info() storage.StorageInfo {
|
|
return storage.StorageInfo{
|
|
Type: "mock",
|
|
Location: "memory",
|
|
}
|
|
}
|
|
|
|
// GetCalls returns the list of operations that were called
|
|
func (m *MockStorer) GetCalls() []string {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
calls := make([]string, len(m.calls))
|
|
copy(calls, m.calls)
|
|
return calls
|
|
}
|
|
|
|
// GetStorageSize returns the number of objects in storage
|
|
func (m *MockStorer) GetStorageSize() int {
|
|
m.mu.Lock()
|
|
defer m.mu.Unlock()
|
|
|
|
return len(m.data)
|
|
}
|
|
|
|
// TestEndToEndBackup tests the full backup workflow with mocked dependencies
|
|
func TestEndToEndBackup(t *testing.T) {
|
|
// Initialize logger
|
|
log.Initialize(log.Config{})
|
|
|
|
// Create in-memory filesystem
|
|
fs := afero.NewMemMapFs()
|
|
|
|
// Create test directory structure and files
|
|
testFiles := map[string]string{
|
|
"/home/user/documents/file1.txt": "This is file 1 content",
|
|
"/home/user/documents/file2.txt": "This is file 2 content with more data",
|
|
"/home/user/pictures/photo1.jpg": "Binary photo data here...",
|
|
"/home/user/code/main.go": "package main\n\nfunc main() {\n\tprintln(\"Hello, World!\")\n}",
|
|
}
|
|
|
|
// Create all directories first
|
|
dirs := []string{
|
|
"/home/user/documents",
|
|
"/home/user/pictures",
|
|
"/home/user/code",
|
|
}
|
|
for _, dir := range dirs {
|
|
if err := fs.MkdirAll(dir, 0755); err != nil {
|
|
t.Fatalf("failed to create directory %s: %v", dir, err)
|
|
}
|
|
}
|
|
|
|
// Create test files
|
|
for path, content := range testFiles {
|
|
if err := afero.WriteFile(fs, path, []byte(content), 0644); err != nil {
|
|
t.Fatalf("failed to create test file %s: %v", path, err)
|
|
}
|
|
}
|
|
|
|
// Create mock storage
|
|
mockStorage := NewMockStorer()
|
|
|
|
// Create test configuration
|
|
cfg := &config.Config{
|
|
Snapshots: map[string]config.SnapshotConfig{
|
|
"test": {
|
|
Paths: []string{"/home/user"},
|
|
},
|
|
},
|
|
Exclude: []string{"*.tmp", "*.log"},
|
|
ChunkSize: config.Size(16 * 1024), // 16KB chunks
|
|
BlobSizeLimit: config.Size(100 * 1024), // 100KB blobs
|
|
CompressionLevel: 3,
|
|
AgeRecipients: []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
|
|
AgeSecretKey: "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5", // Test private key
|
|
S3: config.S3Config{
|
|
Endpoint: "http://localhost:9000", // MinIO endpoint for testing
|
|
Region: "us-east-1",
|
|
Bucket: "test-bucket",
|
|
AccessKeyID: "test-access",
|
|
SecretAccessKey: "test-secret",
|
|
},
|
|
IndexPath: ":memory:", // In-memory SQLite database
|
|
}
|
|
|
|
// For a true end-to-end test, we'll create a simpler test that focuses on
|
|
// the core backup logic using the scanner directly with our mock storage
|
|
ctx := context.Background()
|
|
|
|
// Create in-memory database
|
|
db, err := database.New(ctx, ":memory:")
|
|
require.NoError(t, err)
|
|
defer func() {
|
|
if err := db.Close(); err != nil {
|
|
t.Errorf("failed to close database: %v", err)
|
|
}
|
|
}()
|
|
|
|
repos := database.NewRepositories(db)
|
|
|
|
// Create scanner with mock storage
|
|
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
|
|
FS: fs,
|
|
ChunkSize: cfg.ChunkSize.Int64(),
|
|
Repositories: repos,
|
|
Storage: mockStorage,
|
|
MaxBlobSize: cfg.BlobSizeLimit.Int64(),
|
|
CompressionLevel: cfg.CompressionLevel,
|
|
AgeRecipients: cfg.AgeRecipients,
|
|
EnableProgress: false,
|
|
})
|
|
|
|
// Create a snapshot record
|
|
snapshotID := "test-snapshot-001"
|
|
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
|
snapshot := &database.Snapshot{
|
|
ID: types.SnapshotID(snapshotID),
|
|
Hostname: "test-host",
|
|
VaultikVersion: "test-version",
|
|
StartedAt: time.Now(),
|
|
}
|
|
return repos.Snapshots.Create(ctx, tx, snapshot)
|
|
})
|
|
require.NoError(t, err)
|
|
|
|
// Run the backup scan
|
|
result, err := scanner.Scan(ctx, "/home/user", snapshotID)
|
|
require.NoError(t, err)
|
|
|
|
// Verify scan results
|
|
// The scanner counts both files and directories, so we have:
|
|
// 4 files + 4 directories (/home, /home/user, /home/user/documents, /home/user/pictures, /home/user/code)
|
|
assert.GreaterOrEqual(t, result.FilesScanned, 4, "Should scan at least 4 files")
|
|
assert.Greater(t, result.BytesScanned, int64(0), "Should scan some bytes")
|
|
assert.Greater(t, result.ChunksCreated, 0, "Should create chunks")
|
|
assert.Greater(t, result.BlobsCreated, 0, "Should create blobs")
|
|
|
|
// Verify storage operations
|
|
calls := mockStorage.GetCalls()
|
|
t.Logf("Storage operations performed: %v", calls)
|
|
|
|
// Should have uploaded at least one blob
|
|
blobUploads := 0
|
|
for _, call := range calls {
|
|
if len(call) > 4 && call[:4] == "Put:" {
|
|
if len(call) > 10 && call[4:10] == "blobs/" {
|
|
blobUploads++
|
|
}
|
|
}
|
|
}
|
|
assert.Greater(t, blobUploads, 0, "Should upload at least one blob")
|
|
|
|
// Verify files in database
|
|
files, err := repos.Files.ListByPrefix(ctx, "/home/user")
|
|
require.NoError(t, err)
|
|
// Count only regular files (not directories)
|
|
regularFiles := 0
|
|
for _, f := range files {
|
|
if f.Mode&0x80000000 == 0 { // Check if regular file (not directory)
|
|
regularFiles++
|
|
}
|
|
}
|
|
assert.Equal(t, 4, regularFiles, "Should have 4 regular files in database")
|
|
|
|
// Verify chunks were created by checking a specific file
|
|
fileChunks, err := repos.FileChunks.GetByPath(ctx, "/home/user/documents/file1.txt")
|
|
require.NoError(t, err)
|
|
assert.Greater(t, len(fileChunks), 0, "Should have chunks for file1.txt")
|
|
|
|
// Verify blobs were uploaded to storage
|
|
assert.Greater(t, mockStorage.GetStorageSize(), 0, "Should have blobs in storage")
|
|
|
|
// Complete the snapshot - just verify we got results
|
|
// In a real integration test, we'd update the snapshot record
|
|
|
|
// Create snapshot manager to test metadata export
|
|
snapshotManager := &snapshot.SnapshotManager{}
|
|
snapshotManager.SetFilesystem(fs)
|
|
|
|
// Note: We can't fully test snapshot metadata export without a proper S3 client mock
|
|
// that implements all required methods. This would require refactoring the S3 client
|
|
// interface to be more testable.
|
|
|
|
t.Logf("Backup completed successfully:")
|
|
t.Logf(" Files scanned: %d", result.FilesScanned)
|
|
t.Logf(" Bytes scanned: %d", result.BytesScanned)
|
|
t.Logf(" Chunks created: %d", result.ChunksCreated)
|
|
t.Logf(" Blobs created: %d", result.BlobsCreated)
|
|
t.Logf(" Storage size: %d objects", mockStorage.GetStorageSize())
|
|
}
|
|
|
|
// TestBackupAndVerify tests backing up files and verifying the blobs
|
|
func TestBackupAndVerify(t *testing.T) {
|
|
// Initialize logger
|
|
log.Initialize(log.Config{})
|
|
|
|
// Create in-memory filesystem
|
|
fs := afero.NewMemMapFs()
|
|
|
|
// Create test files
|
|
testContent := "This is a test file with some content that should be backed up"
|
|
err := fs.MkdirAll("/data", 0755)
|
|
require.NoError(t, err)
|
|
err = afero.WriteFile(fs, "/data/test.txt", []byte(testContent), 0644)
|
|
require.NoError(t, err)
|
|
|
|
// Create mock storage
|
|
mockStorage := NewMockStorer()
|
|
|
|
// Create test database
|
|
ctx := context.Background()
|
|
db, err := database.New(ctx, ":memory:")
|
|
require.NoError(t, err)
|
|
defer func() {
|
|
if err := db.Close(); err != nil {
|
|
t.Errorf("failed to close database: %v", err)
|
|
}
|
|
}()
|
|
|
|
repos := database.NewRepositories(db)
|
|
|
|
// Create scanner
|
|
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
|
|
FS: fs,
|
|
ChunkSize: int64(1024 * 16), // 16KB chunks
|
|
Repositories: repos,
|
|
Storage: mockStorage,
|
|
MaxBlobSize: int64(1024 * 1024), // 1MB blobs
|
|
CompressionLevel: 3,
|
|
AgeRecipients: []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
|
|
})
|
|
|
|
// Create a snapshot
|
|
snapshotID := "test-snapshot-001"
|
|
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
|
snapshot := &database.Snapshot{
|
|
ID: types.SnapshotID(snapshotID),
|
|
Hostname: "test-host",
|
|
VaultikVersion: "test-version",
|
|
StartedAt: time.Now(),
|
|
}
|
|
return repos.Snapshots.Create(ctx, tx, snapshot)
|
|
})
|
|
require.NoError(t, err)
|
|
|
|
// Run the backup
|
|
result, err := scanner.Scan(ctx, "/data", snapshotID)
|
|
require.NoError(t, err)
|
|
|
|
// Verify backup created blobs
|
|
assert.Greater(t, result.BlobsCreated, 0, "Should create at least one blob")
|
|
assert.Equal(t, mockStorage.GetStorageSize(), result.BlobsCreated, "Storage should have the blobs")
|
|
|
|
// Verify we can retrieve the blob from storage
|
|
objects, err := mockStorage.List(ctx, "blobs/")
|
|
require.NoError(t, err)
|
|
assert.Len(t, objects, result.BlobsCreated, "Should have correct number of blobs in storage")
|
|
|
|
// Get the first blob and verify it exists
|
|
if len(objects) > 0 {
|
|
blobKey := objects[0]
|
|
t.Logf("Verifying blob: %s", blobKey)
|
|
|
|
// Get blob info
|
|
blobInfo, err := mockStorage.Stat(ctx, blobKey)
|
|
require.NoError(t, err)
|
|
assert.Greater(t, blobInfo.Size, int64(0), "Blob should have content")
|
|
|
|
// Get blob content
|
|
reader, err := mockStorage.Get(ctx, blobKey)
|
|
require.NoError(t, err)
|
|
defer func() { _ = reader.Close() }()
|
|
|
|
// Verify blob data is encrypted (should not contain plaintext)
|
|
blobData, err := io.ReadAll(reader)
|
|
require.NoError(t, err)
|
|
assert.NotContains(t, string(blobData), testContent, "Blob should be encrypted")
|
|
assert.Greater(t, len(blobData), 0, "Blob should have data")
|
|
}
|
|
|
|
t.Logf("Backup and verify test completed successfully")
|
|
}
|
|
|
|
// TestBackupAndRestore tests the full backup and restore workflow
|
|
// This test verifies that the restore code correctly handles the binary SQLite
|
|
// database format that is exported by the snapshot manager.
|
|
func TestBackupAndRestore(t *testing.T) {
|
|
// Initialize logger
|
|
log.Initialize(log.Config{})
|
|
|
|
// Create real temp directory for the database (SQLite needs real filesystem)
|
|
realTempDir, err := os.MkdirTemp("", "vaultik-test-")
|
|
require.NoError(t, err)
|
|
defer func() { _ = os.RemoveAll(realTempDir) }()
|
|
|
|
// Use real OS filesystem for this test
|
|
fs := afero.NewOsFs()
|
|
|
|
// Create test directory structure and files
|
|
dataDir := filepath.Join(realTempDir, "data")
|
|
testFiles := map[string]string{
|
|
filepath.Join(dataDir, "file1.txt"): "This is file 1 content",
|
|
filepath.Join(dataDir, "file2.txt"): "This is file 2 content with more data",
|
|
filepath.Join(dataDir, "subdir", "file3.txt"): "This is file 3 in a subdirectory",
|
|
}
|
|
|
|
// Create directories and files
|
|
for path, content := range testFiles {
|
|
dir := filepath.Dir(path)
|
|
if err := fs.MkdirAll(dir, 0755); err != nil {
|
|
t.Fatalf("failed to create directory %s: %v", dir, err)
|
|
}
|
|
if err := afero.WriteFile(fs, path, []byte(content), 0644); err != nil {
|
|
t.Fatalf("failed to create test file %s: %v", path, err)
|
|
}
|
|
}
|
|
|
|
ctx := context.Background()
|
|
|
|
// Create mock storage
|
|
mockStorage := NewMockStorer()
|
|
|
|
// Test keypair
|
|
agePublicKey := "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
|
|
ageSecretKey := "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
|
|
|
|
// Create database file
|
|
dbPath := filepath.Join(realTempDir, "test.db")
|
|
db, err := database.New(ctx, dbPath)
|
|
require.NoError(t, err)
|
|
defer func() { _ = db.Close() }()
|
|
|
|
repos := database.NewRepositories(db)
|
|
|
|
// Create config for snapshot manager
|
|
cfg := &config.Config{
|
|
AgeSecretKey: ageSecretKey,
|
|
AgeRecipients: []string{agePublicKey},
|
|
CompressionLevel: 3,
|
|
}
|
|
|
|
// Create snapshot manager
|
|
sm := snapshot.NewSnapshotManager(snapshot.SnapshotManagerParams{
|
|
Repos: repos,
|
|
Storage: mockStorage,
|
|
Config: cfg,
|
|
})
|
|
sm.SetFilesystem(fs)
|
|
|
|
// Create scanner
|
|
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
|
|
FS: fs,
|
|
Storage: mockStorage,
|
|
ChunkSize: int64(16 * 1024),
|
|
MaxBlobSize: int64(100 * 1024),
|
|
CompressionLevel: 3,
|
|
AgeRecipients: []string{agePublicKey},
|
|
Repositories: repos,
|
|
})
|
|
|
|
// Create a snapshot
|
|
snapshotID, err := sm.CreateSnapshot(ctx, "test-host", "test-version", "test-git")
|
|
require.NoError(t, err)
|
|
t.Logf("Created snapshot: %s", snapshotID)
|
|
|
|
// Run the backup (scan)
|
|
result, err := scanner.Scan(ctx, dataDir, snapshotID)
|
|
require.NoError(t, err)
|
|
t.Logf("Scan complete: %d files, %d blobs", result.FilesScanned, result.BlobsCreated)
|
|
|
|
// Complete the snapshot
|
|
err = sm.CompleteSnapshot(ctx, snapshotID)
|
|
require.NoError(t, err)
|
|
|
|
// Export snapshot metadata (this uploads db.zst.age and manifest.json.zst)
|
|
err = sm.ExportSnapshotMetadata(ctx, dbPath, snapshotID)
|
|
require.NoError(t, err)
|
|
t.Logf("Exported snapshot metadata")
|
|
|
|
// Verify metadata was uploaded
|
|
keys, err := mockStorage.List(ctx, "metadata/")
|
|
require.NoError(t, err)
|
|
t.Logf("Metadata keys: %v", keys)
|
|
assert.GreaterOrEqual(t, len(keys), 2, "Should have at least db.zst.age and manifest.json.zst")
|
|
|
|
// Close the source database
|
|
err = db.Close()
|
|
require.NoError(t, err)
|
|
|
|
// Create Vaultik instance for restore
|
|
vaultikApp := &vaultik.Vaultik{
|
|
Config: cfg,
|
|
Storage: mockStorage,
|
|
Fs: fs,
|
|
Stdout: io.Discard,
|
|
Stderr: io.Discard,
|
|
}
|
|
vaultikApp.SetContext(ctx)
|
|
|
|
// Try to restore - this should work with binary SQLite format
|
|
restoreDir := filepath.Join(realTempDir, "restored")
|
|
err = vaultikApp.Restore(&vaultik.RestoreOptions{
|
|
SnapshotID: snapshotID,
|
|
TargetDir: restoreDir,
|
|
})
|
|
require.NoError(t, err, "Restore should succeed with binary SQLite database format")
|
|
|
|
// Verify restored files match originals
|
|
for origPath, expectedContent := range testFiles {
|
|
restoredPath := filepath.Join(restoreDir, origPath)
|
|
restoredContent, err := afero.ReadFile(fs, restoredPath)
|
|
require.NoError(t, err, "Should be able to read restored file: %s", restoredPath)
|
|
assert.Equal(t, expectedContent, string(restoredContent), "Restored content should match original for: %s", origPath)
|
|
}
|
|
|
|
t.Log("Backup and restore test completed successfully")
|
|
}
|
|
|
|
// TestEndToEndFileStorage exercises the full backup → restore loop against the
|
|
// real `file://` storage backend (FileStorer) on a real OS filesystem. This is
|
|
// the closest local approximation of a production backup: encrypted blobs get
|
|
// written to disk, the metadata SQLite database is exported through the same
|
|
// blobgen pipeline as a real backup, and restoration reads them back through
|
|
// the public Vaultik.Restore entrypoint. It is the canonical end-to-end smoke
|
|
// test for 1.0.
|
|
func TestEndToEndFileStorage(t *testing.T) {
|
|
log.Initialize(log.Config{})
|
|
|
|
// Real OS filesystem (SQLite + FileStorer both need it).
|
|
fs := afero.NewOsFs()
|
|
tempDir, err := os.MkdirTemp("", "vaultik-e2e-")
|
|
require.NoError(t, err)
|
|
defer func() { _ = os.RemoveAll(tempDir) }()
|
|
|
|
dataDir := filepath.Join(tempDir, "source")
|
|
storeDir := filepath.Join(tempDir, "remote")
|
|
restoreDir := filepath.Join(tempDir, "restored")
|
|
dbPath := filepath.Join(tempDir, "index.sqlite")
|
|
|
|
// Write a representative mix of file sizes:
|
|
// - empty file
|
|
// - tiny text file
|
|
// - file just under chunk boundary
|
|
// - file forcing multiple chunks
|
|
// - nested subdirectories
|
|
chunkSize := int64(64 * 1024)
|
|
maxBlobSize := int64(512 * 1024)
|
|
|
|
testFiles := map[string][]byte{
|
|
filepath.Join(dataDir, "empty.txt"): {},
|
|
filepath.Join(dataDir, "small.txt"): []byte("hello vaultik"),
|
|
filepath.Join(dataDir, "subdir", "medium.bin"): bytesPattern("medium-", int(chunkSize/2)),
|
|
filepath.Join(dataDir, "subdir", "large.bin"): bytesPattern("large-", int(chunkSize*4)),
|
|
filepath.Join(dataDir, "deep", "nest", "leaf.txt"): []byte("leaf"),
|
|
}
|
|
|
|
for path, content := range testFiles {
|
|
require.NoError(t, fs.MkdirAll(filepath.Dir(path), 0o755))
|
|
require.NoError(t, afero.WriteFile(fs, path, content, 0o644))
|
|
}
|
|
|
|
// Create a file with non-default permissions.
|
|
restrictedPath := filepath.Join(dataDir, "restricted.txt")
|
|
require.NoError(t, afero.WriteFile(fs, restrictedPath, []byte("secret"), 0o600))
|
|
testFiles[restrictedPath] = []byte("secret")
|
|
|
|
// Create an empty directory (should survive round-trip).
|
|
emptyDir := filepath.Join(dataDir, "emptydir")
|
|
require.NoError(t, fs.MkdirAll(emptyDir, 0o755))
|
|
|
|
// Create a symlink.
|
|
symlinkPath := filepath.Join(dataDir, "link-to-small")
|
|
require.NoError(t, os.Symlink("small.txt", symlinkPath))
|
|
|
|
// FileStorer is the real-world local-disk backend.
|
|
storer, err := storage.NewFileStorer(storeDir)
|
|
require.NoError(t, err)
|
|
|
|
agePublicKey := "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
|
|
ageSecretKey := "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
|
|
|
|
cfg := &config.Config{
|
|
AgeRecipients: []string{agePublicKey},
|
|
AgeSecretKey: ageSecretKey,
|
|
CompressionLevel: 3,
|
|
Hostname: "test-host",
|
|
}
|
|
|
|
ctx := context.Background()
|
|
|
|
db, err := database.New(ctx, dbPath)
|
|
require.NoError(t, err)
|
|
defer func() { _ = db.Close() }()
|
|
|
|
repos := database.NewRepositories(db)
|
|
|
|
sm := snapshot.NewSnapshotManager(snapshot.SnapshotManagerParams{
|
|
Repos: repos,
|
|
Storage: storer,
|
|
Config: cfg,
|
|
})
|
|
sm.SetFilesystem(fs)
|
|
|
|
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
|
|
FS: fs,
|
|
Storage: storer,
|
|
ChunkSize: chunkSize,
|
|
MaxBlobSize: maxBlobSize,
|
|
CompressionLevel: cfg.CompressionLevel,
|
|
AgeRecipients: cfg.AgeRecipients,
|
|
Repositories: repos,
|
|
})
|
|
|
|
snapshotID, err := sm.CreateSnapshotWithName(ctx, cfg.Hostname, "e2e", "test-version", "test-git")
|
|
require.NoError(t, err)
|
|
|
|
scanResult, err := scanner.Scan(ctx, dataDir, snapshotID)
|
|
require.NoError(t, err)
|
|
require.Greater(t, scanResult.FilesScanned, 0)
|
|
require.Greater(t, scanResult.BlobsCreated, 0)
|
|
|
|
require.NoError(t, sm.CompleteSnapshot(ctx, snapshotID))
|
|
require.NoError(t, sm.ExportSnapshotMetadata(ctx, dbPath, snapshotID))
|
|
|
|
// Verify the backup actually landed on disk under blobs/ and metadata/.
|
|
blobInfo, err := os.Stat(filepath.Join(storeDir, "blobs"))
|
|
require.NoError(t, err)
|
|
require.True(t, blobInfo.IsDir())
|
|
metaInfo, err := os.Stat(filepath.Join(storeDir, "metadata", snapshotID))
|
|
require.NoError(t, err)
|
|
require.True(t, metaInfo.IsDir())
|
|
|
|
// Tear down the source DB before restore — restore must work using only
|
|
// the remote bytes plus the secret key, with no help from the local index.
|
|
require.NoError(t, db.Close())
|
|
|
|
restoreVaultik := &vaultik.Vaultik{
|
|
Config: cfg,
|
|
Storage: storer,
|
|
Fs: fs,
|
|
Stdout: io.Discard,
|
|
Stderr: io.Discard,
|
|
}
|
|
restoreVaultik.SetContext(ctx)
|
|
|
|
require.NoError(t, restoreVaultik.Restore(&vaultik.RestoreOptions{
|
|
SnapshotID: snapshotID,
|
|
TargetDir: restoreDir,
|
|
Verify: true,
|
|
}))
|
|
|
|
// Byte-equality compare every original against its restored copy.
|
|
for origPath, expected := range testFiles {
|
|
restoredPath := filepath.Join(restoreDir, origPath)
|
|
got, err := afero.ReadFile(fs, restoredPath)
|
|
require.NoError(t, err, "restored file missing: %s", restoredPath)
|
|
require.Equalf(t, expected, got, "byte-equality failed for %s", origPath)
|
|
}
|
|
|
|
// Verify the restricted file kept its permissions.
|
|
restoredRestricted := filepath.Join(restoreDir, restrictedPath)
|
|
rInfo, err := os.Stat(restoredRestricted)
|
|
require.NoError(t, err)
|
|
assert.Equal(t, os.FileMode(0o600), rInfo.Mode().Perm(),
|
|
"restricted file should preserve 0600 permissions")
|
|
|
|
// Verify the empty directory was restored.
|
|
restoredEmptyDir := filepath.Join(restoreDir, emptyDir)
|
|
dInfo, err := os.Stat(restoredEmptyDir)
|
|
require.NoError(t, err, "empty directory should be restored")
|
|
assert.True(t, dInfo.IsDir(), "emptydir should be a directory")
|
|
|
|
// Verify the symlink was restored with the correct target.
|
|
restoredSymlink := filepath.Join(restoreDir, symlinkPath)
|
|
target, err := os.Readlink(restoredSymlink)
|
|
require.NoError(t, err, "symlink should be restored")
|
|
assert.Equal(t, "small.txt", target, "symlink target should be preserved")
|
|
}
|
|
|
|
// TestDedupOnlySnapshotRestores backs up the same directory twice without
|
|
// touching it between runs, then restores the SECOND (fully-deduplicated)
|
|
// snapshot. The second snapshot uploads no new blobs — every chunk is
|
|
// already in storage from the first run. This test guards against the
|
|
// regression where snapshot_blobs was populated only for blobs uploaded
|
|
// during the snapshot, leaving fully-deduplicated snapshots unrestorable
|
|
// with "chunk X not found in any blob" errors.
|
|
func TestDedupOnlySnapshotRestores(t *testing.T) {
|
|
log.Initialize(log.Config{})
|
|
|
|
fs := afero.NewOsFs()
|
|
tempDir, err := os.MkdirTemp("", "vaultik-dedup-")
|
|
require.NoError(t, err)
|
|
defer func() { _ = os.RemoveAll(tempDir) }()
|
|
|
|
dataDir := filepath.Join(tempDir, "source")
|
|
storeDir := filepath.Join(tempDir, "remote")
|
|
restoreDir := filepath.Join(tempDir, "restored")
|
|
dbPath := filepath.Join(tempDir, "index.sqlite")
|
|
|
|
chunkSize := int64(64 * 1024)
|
|
maxBlobSize := int64(512 * 1024)
|
|
|
|
testFiles := map[string][]byte{
|
|
filepath.Join(dataDir, "a.bin"): bytesPattern("a-", int(chunkSize*3)),
|
|
filepath.Join(dataDir, "b.bin"): bytesPattern("b-", int(chunkSize*2)),
|
|
}
|
|
for path, content := range testFiles {
|
|
require.NoError(t, fs.MkdirAll(filepath.Dir(path), 0o755))
|
|
require.NoError(t, afero.WriteFile(fs, path, content, 0o644))
|
|
}
|
|
|
|
storer, err := storage.NewFileStorer(storeDir)
|
|
require.NoError(t, err)
|
|
|
|
agePublicKey := "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
|
|
ageSecretKey := "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
|
|
|
|
cfg := &config.Config{
|
|
AgeRecipients: []string{agePublicKey},
|
|
AgeSecretKey: ageSecretKey,
|
|
CompressionLevel: 3,
|
|
Hostname: "test-host",
|
|
}
|
|
|
|
ctx := context.Background()
|
|
db, err := database.New(ctx, dbPath)
|
|
require.NoError(t, err)
|
|
defer func() { _ = db.Close() }()
|
|
repos := database.NewRepositories(db)
|
|
|
|
makeScanner := func() *snapshot.Scanner {
|
|
return snapshot.NewScanner(snapshot.ScannerConfig{
|
|
FS: fs,
|
|
Storage: storer,
|
|
ChunkSize: chunkSize,
|
|
MaxBlobSize: maxBlobSize,
|
|
CompressionLevel: cfg.CompressionLevel,
|
|
AgeRecipients: cfg.AgeRecipients,
|
|
Repositories: repos,
|
|
})
|
|
}
|
|
sm := snapshot.NewSnapshotManager(snapshot.SnapshotManagerParams{
|
|
Repos: repos, Storage: storer, Config: cfg,
|
|
})
|
|
sm.SetFilesystem(fs)
|
|
|
|
// First snapshot — uploads all blobs.
|
|
id1, err := sm.CreateSnapshotWithName(ctx, cfg.Hostname, "dedup", "v", "g")
|
|
require.NoError(t, err)
|
|
r1, err := makeScanner().Scan(ctx, dataDir, id1)
|
|
require.NoError(t, err)
|
|
require.Greater(t, r1.BlobsCreated, 0, "first snapshot should upload at least one blob")
|
|
require.NoError(t, sm.CompleteSnapshot(ctx, id1))
|
|
require.NoError(t, sm.ExportSnapshotMetadata(ctx, dbPath, id1))
|
|
|
|
// Second snapshot — same data, every chunk dedups. Sleep past the
|
|
// second-precision timestamp so the snapshot IDs differ.
|
|
time.Sleep(1100 * time.Millisecond)
|
|
id2, err := sm.CreateSnapshotWithName(ctx, cfg.Hostname, "dedup", "v", "g")
|
|
require.NoError(t, err)
|
|
r2, err := makeScanner().Scan(ctx, dataDir, id2)
|
|
require.NoError(t, err)
|
|
require.Equal(t, 0, r2.BlobsCreated, "second snapshot should upload zero new blobs (fully dedup'd)")
|
|
require.NoError(t, sm.CompleteSnapshot(ctx, id2))
|
|
require.NoError(t, sm.ExportSnapshotMetadata(ctx, dbPath, id2))
|
|
|
|
// snapshot_blobs for id2 must be populated despite no uploads.
|
|
blobHashes, err := repos.Snapshots.GetBlobHashes(ctx, id2)
|
|
require.NoError(t, err)
|
|
require.NotEmpty(t, blobHashes, "snapshot_blobs for fully-dedup'd snapshot must reference blobs uploaded by prior snapshot")
|
|
|
|
require.NoError(t, db.Close())
|
|
|
|
restoreVaultik := &vaultik.Vaultik{
|
|
Config: cfg,
|
|
Storage: storer,
|
|
Fs: fs,
|
|
Stdout: io.Discard,
|
|
Stderr: io.Discard,
|
|
}
|
|
restoreVaultik.SetContext(ctx)
|
|
|
|
require.NoError(t, restoreVaultik.Restore(&vaultik.RestoreOptions{
|
|
SnapshotID: id2,
|
|
TargetDir: restoreDir,
|
|
Verify: true,
|
|
}))
|
|
|
|
for origPath, expected := range testFiles {
|
|
restoredPath := filepath.Join(restoreDir, origPath)
|
|
got, err := afero.ReadFile(fs, restoredPath)
|
|
require.NoError(t, err, "restored file missing: %s", restoredPath)
|
|
require.Equalf(t, expected, got, "byte-equality failed for %s", origPath)
|
|
}
|
|
}
|
|
|
|
// bytesPattern returns a deterministic byte slice of length n with a tag prefix,
|
|
// useful for forcing chunker behavior with reproducible content.
|
|
func bytesPattern(tag string, n int) []byte {
|
|
out := make([]byte, n)
|
|
for i := range out {
|
|
out[i] = byte(tag[i%len(tag)] ^ byte(i&0xff))
|
|
}
|
|
return out
|
|
}
|