vaultik/internal/vaultik/integration_test.go
sneak 470bf648c4 Add deterministic deduplication, rclone backend, and database purge command
- Implement deterministic blob hashing using double SHA256 of uncompressed
  plaintext data, enabling deduplication even after local DB is cleared
- Add Stat() check before blob upload to skip existing blobs in storage
- Add rclone storage backend for additional remote storage options
- Add 'vaultik database purge' command to erase local state DB
- Add 'vaultik remote check' command to verify remote connectivity
- Show configured snapshots in 'vaultik snapshot list' output
- Skip macOS resource fork files (._*) when listing remote snapshots
- Use multi-threaded zstd compression (CPUs - 2 threads)
- Add writer tests for double hashing behavior
2026-01-28 15:50:17 -08:00

544 lines
16 KiB
Go

package vaultik_test
import (
"bytes"
"context"
"database/sql"
"io"
"os"
"path/filepath"
"sync"
"testing"
"time"
"git.eeqj.de/sneak/vaultik/internal/config"
"git.eeqj.de/sneak/vaultik/internal/database"
"git.eeqj.de/sneak/vaultik/internal/log"
"git.eeqj.de/sneak/vaultik/internal/snapshot"
"git.eeqj.de/sneak/vaultik/internal/storage"
"git.eeqj.de/sneak/vaultik/internal/types"
"git.eeqj.de/sneak/vaultik/internal/vaultik"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// MockStorer implements storage.Storer for testing
type MockStorer struct {
mu sync.Mutex
data map[string][]byte
calls []string
}
func NewMockStorer() *MockStorer {
return &MockStorer{
data: make(map[string][]byte),
calls: make([]string, 0),
}
}
func (m *MockStorer) Put(ctx context.Context, key string, reader io.Reader) error {
m.mu.Lock()
defer m.mu.Unlock()
m.calls = append(m.calls, "Put:"+key)
data, err := io.ReadAll(reader)
if err != nil {
return err
}
m.data[key] = data
return nil
}
func (m *MockStorer) PutWithProgress(ctx context.Context, key string, reader io.Reader, size int64, progress storage.ProgressCallback) error {
return m.Put(ctx, key, reader)
}
func (m *MockStorer) Get(ctx context.Context, key string) (io.ReadCloser, error) {
m.mu.Lock()
defer m.mu.Unlock()
m.calls = append(m.calls, "Get:"+key)
data, exists := m.data[key]
if !exists {
return nil, storage.ErrNotFound
}
return io.NopCloser(bytes.NewReader(data)), nil
}
func (m *MockStorer) Stat(ctx context.Context, key string) (*storage.ObjectInfo, error) {
m.mu.Lock()
defer m.mu.Unlock()
m.calls = append(m.calls, "Stat:"+key)
data, exists := m.data[key]
if !exists {
return nil, storage.ErrNotFound
}
return &storage.ObjectInfo{
Key: key,
Size: int64(len(data)),
}, nil
}
func (m *MockStorer) Delete(ctx context.Context, key string) error {
m.mu.Lock()
defer m.mu.Unlock()
m.calls = append(m.calls, "Delete:"+key)
delete(m.data, key)
return nil
}
func (m *MockStorer) List(ctx context.Context, prefix string) ([]string, error) {
m.mu.Lock()
defer m.mu.Unlock()
m.calls = append(m.calls, "List:"+prefix)
var keys []string
for key := range m.data {
if len(prefix) == 0 || (len(key) >= len(prefix) && key[:len(prefix)] == prefix) {
keys = append(keys, key)
}
}
return keys, nil
}
func (m *MockStorer) ListStream(ctx context.Context, prefix string) <-chan storage.ObjectInfo {
ch := make(chan storage.ObjectInfo)
go func() {
defer close(ch)
m.mu.Lock()
defer m.mu.Unlock()
for key, data := range m.data {
if len(prefix) == 0 || (len(key) >= len(prefix) && key[:len(prefix)] == prefix) {
ch <- storage.ObjectInfo{
Key: key,
Size: int64(len(data)),
}
}
}
}()
return ch
}
func (m *MockStorer) Info() storage.StorageInfo {
return storage.StorageInfo{
Type: "mock",
Location: "memory",
}
}
// GetCalls returns the list of operations that were called
func (m *MockStorer) GetCalls() []string {
m.mu.Lock()
defer m.mu.Unlock()
calls := make([]string, len(m.calls))
copy(calls, m.calls)
return calls
}
// GetStorageSize returns the number of objects in storage
func (m *MockStorer) GetStorageSize() int {
m.mu.Lock()
defer m.mu.Unlock()
return len(m.data)
}
// TestEndToEndBackup tests the full backup workflow with mocked dependencies
func TestEndToEndBackup(t *testing.T) {
// Initialize logger
log.Initialize(log.Config{})
// Create in-memory filesystem
fs := afero.NewMemMapFs()
// Create test directory structure and files
testFiles := map[string]string{
"/home/user/documents/file1.txt": "This is file 1 content",
"/home/user/documents/file2.txt": "This is file 2 content with more data",
"/home/user/pictures/photo1.jpg": "Binary photo data here...",
"/home/user/code/main.go": "package main\n\nfunc main() {\n\tprintln(\"Hello, World!\")\n}",
}
// Create all directories first
dirs := []string{
"/home/user/documents",
"/home/user/pictures",
"/home/user/code",
}
for _, dir := range dirs {
if err := fs.MkdirAll(dir, 0755); err != nil {
t.Fatalf("failed to create directory %s: %v", dir, err)
}
}
// Create test files
for path, content := range testFiles {
if err := afero.WriteFile(fs, path, []byte(content), 0644); err != nil {
t.Fatalf("failed to create test file %s: %v", path, err)
}
}
// Create mock storage
mockStorage := NewMockStorer()
// Create test configuration
cfg := &config.Config{
Snapshots: map[string]config.SnapshotConfig{
"test": {
Paths: []string{"/home/user"},
},
},
Exclude: []string{"*.tmp", "*.log"},
ChunkSize: config.Size(16 * 1024), // 16KB chunks
BlobSizeLimit: config.Size(100 * 1024), // 100KB blobs
CompressionLevel: 3,
AgeRecipients: []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
AgeSecretKey: "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5", // Test private key
S3: config.S3Config{
Endpoint: "http://localhost:9000", // MinIO endpoint for testing
Region: "us-east-1",
Bucket: "test-bucket",
AccessKeyID: "test-access",
SecretAccessKey: "test-secret",
},
IndexPath: ":memory:", // In-memory SQLite database
}
// For a true end-to-end test, we'll create a simpler test that focuses on
// the core backup logic using the scanner directly with our mock storage
ctx := context.Background()
// Create in-memory database
db, err := database.New(ctx, ":memory:")
require.NoError(t, err)
defer func() {
if err := db.Close(); err != nil {
t.Errorf("failed to close database: %v", err)
}
}()
repos := database.NewRepositories(db)
// Create scanner with mock storage
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
FS: fs,
ChunkSize: cfg.ChunkSize.Int64(),
Repositories: repos,
Storage: mockStorage,
MaxBlobSize: cfg.BlobSizeLimit.Int64(),
CompressionLevel: cfg.CompressionLevel,
AgeRecipients: cfg.AgeRecipients,
EnableProgress: false,
})
// Create a snapshot record
snapshotID := "test-snapshot-001"
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
snapshot := &database.Snapshot{
ID: types.SnapshotID(snapshotID),
Hostname: "test-host",
VaultikVersion: "test-version",
StartedAt: time.Now(),
}
return repos.Snapshots.Create(ctx, tx, snapshot)
})
require.NoError(t, err)
// Run the backup scan
result, err := scanner.Scan(ctx, "/home/user", snapshotID)
require.NoError(t, err)
// Verify scan results
// The scanner counts both files and directories, so we have:
// 4 files + 4 directories (/home, /home/user, /home/user/documents, /home/user/pictures, /home/user/code)
assert.GreaterOrEqual(t, result.FilesScanned, 4, "Should scan at least 4 files")
assert.Greater(t, result.BytesScanned, int64(0), "Should scan some bytes")
assert.Greater(t, result.ChunksCreated, 0, "Should create chunks")
assert.Greater(t, result.BlobsCreated, 0, "Should create blobs")
// Verify storage operations
calls := mockStorage.GetCalls()
t.Logf("Storage operations performed: %v", calls)
// Should have uploaded at least one blob
blobUploads := 0
for _, call := range calls {
if len(call) > 4 && call[:4] == "Put:" {
if len(call) > 10 && call[4:10] == "blobs/" {
blobUploads++
}
}
}
assert.Greater(t, blobUploads, 0, "Should upload at least one blob")
// Verify files in database
files, err := repos.Files.ListByPrefix(ctx, "/home/user")
require.NoError(t, err)
// Count only regular files (not directories)
regularFiles := 0
for _, f := range files {
if f.Mode&0x80000000 == 0 { // Check if regular file (not directory)
regularFiles++
}
}
assert.Equal(t, 4, regularFiles, "Should have 4 regular files in database")
// Verify chunks were created by checking a specific file
fileChunks, err := repos.FileChunks.GetByPath(ctx, "/home/user/documents/file1.txt")
require.NoError(t, err)
assert.Greater(t, len(fileChunks), 0, "Should have chunks for file1.txt")
// Verify blobs were uploaded to storage
assert.Greater(t, mockStorage.GetStorageSize(), 0, "Should have blobs in storage")
// Complete the snapshot - just verify we got results
// In a real integration test, we'd update the snapshot record
// Create snapshot manager to test metadata export
snapshotManager := &snapshot.SnapshotManager{}
snapshotManager.SetFilesystem(fs)
// Note: We can't fully test snapshot metadata export without a proper S3 client mock
// that implements all required methods. This would require refactoring the S3 client
// interface to be more testable.
t.Logf("Backup completed successfully:")
t.Logf(" Files scanned: %d", result.FilesScanned)
t.Logf(" Bytes scanned: %d", result.BytesScanned)
t.Logf(" Chunks created: %d", result.ChunksCreated)
t.Logf(" Blobs created: %d", result.BlobsCreated)
t.Logf(" Storage size: %d objects", mockStorage.GetStorageSize())
}
// TestBackupAndVerify tests backing up files and verifying the blobs
func TestBackupAndVerify(t *testing.T) {
// Initialize logger
log.Initialize(log.Config{})
// Create in-memory filesystem
fs := afero.NewMemMapFs()
// Create test files
testContent := "This is a test file with some content that should be backed up"
err := fs.MkdirAll("/data", 0755)
require.NoError(t, err)
err = afero.WriteFile(fs, "/data/test.txt", []byte(testContent), 0644)
require.NoError(t, err)
// Create mock storage
mockStorage := NewMockStorer()
// Create test database
ctx := context.Background()
db, err := database.New(ctx, ":memory:")
require.NoError(t, err)
defer func() {
if err := db.Close(); err != nil {
t.Errorf("failed to close database: %v", err)
}
}()
repos := database.NewRepositories(db)
// Create scanner
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
FS: fs,
ChunkSize: int64(1024 * 16), // 16KB chunks
Repositories: repos,
Storage: mockStorage,
MaxBlobSize: int64(1024 * 1024), // 1MB blobs
CompressionLevel: 3,
AgeRecipients: []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
})
// Create a snapshot
snapshotID := "test-snapshot-001"
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
snapshot := &database.Snapshot{
ID: types.SnapshotID(snapshotID),
Hostname: "test-host",
VaultikVersion: "test-version",
StartedAt: time.Now(),
}
return repos.Snapshots.Create(ctx, tx, snapshot)
})
require.NoError(t, err)
// Run the backup
result, err := scanner.Scan(ctx, "/data", snapshotID)
require.NoError(t, err)
// Verify backup created blobs
assert.Greater(t, result.BlobsCreated, 0, "Should create at least one blob")
assert.Equal(t, mockStorage.GetStorageSize(), result.BlobsCreated, "Storage should have the blobs")
// Verify we can retrieve the blob from storage
objects, err := mockStorage.List(ctx, "blobs/")
require.NoError(t, err)
assert.Len(t, objects, result.BlobsCreated, "Should have correct number of blobs in storage")
// Get the first blob and verify it exists
if len(objects) > 0 {
blobKey := objects[0]
t.Logf("Verifying blob: %s", blobKey)
// Get blob info
blobInfo, err := mockStorage.Stat(ctx, blobKey)
require.NoError(t, err)
assert.Greater(t, blobInfo.Size, int64(0), "Blob should have content")
// Get blob content
reader, err := mockStorage.Get(ctx, blobKey)
require.NoError(t, err)
defer func() { _ = reader.Close() }()
// Verify blob data is encrypted (should not contain plaintext)
blobData, err := io.ReadAll(reader)
require.NoError(t, err)
assert.NotContains(t, string(blobData), testContent, "Blob should be encrypted")
assert.Greater(t, len(blobData), 0, "Blob should have data")
}
t.Logf("Backup and verify test completed successfully")
}
// TestBackupAndRestore tests the full backup and restore workflow
// This test verifies that the restore code correctly handles the binary SQLite
// database format that is exported by the snapshot manager.
func TestBackupAndRestore(t *testing.T) {
// Initialize logger
log.Initialize(log.Config{})
// Create real temp directory for the database (SQLite needs real filesystem)
realTempDir, err := os.MkdirTemp("", "vaultik-test-")
require.NoError(t, err)
defer func() { _ = os.RemoveAll(realTempDir) }()
// Use real OS filesystem for this test
fs := afero.NewOsFs()
// Create test directory structure and files
dataDir := filepath.Join(realTempDir, "data")
testFiles := map[string]string{
filepath.Join(dataDir, "file1.txt"): "This is file 1 content",
filepath.Join(dataDir, "file2.txt"): "This is file 2 content with more data",
filepath.Join(dataDir, "subdir", "file3.txt"): "This is file 3 in a subdirectory",
}
// Create directories and files
for path, content := range testFiles {
dir := filepath.Dir(path)
if err := fs.MkdirAll(dir, 0755); err != nil {
t.Fatalf("failed to create directory %s: %v", dir, err)
}
if err := afero.WriteFile(fs, path, []byte(content), 0644); err != nil {
t.Fatalf("failed to create test file %s: %v", path, err)
}
}
ctx := context.Background()
// Create mock storage
mockStorage := NewMockStorer()
// Test keypair
agePublicKey := "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
ageSecretKey := "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
// Create database file
dbPath := filepath.Join(realTempDir, "test.db")
db, err := database.New(ctx, dbPath)
require.NoError(t, err)
defer func() { _ = db.Close() }()
repos := database.NewRepositories(db)
// Create config for snapshot manager
cfg := &config.Config{
AgeSecretKey: ageSecretKey,
AgeRecipients: []string{agePublicKey},
CompressionLevel: 3,
}
// Create snapshot manager
sm := snapshot.NewSnapshotManager(snapshot.SnapshotManagerParams{
Repos: repos,
Storage: mockStorage,
Config: cfg,
})
sm.SetFilesystem(fs)
// Create scanner
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
FS: fs,
Storage: mockStorage,
ChunkSize: int64(16 * 1024),
MaxBlobSize: int64(100 * 1024),
CompressionLevel: 3,
AgeRecipients: []string{agePublicKey},
Repositories: repos,
})
// Create a snapshot
snapshotID, err := sm.CreateSnapshot(ctx, "test-host", "test-version", "test-git")
require.NoError(t, err)
t.Logf("Created snapshot: %s", snapshotID)
// Run the backup (scan)
result, err := scanner.Scan(ctx, dataDir, snapshotID)
require.NoError(t, err)
t.Logf("Scan complete: %d files, %d blobs", result.FilesScanned, result.BlobsCreated)
// Complete the snapshot
err = sm.CompleteSnapshot(ctx, snapshotID)
require.NoError(t, err)
// Export snapshot metadata (this uploads db.zst.age and manifest.json.zst)
err = sm.ExportSnapshotMetadata(ctx, dbPath, snapshotID)
require.NoError(t, err)
t.Logf("Exported snapshot metadata")
// Verify metadata was uploaded
keys, err := mockStorage.List(ctx, "metadata/")
require.NoError(t, err)
t.Logf("Metadata keys: %v", keys)
assert.GreaterOrEqual(t, len(keys), 2, "Should have at least db.zst.age and manifest.json.zst")
// Close the source database
err = db.Close()
require.NoError(t, err)
// Create Vaultik instance for restore
vaultikApp := &vaultik.Vaultik{
Config: cfg,
Storage: mockStorage,
Fs: fs,
Stdout: io.Discard,
Stderr: io.Discard,
}
vaultikApp.SetContext(ctx)
// Try to restore - this should work with binary SQLite format
restoreDir := filepath.Join(realTempDir, "restored")
err = vaultikApp.Restore(&vaultik.RestoreOptions{
SnapshotID: snapshotID,
TargetDir: restoreDir,
})
require.NoError(t, err, "Restore should succeed with binary SQLite database format")
// Verify restored files match originals
for origPath, expectedContent := range testFiles {
restoredPath := filepath.Join(restoreDir, origPath)
restoredContent, err := afero.ReadFile(fs, restoredPath)
require.NoError(t, err, "Should be able to read restored file: %s", restoredPath)
assert.Equal(t, expectedContent, string(restoredContent), "Restored content should match original for: %s", origPath)
}
t.Log("Backup and restore test completed successfully")
}