This commit represents a significant architectural overhaul of vaultik: Database Schema Changes: - Switch files table to use UUID primary keys instead of path-based keys - Add UUID primary keys to blobs table for immediate chunk association - Update all foreign key relationships to use UUIDs - Add comprehensive schema documentation in DATAMODEL.md - Add SQLite busy timeout handling for concurrent operations Streaming and Performance Improvements: - Implement true streaming blob packing without intermediate storage - Add streaming chunk processing to reduce memory usage - Improve progress reporting with real-time metrics - Add upload metrics tracking in new uploads table CLI Refactoring: - Restructure CLI to use subcommands: snapshot create/list/purge/verify - Add store info command for S3 configuration display - Add custom duration parser supporting days/weeks/months/years - Remove old backup.go in favor of enhanced snapshot.go - Add --cron flag for silent operation Configuration Changes: - Remove unused index_prefix configuration option - Add support for snapshot pruning retention policies - Improve configuration validation and error messages Testing Improvements: - Add comprehensive repository tests with edge cases - Add cascade delete debugging tests - Fix concurrent operation tests to use SQLite busy timeout - Remove tolerance for SQLITE_BUSY errors in tests Documentation: - Add MIT LICENSE file - Update README with new command structure - Add comprehensive DATAMODEL.md explaining database schema - Update DESIGN.md with UUID-based architecture Other Changes: - Add test-config.yml for testing - Update Makefile with better test output formatting - Fix various race conditions in concurrent operations - Improve error handling throughout
382 lines
11 KiB
Go
382 lines
11 KiB
Go
package backup_test
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"path/filepath"
|
|
"testing"
|
|
"time"
|
|
|
|
"git.eeqj.de/sneak/vaultik/internal/backup"
|
|
"git.eeqj.de/sneak/vaultik/internal/database"
|
|
"git.eeqj.de/sneak/vaultik/internal/log"
|
|
"github.com/spf13/afero"
|
|
)
|
|
|
|
func TestScannerSimpleDirectory(t *testing.T) {
|
|
// Initialize logger for tests
|
|
log.Initialize(log.Config{})
|
|
|
|
// Create in-memory filesystem
|
|
fs := afero.NewMemMapFs()
|
|
|
|
// Create test directory structure
|
|
testFiles := map[string]string{
|
|
"/source/file1.txt": "Hello, world!", // 13 bytes
|
|
"/source/file2.txt": "This is another file", // 20 bytes
|
|
"/source/subdir/file3.txt": "File in subdirectory", // 20 bytes
|
|
"/source/subdir/file4.txt": "Another file in subdirectory", // 28 bytes
|
|
"/source/empty.txt": "", // 0 bytes
|
|
"/source/subdir2/file5.txt": "Yet another file", // 16 bytes
|
|
}
|
|
|
|
// Create files with specific times
|
|
testTime := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
|
|
for path, content := range testFiles {
|
|
dir := filepath.Dir(path)
|
|
if err := fs.MkdirAll(dir, 0755); err != nil {
|
|
t.Fatalf("failed to create directory %s: %v", dir, err)
|
|
}
|
|
if err := afero.WriteFile(fs, path, []byte(content), 0644); err != nil {
|
|
t.Fatalf("failed to write file %s: %v", path, err)
|
|
}
|
|
// Set times
|
|
if err := fs.Chtimes(path, testTime, testTime); err != nil {
|
|
t.Fatalf("failed to set times for %s: %v", path, err)
|
|
}
|
|
}
|
|
|
|
// Create test database
|
|
db, err := database.NewTestDB()
|
|
if err != nil {
|
|
t.Fatalf("failed to create test database: %v", err)
|
|
}
|
|
defer func() {
|
|
if err := db.Close(); err != nil {
|
|
t.Errorf("failed to close database: %v", err)
|
|
}
|
|
}()
|
|
|
|
repos := database.NewRepositories(db)
|
|
|
|
// Create scanner
|
|
scanner := backup.NewScanner(backup.ScannerConfig{
|
|
FS: fs,
|
|
ChunkSize: int64(1024 * 16), // 16KB chunks for testing
|
|
Repositories: repos,
|
|
MaxBlobSize: int64(1024 * 1024), // 1MB blobs
|
|
CompressionLevel: 3,
|
|
AgeRecipients: []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
|
|
})
|
|
|
|
// Create a snapshot record for testing
|
|
ctx := context.Background()
|
|
snapshotID := "test-snapshot-001"
|
|
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
|
snapshot := &database.Snapshot{
|
|
ID: snapshotID,
|
|
Hostname: "test-host",
|
|
VaultikVersion: "test",
|
|
StartedAt: time.Now(),
|
|
CompletedAt: nil,
|
|
FileCount: 0,
|
|
ChunkCount: 0,
|
|
BlobCount: 0,
|
|
TotalSize: 0,
|
|
BlobSize: 0,
|
|
CompressionRatio: 1.0,
|
|
}
|
|
return repos.Snapshots.Create(ctx, tx, snapshot)
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("failed to create snapshot: %v", err)
|
|
}
|
|
|
|
// Scan the directory
|
|
var result *backup.ScanResult
|
|
result, err = scanner.Scan(ctx, "/source", snapshotID)
|
|
if err != nil {
|
|
t.Fatalf("scan failed: %v", err)
|
|
}
|
|
|
|
// Verify results
|
|
// We now scan 6 files + 3 directories (source, subdir, subdir2) = 9 entries
|
|
if result.FilesScanned != 9 {
|
|
t.Errorf("expected 9 entries scanned, got %d", result.FilesScanned)
|
|
}
|
|
|
|
// Directories have their own sizes, so the total will be more than just file content
|
|
if result.BytesScanned < 97 { // At minimum we have 97 bytes of file content
|
|
t.Errorf("expected at least 97 bytes scanned, got %d", result.BytesScanned)
|
|
}
|
|
|
|
// Verify files in database
|
|
files, err := repos.Files.ListByPrefix(ctx, "/source")
|
|
if err != nil {
|
|
t.Fatalf("failed to list files: %v", err)
|
|
}
|
|
|
|
// We should have 6 files + 3 directories = 9 entries
|
|
if len(files) != 9 {
|
|
t.Errorf("expected 9 entries in database, got %d", len(files))
|
|
}
|
|
|
|
// Verify specific file
|
|
file1, err := repos.Files.GetByPath(ctx, "/source/file1.txt")
|
|
if err != nil {
|
|
t.Fatalf("failed to get file1.txt: %v", err)
|
|
}
|
|
|
|
if file1.Size != 13 {
|
|
t.Errorf("expected file1.txt size 13, got %d", file1.Size)
|
|
}
|
|
|
|
if file1.Mode != 0644 {
|
|
t.Errorf("expected file1.txt mode 0644, got %o", file1.Mode)
|
|
}
|
|
|
|
// Verify chunks were created
|
|
chunks, err := repos.FileChunks.GetByFile(ctx, "/source/file1.txt")
|
|
if err != nil {
|
|
t.Fatalf("failed to get chunks for file1.txt: %v", err)
|
|
}
|
|
|
|
if len(chunks) != 1 { // Small file should be one chunk
|
|
t.Errorf("expected 1 chunk for file1.txt, got %d", len(chunks))
|
|
}
|
|
|
|
// Verify deduplication - file3.txt and file4.txt have different content
|
|
// but we should still have the correct number of unique chunks
|
|
allChunks, err := repos.Chunks.List(ctx)
|
|
if err != nil {
|
|
t.Fatalf("failed to list all chunks: %v", err)
|
|
}
|
|
|
|
// We should have at most 6 chunks (one per unique file content)
|
|
// Empty file might not create a chunk
|
|
if len(allChunks) > 6 {
|
|
t.Errorf("expected at most 6 chunks, got %d", len(allChunks))
|
|
}
|
|
}
|
|
|
|
func TestScannerWithSymlinks(t *testing.T) {
|
|
// Initialize logger for tests
|
|
log.Initialize(log.Config{})
|
|
|
|
// Create in-memory filesystem
|
|
fs := afero.NewMemMapFs()
|
|
|
|
// Create test files
|
|
if err := fs.MkdirAll("/source", 0755); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if err := afero.WriteFile(fs, "/source/target.txt", []byte("target content"), 0644); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if err := afero.WriteFile(fs, "/outside/file.txt", []byte("outside content"), 0644); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// Create symlinks (if supported by the filesystem)
|
|
linker, ok := fs.(afero.Symlinker)
|
|
if !ok {
|
|
t.Skip("filesystem does not support symlinks")
|
|
}
|
|
|
|
// Symlink to file in source
|
|
if err := linker.SymlinkIfPossible("target.txt", "/source/link1.txt"); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// Symlink to file outside source
|
|
if err := linker.SymlinkIfPossible("/outside/file.txt", "/source/link2.txt"); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// Create test database
|
|
db, err := database.NewTestDB()
|
|
if err != nil {
|
|
t.Fatalf("failed to create test database: %v", err)
|
|
}
|
|
defer func() {
|
|
if err := db.Close(); err != nil {
|
|
t.Errorf("failed to close database: %v", err)
|
|
}
|
|
}()
|
|
|
|
repos := database.NewRepositories(db)
|
|
|
|
// Create scanner
|
|
scanner := backup.NewScanner(backup.ScannerConfig{
|
|
FS: fs,
|
|
ChunkSize: 1024 * 16,
|
|
Repositories: repos,
|
|
MaxBlobSize: int64(1024 * 1024),
|
|
CompressionLevel: 3,
|
|
AgeRecipients: []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
|
|
})
|
|
|
|
// Create a snapshot record for testing
|
|
ctx := context.Background()
|
|
snapshotID := "test-snapshot-001"
|
|
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
|
snapshot := &database.Snapshot{
|
|
ID: snapshotID,
|
|
Hostname: "test-host",
|
|
VaultikVersion: "test",
|
|
StartedAt: time.Now(),
|
|
CompletedAt: nil,
|
|
FileCount: 0,
|
|
ChunkCount: 0,
|
|
BlobCount: 0,
|
|
TotalSize: 0,
|
|
BlobSize: 0,
|
|
CompressionRatio: 1.0,
|
|
}
|
|
return repos.Snapshots.Create(ctx, tx, snapshot)
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("failed to create snapshot: %v", err)
|
|
}
|
|
|
|
// Scan the directory
|
|
var result *backup.ScanResult
|
|
result, err = scanner.Scan(ctx, "/source", snapshotID)
|
|
if err != nil {
|
|
t.Fatalf("scan failed: %v", err)
|
|
}
|
|
|
|
// Should have scanned 3 files (target + 2 symlinks)
|
|
if result.FilesScanned != 3 {
|
|
t.Errorf("expected 3 files scanned, got %d", result.FilesScanned)
|
|
}
|
|
|
|
// Check symlinks in database
|
|
link1, err := repos.Files.GetByPath(ctx, "/source/link1.txt")
|
|
if err != nil {
|
|
t.Fatalf("failed to get link1.txt: %v", err)
|
|
}
|
|
|
|
if link1.LinkTarget != "target.txt" {
|
|
t.Errorf("expected link1.txt target 'target.txt', got %q", link1.LinkTarget)
|
|
}
|
|
|
|
link2, err := repos.Files.GetByPath(ctx, "/source/link2.txt")
|
|
if err != nil {
|
|
t.Fatalf("failed to get link2.txt: %v", err)
|
|
}
|
|
|
|
if link2.LinkTarget != "/outside/file.txt" {
|
|
t.Errorf("expected link2.txt target '/outside/file.txt', got %q", link2.LinkTarget)
|
|
}
|
|
}
|
|
|
|
func TestScannerLargeFile(t *testing.T) {
|
|
// Initialize logger for tests
|
|
log.Initialize(log.Config{})
|
|
|
|
// Create in-memory filesystem
|
|
fs := afero.NewMemMapFs()
|
|
|
|
// Create a large file that will require multiple chunks
|
|
// Use random content to ensure good chunk boundaries
|
|
largeContent := make([]byte, 1024*1024) // 1MB
|
|
// Fill with pseudo-random data to ensure chunk boundaries
|
|
for i := 0; i < len(largeContent); i++ {
|
|
// Simple pseudo-random generator for deterministic tests
|
|
largeContent[i] = byte((i * 7919) ^ (i >> 3))
|
|
}
|
|
|
|
if err := fs.MkdirAll("/source", 0755); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if err := afero.WriteFile(fs, "/source/large.bin", largeContent, 0644); err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// Create test database
|
|
db, err := database.NewTestDB()
|
|
if err != nil {
|
|
t.Fatalf("failed to create test database: %v", err)
|
|
}
|
|
defer func() {
|
|
if err := db.Close(); err != nil {
|
|
t.Errorf("failed to close database: %v", err)
|
|
}
|
|
}()
|
|
|
|
repos := database.NewRepositories(db)
|
|
|
|
// Create scanner with 64KB average chunk size
|
|
scanner := backup.NewScanner(backup.ScannerConfig{
|
|
FS: fs,
|
|
ChunkSize: int64(1024 * 64), // 64KB average chunks
|
|
Repositories: repos,
|
|
MaxBlobSize: int64(1024 * 1024),
|
|
CompressionLevel: 3,
|
|
AgeRecipients: []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
|
|
})
|
|
|
|
// Create a snapshot record for testing
|
|
ctx := context.Background()
|
|
snapshotID := "test-snapshot-001"
|
|
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
|
snapshot := &database.Snapshot{
|
|
ID: snapshotID,
|
|
Hostname: "test-host",
|
|
VaultikVersion: "test",
|
|
StartedAt: time.Now(),
|
|
CompletedAt: nil,
|
|
FileCount: 0,
|
|
ChunkCount: 0,
|
|
BlobCount: 0,
|
|
TotalSize: 0,
|
|
BlobSize: 0,
|
|
CompressionRatio: 1.0,
|
|
}
|
|
return repos.Snapshots.Create(ctx, tx, snapshot)
|
|
})
|
|
if err != nil {
|
|
t.Fatalf("failed to create snapshot: %v", err)
|
|
}
|
|
|
|
// Scan the directory
|
|
var result *backup.ScanResult
|
|
result, err = scanner.Scan(ctx, "/source", snapshotID)
|
|
if err != nil {
|
|
t.Fatalf("scan failed: %v", err)
|
|
}
|
|
|
|
// We scan 1 file + 1 directory = 2 entries
|
|
if result.FilesScanned != 2 {
|
|
t.Errorf("expected 2 entries scanned, got %d", result.FilesScanned)
|
|
}
|
|
|
|
// The file size should be at least 1MB
|
|
if result.BytesScanned < 1024*1024 {
|
|
t.Errorf("expected at least %d bytes scanned, got %d", 1024*1024, result.BytesScanned)
|
|
}
|
|
|
|
// Verify chunks
|
|
chunks, err := repos.FileChunks.GetByFile(ctx, "/source/large.bin")
|
|
if err != nil {
|
|
t.Fatalf("failed to get chunks: %v", err)
|
|
}
|
|
|
|
// With content-defined chunking, the number of chunks depends on content
|
|
// For a 1MB file, we should get at least 1 chunk
|
|
if len(chunks) < 1 {
|
|
t.Errorf("expected at least 1 chunk, got %d", len(chunks))
|
|
}
|
|
|
|
// Log the actual number of chunks for debugging
|
|
t.Logf("1MB file produced %d chunks with 64KB average chunk size", len(chunks))
|
|
|
|
// Verify chunk sequence
|
|
for i, fc := range chunks {
|
|
if fc.Idx != i {
|
|
t.Errorf("chunk %d has incorrect sequence %d", i, fc.Idx)
|
|
}
|
|
}
|
|
}
|