package backup_test import ( "context" "database/sql" "path/filepath" "testing" "time" "git.eeqj.de/sneak/vaultik/internal/backup" "git.eeqj.de/sneak/vaultik/internal/database" "git.eeqj.de/sneak/vaultik/internal/log" "github.com/spf13/afero" ) func TestScannerSimpleDirectory(t *testing.T) { // Initialize logger for tests log.Initialize(log.Config{}) // Create in-memory filesystem fs := afero.NewMemMapFs() // Create test directory structure testFiles := map[string]string{ "/source/file1.txt": "Hello, world!", // 13 bytes "/source/file2.txt": "This is another file", // 20 bytes "/source/subdir/file3.txt": "File in subdirectory", // 20 bytes "/source/subdir/file4.txt": "Another file in subdirectory", // 28 bytes "/source/empty.txt": "", // 0 bytes "/source/subdir2/file5.txt": "Yet another file", // 16 bytes } // Create files with specific times testTime := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC) for path, content := range testFiles { dir := filepath.Dir(path) if err := fs.MkdirAll(dir, 0755); err != nil { t.Fatalf("failed to create directory %s: %v", dir, err) } if err := afero.WriteFile(fs, path, []byte(content), 0644); err != nil { t.Fatalf("failed to write file %s: %v", path, err) } // Set times if err := fs.Chtimes(path, testTime, testTime); err != nil { t.Fatalf("failed to set times for %s: %v", path, err) } } // Create test database db, err := database.NewTestDB() if err != nil { t.Fatalf("failed to create test database: %v", err) } defer func() { if err := db.Close(); err != nil { t.Errorf("failed to close database: %v", err) } }() repos := database.NewRepositories(db) // Create scanner scanner := backup.NewScanner(backup.ScannerConfig{ FS: fs, ChunkSize: int64(1024 * 16), // 16KB chunks for testing Repositories: repos, MaxBlobSize: int64(1024 * 1024), // 1MB blobs CompressionLevel: 3, AgeRecipients: []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key }) // Create a snapshot record for testing ctx := context.Background() snapshotID := "test-snapshot-001" err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error { snapshot := &database.Snapshot{ ID: snapshotID, Hostname: "test-host", VaultikVersion: "test", StartedAt: time.Now(), CompletedAt: nil, FileCount: 0, ChunkCount: 0, BlobCount: 0, TotalSize: 0, BlobSize: 0, CompressionRatio: 1.0, } return repos.Snapshots.Create(ctx, tx, snapshot) }) if err != nil { t.Fatalf("failed to create snapshot: %v", err) } // Scan the directory var result *backup.ScanResult result, err = scanner.Scan(ctx, "/source", snapshotID) if err != nil { t.Fatalf("scan failed: %v", err) } // Verify results // We now scan 6 files + 3 directories (source, subdir, subdir2) = 9 entries if result.FilesScanned != 9 { t.Errorf("expected 9 entries scanned, got %d", result.FilesScanned) } // Directories have their own sizes, so the total will be more than just file content if result.BytesScanned < 97 { // At minimum we have 97 bytes of file content t.Errorf("expected at least 97 bytes scanned, got %d", result.BytesScanned) } // Verify files in database files, err := repos.Files.ListByPrefix(ctx, "/source") if err != nil { t.Fatalf("failed to list files: %v", err) } // We should have 6 files + 3 directories = 9 entries if len(files) != 9 { t.Errorf("expected 9 entries in database, got %d", len(files)) } // Verify specific file file1, err := repos.Files.GetByPath(ctx, "/source/file1.txt") if err != nil { t.Fatalf("failed to get file1.txt: %v", err) } if file1.Size != 13 { t.Errorf("expected file1.txt size 13, got %d", file1.Size) } if file1.Mode != 0644 { t.Errorf("expected file1.txt mode 0644, got %o", file1.Mode) } // Verify chunks were created chunks, err := repos.FileChunks.GetByFile(ctx, "/source/file1.txt") if err != nil { t.Fatalf("failed to get chunks for file1.txt: %v", err) } if len(chunks) != 1 { // Small file should be one chunk t.Errorf("expected 1 chunk for file1.txt, got %d", len(chunks)) } // Verify deduplication - file3.txt and file4.txt have different content // but we should still have the correct number of unique chunks allChunks, err := repos.Chunks.List(ctx) if err != nil { t.Fatalf("failed to list all chunks: %v", err) } // We should have at most 6 chunks (one per unique file content) // Empty file might not create a chunk if len(allChunks) > 6 { t.Errorf("expected at most 6 chunks, got %d", len(allChunks)) } } func TestScannerWithSymlinks(t *testing.T) { // Initialize logger for tests log.Initialize(log.Config{}) // Create in-memory filesystem fs := afero.NewMemMapFs() // Create test files if err := fs.MkdirAll("/source", 0755); err != nil { t.Fatal(err) } if err := afero.WriteFile(fs, "/source/target.txt", []byte("target content"), 0644); err != nil { t.Fatal(err) } if err := afero.WriteFile(fs, "/outside/file.txt", []byte("outside content"), 0644); err != nil { t.Fatal(err) } // Create symlinks (if supported by the filesystem) linker, ok := fs.(afero.Symlinker) if !ok { t.Skip("filesystem does not support symlinks") } // Symlink to file in source if err := linker.SymlinkIfPossible("target.txt", "/source/link1.txt"); err != nil { t.Fatal(err) } // Symlink to file outside source if err := linker.SymlinkIfPossible("/outside/file.txt", "/source/link2.txt"); err != nil { t.Fatal(err) } // Create test database db, err := database.NewTestDB() if err != nil { t.Fatalf("failed to create test database: %v", err) } defer func() { if err := db.Close(); err != nil { t.Errorf("failed to close database: %v", err) } }() repos := database.NewRepositories(db) // Create scanner scanner := backup.NewScanner(backup.ScannerConfig{ FS: fs, ChunkSize: 1024 * 16, Repositories: repos, MaxBlobSize: int64(1024 * 1024), CompressionLevel: 3, AgeRecipients: []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key }) // Create a snapshot record for testing ctx := context.Background() snapshotID := "test-snapshot-001" err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error { snapshot := &database.Snapshot{ ID: snapshotID, Hostname: "test-host", VaultikVersion: "test", StartedAt: time.Now(), CompletedAt: nil, FileCount: 0, ChunkCount: 0, BlobCount: 0, TotalSize: 0, BlobSize: 0, CompressionRatio: 1.0, } return repos.Snapshots.Create(ctx, tx, snapshot) }) if err != nil { t.Fatalf("failed to create snapshot: %v", err) } // Scan the directory var result *backup.ScanResult result, err = scanner.Scan(ctx, "/source", snapshotID) if err != nil { t.Fatalf("scan failed: %v", err) } // Should have scanned 3 files (target + 2 symlinks) if result.FilesScanned != 3 { t.Errorf("expected 3 files scanned, got %d", result.FilesScanned) } // Check symlinks in database link1, err := repos.Files.GetByPath(ctx, "/source/link1.txt") if err != nil { t.Fatalf("failed to get link1.txt: %v", err) } if link1.LinkTarget != "target.txt" { t.Errorf("expected link1.txt target 'target.txt', got %q", link1.LinkTarget) } link2, err := repos.Files.GetByPath(ctx, "/source/link2.txt") if err != nil { t.Fatalf("failed to get link2.txt: %v", err) } if link2.LinkTarget != "/outside/file.txt" { t.Errorf("expected link2.txt target '/outside/file.txt', got %q", link2.LinkTarget) } } func TestScannerLargeFile(t *testing.T) { // Initialize logger for tests log.Initialize(log.Config{}) // Create in-memory filesystem fs := afero.NewMemMapFs() // Create a large file that will require multiple chunks // Use random content to ensure good chunk boundaries largeContent := make([]byte, 1024*1024) // 1MB // Fill with pseudo-random data to ensure chunk boundaries for i := 0; i < len(largeContent); i++ { // Simple pseudo-random generator for deterministic tests largeContent[i] = byte((i * 7919) ^ (i >> 3)) } if err := fs.MkdirAll("/source", 0755); err != nil { t.Fatal(err) } if err := afero.WriteFile(fs, "/source/large.bin", largeContent, 0644); err != nil { t.Fatal(err) } // Create test database db, err := database.NewTestDB() if err != nil { t.Fatalf("failed to create test database: %v", err) } defer func() { if err := db.Close(); err != nil { t.Errorf("failed to close database: %v", err) } }() repos := database.NewRepositories(db) // Create scanner with 64KB average chunk size scanner := backup.NewScanner(backup.ScannerConfig{ FS: fs, ChunkSize: int64(1024 * 64), // 64KB average chunks Repositories: repos, MaxBlobSize: int64(1024 * 1024), CompressionLevel: 3, AgeRecipients: []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key }) // Create a snapshot record for testing ctx := context.Background() snapshotID := "test-snapshot-001" err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error { snapshot := &database.Snapshot{ ID: snapshotID, Hostname: "test-host", VaultikVersion: "test", StartedAt: time.Now(), CompletedAt: nil, FileCount: 0, ChunkCount: 0, BlobCount: 0, TotalSize: 0, BlobSize: 0, CompressionRatio: 1.0, } return repos.Snapshots.Create(ctx, tx, snapshot) }) if err != nil { t.Fatalf("failed to create snapshot: %v", err) } // Scan the directory var result *backup.ScanResult result, err = scanner.Scan(ctx, "/source", snapshotID) if err != nil { t.Fatalf("scan failed: %v", err) } // We scan 1 file + 1 directory = 2 entries if result.FilesScanned != 2 { t.Errorf("expected 2 entries scanned, got %d", result.FilesScanned) } // The file size should be at least 1MB if result.BytesScanned < 1024*1024 { t.Errorf("expected at least %d bytes scanned, got %d", 1024*1024, result.BytesScanned) } // Verify chunks chunks, err := repos.FileChunks.GetByFile(ctx, "/source/large.bin") if err != nil { t.Fatalf("failed to get chunks: %v", err) } // With content-defined chunking, the number of chunks depends on content // For a 1MB file, we should get at least 1 chunk if len(chunks) < 1 { t.Errorf("expected at least 1 chunk, got %d", len(chunks)) } // Log the actual number of chunks for debugging t.Logf("1MB file produced %d chunks with 64KB average chunk size", len(chunks)) // Verify chunk sequence for i, fc := range chunks { if fc.Idx != i { t.Errorf("chunk %d has incorrect sequence %d", i, fc.Idx) } } }