package backup import ( "context" "crypto/sha256" "database/sql" "fmt" "io" "io/fs" "os" "path/filepath" "testing" "testing/fstest" "time" "git.eeqj.de/sneak/vaultik/internal/database" ) // MockS3Client is a mock implementation of S3 operations for testing type MockS3Client struct { storage map[string][]byte } func NewMockS3Client() *MockS3Client { return &MockS3Client{ storage: make(map[string][]byte), } } func (m *MockS3Client) PutBlob(ctx context.Context, hash string, data []byte) error { m.storage[hash] = data return nil } func (m *MockS3Client) GetBlob(ctx context.Context, hash string) ([]byte, error) { data, ok := m.storage[hash] if !ok { return nil, fmt.Errorf("blob not found: %s", hash) } return data, nil } func (m *MockS3Client) BlobExists(ctx context.Context, hash string) (bool, error) { _, ok := m.storage[hash] return ok, nil } func (m *MockS3Client) CreateBucket(ctx context.Context, bucket string) error { return nil } func TestBackupWithInMemoryFS(t *testing.T) { // Create a temporary directory for the database tempDir := t.TempDir() dbPath := filepath.Join(tempDir, "test.db") // Create test filesystem testFS := fstest.MapFS{ "file1.txt": &fstest.MapFile{ Data: []byte("Hello, World!"), Mode: 0644, ModTime: time.Now(), }, "dir1/file2.txt": &fstest.MapFile{ Data: []byte("This is a test file with some content."), Mode: 0755, ModTime: time.Now(), }, "dir1/subdir/file3.txt": &fstest.MapFile{ Data: []byte("Another file in a subdirectory."), Mode: 0600, ModTime: time.Now(), }, "largefile.bin": &fstest.MapFile{ Data: generateLargeFileContent(10 * 1024 * 1024), // 10MB file with varied content Mode: 0644, ModTime: time.Now(), }, } // Initialize the database ctx := context.Background() db, err := database.New(ctx, dbPath) if err != nil { t.Fatalf("Failed to create database: %v", err) } defer func() { if err := db.Close(); err != nil { t.Logf("Failed to close database: %v", err) } }() repos := database.NewRepositories(db) // Create mock S3 client s3Client := NewMockS3Client() // Run backup backupEngine := &BackupEngine{ repos: repos, s3Client: s3Client, } snapshotID, err := backupEngine.Backup(ctx, testFS, ".") if err != nil { t.Fatalf("Backup failed: %v", err) } // Verify snapshot was created snapshot, err := repos.Snapshots.GetByID(ctx, snapshotID) if err != nil { t.Fatalf("Failed to get snapshot: %v", err) } if snapshot == nil { t.Fatal("Snapshot not found") } if snapshot.FileCount == 0 { t.Error("Expected snapshot to have files") } // Verify files in database files, err := repos.Files.ListByPrefix(ctx, "") if err != nil { t.Fatalf("Failed to list files: %v", err) } expectedFiles := map[string]bool{ "file1.txt": true, "dir1/file2.txt": true, "dir1/subdir/file3.txt": true, "largefile.bin": true, } if len(files) != len(expectedFiles) { t.Errorf("Expected %d files, got %d", len(expectedFiles), len(files)) } for _, file := range files { if !expectedFiles[file.Path] { t.Errorf("Unexpected file in database: %s", file.Path) } delete(expectedFiles, file.Path) // Verify file metadata fsFile := testFS[file.Path] if fsFile == nil { t.Errorf("File %s not found in test filesystem", file.Path) continue } if file.Size != int64(len(fsFile.Data)) { t.Errorf("File %s: expected size %d, got %d", file.Path, len(fsFile.Data), file.Size) } if file.Mode != uint32(fsFile.Mode) { t.Errorf("File %s: expected mode %o, got %o", file.Path, fsFile.Mode, file.Mode) } } if len(expectedFiles) > 0 { t.Errorf("Files not found in database: %v", expectedFiles) } // Verify chunks chunks, err := repos.Chunks.List(ctx) if err != nil { t.Fatalf("Failed to list chunks: %v", err) } if len(chunks) == 0 { t.Error("No chunks found in database") } // The large file should create 10 chunks (10MB / 1MB chunk size) // Plus the small files minExpectedChunks := 10 + 3 if len(chunks) < minExpectedChunks { t.Errorf("Expected at least %d chunks, got %d", minExpectedChunks, len(chunks)) } // Verify at least one blob was created and uploaded // We can't list blobs directly, but we can check via snapshot blobs blobHashes, err := repos.Snapshots.GetBlobHashes(ctx, snapshotID) if err != nil { t.Fatalf("Failed to get blob hashes: %v", err) } if len(blobHashes) == 0 { t.Error("Expected at least one blob to be created") } for _, blobHash := range blobHashes { // Check blob exists in mock S3 exists, err := s3Client.BlobExists(ctx, blobHash) if err != nil { t.Errorf("Failed to check blob %s: %v", blobHash, err) } if !exists { t.Errorf("Blob %s not found in S3", blobHash) } } } func TestBackupDeduplication(t *testing.T) { // Create a temporary directory for the database tempDir := t.TempDir() dbPath := filepath.Join(tempDir, "test.db") // Create test filesystem with duplicate content testFS := fstest.MapFS{ "file1.txt": &fstest.MapFile{ Data: []byte("Duplicate content"), Mode: 0644, ModTime: time.Now(), }, "file2.txt": &fstest.MapFile{ Data: []byte("Duplicate content"), Mode: 0644, ModTime: time.Now(), }, "file3.txt": &fstest.MapFile{ Data: []byte("Unique content"), Mode: 0644, ModTime: time.Now(), }, } // Initialize the database ctx := context.Background() db, err := database.New(ctx, dbPath) if err != nil { t.Fatalf("Failed to create database: %v", err) } defer func() { if err := db.Close(); err != nil { t.Logf("Failed to close database: %v", err) } }() repos := database.NewRepositories(db) // Create mock S3 client s3Client := NewMockS3Client() // Run backup backupEngine := &BackupEngine{ repos: repos, s3Client: s3Client, } _, err = backupEngine.Backup(ctx, testFS, ".") if err != nil { t.Fatalf("Backup failed: %v", err) } // Verify deduplication chunks, err := repos.Chunks.List(ctx) if err != nil { t.Fatalf("Failed to list chunks: %v", err) } // Should have only 2 unique chunks (duplicate content + unique content) if len(chunks) != 2 { t.Errorf("Expected 2 unique chunks, got %d", len(chunks)) } // Verify chunk references for _, chunk := range chunks { files, err := repos.ChunkFiles.GetByChunkHash(ctx, chunk.ChunkHash) if err != nil { t.Errorf("Failed to get files for chunk %s: %v", chunk.ChunkHash, err) } // The duplicate content chunk should be referenced by 2 files if chunk.Size == int64(len("Duplicate content")) && len(files) != 2 { t.Errorf("Expected duplicate chunk to be referenced by 2 files, got %d", len(files)) } } } // BackupEngine performs backup operations type BackupEngine struct { repos *database.Repositories s3Client interface { PutBlob(ctx context.Context, hash string, data []byte) error BlobExists(ctx context.Context, hash string) (bool, error) } } // Backup performs a backup of the given filesystem func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (string, error) { // Create a new snapshot hostname, _ := os.Hostname() snapshotID := time.Now().Format(time.RFC3339) snapshot := &database.Snapshot{ ID: snapshotID, Hostname: hostname, VaultikVersion: "test", StartedAt: time.Now(), CompletedAt: nil, } // Create initial snapshot record err := b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error { return b.repos.Snapshots.Create(ctx, tx, snapshot) }) if err != nil { return "", err } // Track counters var fileCount, chunkCount, blobCount, totalSize, blobSize int64 // Track which chunks we've seen to handle deduplication processedChunks := make(map[string]bool) // Scan the filesystem and process files err = fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error { if err != nil { return err } // Skip directories if d.IsDir() { return nil } // Get file info info, err := d.Info() if err != nil { return err } // Handle symlinks if info.Mode()&fs.ModeSymlink != 0 { // For testing, we'll skip symlinks since fstest doesn't support them well return nil } // Create file record in a short transaction file := &database.File{ Path: path, Size: info.Size(), Mode: uint32(info.Mode()), MTime: info.ModTime(), CTime: info.ModTime(), // Use mtime as ctime for test UID: 1000, // Default UID for test GID: 1000, // Default GID for test } err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error { return b.repos.Files.Create(ctx, tx, file) }) if err != nil { return err } fileCount++ totalSize += info.Size() // Read and process file in chunks f, err := fsys.Open(path) if err != nil { return err } defer func() { if err := f.Close(); err != nil { // Log but don't fail since we're already in an error path potentially fmt.Fprintf(os.Stderr, "Failed to close file: %v\n", err) } }() // Process file in chunks chunkIndex := 0 buffer := make([]byte, defaultChunkSize) for { n, err := f.Read(buffer) if err != nil && err != io.EOF { return err } if n == 0 { break } chunkData := buffer[:n] chunkHash := calculateHash(chunkData) // Check if chunk already exists (outside of transaction) existingChunk, _ := b.repos.Chunks.GetByHash(ctx, chunkHash) if existingChunk == nil { // Create new chunk in a short transaction err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error { chunk := &database.Chunk{ ChunkHash: chunkHash, SHA256: chunkHash, Size: int64(n), } return b.repos.Chunks.Create(ctx, tx, chunk) }) if err != nil { return err } processedChunks[chunkHash] = true } // Create file-chunk mapping in a short transaction err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error { fileChunk := &database.FileChunk{ FileID: file.ID, Idx: chunkIndex, ChunkHash: chunkHash, } return b.repos.FileChunks.Create(ctx, tx, fileChunk) }) if err != nil { return err } // Create chunk-file mapping in a short transaction err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error { chunkFile := &database.ChunkFile{ ChunkHash: chunkHash, FileID: file.ID, FileOffset: int64(chunkIndex * defaultChunkSize), Length: int64(n), } return b.repos.ChunkFiles.Create(ctx, tx, chunkFile) }) if err != nil { return err } chunkIndex++ } return nil }) if err != nil { return "", err } // After all files are processed, create blobs for new chunks for chunkHash := range processedChunks { // Get chunk data (outside of transaction) chunk, err := b.repos.Chunks.GetByHash(ctx, chunkHash) if err != nil { return "", err } chunkCount++ // In a real system, blobs would contain multiple chunks and be encrypted // For testing, we'll create a blob with a "blob-" prefix to differentiate blobHash := "blob-" + chunkHash // For the test, we'll create dummy data since we don't have the original dummyData := []byte(chunkHash) // Upload to S3 as a blob if err := b.s3Client.PutBlob(ctx, blobHash, dummyData); err != nil { return "", err } // Create blob entry in a short transaction err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error { blob := &database.Blob{ ID: "test-blob-" + blobHash[:8], Hash: blobHash, CreatedTS: time.Now(), } return b.repos.Blobs.Create(ctx, tx, blob) }) if err != nil { return "", err } blobCount++ blobSize += chunk.Size // Create blob-chunk mapping in a short transaction err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error { blobChunk := &database.BlobChunk{ BlobID: "test-blob-" + blobHash[:8], ChunkHash: chunkHash, Offset: 0, Length: chunk.Size, } return b.repos.BlobChunks.Create(ctx, tx, blobChunk) }) if err != nil { return "", err } // Add blob to snapshot in a short transaction err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error { return b.repos.Snapshots.AddBlob(ctx, tx, snapshotID, "test-blob-"+blobHash[:8], blobHash) }) if err != nil { return "", err } } // Update snapshot with final counts err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error { return b.repos.Snapshots.UpdateCounts(ctx, tx, snapshotID, fileCount, chunkCount, blobCount, totalSize, blobSize) }) if err != nil { return "", err } return snapshotID, nil } func calculateHash(data []byte) string { h := sha256.New() h.Write(data) return fmt.Sprintf("%x", h.Sum(nil)) } func generateLargeFileContent(size int) []byte { data := make([]byte, size) // Fill with pattern that changes every chunk to avoid deduplication for i := 0; i < size; i++ { chunkNum := i / defaultChunkSize data[i] = byte((i + chunkNum) % 256) } return data } const defaultChunkSize = 1024 * 1024 // 1MB chunks