vaultik/internal/database/chunk_files_test.go
sneak 78af626759 Major refactoring: UUID-based storage, streaming architecture, and CLI improvements
This commit represents a significant architectural overhaul of vaultik:

Database Schema Changes:
- Switch files table to use UUID primary keys instead of path-based keys
- Add UUID primary keys to blobs table for immediate chunk association
- Update all foreign key relationships to use UUIDs
- Add comprehensive schema documentation in DATAMODEL.md
- Add SQLite busy timeout handling for concurrent operations

Streaming and Performance Improvements:
- Implement true streaming blob packing without intermediate storage
- Add streaming chunk processing to reduce memory usage
- Improve progress reporting with real-time metrics
- Add upload metrics tracking in new uploads table

CLI Refactoring:
- Restructure CLI to use subcommands: snapshot create/list/purge/verify
- Add store info command for S3 configuration display
- Add custom duration parser supporting days/weeks/months/years
- Remove old backup.go in favor of enhanced snapshot.go
- Add --cron flag for silent operation

Configuration Changes:
- Remove unused index_prefix configuration option
- Add support for snapshot pruning retention policies
- Improve configuration validation and error messages

Testing Improvements:
- Add comprehensive repository tests with edge cases
- Add cascade delete debugging tests
- Fix concurrent operation tests to use SQLite busy timeout
- Remove tolerance for SQLITE_BUSY errors in tests

Documentation:
- Add MIT LICENSE file
- Update README with new command structure
- Add comprehensive DATAMODEL.md explaining database schema
- Update DESIGN.md with UUID-based architecture

Other Changes:
- Add test-config.yml for testing
- Update Makefile with better test output formatting
- Fix various race conditions in concurrent operations
- Improve error handling throughout
2025-07-22 14:56:44 +02:00

194 lines
5.1 KiB
Go

package database
import (
"context"
"testing"
"time"
)
func TestChunkFileRepository(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repo := NewChunkFileRepository(db)
fileRepo := NewFileRepository(db)
// Create test files first
testTime := time.Now().Truncate(time.Second)
file1 := &File{
Path: "/file1.txt",
MTime: testTime,
CTime: testTime,
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
LinkTarget: "",
}
err := fileRepo.Create(ctx, nil, file1)
if err != nil {
t.Fatalf("failed to create file1: %v", err)
}
file2 := &File{
Path: "/file2.txt",
MTime: testTime,
CTime: testTime,
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
LinkTarget: "",
}
err = fileRepo.Create(ctx, nil, file2)
if err != nil {
t.Fatalf("failed to create file2: %v", err)
}
// Test Create
cf1 := &ChunkFile{
ChunkHash: "chunk1",
FileID: file1.ID,
FileOffset: 0,
Length: 1024,
}
err = repo.Create(ctx, nil, cf1)
if err != nil {
t.Fatalf("failed to create chunk file: %v", err)
}
// Add same chunk in different file (deduplication scenario)
cf2 := &ChunkFile{
ChunkHash: "chunk1",
FileID: file2.ID,
FileOffset: 2048,
Length: 1024,
}
err = repo.Create(ctx, nil, cf2)
if err != nil {
t.Fatalf("failed to create second chunk file: %v", err)
}
// Test GetByChunkHash
chunkFiles, err := repo.GetByChunkHash(ctx, "chunk1")
if err != nil {
t.Fatalf("failed to get chunk files: %v", err)
}
if len(chunkFiles) != 2 {
t.Errorf("expected 2 files for chunk, got %d", len(chunkFiles))
}
// Verify both files are returned
foundFile1 := false
foundFile2 := false
for _, cf := range chunkFiles {
if cf.FileID == file1.ID && cf.FileOffset == 0 {
foundFile1 = true
}
if cf.FileID == file2.ID && cf.FileOffset == 2048 {
foundFile2 = true
}
}
if !foundFile1 || !foundFile2 {
t.Error("not all expected files found")
}
// Test GetByFileID
chunkFiles, err = repo.GetByFileID(ctx, file1.ID)
if err != nil {
t.Fatalf("failed to get chunks by file ID: %v", err)
}
if len(chunkFiles) != 1 {
t.Errorf("expected 1 chunk for file, got %d", len(chunkFiles))
}
if chunkFiles[0].ChunkHash != "chunk1" {
t.Errorf("wrong chunk hash: expected chunk1, got %s", chunkFiles[0].ChunkHash)
}
// Test duplicate insert (should be idempotent)
err = repo.Create(ctx, nil, cf1)
if err != nil {
t.Fatalf("failed to create duplicate chunk file: %v", err)
}
}
func TestChunkFileRepositoryComplexDeduplication(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repo := NewChunkFileRepository(db)
fileRepo := NewFileRepository(db)
// Create test files
testTime := time.Now().Truncate(time.Second)
file1 := &File{Path: "/file1.txt", MTime: testTime, CTime: testTime, Size: 3072, Mode: 0644, UID: 1000, GID: 1000}
file2 := &File{Path: "/file2.txt", MTime: testTime, CTime: testTime, Size: 3072, Mode: 0644, UID: 1000, GID: 1000}
file3 := &File{Path: "/file3.txt", MTime: testTime, CTime: testTime, Size: 2048, Mode: 0644, UID: 1000, GID: 1000}
if err := fileRepo.Create(ctx, nil, file1); err != nil {
t.Fatalf("failed to create file1: %v", err)
}
if err := fileRepo.Create(ctx, nil, file2); err != nil {
t.Fatalf("failed to create file2: %v", err)
}
if err := fileRepo.Create(ctx, nil, file3); err != nil {
t.Fatalf("failed to create file3: %v", err)
}
// Simulate a scenario where multiple files share chunks
// File1: chunk1, chunk2, chunk3
// File2: chunk2, chunk3, chunk4
// File3: chunk1, chunk4
chunkFiles := []ChunkFile{
// File1
{ChunkHash: "chunk1", FileID: file1.ID, FileOffset: 0, Length: 1024},
{ChunkHash: "chunk2", FileID: file1.ID, FileOffset: 1024, Length: 1024},
{ChunkHash: "chunk3", FileID: file1.ID, FileOffset: 2048, Length: 1024},
// File2
{ChunkHash: "chunk2", FileID: file2.ID, FileOffset: 0, Length: 1024},
{ChunkHash: "chunk3", FileID: file2.ID, FileOffset: 1024, Length: 1024},
{ChunkHash: "chunk4", FileID: file2.ID, FileOffset: 2048, Length: 1024},
// File3
{ChunkHash: "chunk1", FileID: file3.ID, FileOffset: 0, Length: 1024},
{ChunkHash: "chunk4", FileID: file3.ID, FileOffset: 1024, Length: 1024},
}
for _, cf := range chunkFiles {
err := repo.Create(ctx, nil, &cf)
if err != nil {
t.Fatalf("failed to create chunk file: %v", err)
}
}
// Test chunk1 (used by file1 and file3)
files, err := repo.GetByChunkHash(ctx, "chunk1")
if err != nil {
t.Fatalf("failed to get files for chunk1: %v", err)
}
if len(files) != 2 {
t.Errorf("expected 2 files for chunk1, got %d", len(files))
}
// Test chunk2 (used by file1 and file2)
files, err = repo.GetByChunkHash(ctx, "chunk2")
if err != nil {
t.Fatalf("failed to get files for chunk2: %v", err)
}
if len(files) != 2 {
t.Errorf("expected 2 files for chunk2, got %d", len(files))
}
// Test file2 chunks
chunks, err := repo.GetByFileID(ctx, file2.ID)
if err != nil {
t.Fatalf("failed to get chunks for file2: %v", err)
}
if len(chunks) != 3 {
t.Errorf("expected 3 chunks for file2, got %d", len(chunks))
}
}