vaultik/internal/database/repository_edge_cases_test.go
sneak 78af626759 Major refactoring: UUID-based storage, streaming architecture, and CLI improvements
This commit represents a significant architectural overhaul of vaultik:

Database Schema Changes:
- Switch files table to use UUID primary keys instead of path-based keys
- Add UUID primary keys to blobs table for immediate chunk association
- Update all foreign key relationships to use UUIDs
- Add comprehensive schema documentation in DATAMODEL.md
- Add SQLite busy timeout handling for concurrent operations

Streaming and Performance Improvements:
- Implement true streaming blob packing without intermediate storage
- Add streaming chunk processing to reduce memory usage
- Improve progress reporting with real-time metrics
- Add upload metrics tracking in new uploads table

CLI Refactoring:
- Restructure CLI to use subcommands: snapshot create/list/purge/verify
- Add store info command for S3 configuration display
- Add custom duration parser supporting days/weeks/months/years
- Remove old backup.go in favor of enhanced snapshot.go
- Add --cron flag for silent operation

Configuration Changes:
- Remove unused index_prefix configuration option
- Add support for snapshot pruning retention policies
- Improve configuration validation and error messages

Testing Improvements:
- Add comprehensive repository tests with edge cases
- Add cascade delete debugging tests
- Fix concurrent operation tests to use SQLite busy timeout
- Remove tolerance for SQLITE_BUSY errors in tests

Documentation:
- Add MIT LICENSE file
- Update README with new command structure
- Add comprehensive DATAMODEL.md explaining database schema
- Update DESIGN.md with UUID-based architecture

Other Changes:
- Add test-config.yml for testing
- Update Makefile with better test output formatting
- Fix various race conditions in concurrent operations
- Improve error handling throughout
2025-07-22 14:56:44 +02:00

544 lines
13 KiB
Go

package database
import (
"context"
"fmt"
"strings"
"testing"
"time"
)
// TestFileRepositoryEdgeCases tests edge cases for file repository
func TestFileRepositoryEdgeCases(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repo := NewFileRepository(db)
tests := []struct {
name string
file *File
wantErr bool
errMsg string
}{
{
name: "empty path",
file: &File{
Path: "",
MTime: time.Now(),
CTime: time.Now(),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
},
wantErr: false, // Empty strings are allowed, only NULL is not allowed
},
{
name: "very long path",
file: &File{
Path: "/" + strings.Repeat("a", 4096),
MTime: time.Now(),
CTime: time.Now(),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
},
wantErr: false,
},
{
name: "path with special characters",
file: &File{
Path: "/test/file with spaces and 特殊文字.txt",
MTime: time.Now(),
CTime: time.Now(),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
},
wantErr: false,
},
{
name: "zero size file",
file: &File{
Path: "/empty.txt",
MTime: time.Now(),
CTime: time.Now(),
Size: 0,
Mode: 0644,
UID: 1000,
GID: 1000,
},
wantErr: false,
},
{
name: "symlink with target",
file: &File{
Path: "/link",
MTime: time.Now(),
CTime: time.Now(),
Size: 0,
Mode: 0777 | 0120000, // symlink mode
UID: 1000,
GID: 1000,
LinkTarget: "/target",
},
wantErr: false,
},
}
for i, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Add a unique suffix to paths to avoid UNIQUE constraint violations
if tt.file.Path != "" {
tt.file.Path = fmt.Sprintf("%s_%d_%d", tt.file.Path, i, time.Now().UnixNano())
}
err := repo.Create(ctx, nil, tt.file)
if (err != nil) != tt.wantErr {
t.Errorf("Create() error = %v, wantErr %v", err, tt.wantErr)
}
if err != nil && tt.errMsg != "" && !strings.Contains(err.Error(), tt.errMsg) {
t.Errorf("Create() error = %v, want error containing %q", err, tt.errMsg)
}
})
}
}
// TestDuplicateHandling tests handling of duplicate entries
func TestDuplicateHandling(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Test duplicate file paths - Create uses UPSERT logic
t.Run("duplicate file paths", func(t *testing.T) {
file1 := &File{
Path: "/duplicate.txt",
MTime: time.Now(),
CTime: time.Now(),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
file2 := &File{
Path: "/duplicate.txt", // Same path
MTime: time.Now().Add(time.Hour),
CTime: time.Now().Add(time.Hour),
Size: 2048,
Mode: 0644,
UID: 1000,
GID: 1000,
}
err := repos.Files.Create(ctx, nil, file1)
if err != nil {
t.Fatalf("failed to create file1: %v", err)
}
originalID := file1.ID
// Create with same path should update the existing record (UPSERT behavior)
err = repos.Files.Create(ctx, nil, file2)
if err != nil {
t.Fatalf("failed to create file2: %v", err)
}
// Verify the file was updated, not duplicated
retrievedFile, err := repos.Files.GetByPath(ctx, "/duplicate.txt")
if err != nil {
t.Fatalf("failed to retrieve file: %v", err)
}
// The file should have been updated with file2's data
if retrievedFile.Size != 2048 {
t.Errorf("expected size 2048, got %d", retrievedFile.Size)
}
// ID might be different due to the UPSERT
if retrievedFile.ID != file2.ID {
t.Logf("File ID changed from %s to %s during upsert", originalID, retrievedFile.ID)
}
})
// Test duplicate chunk hashes
t.Run("duplicate chunk hashes", func(t *testing.T) {
chunk := &Chunk{
ChunkHash: "duplicate-chunk",
SHA256: "duplicate-sha",
Size: 1024,
}
err := repos.Chunks.Create(ctx, nil, chunk)
if err != nil {
t.Fatalf("failed to create chunk: %v", err)
}
// Creating the same chunk again should be idempotent (ON CONFLICT DO NOTHING)
err = repos.Chunks.Create(ctx, nil, chunk)
if err != nil {
t.Errorf("duplicate chunk creation should be idempotent, got error: %v", err)
}
})
// Test duplicate file-chunk mappings
t.Run("duplicate file-chunk mappings", func(t *testing.T) {
file := &File{
Path: "/test-dup-fc.txt",
MTime: time.Now(),
CTime: time.Now(),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
err := repos.Files.Create(ctx, nil, file)
if err != nil {
t.Fatal(err)
}
chunk := &Chunk{
ChunkHash: "test-chunk-dup",
SHA256: "test-sha-dup",
Size: 1024,
}
err = repos.Chunks.Create(ctx, nil, chunk)
if err != nil {
t.Fatal(err)
}
fc := &FileChunk{
FileID: file.ID,
Idx: 0,
ChunkHash: chunk.ChunkHash,
}
err = repos.FileChunks.Create(ctx, nil, fc)
if err != nil {
t.Fatal(err)
}
// Creating the same mapping again should be idempotent
err = repos.FileChunks.Create(ctx, nil, fc)
if err != nil {
t.Error("file-chunk creation should be idempotent")
}
})
}
// TestNullHandling tests handling of NULL values
func TestNullHandling(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Test file with no link target
t.Run("file without link target", func(t *testing.T) {
file := &File{
Path: "/regular.txt",
MTime: time.Now(),
CTime: time.Now(),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
LinkTarget: "", // Should be stored as NULL
}
err := repos.Files.Create(ctx, nil, file)
if err != nil {
t.Fatal(err)
}
retrieved, err := repos.Files.GetByID(ctx, file.ID)
if err != nil {
t.Fatal(err)
}
if retrieved.LinkTarget != "" {
t.Errorf("expected empty link target, got %q", retrieved.LinkTarget)
}
})
// Test snapshot with NULL completed_at
t.Run("incomplete snapshot", func(t *testing.T) {
snapshot := &Snapshot{
ID: "incomplete-test",
Hostname: "test-host",
StartedAt: time.Now(),
CompletedAt: nil, // Should remain NULL until completed
}
err := repos.Snapshots.Create(ctx, nil, snapshot)
if err != nil {
t.Fatal(err)
}
retrieved, err := repos.Snapshots.GetByID(ctx, snapshot.ID)
if err != nil {
t.Fatal(err)
}
if retrieved.CompletedAt != nil {
t.Error("expected nil CompletedAt for incomplete snapshot")
}
})
// Test blob with NULL uploaded_ts
t.Run("blob not uploaded", func(t *testing.T) {
blob := &Blob{
ID: "not-uploaded",
Hash: "test-hash",
CreatedTS: time.Now(),
UploadedTS: nil, // Not uploaded yet
}
err := repos.Blobs.Create(ctx, nil, blob)
if err != nil {
t.Fatal(err)
}
retrieved, err := repos.Blobs.GetByID(ctx, blob.ID)
if err != nil {
t.Fatal(err)
}
if retrieved.UploadedTS != nil {
t.Error("expected nil UploadedTS for non-uploaded blob")
}
})
}
// TestLargeDatasets tests operations with large amounts of data
func TestLargeDatasets(t *testing.T) {
if testing.Short() {
t.Skip("skipping large dataset test in short mode")
}
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Create a snapshot
snapshot := &Snapshot{
ID: "large-dataset-test",
Hostname: "test-host",
StartedAt: time.Now(),
}
err := repos.Snapshots.Create(ctx, nil, snapshot)
if err != nil {
t.Fatal(err)
}
// Create many files
const fileCount = 1000
fileIDs := make([]string, fileCount)
t.Run("create many files", func(t *testing.T) {
start := time.Now()
for i := 0; i < fileCount; i++ {
file := &File{
Path: fmt.Sprintf("/large/file%05d.txt", i),
MTime: time.Now(),
CTime: time.Now(),
Size: int64(i * 1024),
Mode: 0644,
UID: uint32(1000 + (i % 10)),
GID: uint32(1000 + (i % 10)),
}
err := repos.Files.Create(ctx, nil, file)
if err != nil {
t.Fatalf("failed to create file %d: %v", i, err)
}
fileIDs[i] = file.ID
// Add half to snapshot
if i%2 == 0 {
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID, file.ID)
if err != nil {
t.Fatal(err)
}
}
}
t.Logf("Created %d files in %v", fileCount, time.Since(start))
})
// Test ListByPrefix performance
t.Run("list by prefix performance", func(t *testing.T) {
start := time.Now()
files, err := repos.Files.ListByPrefix(ctx, "/large/")
if err != nil {
t.Fatal(err)
}
if len(files) != fileCount {
t.Errorf("expected %d files, got %d", fileCount, len(files))
}
t.Logf("Listed %d files in %v", len(files), time.Since(start))
})
// Test orphaned cleanup performance
t.Run("orphaned cleanup performance", func(t *testing.T) {
start := time.Now()
err := repos.Files.DeleteOrphaned(ctx)
if err != nil {
t.Fatal(err)
}
t.Logf("Cleaned up orphaned files in %v", time.Since(start))
// Verify correct number remain
files, err := repos.Files.ListByPrefix(ctx, "/large/")
if err != nil {
t.Fatal(err)
}
if len(files) != fileCount/2 {
t.Errorf("expected %d files after cleanup, got %d", fileCount/2, len(files))
}
})
}
// TestErrorPropagation tests that errors are properly propagated
func TestErrorPropagation(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Test GetByID with non-existent ID
t.Run("GetByID non-existent", func(t *testing.T) {
file, err := repos.Files.GetByID(ctx, "non-existent-uuid")
if err != nil {
t.Errorf("GetByID should not return error for non-existent ID, got: %v", err)
}
if file != nil {
t.Error("expected nil file for non-existent ID")
}
})
// Test GetByPath with non-existent path
t.Run("GetByPath non-existent", func(t *testing.T) {
file, err := repos.Files.GetByPath(ctx, "/non/existent/path.txt")
if err != nil {
t.Errorf("GetByPath should not return error for non-existent path, got: %v", err)
}
if file != nil {
t.Error("expected nil file for non-existent path")
}
})
// Test invalid foreign key reference
t.Run("invalid foreign key", func(t *testing.T) {
fc := &FileChunk{
FileID: "non-existent-file-id",
Idx: 0,
ChunkHash: "some-chunk",
}
err := repos.FileChunks.Create(ctx, nil, fc)
if err == nil {
t.Error("expected error for invalid foreign key")
}
if !strings.Contains(err.Error(), "FOREIGN KEY") {
t.Errorf("expected foreign key error, got: %v", err)
}
})
}
// TestQueryInjection tests that the system is safe from SQL injection
func TestQueryInjection(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Test various injection attempts
injectionTests := []string{
"'; DROP TABLE files; --",
"' OR '1'='1",
"'; DELETE FROM files WHERE '1'='1'; --",
`test'); DROP TABLE files; --`,
}
for _, injection := range injectionTests {
t.Run("injection attempt", func(t *testing.T) {
// Try injection in file path
file := &File{
Path: injection,
MTime: time.Now(),
CTime: time.Now(),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
_ = repos.Files.Create(ctx, nil, file)
// Should either succeed (treating as normal string) or fail with constraint
// but should NOT execute the injected SQL
// Verify tables still exist
var count int
err := db.conn.QueryRow("SELECT COUNT(*) FROM files").Scan(&count)
if err != nil {
t.Fatal("files table was damaged by injection")
}
})
}
}
// TestTimezoneHandling tests that times are properly handled in UTC
func TestTimezoneHandling(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Create file with specific timezone
loc, err := time.LoadLocation("America/New_York")
if err != nil {
t.Skip("timezone not available")
}
// Use Truncate to remove sub-second precision since we store as Unix timestamps
nyTime := time.Now().In(loc).Truncate(time.Second)
file := &File{
Path: "/timezone-test.txt",
MTime: nyTime,
CTime: nyTime,
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
err = repos.Files.Create(ctx, nil, file)
if err != nil {
t.Fatal(err)
}
// Retrieve and verify times are in UTC
retrieved, err := repos.Files.GetByID(ctx, file.ID)
if err != nil {
t.Fatal(err)
}
// Check that times are equivalent (same instant)
if !retrieved.MTime.Equal(nyTime) {
t.Error("time was not preserved correctly")
}
// Check that retrieved time is in UTC
if retrieved.MTime.Location() != time.UTC {
t.Error("retrieved time is not in UTC")
}
}