Major refactoring: UUID-based storage, streaming architecture, and CLI improvements
This commit represents a significant architectural overhaul of vaultik: Database Schema Changes: - Switch files table to use UUID primary keys instead of path-based keys - Add UUID primary keys to blobs table for immediate chunk association - Update all foreign key relationships to use UUIDs - Add comprehensive schema documentation in DATAMODEL.md - Add SQLite busy timeout handling for concurrent operations Streaming and Performance Improvements: - Implement true streaming blob packing without intermediate storage - Add streaming chunk processing to reduce memory usage - Improve progress reporting with real-time metrics - Add upload metrics tracking in new uploads table CLI Refactoring: - Restructure CLI to use subcommands: snapshot create/list/purge/verify - Add store info command for S3 configuration display - Add custom duration parser supporting days/weeks/months/years - Remove old backup.go in favor of enhanced snapshot.go - Add --cron flag for silent operation Configuration Changes: - Remove unused index_prefix configuration option - Add support for snapshot pruning retention policies - Improve configuration validation and error messages Testing Improvements: - Add comprehensive repository tests with edge cases - Add cascade delete debugging tests - Fix concurrent operation tests to use SQLite busy timeout - Remove tolerance for SQLITE_BUSY errors in tests Documentation: - Add MIT LICENSE file - Update README with new command structure - Add comprehensive DATAMODEL.md explaining database schema - Update DESIGN.md with UUID-based architecture Other Changes: - Add test-config.yml for testing - Update Makefile with better test output formatting - Fix various race conditions in concurrent operations - Improve error handling throughout
This commit is contained in:
543
internal/database/repository_edge_cases_test.go
Normal file
543
internal/database/repository_edge_cases_test.go
Normal file
@@ -0,0 +1,543 @@
|
||||
package database
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// TestFileRepositoryEdgeCases tests edge cases for file repository
|
||||
func TestFileRepositoryEdgeCases(t *testing.T) {
|
||||
db, cleanup := setupTestDB(t)
|
||||
defer cleanup()
|
||||
|
||||
ctx := context.Background()
|
||||
repo := NewFileRepository(db)
|
||||
|
||||
tests := []struct {
|
||||
name string
|
||||
file *File
|
||||
wantErr bool
|
||||
errMsg string
|
||||
}{
|
||||
{
|
||||
name: "empty path",
|
||||
file: &File{
|
||||
Path: "",
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
GID: 1000,
|
||||
},
|
||||
wantErr: false, // Empty strings are allowed, only NULL is not allowed
|
||||
},
|
||||
{
|
||||
name: "very long path",
|
||||
file: &File{
|
||||
Path: "/" + strings.Repeat("a", 4096),
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
GID: 1000,
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "path with special characters",
|
||||
file: &File{
|
||||
Path: "/test/file with spaces and 特殊文字.txt",
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
GID: 1000,
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "zero size file",
|
||||
file: &File{
|
||||
Path: "/empty.txt",
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 0,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
GID: 1000,
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "symlink with target",
|
||||
file: &File{
|
||||
Path: "/link",
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 0,
|
||||
Mode: 0777 | 0120000, // symlink mode
|
||||
UID: 1000,
|
||||
GID: 1000,
|
||||
LinkTarget: "/target",
|
||||
},
|
||||
wantErr: false,
|
||||
},
|
||||
}
|
||||
|
||||
for i, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Add a unique suffix to paths to avoid UNIQUE constraint violations
|
||||
if tt.file.Path != "" {
|
||||
tt.file.Path = fmt.Sprintf("%s_%d_%d", tt.file.Path, i, time.Now().UnixNano())
|
||||
}
|
||||
|
||||
err := repo.Create(ctx, nil, tt.file)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("Create() error = %v, wantErr %v", err, tt.wantErr)
|
||||
}
|
||||
if err != nil && tt.errMsg != "" && !strings.Contains(err.Error(), tt.errMsg) {
|
||||
t.Errorf("Create() error = %v, want error containing %q", err, tt.errMsg)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestDuplicateHandling tests handling of duplicate entries
|
||||
func TestDuplicateHandling(t *testing.T) {
|
||||
db, cleanup := setupTestDB(t)
|
||||
defer cleanup()
|
||||
|
||||
ctx := context.Background()
|
||||
repos := NewRepositories(db)
|
||||
|
||||
// Test duplicate file paths - Create uses UPSERT logic
|
||||
t.Run("duplicate file paths", func(t *testing.T) {
|
||||
file1 := &File{
|
||||
Path: "/duplicate.txt",
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
GID: 1000,
|
||||
}
|
||||
file2 := &File{
|
||||
Path: "/duplicate.txt", // Same path
|
||||
MTime: time.Now().Add(time.Hour),
|
||||
CTime: time.Now().Add(time.Hour),
|
||||
Size: 2048,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
GID: 1000,
|
||||
}
|
||||
|
||||
err := repos.Files.Create(ctx, nil, file1)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create file1: %v", err)
|
||||
}
|
||||
originalID := file1.ID
|
||||
|
||||
// Create with same path should update the existing record (UPSERT behavior)
|
||||
err = repos.Files.Create(ctx, nil, file2)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create file2: %v", err)
|
||||
}
|
||||
|
||||
// Verify the file was updated, not duplicated
|
||||
retrievedFile, err := repos.Files.GetByPath(ctx, "/duplicate.txt")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to retrieve file: %v", err)
|
||||
}
|
||||
|
||||
// The file should have been updated with file2's data
|
||||
if retrievedFile.Size != 2048 {
|
||||
t.Errorf("expected size 2048, got %d", retrievedFile.Size)
|
||||
}
|
||||
|
||||
// ID might be different due to the UPSERT
|
||||
if retrievedFile.ID != file2.ID {
|
||||
t.Logf("File ID changed from %s to %s during upsert", originalID, retrievedFile.ID)
|
||||
}
|
||||
})
|
||||
|
||||
// Test duplicate chunk hashes
|
||||
t.Run("duplicate chunk hashes", func(t *testing.T) {
|
||||
chunk := &Chunk{
|
||||
ChunkHash: "duplicate-chunk",
|
||||
SHA256: "duplicate-sha",
|
||||
Size: 1024,
|
||||
}
|
||||
|
||||
err := repos.Chunks.Create(ctx, nil, chunk)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create chunk: %v", err)
|
||||
}
|
||||
|
||||
// Creating the same chunk again should be idempotent (ON CONFLICT DO NOTHING)
|
||||
err = repos.Chunks.Create(ctx, nil, chunk)
|
||||
if err != nil {
|
||||
t.Errorf("duplicate chunk creation should be idempotent, got error: %v", err)
|
||||
}
|
||||
})
|
||||
|
||||
// Test duplicate file-chunk mappings
|
||||
t.Run("duplicate file-chunk mappings", func(t *testing.T) {
|
||||
file := &File{
|
||||
Path: "/test-dup-fc.txt",
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
GID: 1000,
|
||||
}
|
||||
err := repos.Files.Create(ctx, nil, file)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
chunk := &Chunk{
|
||||
ChunkHash: "test-chunk-dup",
|
||||
SHA256: "test-sha-dup",
|
||||
Size: 1024,
|
||||
}
|
||||
err = repos.Chunks.Create(ctx, nil, chunk)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
fc := &FileChunk{
|
||||
FileID: file.ID,
|
||||
Idx: 0,
|
||||
ChunkHash: chunk.ChunkHash,
|
||||
}
|
||||
|
||||
err = repos.FileChunks.Create(ctx, nil, fc)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Creating the same mapping again should be idempotent
|
||||
err = repos.FileChunks.Create(ctx, nil, fc)
|
||||
if err != nil {
|
||||
t.Error("file-chunk creation should be idempotent")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestNullHandling tests handling of NULL values
|
||||
func TestNullHandling(t *testing.T) {
|
||||
db, cleanup := setupTestDB(t)
|
||||
defer cleanup()
|
||||
|
||||
ctx := context.Background()
|
||||
repos := NewRepositories(db)
|
||||
|
||||
// Test file with no link target
|
||||
t.Run("file without link target", func(t *testing.T) {
|
||||
file := &File{
|
||||
Path: "/regular.txt",
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
GID: 1000,
|
||||
LinkTarget: "", // Should be stored as NULL
|
||||
}
|
||||
|
||||
err := repos.Files.Create(ctx, nil, file)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
retrieved, err := repos.Files.GetByID(ctx, file.ID)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if retrieved.LinkTarget != "" {
|
||||
t.Errorf("expected empty link target, got %q", retrieved.LinkTarget)
|
||||
}
|
||||
})
|
||||
|
||||
// Test snapshot with NULL completed_at
|
||||
t.Run("incomplete snapshot", func(t *testing.T) {
|
||||
snapshot := &Snapshot{
|
||||
ID: "incomplete-test",
|
||||
Hostname: "test-host",
|
||||
StartedAt: time.Now(),
|
||||
CompletedAt: nil, // Should remain NULL until completed
|
||||
}
|
||||
|
||||
err := repos.Snapshots.Create(ctx, nil, snapshot)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
retrieved, err := repos.Snapshots.GetByID(ctx, snapshot.ID)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if retrieved.CompletedAt != nil {
|
||||
t.Error("expected nil CompletedAt for incomplete snapshot")
|
||||
}
|
||||
})
|
||||
|
||||
// Test blob with NULL uploaded_ts
|
||||
t.Run("blob not uploaded", func(t *testing.T) {
|
||||
blob := &Blob{
|
||||
ID: "not-uploaded",
|
||||
Hash: "test-hash",
|
||||
CreatedTS: time.Now(),
|
||||
UploadedTS: nil, // Not uploaded yet
|
||||
}
|
||||
|
||||
err := repos.Blobs.Create(ctx, nil, blob)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
retrieved, err := repos.Blobs.GetByID(ctx, blob.ID)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if retrieved.UploadedTS != nil {
|
||||
t.Error("expected nil UploadedTS for non-uploaded blob")
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestLargeDatasets tests operations with large amounts of data
|
||||
func TestLargeDatasets(t *testing.T) {
|
||||
if testing.Short() {
|
||||
t.Skip("skipping large dataset test in short mode")
|
||||
}
|
||||
|
||||
db, cleanup := setupTestDB(t)
|
||||
defer cleanup()
|
||||
|
||||
ctx := context.Background()
|
||||
repos := NewRepositories(db)
|
||||
|
||||
// Create a snapshot
|
||||
snapshot := &Snapshot{
|
||||
ID: "large-dataset-test",
|
||||
Hostname: "test-host",
|
||||
StartedAt: time.Now(),
|
||||
}
|
||||
err := repos.Snapshots.Create(ctx, nil, snapshot)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Create many files
|
||||
const fileCount = 1000
|
||||
fileIDs := make([]string, fileCount)
|
||||
|
||||
t.Run("create many files", func(t *testing.T) {
|
||||
start := time.Now()
|
||||
for i := 0; i < fileCount; i++ {
|
||||
file := &File{
|
||||
Path: fmt.Sprintf("/large/file%05d.txt", i),
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: int64(i * 1024),
|
||||
Mode: 0644,
|
||||
UID: uint32(1000 + (i % 10)),
|
||||
GID: uint32(1000 + (i % 10)),
|
||||
}
|
||||
err := repos.Files.Create(ctx, nil, file)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create file %d: %v", i, err)
|
||||
}
|
||||
fileIDs[i] = file.ID
|
||||
|
||||
// Add half to snapshot
|
||||
if i%2 == 0 {
|
||||
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID, file.ID)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
}
|
||||
t.Logf("Created %d files in %v", fileCount, time.Since(start))
|
||||
})
|
||||
|
||||
// Test ListByPrefix performance
|
||||
t.Run("list by prefix performance", func(t *testing.T) {
|
||||
start := time.Now()
|
||||
files, err := repos.Files.ListByPrefix(ctx, "/large/")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(files) != fileCount {
|
||||
t.Errorf("expected %d files, got %d", fileCount, len(files))
|
||||
}
|
||||
t.Logf("Listed %d files in %v", len(files), time.Since(start))
|
||||
})
|
||||
|
||||
// Test orphaned cleanup performance
|
||||
t.Run("orphaned cleanup performance", func(t *testing.T) {
|
||||
start := time.Now()
|
||||
err := repos.Files.DeleteOrphaned(ctx)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
t.Logf("Cleaned up orphaned files in %v", time.Since(start))
|
||||
|
||||
// Verify correct number remain
|
||||
files, err := repos.Files.ListByPrefix(ctx, "/large/")
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if len(files) != fileCount/2 {
|
||||
t.Errorf("expected %d files after cleanup, got %d", fileCount/2, len(files))
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestErrorPropagation tests that errors are properly propagated
|
||||
func TestErrorPropagation(t *testing.T) {
|
||||
db, cleanup := setupTestDB(t)
|
||||
defer cleanup()
|
||||
|
||||
ctx := context.Background()
|
||||
repos := NewRepositories(db)
|
||||
|
||||
// Test GetByID with non-existent ID
|
||||
t.Run("GetByID non-existent", func(t *testing.T) {
|
||||
file, err := repos.Files.GetByID(ctx, "non-existent-uuid")
|
||||
if err != nil {
|
||||
t.Errorf("GetByID should not return error for non-existent ID, got: %v", err)
|
||||
}
|
||||
if file != nil {
|
||||
t.Error("expected nil file for non-existent ID")
|
||||
}
|
||||
})
|
||||
|
||||
// Test GetByPath with non-existent path
|
||||
t.Run("GetByPath non-existent", func(t *testing.T) {
|
||||
file, err := repos.Files.GetByPath(ctx, "/non/existent/path.txt")
|
||||
if err != nil {
|
||||
t.Errorf("GetByPath should not return error for non-existent path, got: %v", err)
|
||||
}
|
||||
if file != nil {
|
||||
t.Error("expected nil file for non-existent path")
|
||||
}
|
||||
})
|
||||
|
||||
// Test invalid foreign key reference
|
||||
t.Run("invalid foreign key", func(t *testing.T) {
|
||||
fc := &FileChunk{
|
||||
FileID: "non-existent-file-id",
|
||||
Idx: 0,
|
||||
ChunkHash: "some-chunk",
|
||||
}
|
||||
err := repos.FileChunks.Create(ctx, nil, fc)
|
||||
if err == nil {
|
||||
t.Error("expected error for invalid foreign key")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "FOREIGN KEY") {
|
||||
t.Errorf("expected foreign key error, got: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
// TestQueryInjection tests that the system is safe from SQL injection
|
||||
func TestQueryInjection(t *testing.T) {
|
||||
db, cleanup := setupTestDB(t)
|
||||
defer cleanup()
|
||||
|
||||
ctx := context.Background()
|
||||
repos := NewRepositories(db)
|
||||
|
||||
// Test various injection attempts
|
||||
injectionTests := []string{
|
||||
"'; DROP TABLE files; --",
|
||||
"' OR '1'='1",
|
||||
"'; DELETE FROM files WHERE '1'='1'; --",
|
||||
`test'); DROP TABLE files; --`,
|
||||
}
|
||||
|
||||
for _, injection := range injectionTests {
|
||||
t.Run("injection attempt", func(t *testing.T) {
|
||||
// Try injection in file path
|
||||
file := &File{
|
||||
Path: injection,
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
GID: 1000,
|
||||
}
|
||||
_ = repos.Files.Create(ctx, nil, file)
|
||||
// Should either succeed (treating as normal string) or fail with constraint
|
||||
// but should NOT execute the injected SQL
|
||||
|
||||
// Verify tables still exist
|
||||
var count int
|
||||
err := db.conn.QueryRow("SELECT COUNT(*) FROM files").Scan(&count)
|
||||
if err != nil {
|
||||
t.Fatal("files table was damaged by injection")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestTimezoneHandling tests that times are properly handled in UTC
|
||||
func TestTimezoneHandling(t *testing.T) {
|
||||
db, cleanup := setupTestDB(t)
|
||||
defer cleanup()
|
||||
|
||||
ctx := context.Background()
|
||||
repos := NewRepositories(db)
|
||||
|
||||
// Create file with specific timezone
|
||||
loc, err := time.LoadLocation("America/New_York")
|
||||
if err != nil {
|
||||
t.Skip("timezone not available")
|
||||
}
|
||||
|
||||
// Use Truncate to remove sub-second precision since we store as Unix timestamps
|
||||
nyTime := time.Now().In(loc).Truncate(time.Second)
|
||||
file := &File{
|
||||
Path: "/timezone-test.txt",
|
||||
MTime: nyTime,
|
||||
CTime: nyTime,
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
GID: 1000,
|
||||
}
|
||||
|
||||
err = repos.Files.Create(ctx, nil, file)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Retrieve and verify times are in UTC
|
||||
retrieved, err := repos.Files.GetByID(ctx, file.ID)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Check that times are equivalent (same instant)
|
||||
if !retrieved.MTime.Equal(nyTime) {
|
||||
t.Error("time was not preserved correctly")
|
||||
}
|
||||
|
||||
// Check that retrieved time is in UTC
|
||||
if retrieved.MTime.Location() != time.UTC {
|
||||
t.Error("retrieved time is not in UTC")
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user