This commit represents a significant architectural overhaul of vaultik: Database Schema Changes: - Switch files table to use UUID primary keys instead of path-based keys - Add UUID primary keys to blobs table for immediate chunk association - Update all foreign key relationships to use UUIDs - Add comprehensive schema documentation in DATAMODEL.md - Add SQLite busy timeout handling for concurrent operations Streaming and Performance Improvements: - Implement true streaming blob packing without intermediate storage - Add streaming chunk processing to reduce memory usage - Improve progress reporting with real-time metrics - Add upload metrics tracking in new uploads table CLI Refactoring: - Restructure CLI to use subcommands: snapshot create/list/purge/verify - Add store info command for S3 configuration display - Add custom duration parser supporting days/weeks/months/years - Remove old backup.go in favor of enhanced snapshot.go - Add --cron flag for silent operation Configuration Changes: - Remove unused index_prefix configuration option - Add support for snapshot pruning retention policies - Improve configuration validation and error messages Testing Improvements: - Add comprehensive repository tests with edge cases - Add cascade delete debugging tests - Fix concurrent operation tests to use SQLite busy timeout - Remove tolerance for SQLITE_BUSY errors in tests Documentation: - Add MIT LICENSE file - Update README with new command structure - Add comprehensive DATAMODEL.md explaining database schema - Update DESIGN.md with UUID-based architecture Other Changes: - Add test-config.yml for testing - Update Makefile with better test output formatting - Fix various race conditions in concurrent operations - Improve error handling throughout
544 lines
13 KiB
Go
544 lines
13 KiB
Go
package database
|
|
|
|
import (
|
|
"context"
|
|
"fmt"
|
|
"strings"
|
|
"testing"
|
|
"time"
|
|
)
|
|
|
|
// TestFileRepositoryEdgeCases tests edge cases for file repository
|
|
func TestFileRepositoryEdgeCases(t *testing.T) {
|
|
db, cleanup := setupTestDB(t)
|
|
defer cleanup()
|
|
|
|
ctx := context.Background()
|
|
repo := NewFileRepository(db)
|
|
|
|
tests := []struct {
|
|
name string
|
|
file *File
|
|
wantErr bool
|
|
errMsg string
|
|
}{
|
|
{
|
|
name: "empty path",
|
|
file: &File{
|
|
Path: "",
|
|
MTime: time.Now(),
|
|
CTime: time.Now(),
|
|
Size: 1024,
|
|
Mode: 0644,
|
|
UID: 1000,
|
|
GID: 1000,
|
|
},
|
|
wantErr: false, // Empty strings are allowed, only NULL is not allowed
|
|
},
|
|
{
|
|
name: "very long path",
|
|
file: &File{
|
|
Path: "/" + strings.Repeat("a", 4096),
|
|
MTime: time.Now(),
|
|
CTime: time.Now(),
|
|
Size: 1024,
|
|
Mode: 0644,
|
|
UID: 1000,
|
|
GID: 1000,
|
|
},
|
|
wantErr: false,
|
|
},
|
|
{
|
|
name: "path with special characters",
|
|
file: &File{
|
|
Path: "/test/file with spaces and 特殊文字.txt",
|
|
MTime: time.Now(),
|
|
CTime: time.Now(),
|
|
Size: 1024,
|
|
Mode: 0644,
|
|
UID: 1000,
|
|
GID: 1000,
|
|
},
|
|
wantErr: false,
|
|
},
|
|
{
|
|
name: "zero size file",
|
|
file: &File{
|
|
Path: "/empty.txt",
|
|
MTime: time.Now(),
|
|
CTime: time.Now(),
|
|
Size: 0,
|
|
Mode: 0644,
|
|
UID: 1000,
|
|
GID: 1000,
|
|
},
|
|
wantErr: false,
|
|
},
|
|
{
|
|
name: "symlink with target",
|
|
file: &File{
|
|
Path: "/link",
|
|
MTime: time.Now(),
|
|
CTime: time.Now(),
|
|
Size: 0,
|
|
Mode: 0777 | 0120000, // symlink mode
|
|
UID: 1000,
|
|
GID: 1000,
|
|
LinkTarget: "/target",
|
|
},
|
|
wantErr: false,
|
|
},
|
|
}
|
|
|
|
for i, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
// Add a unique suffix to paths to avoid UNIQUE constraint violations
|
|
if tt.file.Path != "" {
|
|
tt.file.Path = fmt.Sprintf("%s_%d_%d", tt.file.Path, i, time.Now().UnixNano())
|
|
}
|
|
|
|
err := repo.Create(ctx, nil, tt.file)
|
|
if (err != nil) != tt.wantErr {
|
|
t.Errorf("Create() error = %v, wantErr %v", err, tt.wantErr)
|
|
}
|
|
if err != nil && tt.errMsg != "" && !strings.Contains(err.Error(), tt.errMsg) {
|
|
t.Errorf("Create() error = %v, want error containing %q", err, tt.errMsg)
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestDuplicateHandling tests handling of duplicate entries
|
|
func TestDuplicateHandling(t *testing.T) {
|
|
db, cleanup := setupTestDB(t)
|
|
defer cleanup()
|
|
|
|
ctx := context.Background()
|
|
repos := NewRepositories(db)
|
|
|
|
// Test duplicate file paths - Create uses UPSERT logic
|
|
t.Run("duplicate file paths", func(t *testing.T) {
|
|
file1 := &File{
|
|
Path: "/duplicate.txt",
|
|
MTime: time.Now(),
|
|
CTime: time.Now(),
|
|
Size: 1024,
|
|
Mode: 0644,
|
|
UID: 1000,
|
|
GID: 1000,
|
|
}
|
|
file2 := &File{
|
|
Path: "/duplicate.txt", // Same path
|
|
MTime: time.Now().Add(time.Hour),
|
|
CTime: time.Now().Add(time.Hour),
|
|
Size: 2048,
|
|
Mode: 0644,
|
|
UID: 1000,
|
|
GID: 1000,
|
|
}
|
|
|
|
err := repos.Files.Create(ctx, nil, file1)
|
|
if err != nil {
|
|
t.Fatalf("failed to create file1: %v", err)
|
|
}
|
|
originalID := file1.ID
|
|
|
|
// Create with same path should update the existing record (UPSERT behavior)
|
|
err = repos.Files.Create(ctx, nil, file2)
|
|
if err != nil {
|
|
t.Fatalf("failed to create file2: %v", err)
|
|
}
|
|
|
|
// Verify the file was updated, not duplicated
|
|
retrievedFile, err := repos.Files.GetByPath(ctx, "/duplicate.txt")
|
|
if err != nil {
|
|
t.Fatalf("failed to retrieve file: %v", err)
|
|
}
|
|
|
|
// The file should have been updated with file2's data
|
|
if retrievedFile.Size != 2048 {
|
|
t.Errorf("expected size 2048, got %d", retrievedFile.Size)
|
|
}
|
|
|
|
// ID might be different due to the UPSERT
|
|
if retrievedFile.ID != file2.ID {
|
|
t.Logf("File ID changed from %s to %s during upsert", originalID, retrievedFile.ID)
|
|
}
|
|
})
|
|
|
|
// Test duplicate chunk hashes
|
|
t.Run("duplicate chunk hashes", func(t *testing.T) {
|
|
chunk := &Chunk{
|
|
ChunkHash: "duplicate-chunk",
|
|
SHA256: "duplicate-sha",
|
|
Size: 1024,
|
|
}
|
|
|
|
err := repos.Chunks.Create(ctx, nil, chunk)
|
|
if err != nil {
|
|
t.Fatalf("failed to create chunk: %v", err)
|
|
}
|
|
|
|
// Creating the same chunk again should be idempotent (ON CONFLICT DO NOTHING)
|
|
err = repos.Chunks.Create(ctx, nil, chunk)
|
|
if err != nil {
|
|
t.Errorf("duplicate chunk creation should be idempotent, got error: %v", err)
|
|
}
|
|
})
|
|
|
|
// Test duplicate file-chunk mappings
|
|
t.Run("duplicate file-chunk mappings", func(t *testing.T) {
|
|
file := &File{
|
|
Path: "/test-dup-fc.txt",
|
|
MTime: time.Now(),
|
|
CTime: time.Now(),
|
|
Size: 1024,
|
|
Mode: 0644,
|
|
UID: 1000,
|
|
GID: 1000,
|
|
}
|
|
err := repos.Files.Create(ctx, nil, file)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
chunk := &Chunk{
|
|
ChunkHash: "test-chunk-dup",
|
|
SHA256: "test-sha-dup",
|
|
Size: 1024,
|
|
}
|
|
err = repos.Chunks.Create(ctx, nil, chunk)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
fc := &FileChunk{
|
|
FileID: file.ID,
|
|
Idx: 0,
|
|
ChunkHash: chunk.ChunkHash,
|
|
}
|
|
|
|
err = repos.FileChunks.Create(ctx, nil, fc)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// Creating the same mapping again should be idempotent
|
|
err = repos.FileChunks.Create(ctx, nil, fc)
|
|
if err != nil {
|
|
t.Error("file-chunk creation should be idempotent")
|
|
}
|
|
})
|
|
}
|
|
|
|
// TestNullHandling tests handling of NULL values
|
|
func TestNullHandling(t *testing.T) {
|
|
db, cleanup := setupTestDB(t)
|
|
defer cleanup()
|
|
|
|
ctx := context.Background()
|
|
repos := NewRepositories(db)
|
|
|
|
// Test file with no link target
|
|
t.Run("file without link target", func(t *testing.T) {
|
|
file := &File{
|
|
Path: "/regular.txt",
|
|
MTime: time.Now(),
|
|
CTime: time.Now(),
|
|
Size: 1024,
|
|
Mode: 0644,
|
|
UID: 1000,
|
|
GID: 1000,
|
|
LinkTarget: "", // Should be stored as NULL
|
|
}
|
|
|
|
err := repos.Files.Create(ctx, nil, file)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
retrieved, err := repos.Files.GetByID(ctx, file.ID)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
if retrieved.LinkTarget != "" {
|
|
t.Errorf("expected empty link target, got %q", retrieved.LinkTarget)
|
|
}
|
|
})
|
|
|
|
// Test snapshot with NULL completed_at
|
|
t.Run("incomplete snapshot", func(t *testing.T) {
|
|
snapshot := &Snapshot{
|
|
ID: "incomplete-test",
|
|
Hostname: "test-host",
|
|
StartedAt: time.Now(),
|
|
CompletedAt: nil, // Should remain NULL until completed
|
|
}
|
|
|
|
err := repos.Snapshots.Create(ctx, nil, snapshot)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
retrieved, err := repos.Snapshots.GetByID(ctx, snapshot.ID)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
if retrieved.CompletedAt != nil {
|
|
t.Error("expected nil CompletedAt for incomplete snapshot")
|
|
}
|
|
})
|
|
|
|
// Test blob with NULL uploaded_ts
|
|
t.Run("blob not uploaded", func(t *testing.T) {
|
|
blob := &Blob{
|
|
ID: "not-uploaded",
|
|
Hash: "test-hash",
|
|
CreatedTS: time.Now(),
|
|
UploadedTS: nil, // Not uploaded yet
|
|
}
|
|
|
|
err := repos.Blobs.Create(ctx, nil, blob)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
retrieved, err := repos.Blobs.GetByID(ctx, blob.ID)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
if retrieved.UploadedTS != nil {
|
|
t.Error("expected nil UploadedTS for non-uploaded blob")
|
|
}
|
|
})
|
|
}
|
|
|
|
// TestLargeDatasets tests operations with large amounts of data
|
|
func TestLargeDatasets(t *testing.T) {
|
|
if testing.Short() {
|
|
t.Skip("skipping large dataset test in short mode")
|
|
}
|
|
|
|
db, cleanup := setupTestDB(t)
|
|
defer cleanup()
|
|
|
|
ctx := context.Background()
|
|
repos := NewRepositories(db)
|
|
|
|
// Create a snapshot
|
|
snapshot := &Snapshot{
|
|
ID: "large-dataset-test",
|
|
Hostname: "test-host",
|
|
StartedAt: time.Now(),
|
|
}
|
|
err := repos.Snapshots.Create(ctx, nil, snapshot)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// Create many files
|
|
const fileCount = 1000
|
|
fileIDs := make([]string, fileCount)
|
|
|
|
t.Run("create many files", func(t *testing.T) {
|
|
start := time.Now()
|
|
for i := 0; i < fileCount; i++ {
|
|
file := &File{
|
|
Path: fmt.Sprintf("/large/file%05d.txt", i),
|
|
MTime: time.Now(),
|
|
CTime: time.Now(),
|
|
Size: int64(i * 1024),
|
|
Mode: 0644,
|
|
UID: uint32(1000 + (i % 10)),
|
|
GID: uint32(1000 + (i % 10)),
|
|
}
|
|
err := repos.Files.Create(ctx, nil, file)
|
|
if err != nil {
|
|
t.Fatalf("failed to create file %d: %v", i, err)
|
|
}
|
|
fileIDs[i] = file.ID
|
|
|
|
// Add half to snapshot
|
|
if i%2 == 0 {
|
|
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID, file.ID)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
}
|
|
}
|
|
t.Logf("Created %d files in %v", fileCount, time.Since(start))
|
|
})
|
|
|
|
// Test ListByPrefix performance
|
|
t.Run("list by prefix performance", func(t *testing.T) {
|
|
start := time.Now()
|
|
files, err := repos.Files.ListByPrefix(ctx, "/large/")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if len(files) != fileCount {
|
|
t.Errorf("expected %d files, got %d", fileCount, len(files))
|
|
}
|
|
t.Logf("Listed %d files in %v", len(files), time.Since(start))
|
|
})
|
|
|
|
// Test orphaned cleanup performance
|
|
t.Run("orphaned cleanup performance", func(t *testing.T) {
|
|
start := time.Now()
|
|
err := repos.Files.DeleteOrphaned(ctx)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
t.Logf("Cleaned up orphaned files in %v", time.Since(start))
|
|
|
|
// Verify correct number remain
|
|
files, err := repos.Files.ListByPrefix(ctx, "/large/")
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
if len(files) != fileCount/2 {
|
|
t.Errorf("expected %d files after cleanup, got %d", fileCount/2, len(files))
|
|
}
|
|
})
|
|
}
|
|
|
|
// TestErrorPropagation tests that errors are properly propagated
|
|
func TestErrorPropagation(t *testing.T) {
|
|
db, cleanup := setupTestDB(t)
|
|
defer cleanup()
|
|
|
|
ctx := context.Background()
|
|
repos := NewRepositories(db)
|
|
|
|
// Test GetByID with non-existent ID
|
|
t.Run("GetByID non-existent", func(t *testing.T) {
|
|
file, err := repos.Files.GetByID(ctx, "non-existent-uuid")
|
|
if err != nil {
|
|
t.Errorf("GetByID should not return error for non-existent ID, got: %v", err)
|
|
}
|
|
if file != nil {
|
|
t.Error("expected nil file for non-existent ID")
|
|
}
|
|
})
|
|
|
|
// Test GetByPath with non-existent path
|
|
t.Run("GetByPath non-existent", func(t *testing.T) {
|
|
file, err := repos.Files.GetByPath(ctx, "/non/existent/path.txt")
|
|
if err != nil {
|
|
t.Errorf("GetByPath should not return error for non-existent path, got: %v", err)
|
|
}
|
|
if file != nil {
|
|
t.Error("expected nil file for non-existent path")
|
|
}
|
|
})
|
|
|
|
// Test invalid foreign key reference
|
|
t.Run("invalid foreign key", func(t *testing.T) {
|
|
fc := &FileChunk{
|
|
FileID: "non-existent-file-id",
|
|
Idx: 0,
|
|
ChunkHash: "some-chunk",
|
|
}
|
|
err := repos.FileChunks.Create(ctx, nil, fc)
|
|
if err == nil {
|
|
t.Error("expected error for invalid foreign key")
|
|
}
|
|
if !strings.Contains(err.Error(), "FOREIGN KEY") {
|
|
t.Errorf("expected foreign key error, got: %v", err)
|
|
}
|
|
})
|
|
}
|
|
|
|
// TestQueryInjection tests that the system is safe from SQL injection
|
|
func TestQueryInjection(t *testing.T) {
|
|
db, cleanup := setupTestDB(t)
|
|
defer cleanup()
|
|
|
|
ctx := context.Background()
|
|
repos := NewRepositories(db)
|
|
|
|
// Test various injection attempts
|
|
injectionTests := []string{
|
|
"'; DROP TABLE files; --",
|
|
"' OR '1'='1",
|
|
"'; DELETE FROM files WHERE '1'='1'; --",
|
|
`test'); DROP TABLE files; --`,
|
|
}
|
|
|
|
for _, injection := range injectionTests {
|
|
t.Run("injection attempt", func(t *testing.T) {
|
|
// Try injection in file path
|
|
file := &File{
|
|
Path: injection,
|
|
MTime: time.Now(),
|
|
CTime: time.Now(),
|
|
Size: 1024,
|
|
Mode: 0644,
|
|
UID: 1000,
|
|
GID: 1000,
|
|
}
|
|
_ = repos.Files.Create(ctx, nil, file)
|
|
// Should either succeed (treating as normal string) or fail with constraint
|
|
// but should NOT execute the injected SQL
|
|
|
|
// Verify tables still exist
|
|
var count int
|
|
err := db.conn.QueryRow("SELECT COUNT(*) FROM files").Scan(&count)
|
|
if err != nil {
|
|
t.Fatal("files table was damaged by injection")
|
|
}
|
|
})
|
|
}
|
|
}
|
|
|
|
// TestTimezoneHandling tests that times are properly handled in UTC
|
|
func TestTimezoneHandling(t *testing.T) {
|
|
db, cleanup := setupTestDB(t)
|
|
defer cleanup()
|
|
|
|
ctx := context.Background()
|
|
repos := NewRepositories(db)
|
|
|
|
// Create file with specific timezone
|
|
loc, err := time.LoadLocation("America/New_York")
|
|
if err != nil {
|
|
t.Skip("timezone not available")
|
|
}
|
|
|
|
// Use Truncate to remove sub-second precision since we store as Unix timestamps
|
|
nyTime := time.Now().In(loc).Truncate(time.Second)
|
|
file := &File{
|
|
Path: "/timezone-test.txt",
|
|
MTime: nyTime,
|
|
CTime: nyTime,
|
|
Size: 1024,
|
|
Mode: 0644,
|
|
UID: 1000,
|
|
GID: 1000,
|
|
}
|
|
|
|
err = repos.Files.Create(ctx, nil, file)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// Retrieve and verify times are in UTC
|
|
retrieved, err := repos.Files.GetByID(ctx, file.ID)
|
|
if err != nil {
|
|
t.Fatal(err)
|
|
}
|
|
|
|
// Check that times are equivalent (same instant)
|
|
if !retrieved.MTime.Equal(nyTime) {
|
|
t.Error("time was not preserved correctly")
|
|
}
|
|
|
|
// Check that retrieved time is in UTC
|
|
if retrieved.MTime.Location() != time.UTC {
|
|
t.Error("retrieved time is not in UTC")
|
|
}
|
|
}
|