Major refactoring: UUID-based storage, streaming architecture, and CLI improvements
This commit represents a significant architectural overhaul of vaultik: Database Schema Changes: - Switch files table to use UUID primary keys instead of path-based keys - Add UUID primary keys to blobs table for immediate chunk association - Update all foreign key relationships to use UUIDs - Add comprehensive schema documentation in DATAMODEL.md - Add SQLite busy timeout handling for concurrent operations Streaming and Performance Improvements: - Implement true streaming blob packing without intermediate storage - Add streaming chunk processing to reduce memory usage - Improve progress reporting with real-time metrics - Add upload metrics tracking in new uploads table CLI Refactoring: - Restructure CLI to use subcommands: snapshot create/list/purge/verify - Add store info command for S3 configuration display - Add custom duration parser supporting days/weeks/months/years - Remove old backup.go in favor of enhanced snapshot.go - Add --cron flag for silent operation Configuration Changes: - Remove unused index_prefix configuration option - Add support for snapshot pruning retention policies - Improve configuration validation and error messages Testing Improvements: - Add comprehensive repository tests with edge cases - Add cascade delete debugging tests - Fix concurrent operation tests to use SQLite busy timeout - Remove tolerance for SQLITE_BUSY errors in tests Documentation: - Add MIT LICENSE file - Update README with new command structure - Add comprehensive DATAMODEL.md explaining database schema - Update DESIGN.md with UUID-based architecture Other Changes: - Add test-config.yml for testing - Update Makefile with better test output formatting - Fix various race conditions in concurrent operations - Improve error handling throughout
This commit is contained in:
@@ -2,7 +2,9 @@ package database
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestBlobChunkRepository(t *testing.T) {
|
||||
@@ -10,78 +12,112 @@ func TestBlobChunkRepository(t *testing.T) {
|
||||
defer cleanup()
|
||||
|
||||
ctx := context.Background()
|
||||
repo := NewBlobChunkRepository(db)
|
||||
repos := NewRepositories(db)
|
||||
|
||||
// Create blob first
|
||||
blob := &Blob{
|
||||
ID: "blob1-uuid",
|
||||
Hash: "blob1-hash",
|
||||
CreatedTS: time.Now(),
|
||||
}
|
||||
err := repos.Blobs.Create(ctx, nil, blob)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create blob: %v", err)
|
||||
}
|
||||
|
||||
// Create chunks
|
||||
chunks := []string{"chunk1", "chunk2", "chunk3"}
|
||||
for _, chunkHash := range chunks {
|
||||
chunk := &Chunk{
|
||||
ChunkHash: chunkHash,
|
||||
SHA256: chunkHash + "-sha",
|
||||
Size: 1024,
|
||||
}
|
||||
err = repos.Chunks.Create(ctx, nil, chunk)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create chunk %s: %v", chunkHash, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Test Create
|
||||
bc1 := &BlobChunk{
|
||||
BlobID: "blob1-uuid",
|
||||
BlobID: blob.ID,
|
||||
ChunkHash: "chunk1",
|
||||
Offset: 0,
|
||||
Length: 1024,
|
||||
}
|
||||
|
||||
err := repo.Create(ctx, nil, bc1)
|
||||
err = repos.BlobChunks.Create(ctx, nil, bc1)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create blob chunk: %v", err)
|
||||
}
|
||||
|
||||
// Add more chunks to the same blob
|
||||
bc2 := &BlobChunk{
|
||||
BlobID: "blob1-uuid",
|
||||
BlobID: blob.ID,
|
||||
ChunkHash: "chunk2",
|
||||
Offset: 1024,
|
||||
Length: 2048,
|
||||
}
|
||||
err = repo.Create(ctx, nil, bc2)
|
||||
err = repos.BlobChunks.Create(ctx, nil, bc2)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create second blob chunk: %v", err)
|
||||
}
|
||||
|
||||
bc3 := &BlobChunk{
|
||||
BlobID: "blob1-uuid",
|
||||
BlobID: blob.ID,
|
||||
ChunkHash: "chunk3",
|
||||
Offset: 3072,
|
||||
Length: 512,
|
||||
}
|
||||
err = repo.Create(ctx, nil, bc3)
|
||||
err = repos.BlobChunks.Create(ctx, nil, bc3)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create third blob chunk: %v", err)
|
||||
}
|
||||
|
||||
// Test GetByBlobID
|
||||
chunks, err := repo.GetByBlobID(ctx, "blob1-uuid")
|
||||
blobChunks, err := repos.BlobChunks.GetByBlobID(ctx, blob.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get blob chunks: %v", err)
|
||||
}
|
||||
if len(chunks) != 3 {
|
||||
t.Errorf("expected 3 chunks, got %d", len(chunks))
|
||||
if len(blobChunks) != 3 {
|
||||
t.Errorf("expected 3 chunks, got %d", len(blobChunks))
|
||||
}
|
||||
|
||||
// Verify order by offset
|
||||
expectedOffsets := []int64{0, 1024, 3072}
|
||||
for i, chunk := range chunks {
|
||||
if chunk.Offset != expectedOffsets[i] {
|
||||
t.Errorf("wrong chunk order: expected offset %d, got %d", expectedOffsets[i], chunk.Offset)
|
||||
for i, bc := range blobChunks {
|
||||
if bc.Offset != expectedOffsets[i] {
|
||||
t.Errorf("wrong chunk order: expected offset %d, got %d", expectedOffsets[i], bc.Offset)
|
||||
}
|
||||
}
|
||||
|
||||
// Test GetByChunkHash
|
||||
bc, err := repo.GetByChunkHash(ctx, "chunk2")
|
||||
bc, err := repos.BlobChunks.GetByChunkHash(ctx, "chunk2")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get blob chunk by chunk hash: %v", err)
|
||||
}
|
||||
if bc == nil {
|
||||
t.Fatal("expected blob chunk, got nil")
|
||||
}
|
||||
if bc.BlobID != "blob1-uuid" {
|
||||
t.Errorf("wrong blob ID: expected blob1-uuid, got %s", bc.BlobID)
|
||||
if bc.BlobID != blob.ID {
|
||||
t.Errorf("wrong blob ID: expected %s, got %s", blob.ID, bc.BlobID)
|
||||
}
|
||||
if bc.Offset != 1024 {
|
||||
t.Errorf("wrong offset: expected 1024, got %d", bc.Offset)
|
||||
}
|
||||
|
||||
// Test duplicate insert (should fail due to primary key constraint)
|
||||
err = repos.BlobChunks.Create(ctx, nil, bc1)
|
||||
if err == nil {
|
||||
t.Fatal("duplicate blob_chunk insert should fail due to primary key constraint")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "UNIQUE") && !strings.Contains(err.Error(), "constraint") {
|
||||
t.Fatalf("expected constraint error, got: %v", err)
|
||||
}
|
||||
|
||||
// Test non-existent chunk
|
||||
bc, err = repo.GetByChunkHash(ctx, "nonexistent")
|
||||
bc, err = repos.BlobChunks.GetByChunkHash(ctx, "nonexistent")
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
@@ -95,26 +131,61 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
|
||||
defer cleanup()
|
||||
|
||||
ctx := context.Background()
|
||||
repo := NewBlobChunkRepository(db)
|
||||
repos := NewRepositories(db)
|
||||
|
||||
// Create blobs
|
||||
blob1 := &Blob{
|
||||
ID: "blob1-uuid",
|
||||
Hash: "blob1-hash",
|
||||
CreatedTS: time.Now(),
|
||||
}
|
||||
blob2 := &Blob{
|
||||
ID: "blob2-uuid",
|
||||
Hash: "blob2-hash",
|
||||
CreatedTS: time.Now(),
|
||||
}
|
||||
|
||||
err := repos.Blobs.Create(ctx, nil, blob1)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create blob1: %v", err)
|
||||
}
|
||||
err = repos.Blobs.Create(ctx, nil, blob2)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create blob2: %v", err)
|
||||
}
|
||||
|
||||
// Create chunks
|
||||
chunkHashes := []string{"chunk1", "chunk2", "chunk3"}
|
||||
for _, chunkHash := range chunkHashes {
|
||||
chunk := &Chunk{
|
||||
ChunkHash: chunkHash,
|
||||
SHA256: chunkHash + "-sha",
|
||||
Size: 1024,
|
||||
}
|
||||
err = repos.Chunks.Create(ctx, nil, chunk)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create chunk %s: %v", chunkHash, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Create chunks across multiple blobs
|
||||
// Some chunks are shared between blobs (deduplication scenario)
|
||||
blobChunks := []BlobChunk{
|
||||
{BlobID: "blob1-uuid", ChunkHash: "chunk1", Offset: 0, Length: 1024},
|
||||
{BlobID: "blob1-uuid", ChunkHash: "chunk2", Offset: 1024, Length: 1024},
|
||||
{BlobID: "blob2-uuid", ChunkHash: "chunk2", Offset: 0, Length: 1024}, // chunk2 is shared
|
||||
{BlobID: "blob2-uuid", ChunkHash: "chunk3", Offset: 1024, Length: 1024},
|
||||
{BlobID: blob1.ID, ChunkHash: "chunk1", Offset: 0, Length: 1024},
|
||||
{BlobID: blob1.ID, ChunkHash: "chunk2", Offset: 1024, Length: 1024},
|
||||
{BlobID: blob2.ID, ChunkHash: "chunk2", Offset: 0, Length: 1024}, // chunk2 is shared
|
||||
{BlobID: blob2.ID, ChunkHash: "chunk3", Offset: 1024, Length: 1024},
|
||||
}
|
||||
|
||||
for _, bc := range blobChunks {
|
||||
err := repo.Create(ctx, nil, &bc)
|
||||
err := repos.BlobChunks.Create(ctx, nil, &bc)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create blob chunk: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Verify blob1 chunks
|
||||
chunks, err := repo.GetByBlobID(ctx, "blob1-uuid")
|
||||
chunks, err := repos.BlobChunks.GetByBlobID(ctx, blob1.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get blob1 chunks: %v", err)
|
||||
}
|
||||
@@ -123,7 +194,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
|
||||
}
|
||||
|
||||
// Verify blob2 chunks
|
||||
chunks, err = repo.GetByBlobID(ctx, "blob2-uuid")
|
||||
chunks, err = repos.BlobChunks.GetByBlobID(ctx, blob2.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get blob2 chunks: %v", err)
|
||||
}
|
||||
@@ -132,7 +203,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
|
||||
}
|
||||
|
||||
// Verify shared chunk
|
||||
bc, err := repo.GetByChunkHash(ctx, "chunk2")
|
||||
bc, err := repos.BlobChunks.GetByChunkHash(ctx, "chunk2")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get shared chunk: %v", err)
|
||||
}
|
||||
@@ -140,7 +211,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
|
||||
t.Fatal("expected shared chunk, got nil")
|
||||
}
|
||||
// GetByChunkHash returns first match, should be blob1
|
||||
if bc.BlobID != "blob1-uuid" {
|
||||
t.Errorf("expected blob1-uuid for shared chunk, got %s", bc.BlobID)
|
||||
if bc.BlobID != blob1.ID {
|
||||
t.Errorf("expected %s for shared chunk, got %s", blob1.ID, bc.BlobID)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user