Major refactoring: UUID-based storage, streaming architecture, and CLI improvements

This commit represents a significant architectural overhaul of vaultik: Database Schema Changes: - Switch files table to use UUID primary keys instead of path-based keys - Add UUID primary keys to blobs table for immediate chunk association - Update all foreign key relationships to use UUIDs - Add comprehensive schema documentation in DATAMODEL.md - Add SQLite busy timeout handling for concurrent operations Streaming and Performance Improvements: - Implement true streaming blob packing without intermediate storage - Add streaming chunk processing to reduce memory usage - Improve progress reporting with real-time metrics - Add upload metrics tracking in new uploads table CLI Refactoring: - Restructure CLI to use subcommands: snapshot create/list/purge/verify - Add store info command for S3 configuration display - Add custom duration parser supporting days/weeks/months/years - Remove old backup.go in favor of enhanced snapshot.go - Add --cron flag for silent operation Configuration Changes: - Remove unused index_prefix configuration option - Add support for snapshot pruning retention policies - Improve configuration validation and error messages Testing Improvements: - Add comprehensive repository tests with edge cases - Add cascade delete debugging tests - Fix concurrent operation tests to use SQLite busy timeout - Remove tolerance for SQLITE_BUSY errors in tests Documentation: - Add MIT LICENSE file - Update README with new command structure - Add comprehensive DATAMODEL.md explaining database schema - Update DESIGN.md with UUID-based architecture Other Changes: - Add test-config.yml for testing - Update Makefile with better test output formatting - Fix various race conditions in concurrent operations - Improve error handling throughout
2025-07-22 14:54:37 +02:00
parent 86b533d6ee
commit 78af626759
54 changed files with 5525 additions and 1109 deletions
--- a/internal/database/blob_chunks.go
+++ b/internal/database/blob_chunks.go
@@ -24,7 +24,7 @@ func (r *BlobChunkRepository) Create(ctx context.Context, tx *sql.Tx, bc *BlobCh
 	if tx != nil {
 		_, err = tx.ExecContext(ctx, query, bc.BlobID, bc.ChunkHash, bc.Offset, bc.Length)
 	} else {
-		_, err = r.db.ExecWithLock(ctx, query, bc.BlobID, bc.ChunkHash, bc.Offset, bc.Length)
+		_, err = r.db.ExecWithLog(ctx, query, bc.BlobID, bc.ChunkHash, bc.Offset, bc.Length)
 	}

 	if err != nil {
--- a/internal/database/blob_chunks_test.go
+++ b/internal/database/blob_chunks_test.go
@@ -2,7 +2,9 @@ package database

 import (
 	"context"
+	"strings"
 	"testing"
+	"time"
 )

 func TestBlobChunkRepository(t *testing.T) {
@@ -10,78 +12,112 @@ func TestBlobChunkRepository(t *testing.T) {
 	defer cleanup()

 	ctx := context.Background()
-	repo := NewBlobChunkRepository(db)
+	repos := NewRepositories(db)
+
+	// Create blob first
+	blob := &Blob{
+		ID:        "blob1-uuid",
+		Hash:      "blob1-hash",
+		CreatedTS: time.Now(),
+	}
+	err := repos.Blobs.Create(ctx, nil, blob)
+	if err != nil {
+		t.Fatalf("failed to create blob: %v", err)
+	}
+
+	// Create chunks
+	chunks := []string{"chunk1", "chunk2", "chunk3"}
+	for _, chunkHash := range chunks {
+		chunk := &Chunk{
+			ChunkHash: chunkHash,
+			SHA256:    chunkHash + "-sha",
+			Size:      1024,
+		}
+		err = repos.Chunks.Create(ctx, nil, chunk)
+		if err != nil {
+			t.Fatalf("failed to create chunk %s: %v", chunkHash, err)
+		}
+	}

 	// Test Create
 	bc1 := &BlobChunk{
-		BlobID:    "blob1-uuid",
+		BlobID:    blob.ID,
 		ChunkHash: "chunk1",
 		Offset:    0,
 		Length:    1024,
 	}

-	err := repo.Create(ctx, nil, bc1)
+	err = repos.BlobChunks.Create(ctx, nil, bc1)
 	if err != nil {
 		t.Fatalf("failed to create blob chunk: %v", err)
 	}

 	// Add more chunks to the same blob
 	bc2 := &BlobChunk{
-		BlobID:    "blob1-uuid",
+		BlobID:    blob.ID,
 		ChunkHash: "chunk2",
 		Offset:    1024,
 		Length:    2048,
 	}
-	err = repo.Create(ctx, nil, bc2)
+	err = repos.BlobChunks.Create(ctx, nil, bc2)
 	if err != nil {
 		t.Fatalf("failed to create second blob chunk: %v", err)
 	}

 	bc3 := &BlobChunk{
-		BlobID:    "blob1-uuid",
+		BlobID:    blob.ID,
 		ChunkHash: "chunk3",
 		Offset:    3072,
 		Length:    512,
 	}
-	err = repo.Create(ctx, nil, bc3)
+	err = repos.BlobChunks.Create(ctx, nil, bc3)
 	if err != nil {
 		t.Fatalf("failed to create third blob chunk: %v", err)
 	}

 	// Test GetByBlobID
-	chunks, err := repo.GetByBlobID(ctx, "blob1-uuid")
+	blobChunks, err := repos.BlobChunks.GetByBlobID(ctx, blob.ID)
 	if err != nil {
 		t.Fatalf("failed to get blob chunks: %v", err)
 	}
-	if len(chunks) != 3 {
-		t.Errorf("expected 3 chunks, got %d", len(chunks))
+	if len(blobChunks) != 3 {
+		t.Errorf("expected 3 chunks, got %d", len(blobChunks))
 	}

 	// Verify order by offset
 	expectedOffsets := []int64{0, 1024, 3072}
-	for i, chunk := range chunks {
-		if chunk.Offset != expectedOffsets[i] {
-			t.Errorf("wrong chunk order: expected offset %d, got %d", expectedOffsets[i], chunk.Offset)
+	for i, bc := range blobChunks {
+		if bc.Offset != expectedOffsets[i] {
+			t.Errorf("wrong chunk order: expected offset %d, got %d", expectedOffsets[i], bc.Offset)
 		}
 	}

 	// Test GetByChunkHash
-	bc, err := repo.GetByChunkHash(ctx, "chunk2")
+	bc, err := repos.BlobChunks.GetByChunkHash(ctx, "chunk2")
 	if err != nil {
 		t.Fatalf("failed to get blob chunk by chunk hash: %v", err)
 	}
 	if bc == nil {
 		t.Fatal("expected blob chunk, got nil")
 	}
-	if bc.BlobID != "blob1-uuid" {
-		t.Errorf("wrong blob ID: expected blob1-uuid, got %s", bc.BlobID)
+	if bc.BlobID != blob.ID {
+		t.Errorf("wrong blob ID: expected %s, got %s", blob.ID, bc.BlobID)
 	}
 	if bc.Offset != 1024 {
 		t.Errorf("wrong offset: expected 1024, got %d", bc.Offset)
 	}

+	// Test duplicate insert (should fail due to primary key constraint)
+	err = repos.BlobChunks.Create(ctx, nil, bc1)
+	if err == nil {
+		t.Fatal("duplicate blob_chunk insert should fail due to primary key constraint")
+	}
+	if !strings.Contains(err.Error(), "UNIQUE") && !strings.Contains(err.Error(), "constraint") {
+		t.Fatalf("expected constraint error, got: %v", err)
+	}
+
 	// Test non-existent chunk
-	bc, err = repo.GetByChunkHash(ctx, "nonexistent")
+	bc, err = repos.BlobChunks.GetByChunkHash(ctx, "nonexistent")
 	if err != nil {
 		t.Fatalf("unexpected error: %v", err)
 	}
@@ -95,26 +131,61 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
 	defer cleanup()

 	ctx := context.Background()
-	repo := NewBlobChunkRepository(db)
+	repos := NewRepositories(db)
+
+	// Create blobs
+	blob1 := &Blob{
+		ID:        "blob1-uuid",
+		Hash:      "blob1-hash",
+		CreatedTS: time.Now(),
+	}
+	blob2 := &Blob{
+		ID:        "blob2-uuid",
+		Hash:      "blob2-hash",
+		CreatedTS: time.Now(),
+	}
+
+	err := repos.Blobs.Create(ctx, nil, blob1)
+	if err != nil {
+		t.Fatalf("failed to create blob1: %v", err)
+	}
+	err = repos.Blobs.Create(ctx, nil, blob2)
+	if err != nil {
+		t.Fatalf("failed to create blob2: %v", err)
+	}
+
+	// Create chunks
+	chunkHashes := []string{"chunk1", "chunk2", "chunk3"}
+	for _, chunkHash := range chunkHashes {
+		chunk := &Chunk{
+			ChunkHash: chunkHash,
+			SHA256:    chunkHash + "-sha",
+			Size:      1024,
+		}
+		err = repos.Chunks.Create(ctx, nil, chunk)
+		if err != nil {
+			t.Fatalf("failed to create chunk %s: %v", chunkHash, err)
+		}
+	}

 	// Create chunks across multiple blobs
 	// Some chunks are shared between blobs (deduplication scenario)
 	blobChunks := []BlobChunk{
-		{BlobID: "blob1-uuid", ChunkHash: "chunk1", Offset: 0, Length: 1024},
-		{BlobID: "blob1-uuid", ChunkHash: "chunk2", Offset: 1024, Length: 1024},
-		{BlobID: "blob2-uuid", ChunkHash: "chunk2", Offset: 0, Length: 1024}, // chunk2 is shared
-		{BlobID: "blob2-uuid", ChunkHash: "chunk3", Offset: 1024, Length: 1024},
+		{BlobID: blob1.ID, ChunkHash: "chunk1", Offset: 0, Length: 1024},
+		{BlobID: blob1.ID, ChunkHash: "chunk2", Offset: 1024, Length: 1024},
+		{BlobID: blob2.ID, ChunkHash: "chunk2", Offset: 0, Length: 1024}, // chunk2 is shared
+		{BlobID: blob2.ID, ChunkHash: "chunk3", Offset: 1024, Length: 1024},
 	}

 	for _, bc := range blobChunks {
-		err := repo.Create(ctx, nil, &bc)
+		err := repos.BlobChunks.Create(ctx, nil, &bc)
 		if err != nil {
 			t.Fatalf("failed to create blob chunk: %v", err)
 		}
 	}

 	// Verify blob1 chunks
-	chunks, err := repo.GetByBlobID(ctx, "blob1-uuid")
+	chunks, err := repos.BlobChunks.GetByBlobID(ctx, blob1.ID)
 	if err != nil {
 		t.Fatalf("failed to get blob1 chunks: %v", err)
 	}
@@ -123,7 +194,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
 	}

 	// Verify blob2 chunks
-	chunks, err = repo.GetByBlobID(ctx, "blob2-uuid")
+	chunks, err = repos.BlobChunks.GetByBlobID(ctx, blob2.ID)
 	if err != nil {
 		t.Fatalf("failed to get blob2 chunks: %v", err)
 	}
@@ -132,7 +203,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
 	}

 	// Verify shared chunk
-	bc, err := repo.GetByChunkHash(ctx, "chunk2")
+	bc, err := repos.BlobChunks.GetByChunkHash(ctx, "chunk2")
 	if err != nil {
 		t.Fatalf("failed to get shared chunk: %v", err)
 	}
@@ -140,7 +211,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
 		t.Fatal("expected shared chunk, got nil")
 	}
 	// GetByChunkHash returns first match, should be blob1
-	if bc.BlobID != "blob1-uuid" {
-		t.Errorf("expected blob1-uuid for shared chunk, got %s", bc.BlobID)
+	if bc.BlobID != blob1.ID {
+		t.Errorf("expected %s for shared chunk, got %s", blob1.ID, bc.BlobID)
 	}
 }
--- a/internal/database/blobs.go
+++ b/internal/database/blobs.go
@@ -5,6 +5,8 @@ import (
 	"database/sql"
 	"fmt"
 	"time"
+
+	"git.eeqj.de/sneak/vaultik/internal/log"
 )

 type BlobRepository struct {
@@ -36,7 +38,7 @@ func (r *BlobRepository) Create(ctx context.Context, tx *sql.Tx, blob *Blob) err
 		_, err = tx.ExecContext(ctx, query, blob.ID, blob.Hash, blob.CreatedTS.Unix(),
 			finishedTS, blob.UncompressedSize, blob.CompressedSize, uploadedTS)
 	} else {
-		_, err = r.db.ExecWithLock(ctx, query, blob.ID, blob.Hash, blob.CreatedTS.Unix(),
+		_, err = r.db.ExecWithLog(ctx, query, blob.ID, blob.Hash, blob.CreatedTS.Unix(),
 			finishedTS, blob.UncompressedSize, blob.CompressedSize, uploadedTS)
 	}

@@ -75,13 +77,13 @@ func (r *BlobRepository) GetByHash(ctx context.Context, hash string) (*Blob, err
 		return nil, fmt.Errorf("querying blob: %w", err)
 	}

-	blob.CreatedTS = time.Unix(createdTSUnix, 0)
+	blob.CreatedTS = time.Unix(createdTSUnix, 0).UTC()
 	if finishedTSUnix.Valid {
-		ts := time.Unix(finishedTSUnix.Int64, 0)
+		ts := time.Unix(finishedTSUnix.Int64, 0).UTC()
 		blob.FinishedTS = &ts
 	}
 	if uploadedTSUnix.Valid {
-		ts := time.Unix(uploadedTSUnix.Int64, 0)
+		ts := time.Unix(uploadedTSUnix.Int64, 0).UTC()
 		blob.UploadedTS = &ts
 	}
 	return &blob, nil
@@ -116,13 +118,13 @@ func (r *BlobRepository) GetByID(ctx context.Context, id string) (*Blob, error)
 		return nil, fmt.Errorf("querying blob: %w", err)
 	}

-	blob.CreatedTS = time.Unix(createdTSUnix, 0)
+	blob.CreatedTS = time.Unix(createdTSUnix, 0).UTC()
 	if finishedTSUnix.Valid {
-		ts := time.Unix(finishedTSUnix.Int64, 0)
+		ts := time.Unix(finishedTSUnix.Int64, 0).UTC()
 		blob.FinishedTS = &ts
 	}
 	if uploadedTSUnix.Valid {
-		ts := time.Unix(uploadedTSUnix.Int64, 0)
+		ts := time.Unix(uploadedTSUnix.Int64, 0).UTC()
 		blob.UploadedTS = &ts
 	}
 	return &blob, nil
@@ -136,12 +138,12 @@ func (r *BlobRepository) UpdateFinished(ctx context.Context, tx *sql.Tx, id stri
 		WHERE id = ?
 	`

-	now := time.Now().Unix()
+	now := time.Now().UTC().Unix()
 	var err error
 	if tx != nil {
 		_, err = tx.ExecContext(ctx, query, hash, now, uncompressedSize, compressedSize, id)
 	} else {
-		_, err = r.db.ExecWithLock(ctx, query, hash, now, uncompressedSize, compressedSize, id)
+		_, err = r.db.ExecWithLog(ctx, query, hash, now, uncompressedSize, compressedSize, id)
 	}

 	if err != nil {
@@ -159,12 +161,12 @@ func (r *BlobRepository) UpdateUploaded(ctx context.Context, tx *sql.Tx, id stri
 		WHERE id = ?
 	`

-	now := time.Now().Unix()
+	now := time.Now().UTC().Unix()
 	var err error
 	if tx != nil {
 		_, err = tx.ExecContext(ctx, query, now, id)
 	} else {
-		_, err = r.db.ExecWithLock(ctx, query, now, id)
+		_, err = r.db.ExecWithLog(ctx, query, now, id)
 	}

 	if err != nil {
@@ -173,3 +175,26 @@ func (r *BlobRepository) UpdateUploaded(ctx context.Context, tx *sql.Tx, id stri

 	return nil
 }
+
+// DeleteOrphaned deletes blobs that are not referenced by any snapshot
+func (r *BlobRepository) DeleteOrphaned(ctx context.Context) error {
+	query := `
+		DELETE FROM blobs 
+		WHERE NOT EXISTS (
+			SELECT 1 FROM snapshot_blobs 
+			WHERE snapshot_blobs.blob_id = blobs.id
+		)
+	`
+
+	result, err := r.db.ExecWithLog(ctx, query)
+	if err != nil {
+		return fmt.Errorf("deleting orphaned blobs: %w", err)
+	}
+
+	rowsAffected, _ := result.RowsAffected()
+	if rowsAffected > 0 {
+		log.Debug("Deleted orphaned blobs", "count", rowsAffected)
+	}
+
+	return nil
+}
--- a/internal/database/cascade_debug_test.go
+++ b/internal/database/cascade_debug_test.go
@@ -0,0 +1,124 @@
+package database
+
+import (
+	"context"
+	"fmt"
+	"testing"
+	"time"
+)
+
+// TestCascadeDeleteDebug tests cascade delete with debug output
+func TestCascadeDeleteDebug(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repos := NewRepositories(db)
+
+	// Check if foreign keys are enabled
+	var fkEnabled int
+	err := db.conn.QueryRow("PRAGMA foreign_keys").Scan(&fkEnabled)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Logf("Foreign keys enabled: %d", fkEnabled)
+
+	// Create a file
+	file := &File{
+		Path:  "/cascade-test.txt",
+		MTime: time.Now().Truncate(time.Second),
+		CTime: time.Now().Truncate(time.Second),
+		Size:  1024,
+		Mode:  0644,
+		UID:   1000,
+		GID:   1000,
+	}
+	err = repos.Files.Create(ctx, nil, file)
+	if err != nil {
+		t.Fatalf("failed to create file: %v", err)
+	}
+	t.Logf("Created file with ID: %s", file.ID)
+
+	// Create chunks and file-chunk mappings
+	for i := 0; i < 3; i++ {
+		chunk := &Chunk{
+			ChunkHash: fmt.Sprintf("cascade-chunk-%d", i),
+			SHA256:    fmt.Sprintf("cascade-sha-%d", i),
+			Size:      1024,
+		}
+		err = repos.Chunks.Create(ctx, nil, chunk)
+		if err != nil {
+			t.Fatalf("failed to create chunk: %v", err)
+		}
+
+		fc := &FileChunk{
+			FileID:    file.ID,
+			Idx:       i,
+			ChunkHash: chunk.ChunkHash,
+		}
+		err = repos.FileChunks.Create(ctx, nil, fc)
+		if err != nil {
+			t.Fatalf("failed to create file chunk: %v", err)
+		}
+		t.Logf("Created file chunk mapping: file_id=%s, idx=%d, chunk=%s", fc.FileID, fc.Idx, fc.ChunkHash)
+	}
+
+	// Verify file chunks exist
+	fileChunks, err := repos.FileChunks.GetByFileID(ctx, file.ID)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Logf("File chunks before delete: %d", len(fileChunks))
+
+	// Check the foreign key constraint
+	var fkInfo string
+	err = db.conn.QueryRow(`
+		SELECT sql FROM sqlite_master 
+		WHERE type='table' AND name='file_chunks'
+	`).Scan(&fkInfo)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Logf("file_chunks table definition:\n%s", fkInfo)
+
+	// Delete the file
+	t.Log("Deleting file...")
+	err = repos.Files.DeleteByID(ctx, nil, file.ID)
+	if err != nil {
+		t.Fatalf("failed to delete file: %v", err)
+	}
+
+	// Verify file is gone
+	deletedFile, err := repos.Files.GetByID(ctx, file.ID)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if deletedFile != nil {
+		t.Error("file should have been deleted")
+	} else {
+		t.Log("File was successfully deleted")
+	}
+
+	// Check file chunks after delete
+	fileChunks, err = repos.FileChunks.GetByFileID(ctx, file.ID)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Logf("File chunks after delete: %d", len(fileChunks))
+
+	// Manually check the database
+	var count int
+	err = db.conn.QueryRow("SELECT COUNT(*) FROM file_chunks WHERE file_id = ?", file.ID).Scan(&count)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Logf("Manual count of file_chunks for deleted file: %d", count)
+
+	if len(fileChunks) != 0 {
+		t.Errorf("expected 0 file chunks after cascade delete, got %d", len(fileChunks))
+		// List the remaining chunks
+		for _, fc := range fileChunks {
+			t.Logf("Remaining chunk: file_id=%s, idx=%d, chunk=%s", fc.FileID, fc.Idx, fc.ChunkHash)
+		}
+	}
+}
--- a/internal/database/chunk_files.go
+++ b/internal/database/chunk_files.go
@@ -16,16 +16,16 @@ func NewChunkFileRepository(db *DB) *ChunkFileRepository {

 func (r *ChunkFileRepository) Create(ctx context.Context, tx *sql.Tx, cf *ChunkFile) error {
 	query := `
-		INSERT INTO chunk_files (chunk_hash, file_path, file_offset, length)
+		INSERT INTO chunk_files (chunk_hash, file_id, file_offset, length)
 		VALUES (?, ?, ?, ?)
-		ON CONFLICT(chunk_hash, file_path) DO NOTHING
+		ON CONFLICT(chunk_hash, file_id) DO NOTHING
 	`

 	var err error
 	if tx != nil {
-		_, err = tx.ExecContext(ctx, query, cf.ChunkHash, cf.FilePath, cf.FileOffset, cf.Length)
+		_, err = tx.ExecContext(ctx, query, cf.ChunkHash, cf.FileID, cf.FileOffset, cf.Length)
 	} else {
-		_, err = r.db.ExecWithLock(ctx, query, cf.ChunkHash, cf.FilePath, cf.FileOffset, cf.Length)
+		_, err = r.db.ExecWithLog(ctx, query, cf.ChunkHash, cf.FileID, cf.FileOffset, cf.Length)
 	}

 	if err != nil {
@@ -37,7 +37,7 @@ func (r *ChunkFileRepository) Create(ctx context.Context, tx *sql.Tx, cf *ChunkF

 func (r *ChunkFileRepository) GetByChunkHash(ctx context.Context, chunkHash string) ([]*ChunkFile, error) {
 	query := `
-		SELECT chunk_hash, file_path, file_offset, length
+		SELECT chunk_hash, file_id, file_offset, length
 		FROM chunk_files
 		WHERE chunk_hash = ?
 	`
@@ -51,7 +51,7 @@ func (r *ChunkFileRepository) GetByChunkHash(ctx context.Context, chunkHash stri
 	var chunkFiles []*ChunkFile
 	for rows.Next() {
 		var cf ChunkFile
-		err := rows.Scan(&cf.ChunkHash, &cf.FilePath, &cf.FileOffset, &cf.Length)
+		err := rows.Scan(&cf.ChunkHash, &cf.FileID, &cf.FileOffset, &cf.Length)
 		if err != nil {
 			return nil, fmt.Errorf("scanning chunk file: %w", err)
 		}
@@ -63,9 +63,10 @@ func (r *ChunkFileRepository) GetByChunkHash(ctx context.Context, chunkHash stri

 func (r *ChunkFileRepository) GetByFilePath(ctx context.Context, filePath string) ([]*ChunkFile, error) {
 	query := `
-		SELECT chunk_hash, file_path, file_offset, length
-		FROM chunk_files
-		WHERE file_path = ?
+		SELECT cf.chunk_hash, cf.file_id, cf.file_offset, cf.length
+		FROM chunk_files cf
+		JOIN files f ON cf.file_id = f.id
+		WHERE f.path = ?
 	`

 	rows, err := r.db.conn.QueryContext(ctx, query, filePath)
@@ -77,7 +78,34 @@ func (r *ChunkFileRepository) GetByFilePath(ctx context.Context, filePath string
 	var chunkFiles []*ChunkFile
 	for rows.Next() {
 		var cf ChunkFile
-		err := rows.Scan(&cf.ChunkHash, &cf.FilePath, &cf.FileOffset, &cf.Length)
+		err := rows.Scan(&cf.ChunkHash, &cf.FileID, &cf.FileOffset, &cf.Length)
+		if err != nil {
+			return nil, fmt.Errorf("scanning chunk file: %w", err)
+		}
+		chunkFiles = append(chunkFiles, &cf)
+	}
+
+	return chunkFiles, rows.Err()
+}
+
+// GetByFileID retrieves chunk files by file ID
+func (r *ChunkFileRepository) GetByFileID(ctx context.Context, fileID string) ([]*ChunkFile, error) {
+	query := `
+		SELECT chunk_hash, file_id, file_offset, length
+		FROM chunk_files
+		WHERE file_id = ?
+	`
+
+	rows, err := r.db.conn.QueryContext(ctx, query, fileID)
+	if err != nil {
+		return nil, fmt.Errorf("querying chunk files: %w", err)
+	}
+	defer CloseRows(rows)
+
+	var chunkFiles []*ChunkFile
+	for rows.Next() {
+		var cf ChunkFile
+		err := rows.Scan(&cf.ChunkHash, &cf.FileID, &cf.FileOffset, &cf.Length)
 		if err != nil {
 			return nil, fmt.Errorf("scanning chunk file: %w", err)
 		}
--- a/internal/database/chunk_files_test.go
+++ b/internal/database/chunk_files_test.go
@@ -3,6 +3,7 @@ package database
 import (
 	"context"
 	"testing"
+	"time"
 )

 func TestChunkFileRepository(t *testing.T) {
@@ -11,16 +12,49 @@ func TestChunkFileRepository(t *testing.T) {

 	ctx := context.Background()
 	repo := NewChunkFileRepository(db)
+	fileRepo := NewFileRepository(db)
+
+	// Create test files first
+	testTime := time.Now().Truncate(time.Second)
+	file1 := &File{
+		Path:       "/file1.txt",
+		MTime:      testTime,
+		CTime:      testTime,
+		Size:       1024,
+		Mode:       0644,
+		UID:        1000,
+		GID:        1000,
+		LinkTarget: "",
+	}
+	err := fileRepo.Create(ctx, nil, file1)
+	if err != nil {
+		t.Fatalf("failed to create file1: %v", err)
+	}
+
+	file2 := &File{
+		Path:       "/file2.txt",
+		MTime:      testTime,
+		CTime:      testTime,
+		Size:       1024,
+		Mode:       0644,
+		UID:        1000,
+		GID:        1000,
+		LinkTarget: "",
+	}
+	err = fileRepo.Create(ctx, nil, file2)
+	if err != nil {
+		t.Fatalf("failed to create file2: %v", err)
+	}

 	// Test Create
 	cf1 := &ChunkFile{
 		ChunkHash:  "chunk1",
-		FilePath:   "/file1.txt",
+		FileID:     file1.ID,
 		FileOffset: 0,
 		Length:     1024,
 	}

-	err := repo.Create(ctx, nil, cf1)
+	err = repo.Create(ctx, nil, cf1)
 	if err != nil {
 		t.Fatalf("failed to create chunk file: %v", err)
 	}
@@ -28,7 +62,7 @@ func TestChunkFileRepository(t *testing.T) {
 	// Add same chunk in different file (deduplication scenario)
 	cf2 := &ChunkFile{
 		ChunkHash:  "chunk1",
-		FilePath:   "/file2.txt",
+		FileID:     file2.ID,
 		FileOffset: 2048,
 		Length:     1024,
 	}
@@ -50,10 +84,10 @@ func TestChunkFileRepository(t *testing.T) {
 	foundFile1 := false
 	foundFile2 := false
 	for _, cf := range chunkFiles {
-		if cf.FilePath == "/file1.txt" && cf.FileOffset == 0 {
+		if cf.FileID == file1.ID && cf.FileOffset == 0 {
 			foundFile1 = true
 		}
-		if cf.FilePath == "/file2.txt" && cf.FileOffset == 2048 {
+		if cf.FileID == file2.ID && cf.FileOffset == 2048 {
 			foundFile2 = true
 		}
 	}
@@ -61,10 +95,10 @@ func TestChunkFileRepository(t *testing.T) {
 		t.Error("not all expected files found")
 	}

-	// Test GetByFilePath
-	chunkFiles, err = repo.GetByFilePath(ctx, "/file1.txt")
+	// Test GetByFileID
+	chunkFiles, err = repo.GetByFileID(ctx, file1.ID)
 	if err != nil {
-		t.Fatalf("failed to get chunks by file path: %v", err)
+		t.Fatalf("failed to get chunks by file ID: %v", err)
 	}
 	if len(chunkFiles) != 1 {
 		t.Errorf("expected 1 chunk for file, got %d", len(chunkFiles))
@@ -86,6 +120,23 @@ func TestChunkFileRepositoryComplexDeduplication(t *testing.T) {

 	ctx := context.Background()
 	repo := NewChunkFileRepository(db)
+	fileRepo := NewFileRepository(db)
+
+	// Create test files
+	testTime := time.Now().Truncate(time.Second)
+	file1 := &File{Path: "/file1.txt", MTime: testTime, CTime: testTime, Size: 3072, Mode: 0644, UID: 1000, GID: 1000}
+	file2 := &File{Path: "/file2.txt", MTime: testTime, CTime: testTime, Size: 3072, Mode: 0644, UID: 1000, GID: 1000}
+	file3 := &File{Path: "/file3.txt", MTime: testTime, CTime: testTime, Size: 2048, Mode: 0644, UID: 1000, GID: 1000}
+
+	if err := fileRepo.Create(ctx, nil, file1); err != nil {
+		t.Fatalf("failed to create file1: %v", err)
+	}
+	if err := fileRepo.Create(ctx, nil, file2); err != nil {
+		t.Fatalf("failed to create file2: %v", err)
+	}
+	if err := fileRepo.Create(ctx, nil, file3); err != nil {
+		t.Fatalf("failed to create file3: %v", err)
+	}

 	// Simulate a scenario where multiple files share chunks
 	// File1: chunk1, chunk2, chunk3
@@ -94,16 +145,16 @@ func TestChunkFileRepositoryComplexDeduplication(t *testing.T) {

 	chunkFiles := []ChunkFile{
 		// File1
-		{ChunkHash: "chunk1", FilePath: "/file1.txt", FileOffset: 0, Length: 1024},
-		{ChunkHash: "chunk2", FilePath: "/file1.txt", FileOffset: 1024, Length: 1024},
-		{ChunkHash: "chunk3", FilePath: "/file1.txt", FileOffset: 2048, Length: 1024},
+		{ChunkHash: "chunk1", FileID: file1.ID, FileOffset: 0, Length: 1024},
+		{ChunkHash: "chunk2", FileID: file1.ID, FileOffset: 1024, Length: 1024},
+		{ChunkHash: "chunk3", FileID: file1.ID, FileOffset: 2048, Length: 1024},
 		// File2
-		{ChunkHash: "chunk2", FilePath: "/file2.txt", FileOffset: 0, Length: 1024},
-		{ChunkHash: "chunk3", FilePath: "/file2.txt", FileOffset: 1024, Length: 1024},
-		{ChunkHash: "chunk4", FilePath: "/file2.txt", FileOffset: 2048, Length: 1024},
+		{ChunkHash: "chunk2", FileID: file2.ID, FileOffset: 0, Length: 1024},
+		{ChunkHash: "chunk3", FileID: file2.ID, FileOffset: 1024, Length: 1024},
+		{ChunkHash: "chunk4", FileID: file2.ID, FileOffset: 2048, Length: 1024},
 		// File3
-		{ChunkHash: "chunk1", FilePath: "/file3.txt", FileOffset: 0, Length: 1024},
-		{ChunkHash: "chunk4", FilePath: "/file3.txt", FileOffset: 1024, Length: 1024},
+		{ChunkHash: "chunk1", FileID: file3.ID, FileOffset: 0, Length: 1024},
+		{ChunkHash: "chunk4", FileID: file3.ID, FileOffset: 1024, Length: 1024},
 	}

 	for _, cf := range chunkFiles {
@@ -132,7 +183,7 @@ func TestChunkFileRepositoryComplexDeduplication(t *testing.T) {
 	}

 	// Test file2 chunks
-	chunks, err := repo.GetByFilePath(ctx, "/file2.txt")
+	chunks, err := repo.GetByFileID(ctx, file2.ID)
 	if err != nil {
 		t.Fatalf("failed to get chunks for file2: %v", err)
 	}
--- a/internal/database/chunks.go
+++ b/internal/database/chunks.go
@@ -4,6 +4,8 @@ import (
 	"context"
 	"database/sql"
 	"fmt"
+
+	"git.eeqj.de/sneak/vaultik/internal/log"
 )

 type ChunkRepository struct {
@@ -25,7 +27,7 @@ func (r *ChunkRepository) Create(ctx context.Context, tx *sql.Tx, chunk *Chunk)
 	if tx != nil {
 		_, err = tx.ExecContext(ctx, query, chunk.ChunkHash, chunk.SHA256, chunk.Size)
 	} else {
-		_, err = r.db.ExecWithLock(ctx, query, chunk.ChunkHash, chunk.SHA256, chunk.Size)
+		_, err = r.db.ExecWithLog(ctx, query, chunk.ChunkHash, chunk.SHA256, chunk.Size)
 	}

 	if err != nil {
@@ -139,3 +141,26 @@ func (r *ChunkRepository) ListUnpacked(ctx context.Context, limit int) ([]*Chunk

 	return chunks, rows.Err()
 }
+
+// DeleteOrphaned deletes chunks that are not referenced by any file
+func (r *ChunkRepository) DeleteOrphaned(ctx context.Context) error {
+	query := `
+		DELETE FROM chunks 
+		WHERE NOT EXISTS (
+			SELECT 1 FROM file_chunks 
+			WHERE file_chunks.chunk_hash = chunks.chunk_hash
+		)
+	`
+
+	result, err := r.db.ExecWithLog(ctx, query)
+	if err != nil {
+		return fmt.Errorf("deleting orphaned chunks: %w", err)
+	}
+
+	rowsAffected, _ := result.RowsAffected()
+	if rowsAffected > 0 {
+		log.Debug("Deleted orphaned chunks", "count", rowsAffected)
+	}
+
+	return nil
+}
--- a/internal/database/database.go
+++ b/internal/database/database.go
@@ -1,84 +1,158 @@
+// Package database provides the local SQLite index for Vaultik backup operations.
+// The database tracks files, chunks, and their associations with blobs.
+//
+// Blobs in Vaultik are the final storage units uploaded to S3. Each blob is a
+// large (up to 10GB) file containing many compressed and encrypted chunks from
+// multiple source files. Blobs are content-addressed, meaning their filename
+// is derived from their SHA256 hash after compression and encryption.
+//
+// The database does not support migrations. If the schema changes, delete
+// the local database and perform a full backup to recreate it.
 package database

 import (
 	"context"
 	"database/sql"
+	_ "embed"
 	"fmt"
 	"os"
 	"strings"
-	"sync"

 	"git.eeqj.de/sneak/vaultik/internal/log"
 	_ "modernc.org/sqlite"
 )

+//go:embed schema.sql
+var schemaSQL string
+
+// DB represents the Vaultik local index database connection.
+// It uses SQLite to track file metadata, content-defined chunks, and blob associations.
+// The database enables incremental backups by detecting changed files and
+// supports deduplication by tracking which chunks are already stored in blobs.
+// Write operations are synchronized through a mutex to ensure thread safety.
 type DB struct {
-	conn      *sql.DB
-	writeLock sync.Mutex
+	conn *sql.DB
+	path string
 }

+// New creates a new database connection at the specified path.
+// It automatically handles database recovery, creates the schema if needed,
+// and configures SQLite with appropriate settings for performance and reliability.
+// The database uses WAL mode for better concurrency and sets a busy timeout
+// to handle concurrent access gracefully.
+//
+// If the database appears locked, it will attempt recovery by removing stale
+// lock files and switching temporarily to TRUNCATE journal mode.
+//
+// New creates a new database connection at the specified path.
+// It automatically handles recovery from stale locks, creates the schema if needed,
+// and configures SQLite with WAL mode for better concurrency.
+// The path parameter can be a file path for persistent storage or ":memory:"
+// for an in-memory database (useful for testing).
 func New(ctx context.Context, path string) (*DB, error) {
+	log.Debug("Opening database connection", "path", path)
+
 	// First, try to recover from any stale locks
 	if err := recoverDatabase(ctx, path); err != nil {
 		log.Warn("Failed to recover database", "error", err)
 	}

 	// First attempt with standard WAL mode
-	conn, err := sql.Open("sqlite", path+"?_journal_mode=WAL&_synchronous=NORMAL&_busy_timeout=10000&_locking_mode=NORMAL")
+	log.Debug("Attempting to open database with WAL mode", "path", path)
+	conn, err := sql.Open(
+		"sqlite",
+		path+"?_journal_mode=WAL&_synchronous=NORMAL&_busy_timeout=10000&_locking_mode=NORMAL&_foreign_keys=ON",
+	)
 	if err == nil {
-		// Set connection pool settings to ensure proper cleanup
-		conn.SetMaxOpenConns(1) // SQLite only supports one writer
+		// Set connection pool settings
+		// SQLite can handle multiple readers but only one writer at a time.
+		// Setting MaxOpenConns to 1 ensures all writes are serialized through
+		// a single connection, preventing SQLITE_BUSY errors.
+		conn.SetMaxOpenConns(1)
 		conn.SetMaxIdleConns(1)

 		if err := conn.PingContext(ctx); err == nil {
 			// Success on first try
-			db := &DB{conn: conn}
+			log.Debug("Database opened successfully with WAL mode", "path", path)
+
+			// Enable foreign keys explicitly
+			if _, err := conn.ExecContext(ctx, "PRAGMA foreign_keys = ON"); err != nil {
+				log.Warn("Failed to enable foreign keys", "error", err)
+			}
+
+			db := &DB{conn: conn, path: path}
 			if err := db.createSchema(ctx); err != nil {
 				_ = conn.Close()
 				return nil, fmt.Errorf("creating schema: %w", err)
 			}
 			return db, nil
 		}
+		log.Debug("Failed to ping database, closing connection", "path", path, "error", err)
 		_ = conn.Close()
 	}

 	// If first attempt failed, try with TRUNCATE mode to clear any locks
-	log.Info("Database appears locked, attempting recovery with TRUNCATE mode")
-	conn, err = sql.Open("sqlite", path+"?_journal_mode=TRUNCATE&_synchronous=NORMAL&_busy_timeout=10000")
+	log.Info(
+		"Database appears locked, attempting recovery with TRUNCATE mode",
+		"path", path,
+	)
+	conn, err = sql.Open(
+		"sqlite",
+		path+"?_journal_mode=TRUNCATE&_synchronous=NORMAL&_busy_timeout=10000&_foreign_keys=ON",
+	)
 	if err != nil {
 		return nil, fmt.Errorf("opening database in recovery mode: %w", err)
 	}

 	// Set connection pool settings
+	// SQLite can handle multiple readers but only one writer at a time.
+	// Setting MaxOpenConns to 1 ensures all writes are serialized through
+	// a single connection, preventing SQLITE_BUSY errors.
 	conn.SetMaxOpenConns(1)
 	conn.SetMaxIdleConns(1)

 	if err := conn.PingContext(ctx); err != nil {
+		log.Debug("Failed to ping database in recovery mode, closing", "path", path, "error", err)
 		_ = conn.Close()
-		return nil, fmt.Errorf("database still locked after recovery attempt: %w", err)
+		return nil, fmt.Errorf(
+			"database still locked after recovery attempt: %w",
+			err,
+		)
 	}

+	log.Debug("Database opened in TRUNCATE mode", "path", path)
+
 	// Switch back to WAL mode
+	log.Debug("Switching database back to WAL mode", "path", path)
 	if _, err := conn.ExecContext(ctx, "PRAGMA journal_mode=WAL"); err != nil {
-		log.Warn("Failed to switch back to WAL mode", "error", err)
+		log.Warn("Failed to switch back to WAL mode", "path", path, "error", err)
 	}

-	db := &DB{conn: conn}
+	// Ensure foreign keys are enabled
+	if _, err := conn.ExecContext(ctx, "PRAGMA foreign_keys=ON"); err != nil {
+		log.Warn("Failed to enable foreign keys", "path", path, "error", err)
+	}
+
+	db := &DB{conn: conn, path: path}
 	if err := db.createSchema(ctx); err != nil {
 		_ = conn.Close()
 		return nil, fmt.Errorf("creating schema: %w", err)
 	}

+	log.Debug("Database connection established successfully", "path", path)
 	return db, nil
 }

+// Close closes the database connection.
+// It ensures all pending operations are completed before closing.
+// Returns an error if the database connection cannot be closed properly.
 func (db *DB) Close() error {
-	log.Debug("Closing database connection")
+	log.Debug("Closing database connection", "path", db.path)
 	if err := db.conn.Close(); err != nil {
-		log.Error("Failed to close database", "error", err)
+		log.Error("Failed to close database", "path", db.path, "error", err)
 		return fmt.Errorf("failed to close database: %w", err)
 	}
-	log.Debug("Database connection closed successfully")
+	log.Debug("Database connection closed successfully", "path", db.path)
 	return nil
 }

@@ -138,148 +212,79 @@ func recoverDatabase(ctx context.Context, path string) error {
 	return nil
 }

+// Conn returns the underlying *sql.DB connection.
+// This should be used sparingly and primarily for read operations.
+// For write operations, prefer using the ExecWithLog method.
 func (db *DB) Conn() *sql.DB {
 	return db.conn
 }

-func (db *DB) BeginTx(ctx context.Context, opts *sql.TxOptions) (*sql.Tx, error) {
+// BeginTx starts a new database transaction with the given options.
+// The caller is responsible for committing or rolling back the transaction.
+// For write transactions, consider using the Repositories.WithTx method instead,
+// which handles locking and rollback automatically.
+func (db *DB) BeginTx(
+	ctx context.Context,
+	opts *sql.TxOptions,
+) (*sql.Tx, error) {
 	return db.conn.BeginTx(ctx, opts)
 }

-// LockForWrite acquires the write lock
-func (db *DB) LockForWrite() {
-	log.Debug("Attempting to acquire write lock")
-	db.writeLock.Lock()
-	log.Debug("Write lock acquired")
-}
-
-// UnlockWrite releases the write lock
-func (db *DB) UnlockWrite() {
-	log.Debug("Releasing write lock")
-	db.writeLock.Unlock()
-	log.Debug("Write lock released")
-}
-
-// ExecWithLock executes a write query with the write lock held
-func (db *DB) ExecWithLock(ctx context.Context, query string, args ...interface{}) (sql.Result, error) {
-	db.writeLock.Lock()
-	defer db.writeLock.Unlock()
+// Note: LockForWrite and UnlockWrite methods have been removed.
+// SQLite handles its own locking internally, so explicit locking is not needed.

+// ExecWithLog executes a write query with SQL logging.
+// SQLite handles its own locking internally, so we just pass through to ExecContext.
+// The query and args parameters follow the same format as sql.DB.ExecContext.
+func (db *DB) ExecWithLog(
+	ctx context.Context,
+	query string,
+	args ...interface{},
+) (sql.Result, error) {
 	LogSQL("Execute", query, args...)
 	return db.conn.ExecContext(ctx, query, args...)
 }

-// QueryRowWithLock executes a write query that returns a row with the write lock held
-func (db *DB) QueryRowWithLock(ctx context.Context, query string, args ...interface{}) *sql.Row {
-	db.writeLock.Lock()
-	defer db.writeLock.Unlock()
+// QueryRowWithLog executes a query that returns at most one row with SQL logging.
+// This is useful for queries that modify data and return values (e.g., INSERT ... RETURNING).
+// SQLite handles its own locking internally.
+// The query and args parameters follow the same format as sql.DB.QueryRowContext.
+func (db *DB) QueryRowWithLog(
+	ctx context.Context,
+	query string,
+	args ...interface{},
+) *sql.Row {
+	LogSQL("QueryRow", query, args...)
 	return db.conn.QueryRowContext(ctx, query, args...)
 }

 func (db *DB) createSchema(ctx context.Context) error {
-	schema := `
-	CREATE TABLE IF NOT EXISTS files (
-		path TEXT PRIMARY KEY,
-		mtime INTEGER NOT NULL,
-		ctime INTEGER NOT NULL,
-		size INTEGER NOT NULL,
-		mode INTEGER NOT NULL,
-		uid INTEGER NOT NULL,
-		gid INTEGER NOT NULL,
-		link_target TEXT
-	);
-
-	CREATE TABLE IF NOT EXISTS file_chunks (
-		path TEXT NOT NULL,
-		idx INTEGER NOT NULL,
-		chunk_hash TEXT NOT NULL,
-		PRIMARY KEY (path, idx)
-	);
-
-	CREATE TABLE IF NOT EXISTS chunks (
-		chunk_hash TEXT PRIMARY KEY,
-		sha256 TEXT NOT NULL,
-		size INTEGER NOT NULL
-	);
-
-	CREATE TABLE IF NOT EXISTS blobs (
-		id TEXT PRIMARY KEY,
-		blob_hash TEXT UNIQUE,
-		created_ts INTEGER NOT NULL,
-		finished_ts INTEGER,
-		uncompressed_size INTEGER NOT NULL DEFAULT 0,
-		compressed_size INTEGER NOT NULL DEFAULT 0,
-		uploaded_ts INTEGER
-	);
-
-	CREATE TABLE IF NOT EXISTS blob_chunks (
-		blob_id TEXT NOT NULL,
-		chunk_hash TEXT NOT NULL,
-		offset INTEGER NOT NULL,
-		length INTEGER NOT NULL,
-		PRIMARY KEY (blob_id, chunk_hash),
-		FOREIGN KEY (blob_id) REFERENCES blobs(id)
-	);
-
-	CREATE TABLE IF NOT EXISTS chunk_files (
-		chunk_hash TEXT NOT NULL,
-		file_path TEXT NOT NULL,
-		file_offset INTEGER NOT NULL,
-		length INTEGER NOT NULL,
-		PRIMARY KEY (chunk_hash, file_path)
-	);
-
-	CREATE TABLE IF NOT EXISTS snapshots (
-		id TEXT PRIMARY KEY,
-		hostname TEXT NOT NULL,
-		vaultik_version TEXT NOT NULL,
-		started_at INTEGER NOT NULL,
-		completed_at INTEGER,
-		file_count INTEGER NOT NULL DEFAULT 0,
-		chunk_count INTEGER NOT NULL DEFAULT 0,
-		blob_count INTEGER NOT NULL DEFAULT 0,
-		total_size INTEGER NOT NULL DEFAULT 0,
-		blob_size INTEGER NOT NULL DEFAULT 0,
-		compression_ratio REAL NOT NULL DEFAULT 1.0
-	);
-
-	CREATE TABLE IF NOT EXISTS snapshot_files (
-		snapshot_id TEXT NOT NULL,
-		file_path TEXT NOT NULL,
-		PRIMARY KEY (snapshot_id, file_path),
-		FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
-		FOREIGN KEY (file_path) REFERENCES files(path) ON DELETE CASCADE
-	);
-
-	CREATE TABLE IF NOT EXISTS snapshot_blobs (
-		snapshot_id TEXT NOT NULL,
-		blob_id TEXT NOT NULL,
-		blob_hash TEXT NOT NULL,
-		PRIMARY KEY (snapshot_id, blob_id),
-		FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
-		FOREIGN KEY (blob_id) REFERENCES blobs(id) ON DELETE CASCADE
-	);
-
-	CREATE TABLE IF NOT EXISTS uploads (
-		blob_hash TEXT PRIMARY KEY,
-		uploaded_at INTEGER NOT NULL,
-		size INTEGER NOT NULL,
-		duration_ms INTEGER NOT NULL
-	);
-	`
-
-	_, err := db.conn.ExecContext(ctx, schema)
+	_, err := db.conn.ExecContext(ctx, schemaSQL)
 	return err
 }

-// NewTestDB creates an in-memory SQLite database for testing
+// NewTestDB creates an in-memory SQLite database for testing purposes.
+// The database is automatically initialized with the schema and is ready for use.
+// Each call creates a new independent database instance.
 func NewTestDB() (*DB, error) {
 	return New(context.Background(), ":memory:")
 }

-// LogSQL logs SQL queries if debug mode is enabled
+// LogSQL logs SQL queries and their arguments when debug mode is enabled.
+// Debug mode is activated by setting the GODEBUG environment variable to include "vaultik".
+// This is useful for troubleshooting database operations and understanding query patterns.
+//
+// The operation parameter describes the type of SQL operation (e.g., "Execute", "Query").
+// The query parameter is the SQL statement being executed.
+// The args parameter contains the query arguments that will be interpolated.
 func LogSQL(operation, query string, args ...interface{}) {
 	if strings.Contains(os.Getenv("GODEBUG"), "vaultik") {
-		log.Debug("SQL "+operation, "query", strings.TrimSpace(query), "args", fmt.Sprintf("%v", args))
+		log.Debug(
+			"SQL "+operation,
+			"query",
+			strings.TrimSpace(query),
+			"args",
+			fmt.Sprintf("%v", args),
+		)
 	}
 }
--- a/internal/database/database_test.go
+++ b/internal/database/database_test.go
@@ -67,21 +67,26 @@ func TestDatabaseConcurrentAccess(t *testing.T) {
 	}()

 	// Test concurrent writes
-	done := make(chan bool, 10)
+	type result struct {
+		index int
+		err   error
+	}
+	results := make(chan result, 10)
+
 	for i := 0; i < 10; i++ {
 		go func(i int) {
-			_, err := db.ExecWithLock(ctx, "INSERT INTO chunks (chunk_hash, sha256, size) VALUES (?, ?, ?)",
+			_, err := db.ExecWithLog(ctx, "INSERT INTO chunks (chunk_hash, sha256, size) VALUES (?, ?, ?)",
 				fmt.Sprintf("hash%d", i), fmt.Sprintf("sha%d", i), i*1024)
-			if err != nil {
-				t.Errorf("concurrent insert failed: %v", err)
-			}
-			done <- true
+			results <- result{index: i, err: err}
 		}(i)
 	}

-	// Wait for all goroutines
+	// Wait for all goroutines and check results
 	for i := 0; i < 10; i++ {
-		<-done
+		r := <-results
+		if r.err != nil {
+			t.Fatalf("concurrent insert %d failed: %v", r.index, r.err)
+		}
 	}

 	// Verify all inserts succeeded
--- a/internal/database/file_chunks.go
+++ b/internal/database/file_chunks.go
@@ -16,16 +16,16 @@ func NewFileChunkRepository(db *DB) *FileChunkRepository {

 func (r *FileChunkRepository) Create(ctx context.Context, tx *sql.Tx, fc *FileChunk) error {
 	query := `
-		INSERT INTO file_chunks (path, idx, chunk_hash)
+		INSERT INTO file_chunks (file_id, idx, chunk_hash)
 		VALUES (?, ?, ?)
-		ON CONFLICT(path, idx) DO NOTHING
+		ON CONFLICT(file_id, idx) DO NOTHING
 	`

 	var err error
 	if tx != nil {
-		_, err = tx.ExecContext(ctx, query, fc.Path, fc.Idx, fc.ChunkHash)
+		_, err = tx.ExecContext(ctx, query, fc.FileID, fc.Idx, fc.ChunkHash)
 	} else {
-		_, err = r.db.ExecWithLock(ctx, query, fc.Path, fc.Idx, fc.ChunkHash)
+		_, err = r.db.ExecWithLog(ctx, query, fc.FileID, fc.Idx, fc.ChunkHash)
 	}

 	if err != nil {
@@ -37,10 +37,11 @@ func (r *FileChunkRepository) Create(ctx context.Context, tx *sql.Tx, fc *FileCh

 func (r *FileChunkRepository) GetByPath(ctx context.Context, path string) ([]*FileChunk, error) {
 	query := `
-		SELECT path, idx, chunk_hash
-		FROM file_chunks
-		WHERE path = ?
-		ORDER BY idx
+		SELECT fc.file_id, fc.idx, fc.chunk_hash
+		FROM file_chunks fc
+		JOIN files f ON fc.file_id = f.id
+		WHERE f.path = ?
+		ORDER BY fc.idx
 	`

 	rows, err := r.db.conn.QueryContext(ctx, query, path)
@@ -52,7 +53,35 @@ func (r *FileChunkRepository) GetByPath(ctx context.Context, path string) ([]*Fi
 	var fileChunks []*FileChunk
 	for rows.Next() {
 		var fc FileChunk
-		err := rows.Scan(&fc.Path, &fc.Idx, &fc.ChunkHash)
+		err := rows.Scan(&fc.FileID, &fc.Idx, &fc.ChunkHash)
+		if err != nil {
+			return nil, fmt.Errorf("scanning file chunk: %w", err)
+		}
+		fileChunks = append(fileChunks, &fc)
+	}
+
+	return fileChunks, rows.Err()
+}
+
+// GetByFileID retrieves file chunks by file ID
+func (r *FileChunkRepository) GetByFileID(ctx context.Context, fileID string) ([]*FileChunk, error) {
+	query := `
+		SELECT file_id, idx, chunk_hash
+		FROM file_chunks
+		WHERE file_id = ?
+		ORDER BY idx
+	`
+
+	rows, err := r.db.conn.QueryContext(ctx, query, fileID)
+	if err != nil {
+		return nil, fmt.Errorf("querying file chunks: %w", err)
+	}
+	defer CloseRows(rows)
+
+	var fileChunks []*FileChunk
+	for rows.Next() {
+		var fc FileChunk
+		err := rows.Scan(&fc.FileID, &fc.Idx, &fc.ChunkHash)
 		if err != nil {
 			return nil, fmt.Errorf("scanning file chunk: %w", err)
 		}
@@ -65,10 +94,11 @@ func (r *FileChunkRepository) GetByPath(ctx context.Context, path string) ([]*Fi
 // GetByPathTx retrieves file chunks within a transaction
 func (r *FileChunkRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path string) ([]*FileChunk, error) {
 	query := `
-		SELECT path, idx, chunk_hash
-		FROM file_chunks
-		WHERE path = ?
-		ORDER BY idx
+		SELECT fc.file_id, fc.idx, fc.chunk_hash
+		FROM file_chunks fc
+		JOIN files f ON fc.file_id = f.id
+		WHERE f.path = ?
+		ORDER BY fc.idx
 	`

 	LogSQL("GetByPathTx", query, path)
@@ -81,7 +111,7 @@ func (r *FileChunkRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path
 	var fileChunks []*FileChunk
 	for rows.Next() {
 		var fc FileChunk
-		err := rows.Scan(&fc.Path, &fc.Idx, &fc.ChunkHash)
+		err := rows.Scan(&fc.FileID, &fc.Idx, &fc.ChunkHash)
 		if err != nil {
 			return nil, fmt.Errorf("scanning file chunk: %w", err)
 		}
@@ -93,13 +123,31 @@ func (r *FileChunkRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path
 }

 func (r *FileChunkRepository) DeleteByPath(ctx context.Context, tx *sql.Tx, path string) error {
-	query := `DELETE FROM file_chunks WHERE path = ?`
+	query := `DELETE FROM file_chunks WHERE file_id = (SELECT id FROM files WHERE path = ?)`

 	var err error
 	if tx != nil {
 		_, err = tx.ExecContext(ctx, query, path)
 	} else {
-		_, err = r.db.ExecWithLock(ctx, query, path)
+		_, err = r.db.ExecWithLog(ctx, query, path)
+	}
+
+	if err != nil {
+		return fmt.Errorf("deleting file chunks: %w", err)
+	}
+
+	return nil
+}
+
+// DeleteByFileID deletes all chunks for a file by its UUID
+func (r *FileChunkRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fileID string) error {
+	query := `DELETE FROM file_chunks WHERE file_id = ?`
+
+	var err error
+	if tx != nil {
+		_, err = tx.ExecContext(ctx, query, fileID)
+	} else {
+		_, err = r.db.ExecWithLog(ctx, query, fileID)
 	}

 	if err != nil {
--- a/internal/database/file_chunks_test.go
+++ b/internal/database/file_chunks_test.go
@@ -4,6 +4,7 @@ import (
 	"context"
 	"fmt"
 	"testing"
+	"time"
 )

 func TestFileChunkRepository(t *testing.T) {
@@ -12,22 +13,40 @@ func TestFileChunkRepository(t *testing.T) {

 	ctx := context.Background()
 	repo := NewFileChunkRepository(db)
+	fileRepo := NewFileRepository(db)
+
+	// Create test file first
+	testTime := time.Now().Truncate(time.Second)
+	file := &File{
+		Path:       "/test/file.txt",
+		MTime:      testTime,
+		CTime:      testTime,
+		Size:       3072,
+		Mode:       0644,
+		UID:        1000,
+		GID:        1000,
+		LinkTarget: "",
+	}
+	err := fileRepo.Create(ctx, nil, file)
+	if err != nil {
+		t.Fatalf("failed to create file: %v", err)
+	}

 	// Test Create
 	fc1 := &FileChunk{
-		Path:      "/test/file.txt",
+		FileID:    file.ID,
 		Idx:       0,
 		ChunkHash: "chunk1",
 	}

-	err := repo.Create(ctx, nil, fc1)
+	err = repo.Create(ctx, nil, fc1)
 	if err != nil {
 		t.Fatalf("failed to create file chunk: %v", err)
 	}

 	// Add more chunks for the same file
 	fc2 := &FileChunk{
-		Path:      "/test/file.txt",
+		FileID:    file.ID,
 		Idx:       1,
 		ChunkHash: "chunk2",
 	}
@@ -37,7 +56,7 @@ func TestFileChunkRepository(t *testing.T) {
 	}

 	fc3 := &FileChunk{
-		Path:      "/test/file.txt",
+		FileID:    file.ID,
 		Idx:       2,
 		ChunkHash: "chunk3",
 	}
@@ -46,8 +65,8 @@ func TestFileChunkRepository(t *testing.T) {
 		t.Fatalf("failed to create third file chunk: %v", err)
 	}

-	// Test GetByPath
-	chunks, err := repo.GetByPath(ctx, "/test/file.txt")
+	// Test GetByFile
+	chunks, err := repo.GetByFile(ctx, "/test/file.txt")
 	if err != nil {
 		t.Fatalf("failed to get file chunks: %v", err)
 	}
@@ -68,13 +87,13 @@ func TestFileChunkRepository(t *testing.T) {
 		t.Fatalf("failed to create duplicate file chunk: %v", err)
 	}

-	// Test DeleteByPath
-	err = repo.DeleteByPath(ctx, nil, "/test/file.txt")
+	// Test DeleteByFileID
+	err = repo.DeleteByFileID(ctx, nil, file.ID)
 	if err != nil {
 		t.Fatalf("failed to delete file chunks: %v", err)
 	}

-	chunks, err = repo.GetByPath(ctx, "/test/file.txt")
+	chunks, err = repo.GetByFileID(ctx, file.ID)
 	if err != nil {
 		t.Fatalf("failed to get deleted file chunks: %v", err)
 	}
@@ -89,15 +108,38 @@ func TestFileChunkRepositoryMultipleFiles(t *testing.T) {

 	ctx := context.Background()
 	repo := NewFileChunkRepository(db)
+	fileRepo := NewFileRepository(db)
+
+	// Create test files
+	testTime := time.Now().Truncate(time.Second)
+	filePaths := []string{"/file1.txt", "/file2.txt", "/file3.txt"}
+	files := make([]*File, len(filePaths))
+
+	for i, path := range filePaths {
+		file := &File{
+			Path:       path,
+			MTime:      testTime,
+			CTime:      testTime,
+			Size:       2048,
+			Mode:       0644,
+			UID:        1000,
+			GID:        1000,
+			LinkTarget: "",
+		}
+		err := fileRepo.Create(ctx, nil, file)
+		if err != nil {
+			t.Fatalf("failed to create file %s: %v", path, err)
+		}
+		files[i] = file
+	}

 	// Create chunks for multiple files
-	files := []string{"/file1.txt", "/file2.txt", "/file3.txt"}
-	for _, path := range files {
-		for i := 0; i < 2; i++ {
+	for i, file := range files {
+		for j := 0; j < 2; j++ {
 			fc := &FileChunk{
-				Path:      path,
-				Idx:       i,
-				ChunkHash: fmt.Sprintf("%s_chunk%d", path, i),
+				FileID:    file.ID,
+				Idx:       j,
+				ChunkHash: fmt.Sprintf("file%d_chunk%d", i, j),
 			}
 			err := repo.Create(ctx, nil, fc)
 			if err != nil {
@@ -107,13 +149,13 @@ func TestFileChunkRepositoryMultipleFiles(t *testing.T) {
 	}

 	// Verify each file has correct chunks
-	for _, path := range files {
-		chunks, err := repo.GetByPath(ctx, path)
+	for i, file := range files {
+		chunks, err := repo.GetByFileID(ctx, file.ID)
 		if err != nil {
-			t.Fatalf("failed to get chunks for %s: %v", path, err)
+			t.Fatalf("failed to get chunks for file %d: %v", i, err)
 		}
 		if len(chunks) != 2 {
-			t.Errorf("expected 2 chunks for %s, got %d", path, len(chunks))
+			t.Errorf("expected 2 chunks for file %d, got %d", i, len(chunks))
 		}
 	}
 }
--- a/internal/database/files.go
+++ b/internal/database/files.go
@@ -5,6 +5,9 @@ import (
 	"database/sql"
 	"fmt"
 	"time"
+
+	"git.eeqj.de/sneak/vaultik/internal/log"
+	"github.com/google/uuid"
 )

 type FileRepository struct {
@@ -16,10 +19,16 @@ func NewFileRepository(db *DB) *FileRepository {
 }

 func (r *FileRepository) Create(ctx context.Context, tx *sql.Tx, file *File) error {
+	// Generate UUID if not provided
+	if file.ID == "" {
+		file.ID = uuid.New().String()
+	}
+
 	query := `
-		INSERT INTO files (path, mtime, ctime, size, mode, uid, gid, link_target)
-		VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+		INSERT INTO files (id, path, mtime, ctime, size, mode, uid, gid, link_target)
+		VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
 		ON CONFLICT(path) DO UPDATE SET
+			id = excluded.id,
 			mtime = excluded.mtime,
 			ctime = excluded.ctime,
 			size = excluded.size,
@@ -27,14 +36,15 @@ func (r *FileRepository) Create(ctx context.Context, tx *sql.Tx, file *File) err
 			uid = excluded.uid,
 			gid = excluded.gid,
 			link_target = excluded.link_target
+		RETURNING id
 	`

 	var err error
 	if tx != nil {
-		LogSQL("Execute", query, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget)
-		_, err = tx.ExecContext(ctx, query, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget)
+		LogSQL("Execute", query, file.ID, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget)
+		err = tx.QueryRowContext(ctx, query, file.ID, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget).Scan(&file.ID)
 	} else {
-		_, err = r.db.ExecWithLock(ctx, query, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget)
+		err = r.db.QueryRowWithLog(ctx, query, file.ID, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget).Scan(&file.ID)
 	}

 	if err != nil {
@@ -46,7 +56,7 @@ func (r *FileRepository) Create(ctx context.Context, tx *sql.Tx, file *File) err

 func (r *FileRepository) GetByPath(ctx context.Context, path string) (*File, error) {
 	query := `
-		SELECT path, mtime, ctime, size, mode, uid, gid, link_target
+		SELECT id, path, mtime, ctime, size, mode, uid, gid, link_target
 		FROM files
 		WHERE path = ?
 	`
@@ -56,6 +66,7 @@ func (r *FileRepository) GetByPath(ctx context.Context, path string) (*File, err
 	var linkTarget sql.NullString

 	err := r.db.conn.QueryRowContext(ctx, query, path).Scan(
+		&file.ID,
 		&file.Path,
 		&mtimeUnix,
 		&ctimeUnix,
@@ -73,8 +84,48 @@ func (r *FileRepository) GetByPath(ctx context.Context, path string) (*File, err
 		return nil, fmt.Errorf("querying file: %w", err)
 	}

-	file.MTime = time.Unix(mtimeUnix, 0)
-	file.CTime = time.Unix(ctimeUnix, 0)
+	file.MTime = time.Unix(mtimeUnix, 0).UTC()
+	file.CTime = time.Unix(ctimeUnix, 0).UTC()
+	if linkTarget.Valid {
+		file.LinkTarget = linkTarget.String
+	}
+
+	return &file, nil
+}
+
+// GetByID retrieves a file by its UUID
+func (r *FileRepository) GetByID(ctx context.Context, id string) (*File, error) {
+	query := `
+		SELECT id, path, mtime, ctime, size, mode, uid, gid, link_target
+		FROM files
+		WHERE id = ?
+	`
+
+	var file File
+	var mtimeUnix, ctimeUnix int64
+	var linkTarget sql.NullString
+
+	err := r.db.conn.QueryRowContext(ctx, query, id).Scan(
+		&file.ID,
+		&file.Path,
+		&mtimeUnix,
+		&ctimeUnix,
+		&file.Size,
+		&file.Mode,
+		&file.UID,
+		&file.GID,
+		&linkTarget,
+	)
+
+	if err == sql.ErrNoRows {
+		return nil, nil
+	}
+	if err != nil {
+		return nil, fmt.Errorf("querying file: %w", err)
+	}
+
+	file.MTime = time.Unix(mtimeUnix, 0).UTC()
+	file.CTime = time.Unix(ctimeUnix, 0).UTC()
 	if linkTarget.Valid {
 		file.LinkTarget = linkTarget.String
 	}
@@ -84,7 +135,7 @@ func (r *FileRepository) GetByPath(ctx context.Context, path string) (*File, err

 func (r *FileRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path string) (*File, error) {
 	query := `
-		SELECT path, mtime, ctime, size, mode, uid, gid, link_target
+		SELECT id, path, mtime, ctime, size, mode, uid, gid, link_target
 		FROM files
 		WHERE path = ?
 	`
@@ -95,6 +146,7 @@ func (r *FileRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path strin

 	LogSQL("GetByPathTx QueryRowContext", query, path)
 	err := tx.QueryRowContext(ctx, query, path).Scan(
+		&file.ID,
 		&file.Path,
 		&mtimeUnix,
 		&ctimeUnix,
@@ -113,8 +165,8 @@ func (r *FileRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path strin
 		return nil, fmt.Errorf("querying file: %w", err)
 	}

-	file.MTime = time.Unix(mtimeUnix, 0)
-	file.CTime = time.Unix(ctimeUnix, 0)
+	file.MTime = time.Unix(mtimeUnix, 0).UTC()
+	file.CTime = time.Unix(ctimeUnix, 0).UTC()
 	if linkTarget.Valid {
 		file.LinkTarget = linkTarget.String
 	}
@@ -124,7 +176,7 @@ func (r *FileRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path strin

 func (r *FileRepository) ListModifiedSince(ctx context.Context, since time.Time) ([]*File, error) {
 	query := `
-		SELECT path, mtime, ctime, size, mode, uid, gid, link_target
+		SELECT id, path, mtime, ctime, size, mode, uid, gid, link_target
 		FROM files
 		WHERE mtime >= ?
 		ORDER BY path
@@ -143,6 +195,7 @@ func (r *FileRepository) ListModifiedSince(ctx context.Context, since time.Time)
 		var linkTarget sql.NullString

 		err := rows.Scan(
+			&file.ID,
 			&file.Path,
 			&mtimeUnix,
 			&ctimeUnix,
@@ -175,7 +228,25 @@ func (r *FileRepository) Delete(ctx context.Context, tx *sql.Tx, path string) er
 	if tx != nil {
 		_, err = tx.ExecContext(ctx, query, path)
 	} else {
-		_, err = r.db.ExecWithLock(ctx, query, path)
+		_, err = r.db.ExecWithLog(ctx, query, path)
+	}
+
+	if err != nil {
+		return fmt.Errorf("deleting file: %w", err)
+	}
+
+	return nil
+}
+
+// DeleteByID deletes a file by its UUID
+func (r *FileRepository) DeleteByID(ctx context.Context, tx *sql.Tx, id string) error {
+	query := `DELETE FROM files WHERE id = ?`
+
+	var err error
+	if tx != nil {
+		_, err = tx.ExecContext(ctx, query, id)
+	} else {
+		_, err = r.db.ExecWithLog(ctx, query, id)
 	}

 	if err != nil {
@@ -187,7 +258,7 @@ func (r *FileRepository) Delete(ctx context.Context, tx *sql.Tx, path string) er

 func (r *FileRepository) ListByPrefix(ctx context.Context, prefix string) ([]*File, error) {
 	query := `
-		SELECT path, mtime, ctime, size, mode, uid, gid, link_target
+		SELECT id, path, mtime, ctime, size, mode, uid, gid, link_target
 		FROM files
 		WHERE path LIKE ? || '%'
 		ORDER BY path
@@ -206,6 +277,7 @@ func (r *FileRepository) ListByPrefix(ctx context.Context, prefix string) ([]*Fi
 		var linkTarget sql.NullString

 		err := rows.Scan(
+			&file.ID,
 			&file.Path,
 			&mtimeUnix,
 			&ctimeUnix,
@@ -230,3 +302,26 @@ func (r *FileRepository) ListByPrefix(ctx context.Context, prefix string) ([]*Fi

 	return files, rows.Err()
 }
+
+// DeleteOrphaned deletes files that are not referenced by any snapshot
+func (r *FileRepository) DeleteOrphaned(ctx context.Context) error {
+	query := `
+		DELETE FROM files 
+		WHERE NOT EXISTS (
+			SELECT 1 FROM snapshot_files 
+			WHERE snapshot_files.file_id = files.id
+		)
+	`
+
+	result, err := r.db.ExecWithLog(ctx, query)
+	if err != nil {
+		return fmt.Errorf("deleting orphaned files: %w", err)
+	}
+
+	rowsAffected, _ := result.RowsAffected()
+	if rowsAffected > 0 {
+		log.Debug("Deleted orphaned files", "count", rowsAffected)
+	}
+
+	return nil
+}
--- a/internal/database/models.go
+++ b/internal/database/models.go
@@ -1,9 +1,15 @@
+// Package database provides data models and repository interfaces for the Vaultik backup system.
+// It includes types for files, chunks, blobs, snapshots, and their relationships.
 package database

 import "time"

-// File represents a file record in the database
+// File represents a file or directory in the backup system.
+// It stores metadata about files including timestamps, permissions, ownership,
+// and symlink targets. This information is used to restore files with their
+// original attributes.
 type File struct {
+	ID         string // UUID primary key
 	Path       string
 	MTime      time.Time
 	CTime      time.Time
@@ -14,37 +20,52 @@ type File struct {
 	LinkTarget string // empty for regular files, target path for symlinks
 }

-// IsSymlink returns true if this file is a symbolic link
+// IsSymlink returns true if this file is a symbolic link.
+// A file is considered a symlink if it has a non-empty LinkTarget.
 func (f *File) IsSymlink() bool {
 	return f.LinkTarget != ""
 }

-// FileChunk represents the mapping between files and chunks
+// FileChunk represents the mapping between files and their constituent chunks.
+// Large files are split into multiple chunks for efficient deduplication and storage.
+// The Idx field maintains the order of chunks within a file.
 type FileChunk struct {
-	Path      string
+	FileID    string
 	Idx       int
 	ChunkHash string
 }

-// Chunk represents a chunk record in the database
+// Chunk represents a data chunk in the deduplication system.
+// Files are split into chunks which are content-addressed by their hash.
+// The ChunkHash is used for deduplication, while SHA256 provides
+// an additional verification hash.
 type Chunk struct {
 	ChunkHash string
 	SHA256    string
 	Size      int64
 }

-// Blob represents a blob record in the database
+// Blob represents a blob record in the database.
+// A blob is Vaultik's final storage unit - a large file (up to 10GB) containing
+// many compressed and encrypted chunks from multiple source files.
+// Blobs are content-addressed, meaning their filename in S3 is derived from
+// the SHA256 hash of their compressed and encrypted content.
+// The blob creation process is: chunks are accumulated -> compressed with zstd
+// -> encrypted with age -> hashed -> uploaded to S3 with the hash as filename.
 type Blob struct {
-	ID               string
-	Hash             string // Can be empty until blob is finalized
-	CreatedTS        time.Time
-	FinishedTS       *time.Time // nil if not yet finalized
-	UncompressedSize int64
-	CompressedSize   int64
-	UploadedTS       *time.Time // nil if not yet uploaded
+	ID               string     // UUID assigned when blob creation starts
+	Hash             string     // SHA256 of final compressed+encrypted content (empty until finalized)
+	CreatedTS        time.Time  // When blob creation started
+	FinishedTS       *time.Time // When blob was finalized (nil if still packing)
+	UncompressedSize int64      // Total size of raw chunks before compression
+	CompressedSize   int64      // Size after compression and encryption
+	UploadedTS       *time.Time // When blob was uploaded to S3 (nil if not uploaded)
 }

-// BlobChunk represents the mapping between blobs and chunks
+// BlobChunk represents the mapping between blobs and the chunks they contain.
+// This allows tracking which chunks are stored in which blobs, along with
+// their position and size within the blob. The offset and length fields
+// enable extracting specific chunks from a blob without processing the entire blob.
 type BlobChunk struct {
 	BlobID    string
 	ChunkHash string
@@ -52,27 +73,34 @@ type BlobChunk struct {
 	Length    int64
 }

-// ChunkFile represents the reverse mapping of chunks to files
+// ChunkFile represents the reverse mapping showing which files contain a specific chunk.
+// This is used during deduplication to identify all files that share a chunk,
+// which is important for garbage collection and integrity verification.
 type ChunkFile struct {
 	ChunkHash  string
-	FilePath   string
+	FileID     string
 	FileOffset int64
 	Length     int64
 }

 // Snapshot represents a snapshot record in the database
 type Snapshot struct {
-	ID               string
-	Hostname         string
-	VaultikVersion   string
-	StartedAt        time.Time
-	CompletedAt      *time.Time // nil if still in progress
-	FileCount        int64
-	ChunkCount       int64
-	BlobCount        int64
-	TotalSize        int64   // Total size of all referenced files
-	BlobSize         int64   // Total size of all referenced blobs (compressed and encrypted)
-	CompressionRatio float64 // Compression ratio (BlobSize / TotalSize)
+	ID                   string
+	Hostname             string
+	VaultikVersion       string
+	VaultikGitRevision   string
+	StartedAt            time.Time
+	CompletedAt          *time.Time // nil if still in progress
+	FileCount            int64
+	ChunkCount           int64
+	BlobCount            int64
+	TotalSize            int64   // Total size of all referenced files
+	BlobSize             int64   // Total size of all referenced blobs (compressed and encrypted)
+	BlobUncompressedSize int64   // Total uncompressed size of all referenced blobs
+	CompressionRatio     float64 // Compression ratio (BlobSize / BlobUncompressedSize)
+	CompressionLevel     int     // Compression level used for this snapshot
+	UploadBytes          int64   // Total bytes uploaded during this snapshot
+	UploadDurationMs     int64   // Total milliseconds spent uploading to S3
 }

 // IsComplete returns true if the snapshot has completed
@@ -83,7 +111,7 @@ func (s *Snapshot) IsComplete() bool {
 // SnapshotFile represents the mapping between snapshots and files
 type SnapshotFile struct {
 	SnapshotID string
-	FilePath   string
+	FileID     string
 }

 // SnapshotBlob represents the mapping between snapshots and blobs
--- a/internal/database/repositories.go
+++ b/internal/database/repositories.go
@@ -6,6 +6,9 @@ import (
 	"fmt"
 )

+// Repositories provides access to all database repositories.
+// It serves as a centralized access point for all database operations
+// and manages transaction coordination across repositories.
 type Repositories struct {
 	db         *DB
 	Files      *FileRepository
@@ -18,6 +21,8 @@ type Repositories struct {
 	Uploads    *UploadRepository
 }

+// NewRepositories creates a new Repositories instance with all repository types.
+// Each repository shares the same database connection for coordinated transactions.
 func NewRepositories(db *DB) *Repositories {
 	return &Repositories{
 		db:         db,
@@ -32,17 +37,16 @@ func NewRepositories(db *DB) *Repositories {
 	}
 }

+// TxFunc is a function that executes within a database transaction.
+// The transaction is automatically committed if the function returns nil,
+// or rolled back if it returns an error.
 type TxFunc func(ctx context.Context, tx *sql.Tx) error

+// WithTx executes a function within a write transaction.
+// SQLite handles its own locking internally, so no explicit locking is needed.
+// The transaction is automatically committed on success or rolled back on error.
+// This method should be used for all write operations to ensure atomicity.
 func (r *Repositories) WithTx(ctx context.Context, fn TxFunc) error {
-	// Acquire write lock for the entire transaction
-	LogSQL("WithTx", "Acquiring write lock", "")
-	r.db.LockForWrite()
-	defer func() {
-		LogSQL("WithTx", "Releasing write lock", "")
-		r.db.UnlockWrite()
-	}()
-
 	LogSQL("WithTx", "Beginning transaction", "")
 	tx, err := r.db.BeginTx(ctx, nil)
 	if err != nil {
@@ -71,6 +75,10 @@ func (r *Repositories) WithTx(ctx context.Context, fn TxFunc) error {
 	return tx.Commit()
 }

+// WithReadTx executes a function within a read-only transaction.
+// Read transactions can run concurrently with other read transactions
+// but will be blocked by write transactions. The transaction is
+// automatically committed on success or rolled back on error.
 func (r *Repositories) WithReadTx(ctx context.Context, fn TxFunc) error {
 	opts := &sql.TxOptions{
 		ReadOnly: true,
--- a/internal/database/repositories_test.go
+++ b/internal/database/repositories_test.go
@@ -52,7 +52,7 @@ func TestRepositoriesTransaction(t *testing.T) {

 		// Map chunks to file
 		fc1 := &FileChunk{
-			Path:      file.Path,
+			FileID:    file.ID,
 			Idx:       0,
 			ChunkHash: chunk1.ChunkHash,
 		}
@@ -61,7 +61,7 @@ func TestRepositoriesTransaction(t *testing.T) {
 		}

 		fc2 := &FileChunk{
-			Path:      file.Path,
+			FileID:    file.ID,
 			Idx:       1,
 			ChunkHash: chunk2.ChunkHash,
 		}
@@ -116,7 +116,7 @@ func TestRepositoriesTransaction(t *testing.T) {
 		t.Error("expected file after transaction")
 	}

-	chunks, err := repos.FileChunks.GetByPath(ctx, "/test/tx_file.txt")
+	chunks, err := repos.FileChunks.GetByFile(ctx, "/test/tx_file.txt")
 	if err != nil {
 		t.Fatalf("failed to get file chunks: %v", err)
 	}
@@ -218,7 +218,7 @@ func TestRepositoriesReadTransaction(t *testing.T) {
 	var retrievedFile *File
 	err = repos.WithReadTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
 		var err error
-		retrievedFile, err = repos.Files.GetByPath(ctx, "/test/read_file.txt")
+		retrievedFile, err = repos.Files.GetByPathTx(ctx, tx, "/test/read_file.txt")
 		if err != nil {
 			return err
 		}
--- a/internal/database/repository_comprehensive_test.go
+++ b/internal/database/repository_comprehensive_test.go
@@ -0,0 +1,876 @@
+package database
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+	"testing"
+	"time"
+)
+
+// TestFileRepositoryUUIDGeneration tests that files get unique UUIDs
+func TestFileRepositoryUUIDGeneration(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repo := NewFileRepository(db)
+
+	// Create multiple files
+	files := []*File{
+		{
+			Path:  "/file1.txt",
+			MTime: time.Now().Truncate(time.Second),
+			CTime: time.Now().Truncate(time.Second),
+			Size:  1024,
+			Mode:  0644,
+			UID:   1000,
+			GID:   1000,
+		},
+		{
+			Path:  "/file2.txt",
+			MTime: time.Now().Truncate(time.Second),
+			CTime: time.Now().Truncate(time.Second),
+			Size:  2048,
+			Mode:  0644,
+			UID:   1000,
+			GID:   1000,
+		},
+	}
+
+	uuids := make(map[string]bool)
+	for _, file := range files {
+		err := repo.Create(ctx, nil, file)
+		if err != nil {
+			t.Fatalf("failed to create file: %v", err)
+		}
+
+		// Check UUID was generated
+		if file.ID == "" {
+			t.Error("file ID was not generated")
+		}
+
+		// Check UUID is unique
+		if uuids[file.ID] {
+			t.Errorf("duplicate UUID generated: %s", file.ID)
+		}
+		uuids[file.ID] = true
+	}
+}
+
+// TestFileRepositoryGetByID tests retrieving files by UUID
+func TestFileRepositoryGetByID(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repo := NewFileRepository(db)
+
+	// Create a file
+	file := &File{
+		Path:  "/test.txt",
+		MTime: time.Now().Truncate(time.Second),
+		CTime: time.Now().Truncate(time.Second),
+		Size:  1024,
+		Mode:  0644,
+		UID:   1000,
+		GID:   1000,
+	}
+
+	err := repo.Create(ctx, nil, file)
+	if err != nil {
+		t.Fatalf("failed to create file: %v", err)
+	}
+
+	// Retrieve by ID
+	retrieved, err := repo.GetByID(ctx, file.ID)
+	if err != nil {
+		t.Fatalf("failed to get file by ID: %v", err)
+	}
+
+	if retrieved.ID != file.ID {
+		t.Errorf("ID mismatch: expected %s, got %s", file.ID, retrieved.ID)
+	}
+	if retrieved.Path != file.Path {
+		t.Errorf("Path mismatch: expected %s, got %s", file.Path, retrieved.Path)
+	}
+
+	// Test non-existent ID
+	nonExistent, err := repo.GetByID(ctx, "non-existent-uuid")
+	if err != nil {
+		t.Fatalf("GetByID should not return error for non-existent ID: %v", err)
+	}
+	if nonExistent != nil {
+		t.Error("expected nil for non-existent ID")
+	}
+}
+
+// TestOrphanedFileCleanup tests the cleanup of orphaned files
+func TestOrphanedFileCleanup(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repos := NewRepositories(db)
+
+	// Create files
+	file1 := &File{
+		Path:  "/orphaned.txt",
+		MTime: time.Now().Truncate(time.Second),
+		CTime: time.Now().Truncate(time.Second),
+		Size:  1024,
+		Mode:  0644,
+		UID:   1000,
+		GID:   1000,
+	}
+	file2 := &File{
+		Path:  "/referenced.txt",
+		MTime: time.Now().Truncate(time.Second),
+		CTime: time.Now().Truncate(time.Second),
+		Size:  2048,
+		Mode:  0644,
+		UID:   1000,
+		GID:   1000,
+	}
+
+	err := repos.Files.Create(ctx, nil, file1)
+	if err != nil {
+		t.Fatalf("failed to create file1: %v", err)
+	}
+	err = repos.Files.Create(ctx, nil, file2)
+	if err != nil {
+		t.Fatalf("failed to create file2: %v", err)
+	}
+
+	// Create a snapshot and reference only file2
+	snapshot := &Snapshot{
+		ID:        "test-snapshot",
+		Hostname:  "test-host",
+		StartedAt: time.Now(),
+	}
+	err = repos.Snapshots.Create(ctx, nil, snapshot)
+	if err != nil {
+		t.Fatalf("failed to create snapshot: %v", err)
+	}
+
+	// Add file2 to snapshot
+	err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID, file2.ID)
+	if err != nil {
+		t.Fatalf("failed to add file to snapshot: %v", err)
+	}
+
+	// Run orphaned cleanup
+	err = repos.Files.DeleteOrphaned(ctx)
+	if err != nil {
+		t.Fatalf("failed to delete orphaned files: %v", err)
+	}
+
+	// Check that orphaned file is gone
+	orphanedFile, err := repos.Files.GetByID(ctx, file1.ID)
+	if err != nil {
+		t.Fatalf("error getting file: %v", err)
+	}
+	if orphanedFile != nil {
+		t.Error("orphaned file should have been deleted")
+	}
+
+	// Check that referenced file still exists
+	referencedFile, err := repos.Files.GetByID(ctx, file2.ID)
+	if err != nil {
+		t.Fatalf("error getting file: %v", err)
+	}
+	if referencedFile == nil {
+		t.Error("referenced file should not have been deleted")
+	}
+}
+
+// TestOrphanedChunkCleanup tests the cleanup of orphaned chunks
+func TestOrphanedChunkCleanup(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repos := NewRepositories(db)
+
+	// Create chunks
+	chunk1 := &Chunk{
+		ChunkHash: "orphaned-chunk",
+		SHA256:    "orphaned-chunk-sha",
+		Size:      1024,
+	}
+	chunk2 := &Chunk{
+		ChunkHash: "referenced-chunk",
+		SHA256:    "referenced-chunk-sha",
+		Size:      1024,
+	}
+
+	err := repos.Chunks.Create(ctx, nil, chunk1)
+	if err != nil {
+		t.Fatalf("failed to create chunk1: %v", err)
+	}
+	err = repos.Chunks.Create(ctx, nil, chunk2)
+	if err != nil {
+		t.Fatalf("failed to create chunk2: %v", err)
+	}
+
+	// Create a file and reference only chunk2
+	file := &File{
+		Path:  "/test.txt",
+		MTime: time.Now().Truncate(time.Second),
+		CTime: time.Now().Truncate(time.Second),
+		Size:  1024,
+		Mode:  0644,
+		UID:   1000,
+		GID:   1000,
+	}
+	err = repos.Files.Create(ctx, nil, file)
+	if err != nil {
+		t.Fatalf("failed to create file: %v", err)
+	}
+
+	// Create file-chunk mapping only for chunk2
+	fc := &FileChunk{
+		FileID:    file.ID,
+		Idx:       0,
+		ChunkHash: chunk2.ChunkHash,
+	}
+	err = repos.FileChunks.Create(ctx, nil, fc)
+	if err != nil {
+		t.Fatalf("failed to create file chunk: %v", err)
+	}
+
+	// Run orphaned cleanup
+	err = repos.Chunks.DeleteOrphaned(ctx)
+	if err != nil {
+		t.Fatalf("failed to delete orphaned chunks: %v", err)
+	}
+
+	// Check that orphaned chunk is gone
+	orphanedChunk, err := repos.Chunks.GetByHash(ctx, chunk1.ChunkHash)
+	if err != nil {
+		t.Fatalf("error getting chunk: %v", err)
+	}
+	if orphanedChunk != nil {
+		t.Error("orphaned chunk should have been deleted")
+	}
+
+	// Check that referenced chunk still exists
+	referencedChunk, err := repos.Chunks.GetByHash(ctx, chunk2.ChunkHash)
+	if err != nil {
+		t.Fatalf("error getting chunk: %v", err)
+	}
+	if referencedChunk == nil {
+		t.Error("referenced chunk should not have been deleted")
+	}
+}
+
+// TestOrphanedBlobCleanup tests the cleanup of orphaned blobs
+func TestOrphanedBlobCleanup(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repos := NewRepositories(db)
+
+	// Create blobs
+	blob1 := &Blob{
+		ID:        "orphaned-blob-id",
+		Hash:      "orphaned-blob",
+		CreatedTS: time.Now().Truncate(time.Second),
+	}
+	blob2 := &Blob{
+		ID:        "referenced-blob-id",
+		Hash:      "referenced-blob",
+		CreatedTS: time.Now().Truncate(time.Second),
+	}
+
+	err := repos.Blobs.Create(ctx, nil, blob1)
+	if err != nil {
+		t.Fatalf("failed to create blob1: %v", err)
+	}
+	err = repos.Blobs.Create(ctx, nil, blob2)
+	if err != nil {
+		t.Fatalf("failed to create blob2: %v", err)
+	}
+
+	// Create a snapshot and reference only blob2
+	snapshot := &Snapshot{
+		ID:        "test-snapshot",
+		Hostname:  "test-host",
+		StartedAt: time.Now(),
+	}
+	err = repos.Snapshots.Create(ctx, nil, snapshot)
+	if err != nil {
+		t.Fatalf("failed to create snapshot: %v", err)
+	}
+
+	// Add blob2 to snapshot
+	err = repos.Snapshots.AddBlob(ctx, nil, snapshot.ID, blob2.ID, blob2.Hash)
+	if err != nil {
+		t.Fatalf("failed to add blob to snapshot: %v", err)
+	}
+
+	// Run orphaned cleanup
+	err = repos.Blobs.DeleteOrphaned(ctx)
+	if err != nil {
+		t.Fatalf("failed to delete orphaned blobs: %v", err)
+	}
+
+	// Check that orphaned blob is gone
+	orphanedBlob, err := repos.Blobs.GetByID(ctx, blob1.ID)
+	if err != nil {
+		t.Fatalf("error getting blob: %v", err)
+	}
+	if orphanedBlob != nil {
+		t.Error("orphaned blob should have been deleted")
+	}
+
+	// Check that referenced blob still exists
+	referencedBlob, err := repos.Blobs.GetByID(ctx, blob2.ID)
+	if err != nil {
+		t.Fatalf("error getting blob: %v", err)
+	}
+	if referencedBlob == nil {
+		t.Error("referenced blob should not have been deleted")
+	}
+}
+
+// TestFileChunkRepositoryWithUUIDs tests file-chunk relationships with UUIDs
+func TestFileChunkRepositoryWithUUIDs(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repos := NewRepositories(db)
+
+	// Create a file
+	file := &File{
+		Path:  "/test.txt",
+		MTime: time.Now().Truncate(time.Second),
+		CTime: time.Now().Truncate(time.Second),
+		Size:  3072,
+		Mode:  0644,
+		UID:   1000,
+		GID:   1000,
+	}
+	err := repos.Files.Create(ctx, nil, file)
+	if err != nil {
+		t.Fatalf("failed to create file: %v", err)
+	}
+
+	// Create chunks
+	chunks := []string{"chunk1", "chunk2", "chunk3"}
+	for i, chunkHash := range chunks {
+		chunk := &Chunk{
+			ChunkHash: chunkHash,
+			SHA256:    fmt.Sprintf("sha-%s", chunkHash),
+			Size:      1024,
+		}
+		err = repos.Chunks.Create(ctx, nil, chunk)
+		if err != nil {
+			t.Fatalf("failed to create chunk: %v", err)
+		}
+
+		// Create file-chunk mapping
+		fc := &FileChunk{
+			FileID:    file.ID,
+			Idx:       i,
+			ChunkHash: chunkHash,
+		}
+		err = repos.FileChunks.Create(ctx, nil, fc)
+		if err != nil {
+			t.Fatalf("failed to create file chunk: %v", err)
+		}
+	}
+
+	// Test GetByFileID
+	fileChunks, err := repos.FileChunks.GetByFileID(ctx, file.ID)
+	if err != nil {
+		t.Fatalf("failed to get file chunks: %v", err)
+	}
+	if len(fileChunks) != 3 {
+		t.Errorf("expected 3 chunks, got %d", len(fileChunks))
+	}
+
+	// Test DeleteByFileID
+	err = repos.FileChunks.DeleteByFileID(ctx, nil, file.ID)
+	if err != nil {
+		t.Fatalf("failed to delete file chunks: %v", err)
+	}
+
+	fileChunks, err = repos.FileChunks.GetByFileID(ctx, file.ID)
+	if err != nil {
+		t.Fatalf("failed to get file chunks after delete: %v", err)
+	}
+	if len(fileChunks) != 0 {
+		t.Errorf("expected 0 chunks after delete, got %d", len(fileChunks))
+	}
+}
+
+// TestChunkFileRepositoryWithUUIDs tests chunk-file relationships with UUIDs
+func TestChunkFileRepositoryWithUUIDs(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repos := NewRepositories(db)
+
+	// Create files
+	file1 := &File{
+		Path:  "/file1.txt",
+		MTime: time.Now().Truncate(time.Second),
+		CTime: time.Now().Truncate(time.Second),
+		Size:  1024,
+		Mode:  0644,
+		UID:   1000,
+		GID:   1000,
+	}
+	file2 := &File{
+		Path:  "/file2.txt",
+		MTime: time.Now().Truncate(time.Second),
+		CTime: time.Now().Truncate(time.Second),
+		Size:  1024,
+		Mode:  0644,
+		UID:   1000,
+		GID:   1000,
+	}
+
+	err := repos.Files.Create(ctx, nil, file1)
+	if err != nil {
+		t.Fatalf("failed to create file1: %v", err)
+	}
+	err = repos.Files.Create(ctx, nil, file2)
+	if err != nil {
+		t.Fatalf("failed to create file2: %v", err)
+	}
+
+	// Create a chunk that appears in both files (deduplication)
+	chunk := &Chunk{
+		ChunkHash: "shared-chunk",
+		SHA256:    "shared-chunk-sha",
+		Size:      1024,
+	}
+	err = repos.Chunks.Create(ctx, nil, chunk)
+	if err != nil {
+		t.Fatalf("failed to create chunk: %v", err)
+	}
+
+	// Create chunk-file mappings
+	cf1 := &ChunkFile{
+		ChunkHash:  chunk.ChunkHash,
+		FileID:     file1.ID,
+		FileOffset: 0,
+		Length:     1024,
+	}
+	cf2 := &ChunkFile{
+		ChunkHash:  chunk.ChunkHash,
+		FileID:     file2.ID,
+		FileOffset: 512,
+		Length:     1024,
+	}
+
+	err = repos.ChunkFiles.Create(ctx, nil, cf1)
+	if err != nil {
+		t.Fatalf("failed to create chunk file 1: %v", err)
+	}
+	err = repos.ChunkFiles.Create(ctx, nil, cf2)
+	if err != nil {
+		t.Fatalf("failed to create chunk file 2: %v", err)
+	}
+
+	// Test GetByChunkHash
+	chunkFiles, err := repos.ChunkFiles.GetByChunkHash(ctx, chunk.ChunkHash)
+	if err != nil {
+		t.Fatalf("failed to get chunk files: %v", err)
+	}
+	if len(chunkFiles) != 2 {
+		t.Errorf("expected 2 files for chunk, got %d", len(chunkFiles))
+	}
+
+	// Test GetByFileID
+	chunkFiles, err = repos.ChunkFiles.GetByFileID(ctx, file1.ID)
+	if err != nil {
+		t.Fatalf("failed to get chunks by file ID: %v", err)
+	}
+	if len(chunkFiles) != 1 {
+		t.Errorf("expected 1 chunk for file, got %d", len(chunkFiles))
+	}
+}
+
+// TestSnapshotRepositoryExtendedFields tests snapshot with version and git revision
+func TestSnapshotRepositoryExtendedFields(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repo := NewSnapshotRepository(db)
+
+	// Create snapshot with extended fields
+	snapshot := &Snapshot{
+		ID:                   "test-20250722-120000Z",
+		Hostname:             "test-host",
+		VaultikVersion:       "0.0.1",
+		VaultikGitRevision:   "abc123def456",
+		StartedAt:            time.Now(),
+		CompletedAt:          nil,
+		FileCount:            100,
+		ChunkCount:           200,
+		BlobCount:            50,
+		TotalSize:            1024 * 1024,
+		BlobSize:             512 * 1024,
+		BlobUncompressedSize: 1024 * 1024,
+		CompressionLevel:     6,
+		CompressionRatio:     2.0,
+		UploadDurationMs:     5000,
+	}
+
+	err := repo.Create(ctx, nil, snapshot)
+	if err != nil {
+		t.Fatalf("failed to create snapshot: %v", err)
+	}
+
+	// Retrieve and verify
+	retrieved, err := repo.GetByID(ctx, snapshot.ID)
+	if err != nil {
+		t.Fatalf("failed to get snapshot: %v", err)
+	}
+
+	if retrieved.VaultikVersion != snapshot.VaultikVersion {
+		t.Errorf("version mismatch: expected %s, got %s", snapshot.VaultikVersion, retrieved.VaultikVersion)
+	}
+	if retrieved.VaultikGitRevision != snapshot.VaultikGitRevision {
+		t.Errorf("git revision mismatch: expected %s, got %s", snapshot.VaultikGitRevision, retrieved.VaultikGitRevision)
+	}
+	if retrieved.CompressionLevel != snapshot.CompressionLevel {
+		t.Errorf("compression level mismatch: expected %d, got %d", snapshot.CompressionLevel, retrieved.CompressionLevel)
+	}
+	if retrieved.BlobUncompressedSize != snapshot.BlobUncompressedSize {
+		t.Errorf("uncompressed size mismatch: expected %d, got %d", snapshot.BlobUncompressedSize, retrieved.BlobUncompressedSize)
+	}
+	if retrieved.UploadDurationMs != snapshot.UploadDurationMs {
+		t.Errorf("upload duration mismatch: expected %d, got %d", snapshot.UploadDurationMs, retrieved.UploadDurationMs)
+	}
+}
+
+// TestComplexOrphanedDataScenario tests a complex scenario with multiple relationships
+func TestComplexOrphanedDataScenario(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repos := NewRepositories(db)
+
+	// Create snapshots
+	snapshot1 := &Snapshot{
+		ID:        "snapshot1",
+		Hostname:  "host1",
+		StartedAt: time.Now(),
+	}
+	snapshot2 := &Snapshot{
+		ID:        "snapshot2",
+		Hostname:  "host1",
+		StartedAt: time.Now(),
+	}
+
+	err := repos.Snapshots.Create(ctx, nil, snapshot1)
+	if err != nil {
+		t.Fatalf("failed to create snapshot1: %v", err)
+	}
+	err = repos.Snapshots.Create(ctx, nil, snapshot2)
+	if err != nil {
+		t.Fatalf("failed to create snapshot2: %v", err)
+	}
+
+	// Create files
+	files := make([]*File, 3)
+	for i := range files {
+		files[i] = &File{
+			Path:  fmt.Sprintf("/file%d.txt", i),
+			MTime: time.Now().Truncate(time.Second),
+			CTime: time.Now().Truncate(time.Second),
+			Size:  1024,
+			Mode:  0644,
+			UID:   1000,
+			GID:   1000,
+		}
+		err = repos.Files.Create(ctx, nil, files[i])
+		if err != nil {
+			t.Fatalf("failed to create file%d: %v", i, err)
+		}
+	}
+
+	// Add files to snapshots
+	// Snapshot1: file0, file1
+	// Snapshot2: file1, file2
+	// file0: only in snapshot1
+	// file1: in both snapshots
+	// file2: only in snapshot2
+	err = repos.Snapshots.AddFileByID(ctx, nil, snapshot1.ID, files[0].ID)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = repos.Snapshots.AddFileByID(ctx, nil, snapshot1.ID, files[1].ID)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = repos.Snapshots.AddFileByID(ctx, nil, snapshot2.ID, files[1].ID)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = repos.Snapshots.AddFileByID(ctx, nil, snapshot2.ID, files[2].ID)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Delete snapshot1
+	err = repos.Snapshots.DeleteSnapshotFiles(ctx, snapshot1.ID)
+	if err != nil {
+		t.Fatal(err)
+	}
+	err = repos.Snapshots.Delete(ctx, snapshot1.ID)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Run orphaned cleanup
+	err = repos.Files.DeleteOrphaned(ctx)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Check results
+	// file0 should be deleted (only in deleted snapshot)
+	file0, err := repos.Files.GetByID(ctx, files[0].ID)
+	if err != nil {
+		t.Fatalf("error getting file0: %v", err)
+	}
+	if file0 != nil {
+		t.Error("file0 should have been deleted")
+	}
+
+	// file1 should exist (still in snapshot2)
+	file1, err := repos.Files.GetByID(ctx, files[1].ID)
+	if err != nil {
+		t.Fatalf("error getting file1: %v", err)
+	}
+	if file1 == nil {
+		t.Error("file1 should still exist")
+	}
+
+	// file2 should exist (still in snapshot2)
+	file2, err := repos.Files.GetByID(ctx, files[2].ID)
+	if err != nil {
+		t.Fatalf("error getting file2: %v", err)
+	}
+	if file2 == nil {
+		t.Error("file2 should still exist")
+	}
+}
+
+// TestCascadeDelete tests that cascade deletes work properly
+func TestCascadeDelete(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repos := NewRepositories(db)
+
+	// Create a file
+	file := &File{
+		Path:  "/cascade-test.txt",
+		MTime: time.Now().Truncate(time.Second),
+		CTime: time.Now().Truncate(time.Second),
+		Size:  1024,
+		Mode:  0644,
+		UID:   1000,
+		GID:   1000,
+	}
+	err := repos.Files.Create(ctx, nil, file)
+	if err != nil {
+		t.Fatalf("failed to create file: %v", err)
+	}
+
+	// Create chunks and file-chunk mappings
+	for i := 0; i < 3; i++ {
+		chunk := &Chunk{
+			ChunkHash: fmt.Sprintf("cascade-chunk-%d", i),
+			SHA256:    fmt.Sprintf("cascade-sha-%d", i),
+			Size:      1024,
+		}
+		err = repos.Chunks.Create(ctx, nil, chunk)
+		if err != nil {
+			t.Fatalf("failed to create chunk: %v", err)
+		}
+
+		fc := &FileChunk{
+			FileID:    file.ID,
+			Idx:       i,
+			ChunkHash: chunk.ChunkHash,
+		}
+		err = repos.FileChunks.Create(ctx, nil, fc)
+		if err != nil {
+			t.Fatalf("failed to create file chunk: %v", err)
+		}
+	}
+
+	// Verify file chunks exist
+	fileChunks, err := repos.FileChunks.GetByFileID(ctx, file.ID)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(fileChunks) != 3 {
+		t.Errorf("expected 3 file chunks, got %d", len(fileChunks))
+	}
+
+	// Delete the file
+	err = repos.Files.DeleteByID(ctx, nil, file.ID)
+	if err != nil {
+		t.Fatalf("failed to delete file: %v", err)
+	}
+
+	// Verify file chunks were cascade deleted
+	fileChunks, err = repos.FileChunks.GetByFileID(ctx, file.ID)
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(fileChunks) != 0 {
+		t.Errorf("expected 0 file chunks after cascade delete, got %d", len(fileChunks))
+	}
+}
+
+// TestTransactionIsolation tests that transactions properly isolate changes
+func TestTransactionIsolation(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repos := NewRepositories(db)
+
+	// Start a transaction
+	err := repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		// Create a file within the transaction
+		file := &File{
+			Path:  "/tx-test.txt",
+			MTime: time.Now().Truncate(time.Second),
+			CTime: time.Now().Truncate(time.Second),
+			Size:  1024,
+			Mode:  0644,
+			UID:   1000,
+			GID:   1000,
+		}
+		err := repos.Files.Create(ctx, tx, file)
+		if err != nil {
+			return err
+		}
+
+		// Within the same transaction, we should be able to query it
+		// Note: This would require modifying GetByPath to accept a tx parameter
+		// For now, we'll just test that rollback works
+
+		// Return an error to trigger rollback
+		return fmt.Errorf("intentional rollback")
+	})
+
+	if err == nil {
+		t.Fatal("expected error from transaction")
+	}
+
+	// Verify the file was not created (transaction rolled back)
+	files, err := repos.Files.ListByPrefix(ctx, "/tx-test")
+	if err != nil {
+		t.Fatal(err)
+	}
+	if len(files) != 0 {
+		t.Error("file should not exist after rollback")
+	}
+}
+
+// TestConcurrentOrphanedCleanup tests that concurrent cleanup operations don't interfere
+func TestConcurrentOrphanedCleanup(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repos := NewRepositories(db)
+
+	// Set a 5-second busy timeout to handle concurrent operations
+	if _, err := db.conn.Exec("PRAGMA busy_timeout = 5000"); err != nil {
+		t.Fatalf("failed to set busy timeout: %v", err)
+	}
+
+	// Create a snapshot
+	snapshot := &Snapshot{
+		ID:        "concurrent-test",
+		Hostname:  "test-host",
+		StartedAt: time.Now(),
+	}
+	err := repos.Snapshots.Create(ctx, nil, snapshot)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create many files, some orphaned
+	for i := 0; i < 20; i++ {
+		file := &File{
+			Path:  fmt.Sprintf("/concurrent-%d.txt", i),
+			MTime: time.Now().Truncate(time.Second),
+			CTime: time.Now().Truncate(time.Second),
+			Size:  1024,
+			Mode:  0644,
+			UID:   1000,
+			GID:   1000,
+		}
+		err = repos.Files.Create(ctx, nil, file)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		// Add even-numbered files to snapshot
+		if i%2 == 0 {
+			err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID, file.ID)
+			if err != nil {
+				t.Fatal(err)
+			}
+		}
+	}
+
+	// Run multiple cleanup operations concurrently
+	// Note: SQLite has limited support for concurrent writes, so we expect some to fail
+	done := make(chan error, 3)
+	for i := 0; i < 3; i++ {
+		go func() {
+			done <- repos.Files.DeleteOrphaned(ctx)
+		}()
+	}
+
+	// Wait for all to complete
+	for i := 0; i < 3; i++ {
+		err := <-done
+		if err != nil {
+			t.Errorf("cleanup %d failed: %v", i, err)
+		}
+	}
+
+	// Verify correct files were deleted
+	files, err := repos.Files.ListByPrefix(ctx, "/concurrent-")
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Should have 10 files remaining (even numbered)
+	if len(files) != 10 {
+		t.Errorf("expected 10 files remaining, got %d", len(files))
+	}
+
+	// Verify all remaining files are even-numbered
+	for _, file := range files {
+		var num int
+		_, err := fmt.Sscanf(file.Path, "/concurrent-%d.txt", &num)
+		if err != nil {
+			t.Logf("failed to parse file number from %s: %v", file.Path, err)
+		}
+		if num%2 != 0 {
+			t.Errorf("odd-numbered file %s should have been deleted", file.Path)
+		}
+	}
+}
--- a/internal/database/repository_debug_test.go
+++ b/internal/database/repository_debug_test.go
@@ -0,0 +1,165 @@
+package database
+
+import (
+	"context"
+	"testing"
+	"time"
+)
+
+// TestOrphanedFileCleanupDebug tests orphaned file cleanup with debug output
+func TestOrphanedFileCleanupDebug(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repos := NewRepositories(db)
+
+	// Create files
+	file1 := &File{
+		Path:  "/orphaned.txt",
+		MTime: time.Now().Truncate(time.Second),
+		CTime: time.Now().Truncate(time.Second),
+		Size:  1024,
+		Mode:  0644,
+		UID:   1000,
+		GID:   1000,
+	}
+	file2 := &File{
+		Path:  "/referenced.txt",
+		MTime: time.Now().Truncate(time.Second),
+		CTime: time.Now().Truncate(time.Second),
+		Size:  2048,
+		Mode:  0644,
+		UID:   1000,
+		GID:   1000,
+	}
+
+	err := repos.Files.Create(ctx, nil, file1)
+	if err != nil {
+		t.Fatalf("failed to create file1: %v", err)
+	}
+	t.Logf("Created file1 with ID: %s", file1.ID)
+
+	err = repos.Files.Create(ctx, nil, file2)
+	if err != nil {
+		t.Fatalf("failed to create file2: %v", err)
+	}
+	t.Logf("Created file2 with ID: %s", file2.ID)
+
+	// Create a snapshot and reference only file2
+	snapshot := &Snapshot{
+		ID:        "test-snapshot",
+		Hostname:  "test-host",
+		StartedAt: time.Now(),
+	}
+	err = repos.Snapshots.Create(ctx, nil, snapshot)
+	if err != nil {
+		t.Fatalf("failed to create snapshot: %v", err)
+	}
+	t.Logf("Created snapshot: %s", snapshot.ID)
+
+	// Check snapshot_files before adding
+	var count int
+	err = db.conn.QueryRow("SELECT COUNT(*) FROM snapshot_files").Scan(&count)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Logf("snapshot_files count before add: %d", count)
+
+	// Add file2 to snapshot
+	err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID, file2.ID)
+	if err != nil {
+		t.Fatalf("failed to add file to snapshot: %v", err)
+	}
+	t.Logf("Added file2 to snapshot")
+
+	// Check snapshot_files after adding
+	err = db.conn.QueryRow("SELECT COUNT(*) FROM snapshot_files").Scan(&count)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Logf("snapshot_files count after add: %d", count)
+
+	// Check which files are referenced
+	rows, err := db.conn.Query("SELECT file_id FROM snapshot_files")
+	if err != nil {
+		t.Fatal(err)
+	}
+	defer func() {
+		if err := rows.Close(); err != nil {
+			t.Logf("failed to close rows: %v", err)
+		}
+	}()
+	t.Log("Files in snapshot_files:")
+	for rows.Next() {
+		var fileID string
+		if err := rows.Scan(&fileID); err != nil {
+			t.Fatal(err)
+		}
+		t.Logf("  - %s", fileID)
+	}
+
+	// Check files before cleanup
+	err = db.conn.QueryRow("SELECT COUNT(*) FROM files").Scan(&count)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Logf("Files count before cleanup: %d", count)
+
+	// Run orphaned cleanup
+	err = repos.Files.DeleteOrphaned(ctx)
+	if err != nil {
+		t.Fatalf("failed to delete orphaned files: %v", err)
+	}
+	t.Log("Ran orphaned cleanup")
+
+	// Check files after cleanup
+	err = db.conn.QueryRow("SELECT COUNT(*) FROM files").Scan(&count)
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Logf("Files count after cleanup: %d", count)
+
+	// List remaining files
+	files, err := repos.Files.ListByPrefix(ctx, "/")
+	if err != nil {
+		t.Fatal(err)
+	}
+	t.Log("Remaining files:")
+	for _, f := range files {
+		t.Logf("  - ID: %s, Path: %s", f.ID, f.Path)
+	}
+
+	// Check that orphaned file is gone
+	orphanedFile, err := repos.Files.GetByID(ctx, file1.ID)
+	if err != nil {
+		t.Fatalf("error getting file: %v", err)
+	}
+	if orphanedFile != nil {
+		t.Error("orphaned file should have been deleted")
+		// Let's check why it wasn't deleted
+		var exists bool
+		err = db.conn.QueryRow(`
+			SELECT EXISTS(
+				SELECT 1 FROM snapshot_files 
+				WHERE file_id = ?
+			)`, file1.ID).Scan(&exists)
+		if err != nil {
+			t.Fatal(err)
+		}
+		t.Logf("File1 exists in snapshot_files: %v", exists)
+	} else {
+		t.Log("Orphaned file was correctly deleted")
+	}
+
+	// Check that referenced file still exists
+	referencedFile, err := repos.Files.GetByID(ctx, file2.ID)
+	if err != nil {
+		t.Fatalf("error getting file: %v", err)
+	}
+	if referencedFile == nil {
+		t.Error("referenced file should not have been deleted")
+	} else {
+		t.Log("Referenced file correctly remains")
+	}
+}
--- a/internal/database/repository_edge_cases_test.go
+++ b/internal/database/repository_edge_cases_test.go
@@ -0,0 +1,543 @@
+package database
+
+import (
+	"context"
+	"fmt"
+	"strings"
+	"testing"
+	"time"
+)
+
+// TestFileRepositoryEdgeCases tests edge cases for file repository
+func TestFileRepositoryEdgeCases(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repo := NewFileRepository(db)
+
+	tests := []struct {
+		name    string
+		file    *File
+		wantErr bool
+		errMsg  string
+	}{
+		{
+			name: "empty path",
+			file: &File{
+				Path:  "",
+				MTime: time.Now(),
+				CTime: time.Now(),
+				Size:  1024,
+				Mode:  0644,
+				UID:   1000,
+				GID:   1000,
+			},
+			wantErr: false, // Empty strings are allowed, only NULL is not allowed
+		},
+		{
+			name: "very long path",
+			file: &File{
+				Path:  "/" + strings.Repeat("a", 4096),
+				MTime: time.Now(),
+				CTime: time.Now(),
+				Size:  1024,
+				Mode:  0644,
+				UID:   1000,
+				GID:   1000,
+			},
+			wantErr: false,
+		},
+		{
+			name: "path with special characters",
+			file: &File{
+				Path:  "/test/file with spaces and 特殊文字.txt",
+				MTime: time.Now(),
+				CTime: time.Now(),
+				Size:  1024,
+				Mode:  0644,
+				UID:   1000,
+				GID:   1000,
+			},
+			wantErr: false,
+		},
+		{
+			name: "zero size file",
+			file: &File{
+				Path:  "/empty.txt",
+				MTime: time.Now(),
+				CTime: time.Now(),
+				Size:  0,
+				Mode:  0644,
+				UID:   1000,
+				GID:   1000,
+			},
+			wantErr: false,
+		},
+		{
+			name: "symlink with target",
+			file: &File{
+				Path:       "/link",
+				MTime:      time.Now(),
+				CTime:      time.Now(),
+				Size:       0,
+				Mode:       0777 | 0120000, // symlink mode
+				UID:        1000,
+				GID:        1000,
+				LinkTarget: "/target",
+			},
+			wantErr: false,
+		},
+	}
+
+	for i, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			// Add a unique suffix to paths to avoid UNIQUE constraint violations
+			if tt.file.Path != "" {
+				tt.file.Path = fmt.Sprintf("%s_%d_%d", tt.file.Path, i, time.Now().UnixNano())
+			}
+
+			err := repo.Create(ctx, nil, tt.file)
+			if (err != nil) != tt.wantErr {
+				t.Errorf("Create() error = %v, wantErr %v", err, tt.wantErr)
+			}
+			if err != nil && tt.errMsg != "" && !strings.Contains(err.Error(), tt.errMsg) {
+				t.Errorf("Create() error = %v, want error containing %q", err, tt.errMsg)
+			}
+		})
+	}
+}
+
+// TestDuplicateHandling tests handling of duplicate entries
+func TestDuplicateHandling(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repos := NewRepositories(db)
+
+	// Test duplicate file paths - Create uses UPSERT logic
+	t.Run("duplicate file paths", func(t *testing.T) {
+		file1 := &File{
+			Path:  "/duplicate.txt",
+			MTime: time.Now(),
+			CTime: time.Now(),
+			Size:  1024,
+			Mode:  0644,
+			UID:   1000,
+			GID:   1000,
+		}
+		file2 := &File{
+			Path:  "/duplicate.txt", // Same path
+			MTime: time.Now().Add(time.Hour),
+			CTime: time.Now().Add(time.Hour),
+			Size:  2048,
+			Mode:  0644,
+			UID:   1000,
+			GID:   1000,
+		}
+
+		err := repos.Files.Create(ctx, nil, file1)
+		if err != nil {
+			t.Fatalf("failed to create file1: %v", err)
+		}
+		originalID := file1.ID
+
+		// Create with same path should update the existing record (UPSERT behavior)
+		err = repos.Files.Create(ctx, nil, file2)
+		if err != nil {
+			t.Fatalf("failed to create file2: %v", err)
+		}
+
+		// Verify the file was updated, not duplicated
+		retrievedFile, err := repos.Files.GetByPath(ctx, "/duplicate.txt")
+		if err != nil {
+			t.Fatalf("failed to retrieve file: %v", err)
+		}
+
+		// The file should have been updated with file2's data
+		if retrievedFile.Size != 2048 {
+			t.Errorf("expected size 2048, got %d", retrievedFile.Size)
+		}
+
+		// ID might be different due to the UPSERT
+		if retrievedFile.ID != file2.ID {
+			t.Logf("File ID changed from %s to %s during upsert", originalID, retrievedFile.ID)
+		}
+	})
+
+	// Test duplicate chunk hashes
+	t.Run("duplicate chunk hashes", func(t *testing.T) {
+		chunk := &Chunk{
+			ChunkHash: "duplicate-chunk",
+			SHA256:    "duplicate-sha",
+			Size:      1024,
+		}
+
+		err := repos.Chunks.Create(ctx, nil, chunk)
+		if err != nil {
+			t.Fatalf("failed to create chunk: %v", err)
+		}
+
+		// Creating the same chunk again should be idempotent (ON CONFLICT DO NOTHING)
+		err = repos.Chunks.Create(ctx, nil, chunk)
+		if err != nil {
+			t.Errorf("duplicate chunk creation should be idempotent, got error: %v", err)
+		}
+	})
+
+	// Test duplicate file-chunk mappings
+	t.Run("duplicate file-chunk mappings", func(t *testing.T) {
+		file := &File{
+			Path:  "/test-dup-fc.txt",
+			MTime: time.Now(),
+			CTime: time.Now(),
+			Size:  1024,
+			Mode:  0644,
+			UID:   1000,
+			GID:   1000,
+		}
+		err := repos.Files.Create(ctx, nil, file)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		chunk := &Chunk{
+			ChunkHash: "test-chunk-dup",
+			SHA256:    "test-sha-dup",
+			Size:      1024,
+		}
+		err = repos.Chunks.Create(ctx, nil, chunk)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		fc := &FileChunk{
+			FileID:    file.ID,
+			Idx:       0,
+			ChunkHash: chunk.ChunkHash,
+		}
+
+		err = repos.FileChunks.Create(ctx, nil, fc)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		// Creating the same mapping again should be idempotent
+		err = repos.FileChunks.Create(ctx, nil, fc)
+		if err != nil {
+			t.Error("file-chunk creation should be idempotent")
+		}
+	})
+}
+
+// TestNullHandling tests handling of NULL values
+func TestNullHandling(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repos := NewRepositories(db)
+
+	// Test file with no link target
+	t.Run("file without link target", func(t *testing.T) {
+		file := &File{
+			Path:       "/regular.txt",
+			MTime:      time.Now(),
+			CTime:      time.Now(),
+			Size:       1024,
+			Mode:       0644,
+			UID:        1000,
+			GID:        1000,
+			LinkTarget: "", // Should be stored as NULL
+		}
+
+		err := repos.Files.Create(ctx, nil, file)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		retrieved, err := repos.Files.GetByID(ctx, file.ID)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if retrieved.LinkTarget != "" {
+			t.Errorf("expected empty link target, got %q", retrieved.LinkTarget)
+		}
+	})
+
+	// Test snapshot with NULL completed_at
+	t.Run("incomplete snapshot", func(t *testing.T) {
+		snapshot := &Snapshot{
+			ID:          "incomplete-test",
+			Hostname:    "test-host",
+			StartedAt:   time.Now(),
+			CompletedAt: nil, // Should remain NULL until completed
+		}
+
+		err := repos.Snapshots.Create(ctx, nil, snapshot)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		retrieved, err := repos.Snapshots.GetByID(ctx, snapshot.ID)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if retrieved.CompletedAt != nil {
+			t.Error("expected nil CompletedAt for incomplete snapshot")
+		}
+	})
+
+	// Test blob with NULL uploaded_ts
+	t.Run("blob not uploaded", func(t *testing.T) {
+		blob := &Blob{
+			ID:         "not-uploaded",
+			Hash:       "test-hash",
+			CreatedTS:  time.Now(),
+			UploadedTS: nil, // Not uploaded yet
+		}
+
+		err := repos.Blobs.Create(ctx, nil, blob)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		retrieved, err := repos.Blobs.GetByID(ctx, blob.ID)
+		if err != nil {
+			t.Fatal(err)
+		}
+
+		if retrieved.UploadedTS != nil {
+			t.Error("expected nil UploadedTS for non-uploaded blob")
+		}
+	})
+}
+
+// TestLargeDatasets tests operations with large amounts of data
+func TestLargeDatasets(t *testing.T) {
+	if testing.Short() {
+		t.Skip("skipping large dataset test in short mode")
+	}
+
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repos := NewRepositories(db)
+
+	// Create a snapshot
+	snapshot := &Snapshot{
+		ID:        "large-dataset-test",
+		Hostname:  "test-host",
+		StartedAt: time.Now(),
+	}
+	err := repos.Snapshots.Create(ctx, nil, snapshot)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Create many files
+	const fileCount = 1000
+	fileIDs := make([]string, fileCount)
+
+	t.Run("create many files", func(t *testing.T) {
+		start := time.Now()
+		for i := 0; i < fileCount; i++ {
+			file := &File{
+				Path:  fmt.Sprintf("/large/file%05d.txt", i),
+				MTime: time.Now(),
+				CTime: time.Now(),
+				Size:  int64(i * 1024),
+				Mode:  0644,
+				UID:   uint32(1000 + (i % 10)),
+				GID:   uint32(1000 + (i % 10)),
+			}
+			err := repos.Files.Create(ctx, nil, file)
+			if err != nil {
+				t.Fatalf("failed to create file %d: %v", i, err)
+			}
+			fileIDs[i] = file.ID
+
+			// Add half to snapshot
+			if i%2 == 0 {
+				err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID, file.ID)
+				if err != nil {
+					t.Fatal(err)
+				}
+			}
+		}
+		t.Logf("Created %d files in %v", fileCount, time.Since(start))
+	})
+
+	// Test ListByPrefix performance
+	t.Run("list by prefix performance", func(t *testing.T) {
+		start := time.Now()
+		files, err := repos.Files.ListByPrefix(ctx, "/large/")
+		if err != nil {
+			t.Fatal(err)
+		}
+		if len(files) != fileCount {
+			t.Errorf("expected %d files, got %d", fileCount, len(files))
+		}
+		t.Logf("Listed %d files in %v", len(files), time.Since(start))
+	})
+
+	// Test orphaned cleanup performance
+	t.Run("orphaned cleanup performance", func(t *testing.T) {
+		start := time.Now()
+		err := repos.Files.DeleteOrphaned(ctx)
+		if err != nil {
+			t.Fatal(err)
+		}
+		t.Logf("Cleaned up orphaned files in %v", time.Since(start))
+
+		// Verify correct number remain
+		files, err := repos.Files.ListByPrefix(ctx, "/large/")
+		if err != nil {
+			t.Fatal(err)
+		}
+		if len(files) != fileCount/2 {
+			t.Errorf("expected %d files after cleanup, got %d", fileCount/2, len(files))
+		}
+	})
+}
+
+// TestErrorPropagation tests that errors are properly propagated
+func TestErrorPropagation(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repos := NewRepositories(db)
+
+	// Test GetByID with non-existent ID
+	t.Run("GetByID non-existent", func(t *testing.T) {
+		file, err := repos.Files.GetByID(ctx, "non-existent-uuid")
+		if err != nil {
+			t.Errorf("GetByID should not return error for non-existent ID, got: %v", err)
+		}
+		if file != nil {
+			t.Error("expected nil file for non-existent ID")
+		}
+	})
+
+	// Test GetByPath with non-existent path
+	t.Run("GetByPath non-existent", func(t *testing.T) {
+		file, err := repos.Files.GetByPath(ctx, "/non/existent/path.txt")
+		if err != nil {
+			t.Errorf("GetByPath should not return error for non-existent path, got: %v", err)
+		}
+		if file != nil {
+			t.Error("expected nil file for non-existent path")
+		}
+	})
+
+	// Test invalid foreign key reference
+	t.Run("invalid foreign key", func(t *testing.T) {
+		fc := &FileChunk{
+			FileID:    "non-existent-file-id",
+			Idx:       0,
+			ChunkHash: "some-chunk",
+		}
+		err := repos.FileChunks.Create(ctx, nil, fc)
+		if err == nil {
+			t.Error("expected error for invalid foreign key")
+		}
+		if !strings.Contains(err.Error(), "FOREIGN KEY") {
+			t.Errorf("expected foreign key error, got: %v", err)
+		}
+	})
+}
+
+// TestQueryInjection tests that the system is safe from SQL injection
+func TestQueryInjection(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repos := NewRepositories(db)
+
+	// Test various injection attempts
+	injectionTests := []string{
+		"'; DROP TABLE files; --",
+		"' OR '1'='1",
+		"'; DELETE FROM files WHERE '1'='1'; --",
+		`test'); DROP TABLE files; --`,
+	}
+
+	for _, injection := range injectionTests {
+		t.Run("injection attempt", func(t *testing.T) {
+			// Try injection in file path
+			file := &File{
+				Path:  injection,
+				MTime: time.Now(),
+				CTime: time.Now(),
+				Size:  1024,
+				Mode:  0644,
+				UID:   1000,
+				GID:   1000,
+			}
+			_ = repos.Files.Create(ctx, nil, file)
+			// Should either succeed (treating as normal string) or fail with constraint
+			// but should NOT execute the injected SQL
+
+			// Verify tables still exist
+			var count int
+			err := db.conn.QueryRow("SELECT COUNT(*) FROM files").Scan(&count)
+			if err != nil {
+				t.Fatal("files table was damaged by injection")
+			}
+		})
+	}
+}
+
+// TestTimezoneHandling tests that times are properly handled in UTC
+func TestTimezoneHandling(t *testing.T) {
+	db, cleanup := setupTestDB(t)
+	defer cleanup()
+
+	ctx := context.Background()
+	repos := NewRepositories(db)
+
+	// Create file with specific timezone
+	loc, err := time.LoadLocation("America/New_York")
+	if err != nil {
+		t.Skip("timezone not available")
+	}
+
+	// Use Truncate to remove sub-second precision since we store as Unix timestamps
+	nyTime := time.Now().In(loc).Truncate(time.Second)
+	file := &File{
+		Path:  "/timezone-test.txt",
+		MTime: nyTime,
+		CTime: nyTime,
+		Size:  1024,
+		Mode:  0644,
+		UID:   1000,
+		GID:   1000,
+	}
+
+	err = repos.Files.Create(ctx, nil, file)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Retrieve and verify times are in UTC
+	retrieved, err := repos.Files.GetByID(ctx, file.ID)
+	if err != nil {
+		t.Fatal(err)
+	}
+
+	// Check that times are equivalent (same instant)
+	if !retrieved.MTime.Equal(nyTime) {
+		t.Error("time was not preserved correctly")
+	}
+
+	// Check that retrieved time is in UTC
+	if retrieved.MTime.Location() != time.UTC {
+		t.Error("retrieved time is not in UTC")
+	}
+}
--- a/internal/database/schema.sql
+++ b/internal/database/schema.sql
@@ -0,0 +1,113 @@
+-- Vaultik Database Schema
+-- Note: This database does not support migrations. If the schema changes,
+-- delete the local database and perform a full backup to recreate it.
+
+-- Files table: stores metadata about files in the filesystem
+CREATE TABLE IF NOT EXISTS files (
+    id TEXT PRIMARY KEY,  -- UUID
+    path TEXT NOT NULL UNIQUE,
+    mtime INTEGER NOT NULL,
+    ctime INTEGER NOT NULL,
+    size INTEGER NOT NULL,
+    mode INTEGER NOT NULL,
+    uid INTEGER NOT NULL,
+    gid INTEGER NOT NULL,
+    link_target TEXT
+);
+
+-- Create index on path for efficient lookups
+CREATE INDEX IF NOT EXISTS idx_files_path ON files(path);
+
+-- File chunks table: maps files to their constituent chunks
+CREATE TABLE IF NOT EXISTS file_chunks (
+    file_id TEXT NOT NULL,
+    idx INTEGER NOT NULL,
+    chunk_hash TEXT NOT NULL,
+    PRIMARY KEY (file_id, idx),
+    FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
+);
+
+-- Chunks table: stores unique content-defined chunks
+CREATE TABLE IF NOT EXISTS chunks (
+    chunk_hash TEXT PRIMARY KEY,
+    sha256 TEXT NOT NULL,
+    size INTEGER NOT NULL
+);
+
+-- Blobs table: stores packed, compressed, and encrypted blob information
+CREATE TABLE IF NOT EXISTS blobs (
+    id TEXT PRIMARY KEY,
+    blob_hash TEXT UNIQUE,
+    created_ts INTEGER NOT NULL,
+    finished_ts INTEGER,
+    uncompressed_size INTEGER NOT NULL DEFAULT 0,
+    compressed_size INTEGER NOT NULL DEFAULT 0,
+    uploaded_ts INTEGER
+);
+
+-- Blob chunks table: maps chunks to the blobs that contain them
+CREATE TABLE IF NOT EXISTS blob_chunks (
+    blob_id TEXT NOT NULL,
+    chunk_hash TEXT NOT NULL,
+    offset INTEGER NOT NULL,
+    length INTEGER NOT NULL,
+    PRIMARY KEY (blob_id, chunk_hash),
+    FOREIGN KEY (blob_id) REFERENCES blobs(id)
+);
+
+-- Chunk files table: reverse mapping of chunks to files
+CREATE TABLE IF NOT EXISTS chunk_files (
+    chunk_hash TEXT NOT NULL,
+    file_id TEXT NOT NULL,
+    file_offset INTEGER NOT NULL,
+    length INTEGER NOT NULL,
+    PRIMARY KEY (chunk_hash, file_id),
+    FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
+);
+
+-- Snapshots table: tracks backup snapshots
+CREATE TABLE IF NOT EXISTS snapshots (
+    id TEXT PRIMARY KEY,
+    hostname TEXT NOT NULL,
+    vaultik_version TEXT NOT NULL,
+    vaultik_git_revision TEXT NOT NULL,
+    started_at INTEGER NOT NULL,
+    completed_at INTEGER,
+    file_count INTEGER NOT NULL DEFAULT 0,
+    chunk_count INTEGER NOT NULL DEFAULT 0,
+    blob_count INTEGER NOT NULL DEFAULT 0,
+    total_size INTEGER NOT NULL DEFAULT 0,
+    blob_size INTEGER NOT NULL DEFAULT 0,
+    blob_uncompressed_size INTEGER NOT NULL DEFAULT 0,
+    compression_ratio REAL NOT NULL DEFAULT 1.0,
+    compression_level INTEGER NOT NULL DEFAULT 3,
+    upload_bytes INTEGER NOT NULL DEFAULT 0,
+    upload_duration_ms INTEGER NOT NULL DEFAULT 0
+);
+
+-- Snapshot files table: maps snapshots to files
+CREATE TABLE IF NOT EXISTS snapshot_files (
+    snapshot_id TEXT NOT NULL,
+    file_id TEXT NOT NULL,
+    PRIMARY KEY (snapshot_id, file_id),
+    FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
+    FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
+);
+
+-- Snapshot blobs table: maps snapshots to blobs
+CREATE TABLE IF NOT EXISTS snapshot_blobs (
+    snapshot_id TEXT NOT NULL,
+    blob_id TEXT NOT NULL,
+    blob_hash TEXT NOT NULL,
+    PRIMARY KEY (snapshot_id, blob_id),
+    FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
+    FOREIGN KEY (blob_id) REFERENCES blobs(id) ON DELETE CASCADE
+);
+
+-- Uploads table: tracks blob upload metrics
+CREATE TABLE IF NOT EXISTS uploads (
+    blob_hash TEXT PRIMARY KEY,
+    uploaded_at INTEGER NOT NULL,
+    size INTEGER NOT NULL,
+    duration_ms INTEGER NOT NULL
+);
--- a/internal/database/snapshots.go
+++ b/internal/database/snapshots.go
@@ -17,8 +17,10 @@ func NewSnapshotRepository(db *DB) *SnapshotRepository {

 func (r *SnapshotRepository) Create(ctx context.Context, tx *sql.Tx, snapshot *Snapshot) error {
 	query := `
-		INSERT INTO snapshots (id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio)
-		VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+		INSERT INTO snapshots (id, hostname, vaultik_version, vaultik_git_revision, started_at, completed_at, 
+			file_count, chunk_count, blob_count, total_size, blob_size, blob_uncompressed_size, 
+			compression_ratio, compression_level, upload_bytes, upload_duration_ms)
+		VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
 	`

 	var completedAt *int64
@@ -29,11 +31,13 @@ func (r *SnapshotRepository) Create(ctx context.Context, tx *sql.Tx, snapshot *S

 	var err error
 	if tx != nil {
-		_, err = tx.ExecContext(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.StartedAt.Unix(),
-			completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
+		_, err = tx.ExecContext(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.VaultikGitRevision, snapshot.StartedAt.Unix(),
+			completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.BlobUncompressedSize,
+			snapshot.CompressionRatio, snapshot.CompressionLevel, snapshot.UploadBytes, snapshot.UploadDurationMs)
 	} else {
-		_, err = r.db.ExecWithLock(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.StartedAt.Unix(),
-			completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
+		_, err = r.db.ExecWithLog(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.VaultikGitRevision, snapshot.StartedAt.Unix(),
+			completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.BlobUncompressedSize,
+			snapshot.CompressionRatio, snapshot.CompressionLevel, snapshot.UploadBytes, snapshot.UploadDurationMs)
 	}

 	if err != nil {
@@ -64,7 +68,7 @@ func (r *SnapshotRepository) UpdateCounts(ctx context.Context, tx *sql.Tx, snaps
 	if tx != nil {
 		_, err = tx.ExecContext(ctx, query, fileCount, chunkCount, blobCount, totalSize, blobSize, compressionRatio, snapshotID)
 	} else {
-		_, err = r.db.ExecWithLock(ctx, query, fileCount, chunkCount, blobCount, totalSize, blobSize, compressionRatio, snapshotID)
+		_, err = r.db.ExecWithLog(ctx, query, fileCount, chunkCount, blobCount, totalSize, blobSize, compressionRatio, snapshotID)
 	}

 	if err != nil {
@@ -74,9 +78,58 @@ func (r *SnapshotRepository) UpdateCounts(ctx context.Context, tx *sql.Tx, snaps
 	return nil
 }

+// UpdateExtendedStats updates extended statistics for a snapshot
+func (r *SnapshotRepository) UpdateExtendedStats(ctx context.Context, tx *sql.Tx, snapshotID string, blobUncompressedSize int64, compressionLevel int, uploadDurationMs int64) error {
+	// Calculate compression ratio based on uncompressed vs compressed sizes
+	var compressionRatio float64
+	if blobUncompressedSize > 0 {
+		// Get current blob_size from DB to calculate ratio
+		var blobSize int64
+		queryGet := `SELECT blob_size FROM snapshots WHERE id = ?`
+		if tx != nil {
+			err := tx.QueryRowContext(ctx, queryGet, snapshotID).Scan(&blobSize)
+			if err != nil {
+				return fmt.Errorf("getting blob size: %w", err)
+			}
+		} else {
+			err := r.db.conn.QueryRowContext(ctx, queryGet, snapshotID).Scan(&blobSize)
+			if err != nil {
+				return fmt.Errorf("getting blob size: %w", err)
+			}
+		}
+		compressionRatio = float64(blobSize) / float64(blobUncompressedSize)
+	} else {
+		compressionRatio = 1.0
+	}
+
+	query := `
+		UPDATE snapshots 
+		SET blob_uncompressed_size = ?,
+		    compression_ratio = ?,
+		    compression_level = ?,
+		    upload_bytes = blob_size,
+		    upload_duration_ms = ?
+		WHERE id = ?
+	`
+
+	var err error
+	if tx != nil {
+		_, err = tx.ExecContext(ctx, query, blobUncompressedSize, compressionRatio, compressionLevel, uploadDurationMs, snapshotID)
+	} else {
+		_, err = r.db.ExecWithLog(ctx, query, blobUncompressedSize, compressionRatio, compressionLevel, uploadDurationMs, snapshotID)
+	}
+
+	if err != nil {
+		return fmt.Errorf("updating extended stats: %w", err)
+	}
+	return nil
+}
+
 func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*Snapshot, error) {
 	query := `
-		SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
+		SELECT id, hostname, vaultik_version, vaultik_git_revision, started_at, completed_at, 
+			file_count, chunk_count, blob_count, total_size, blob_size, blob_uncompressed_size,
+			compression_ratio, compression_level, upload_bytes, upload_duration_ms
 		FROM snapshots
 		WHERE id = ?
 	`
@@ -89,6 +142,7 @@ func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*S
 		&snapshot.ID,
 		&snapshot.Hostname,
 		&snapshot.VaultikVersion,
+		&snapshot.VaultikGitRevision,
 		&startedAtUnix,
 		&completedAtUnix,
 		&snapshot.FileCount,
@@ -96,7 +150,11 @@ func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*S
 		&snapshot.BlobCount,
 		&snapshot.TotalSize,
 		&snapshot.BlobSize,
+		&snapshot.BlobUncompressedSize,
 		&snapshot.CompressionRatio,
+		&snapshot.CompressionLevel,
+		&snapshot.UploadBytes,
+		&snapshot.UploadDurationMs,
 	)

 	if err == sql.ErrNoRows {
@@ -106,9 +164,9 @@ func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*S
 		return nil, fmt.Errorf("querying snapshot: %w", err)
 	}

-	snapshot.StartedAt = time.Unix(startedAtUnix, 0)
+	snapshot.StartedAt = time.Unix(startedAtUnix, 0).UTC()
 	if completedAtUnix != nil {
-		t := time.Unix(*completedAtUnix, 0)
+		t := time.Unix(*completedAtUnix, 0).UTC()
 		snapshot.CompletedAt = &t
 	}

@@ -117,7 +175,7 @@ func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*S

 func (r *SnapshotRepository) ListRecent(ctx context.Context, limit int) ([]*Snapshot, error) {
 	query := `
-		SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
+		SELECT id, hostname, vaultik_version, vaultik_git_revision, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
 		FROM snapshots
 		ORDER BY started_at DESC
 		LIMIT ?
@@ -139,6 +197,7 @@ func (r *SnapshotRepository) ListRecent(ctx context.Context, limit int) ([]*Snap
 			&snapshot.ID,
 			&snapshot.Hostname,
 			&snapshot.VaultikVersion,
+			&snapshot.VaultikGitRevision,
 			&startedAtUnix,
 			&completedAtUnix,
 			&snapshot.FileCount,
@@ -172,13 +231,13 @@ func (r *SnapshotRepository) MarkComplete(ctx context.Context, tx *sql.Tx, snaps
 		WHERE id = ?
 	`

-	completedAt := time.Now().Unix()
+	completedAt := time.Now().UTC().Unix()

 	var err error
 	if tx != nil {
 		_, err = tx.ExecContext(ctx, query, completedAt, snapshotID)
 	} else {
-		_, err = r.db.ExecWithLock(ctx, query, completedAt, snapshotID)
+		_, err = r.db.ExecWithLog(ctx, query, completedAt, snapshotID)
 	}

 	if err != nil {
@@ -191,15 +250,36 @@ func (r *SnapshotRepository) MarkComplete(ctx context.Context, tx *sql.Tx, snaps
 // AddFile adds a file to a snapshot
 func (r *SnapshotRepository) AddFile(ctx context.Context, tx *sql.Tx, snapshotID string, filePath string) error {
 	query := `
-		INSERT OR IGNORE INTO snapshot_files (snapshot_id, file_path)
-		VALUES (?, ?)
+		INSERT OR IGNORE INTO snapshot_files (snapshot_id, file_id)
+		SELECT ?, id FROM files WHERE path = ?
 	`

 	var err error
 	if tx != nil {
 		_, err = tx.ExecContext(ctx, query, snapshotID, filePath)
 	} else {
-		_, err = r.db.ExecWithLock(ctx, query, snapshotID, filePath)
+		_, err = r.db.ExecWithLog(ctx, query, snapshotID, filePath)
+	}
+
+	if err != nil {
+		return fmt.Errorf("adding file to snapshot: %w", err)
+	}
+
+	return nil
+}
+
+// AddFileByID adds a file to a snapshot by file ID
+func (r *SnapshotRepository) AddFileByID(ctx context.Context, tx *sql.Tx, snapshotID string, fileID string) error {
+	query := `
+		INSERT OR IGNORE INTO snapshot_files (snapshot_id, file_id)
+		VALUES (?, ?)
+	`
+
+	var err error
+	if tx != nil {
+		_, err = tx.ExecContext(ctx, query, snapshotID, fileID)
+	} else {
+		_, err = r.db.ExecWithLog(ctx, query, snapshotID, fileID)
 	}

 	if err != nil {
@@ -220,7 +300,7 @@ func (r *SnapshotRepository) AddBlob(ctx context.Context, tx *sql.Tx, snapshotID
 	if tx != nil {
 		_, err = tx.ExecContext(ctx, query, snapshotID, blobID, blobHash)
 	} else {
-		_, err = r.db.ExecWithLock(ctx, query, snapshotID, blobID, blobHash)
+		_, err = r.db.ExecWithLog(ctx, query, snapshotID, blobID, blobHash)
 	}

 	if err != nil {
@@ -260,7 +340,7 @@ func (r *SnapshotRepository) GetBlobHashes(ctx context.Context, snapshotID strin
 // GetIncompleteSnapshots returns all snapshots that haven't been completed
 func (r *SnapshotRepository) GetIncompleteSnapshots(ctx context.Context) ([]*Snapshot, error) {
 	query := `
-		SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
+		SELECT id, hostname, vaultik_version, vaultik_git_revision, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
 		FROM snapshots
 		WHERE completed_at IS NULL
 		ORDER BY started_at DESC
@@ -282,6 +362,7 @@ func (r *SnapshotRepository) GetIncompleteSnapshots(ctx context.Context) ([]*Sna
 			&snapshot.ID,
 			&snapshot.Hostname,
 			&snapshot.VaultikVersion,
+			&snapshot.VaultikGitRevision,
 			&startedAtUnix,
 			&completedAtUnix,
 			&snapshot.FileCount,
@@ -306,3 +387,90 @@ func (r *SnapshotRepository) GetIncompleteSnapshots(ctx context.Context) ([]*Sna

 	return snapshots, rows.Err()
 }
+
+// GetIncompleteByHostname returns all incomplete snapshots for a specific hostname
+func (r *SnapshotRepository) GetIncompleteByHostname(ctx context.Context, hostname string) ([]*Snapshot, error) {
+	query := `
+		SELECT id, hostname, vaultik_version, vaultik_git_revision, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
+		FROM snapshots
+		WHERE completed_at IS NULL AND hostname = ?
+		ORDER BY started_at DESC
+	`
+
+	rows, err := r.db.conn.QueryContext(ctx, query, hostname)
+	if err != nil {
+		return nil, fmt.Errorf("querying incomplete snapshots: %w", err)
+	}
+	defer CloseRows(rows)
+
+	var snapshots []*Snapshot
+	for rows.Next() {
+		var snapshot Snapshot
+		var startedAtUnix int64
+		var completedAtUnix *int64
+
+		err := rows.Scan(
+			&snapshot.ID,
+			&snapshot.Hostname,
+			&snapshot.VaultikVersion,
+			&snapshot.VaultikGitRevision,
+			&startedAtUnix,
+			&completedAtUnix,
+			&snapshot.FileCount,
+			&snapshot.ChunkCount,
+			&snapshot.BlobCount,
+			&snapshot.TotalSize,
+			&snapshot.BlobSize,
+			&snapshot.CompressionRatio,
+		)
+		if err != nil {
+			return nil, fmt.Errorf("scanning snapshot: %w", err)
+		}
+
+		snapshot.StartedAt = time.Unix(startedAtUnix, 0).UTC()
+		if completedAtUnix != nil {
+			t := time.Unix(*completedAtUnix, 0).UTC()
+			snapshot.CompletedAt = &t
+		}
+
+		snapshots = append(snapshots, &snapshot)
+	}
+
+	return snapshots, rows.Err()
+}
+
+// Delete removes a snapshot record
+func (r *SnapshotRepository) Delete(ctx context.Context, snapshotID string) error {
+	query := `DELETE FROM snapshots WHERE id = ?`
+
+	_, err := r.db.ExecWithLog(ctx, query, snapshotID)
+	if err != nil {
+		return fmt.Errorf("deleting snapshot: %w", err)
+	}
+
+	return nil
+}
+
+// DeleteSnapshotFiles removes all snapshot_files entries for a snapshot
+func (r *SnapshotRepository) DeleteSnapshotFiles(ctx context.Context, snapshotID string) error {
+	query := `DELETE FROM snapshot_files WHERE snapshot_id = ?`
+
+	_, err := r.db.ExecWithLog(ctx, query, snapshotID)
+	if err != nil {
+		return fmt.Errorf("deleting snapshot files: %w", err)
+	}
+
+	return nil
+}
+
+// DeleteSnapshotBlobs removes all snapshot_blobs entries for a snapshot
+func (r *SnapshotRepository) DeleteSnapshotBlobs(ctx context.Context, snapshotID string) error {
+	query := `DELETE FROM snapshot_blobs WHERE snapshot_id = ?`
+
+	_, err := r.db.ExecWithLog(ctx, query, snapshotID)
+	if err != nil {
+		return fmt.Errorf("deleting snapshot blobs: %w", err)
+	}
+
+	return nil
+}