Major refactoring: UUID-based storage, streaming architecture, and CLI improvements

This commit represents a significant architectural overhaul of vaultik: Database Schema Changes: - Switch files table to use UUID primary keys instead of path-based keys - Add UUID primary keys to blobs table for immediate chunk association - Update all foreign key relationships to use UUIDs - Add comprehensive schema documentation in DATAMODEL.md - Add SQLite busy timeout handling for concurrent operations Streaming and Performance Improvements: - Implement true streaming blob packing without intermediate storage - Add streaming chunk processing to reduce memory usage - Improve progress reporting with real-time metrics - Add upload metrics tracking in new uploads table CLI Refactoring: - Restructure CLI to use subcommands: snapshot create/list/purge/verify - Add store info command for S3 configuration display - Add custom duration parser supporting days/weeks/months/years - Remove old backup.go in favor of enhanced snapshot.go - Add --cron flag for silent operation Configuration Changes: - Remove unused index_prefix configuration option - Add support for snapshot pruning retention policies - Improve configuration validation and error messages Testing Improvements: - Add comprehensive repository tests with edge cases - Add cascade delete debugging tests - Fix concurrent operation tests to use SQLite busy timeout - Remove tolerance for SQLITE_BUSY errors in tests Documentation: - Add MIT LICENSE file - Update README with new command structure - Add comprehensive DATAMODEL.md explaining database schema - Update DESIGN.md with UUID-based architecture Other Changes: - Add test-config.yml for testing - Update Makefile with better test output formatting - Fix various race conditions in concurrent operations - Improve error handling throughout
2025-07-22 14:54:37 +02:00
parent 86b533d6ee
commit 78af626759
54 changed files with 5525 additions and 1109 deletions
--- a/internal/backup/backup_test.go
+++ b/internal/backup/backup_test.go
@@ -338,97 +338,103 @@ func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (str
 			return nil
 		}

-		// Process this file in a transaction
+		// Create file record in a short transaction
+		file := &database.File{
+			Path:  path,
+			Size:  info.Size(),
+			Mode:  uint32(info.Mode()),
+			MTime: info.ModTime(),
+			CTime: info.ModTime(), // Use mtime as ctime for test
+			UID:   1000,           // Default UID for test
+			GID:   1000,           // Default GID for test
+		}
 		err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
-			// Create file record
-			file := &database.File{
-				Path:  path,
-				Size:  info.Size(),
-				Mode:  uint32(info.Mode()),
-				MTime: info.ModTime(),
-				CTime: info.ModTime(), // Use mtime as ctime for test
-				UID:   1000,           // Default UID for test
-				GID:   1000,           // Default GID for test
-			}
+			return b.repos.Files.Create(ctx, tx, file)
+		})
+		if err != nil {
+			return err
+		}

-			if err := b.repos.Files.Create(ctx, tx, file); err != nil {
+		fileCount++
+		totalSize += info.Size()
+
+		// Read and process file in chunks
+		f, err := fsys.Open(path)
+		if err != nil {
+			return err
+		}
+		defer func() {
+			if err := f.Close(); err != nil {
+				// Log but don't fail since we're already in an error path potentially
+				fmt.Fprintf(os.Stderr, "Failed to close file: %v\n", err)
+			}
+		}()
+
+		// Process file in chunks
+		chunkIndex := 0
+		buffer := make([]byte, defaultChunkSize)
+
+		for {
+			n, err := f.Read(buffer)
+			if err != nil && err != io.EOF {
 				return err
 			}
-
-			fileCount++
-			totalSize += info.Size()
-
-			// Read and process file in chunks
-			f, err := fsys.Open(path)
-			if err != nil {
-				return err
+			if n == 0 {
+				break
 			}
-			defer func() {
-				if err := f.Close(); err != nil {
-					// Log but don't fail since we're already in an error path potentially
-					fmt.Fprintf(os.Stderr, "Failed to close file: %v\n", err)
-				}
-			}()

-			// Process file in chunks
-			chunkIndex := 0
-			buffer := make([]byte, defaultChunkSize)
+			chunkData := buffer[:n]
+			chunkHash := calculateHash(chunkData)

-			for {
-				n, err := f.Read(buffer)
-				if err != nil && err != io.EOF {
-					return err
-				}
-				if n == 0 {
-					break
-				}
-
-				chunkData := buffer[:n]
-				chunkHash := calculateHash(chunkData)
-
-				// Check if chunk already exists
-				existingChunk, _ := b.repos.Chunks.GetByHash(ctx, chunkHash)
-				if existingChunk == nil {
-					// Create new chunk
+			// Check if chunk already exists (outside of transaction)
+			existingChunk, _ := b.repos.Chunks.GetByHash(ctx, chunkHash)
+			if existingChunk == nil {
+				// Create new chunk in a short transaction
+				err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
 					chunk := &database.Chunk{
 						ChunkHash: chunkHash,
 						SHA256:    chunkHash,
 						Size:      int64(n),
 					}
-					if err := b.repos.Chunks.Create(ctx, tx, chunk); err != nil {
-						return err
-					}
-					processedChunks[chunkHash] = true
+					return b.repos.Chunks.Create(ctx, tx, chunk)
+				})
+				if err != nil {
+					return err
 				}
+				processedChunks[chunkHash] = true
+			}

-				// Create file-chunk mapping
+			// Create file-chunk mapping in a short transaction
+			err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
 				fileChunk := &database.FileChunk{
-					Path:      path,
+					FileID:    file.ID,
 					Idx:       chunkIndex,
 					ChunkHash: chunkHash,
 				}
-				if err := b.repos.FileChunks.Create(ctx, tx, fileChunk); err != nil {
-					return err
-				}
+				return b.repos.FileChunks.Create(ctx, tx, fileChunk)
+			})
+			if err != nil {
+				return err
+			}

-				// Create chunk-file mapping
+			// Create chunk-file mapping in a short transaction
+			err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
 				chunkFile := &database.ChunkFile{
 					ChunkHash:  chunkHash,
-					FilePath:   path,
+					FileID:     file.ID,
 					FileOffset: int64(chunkIndex * defaultChunkSize),
 					Length:     int64(n),
 				}
-				if err := b.repos.ChunkFiles.Create(ctx, tx, chunkFile); err != nil {
-					return err
-				}
-
-				chunkIndex++
+				return b.repos.ChunkFiles.Create(ctx, tx, chunkFile)
+			})
+			if err != nil {
+				return err
 			}

-			return nil
-		})
+			chunkIndex++
+		}

-		return err
+		return nil
 	})

 	if err != nil {
@@ -436,61 +442,64 @@ func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (str
 	}

 	// After all files are processed, create blobs for new chunks
-	err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
-		for chunkHash := range processedChunks {
-			// Get chunk data
-			chunk, err := b.repos.Chunks.GetByHash(ctx, chunkHash)
-			if err != nil {
-				return err
-			}
+	for chunkHash := range processedChunks {
+		// Get chunk data (outside of transaction)
+		chunk, err := b.repos.Chunks.GetByHash(ctx, chunkHash)
+		if err != nil {
+			return "", err
+		}

-			chunkCount++
+		chunkCount++

-			// In a real system, blobs would contain multiple chunks and be encrypted
-			// For testing, we'll create a blob with a "blob-" prefix to differentiate
-			blobHash := "blob-" + chunkHash
+		// In a real system, blobs would contain multiple chunks and be encrypted
+		// For testing, we'll create a blob with a "blob-" prefix to differentiate
+		blobHash := "blob-" + chunkHash

-			// For the test, we'll create dummy data since we don't have the original
-			dummyData := []byte(chunkHash)
+		// For the test, we'll create dummy data since we don't have the original
+		dummyData := []byte(chunkHash)

-			// Upload to S3 as a blob
-			if err := b.s3Client.PutBlob(ctx, blobHash, dummyData); err != nil {
-				return err
-			}
+		// Upload to S3 as a blob
+		if err := b.s3Client.PutBlob(ctx, blobHash, dummyData); err != nil {
+			return "", err
+		}

-			// Create blob entry
+		// Create blob entry in a short transaction
+		err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
 			blob := &database.Blob{
 				ID:        "test-blob-" + blobHash[:8],
 				Hash:      blobHash,
 				CreatedTS: time.Now(),
 			}
-			if err := b.repos.Blobs.Create(ctx, tx, blob); err != nil {
-				return err
-			}
-			blobCount++
-			blobSize += chunk.Size
+			return b.repos.Blobs.Create(ctx, tx, blob)
+		})
+		if err != nil {
+			return "", err
+		}

-			// Create blob-chunk mapping
+		blobCount++
+		blobSize += chunk.Size
+
+		// Create blob-chunk mapping in a short transaction
+		err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
 			blobChunk := &database.BlobChunk{
-				BlobID:    blob.ID,
+				BlobID:    "test-blob-" + blobHash[:8],
 				ChunkHash: chunkHash,
 				Offset:    0,
 				Length:    chunk.Size,
 			}
-			if err := b.repos.BlobChunks.Create(ctx, tx, blobChunk); err != nil {
-				return err
-			}
-
-			// Add blob to snapshot
-			if err := b.repos.Snapshots.AddBlob(ctx, tx, snapshotID, blob.ID, blob.Hash); err != nil {
-				return err
-			}
+			return b.repos.BlobChunks.Create(ctx, tx, blobChunk)
+		})
+		if err != nil {
+			return "", err
 		}
-		return nil
-	})

-	if err != nil {
-		return "", err
+		// Add blob to snapshot in a short transaction
+		err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+			return b.repos.Snapshots.AddBlob(ctx, tx, snapshotID, "test-blob-"+blobHash[:8], blobHash)
+		})
+		if err != nil {
+			return "", err
+		}
 	}

 	// Update snapshot with final counts
--- a/internal/backup/module.go
+++ b/internal/backup/module.go
@@ -13,7 +13,9 @@ type ScannerParams struct {
 	EnableProgress bool
 }

-// Module exports backup functionality
+// Module exports backup functionality as an fx module.
+// It provides a ScannerFactory that can create Scanner instances
+// with custom parameters while sharing common dependencies.
 var Module = fx.Module("backup",
 	fx.Provide(
 		provideScannerFactory,
--- a/internal/backup/progress.go
+++ b/internal/backup/progress.go
@@ -15,9 +15,13 @@ import (
 )

 const (
-	// Progress reporting intervals
-	SummaryInterval = 10 * time.Second // One-line status updates
-	DetailInterval  = 60 * time.Second // Multi-line detailed status
+	// SummaryInterval defines how often one-line status updates are printed.
+	// These updates show current progress, ETA, and the file being processed.
+	SummaryInterval = 10 * time.Second
+
+	// DetailInterval defines how often multi-line detailed status reports are printed.
+	// These reports include comprehensive statistics about files, chunks, blobs, and uploads.
+	DetailInterval = 60 * time.Second
 )

 // ProgressStats holds atomic counters for progress tracking
@@ -32,6 +36,7 @@ type ProgressStats struct {
 	BlobsCreated     atomic.Int64
 	BlobsUploaded    atomic.Int64
 	BytesUploaded    atomic.Int64
+	UploadDurationMs atomic.Int64 // Total milliseconds spent uploading to S3
 	CurrentFile      atomic.Value // stores string
 	TotalSize        atomic.Int64 // Total size to process (set after scan phase)
 	TotalFiles       atomic.Int64 // Total files to process in phase 2
@@ -66,8 +71,8 @@ type ProgressReporter struct {
 // NewProgressReporter creates a new progress reporter
 func NewProgressReporter() *ProgressReporter {
 	stats := &ProgressStats{
-		StartTime:      time.Now(),
-		lastDetailTime: time.Now(),
+		StartTime:      time.Now().UTC(),
+		lastDetailTime: time.Now().UTC(),
 	}
 	stats.CurrentFile.Store("")

@@ -115,7 +120,7 @@ func (pr *ProgressReporter) GetStats() *ProgressStats {
 // SetTotalSize sets the total size to process (after scan phase)
 func (pr *ProgressReporter) SetTotalSize(size int64) {
 	pr.stats.TotalSize.Store(size)
-	pr.stats.ProcessStartTime.Store(time.Now())
+	pr.stats.ProcessStartTime.Store(time.Now().UTC())
 }

 // run is the main progress reporting loop
@@ -186,7 +191,7 @@ func (pr *ProgressReporter) printSummaryStatus() {
 	filesProcessed := pr.stats.FilesProcessed.Load()
 	totalFiles := pr.stats.TotalFiles.Load()

-	status := fmt.Sprintf("Progress: %d/%d files, %s/%s (%.1f%%), %s/s%s",
+	status := fmt.Sprintf("Snapshot progress: %d/%d files, %s/%s (%.1f%%), %s/s%s",
 		filesProcessed,
 		totalFiles,
 		humanize.Bytes(uint64(bytesProcessed)),
@@ -206,7 +211,7 @@ func (pr *ProgressReporter) printSummaryStatus() {
 // printDetailedStatus prints a multi-line detailed status
 func (pr *ProgressReporter) printDetailedStatus() {
 	pr.stats.mu.Lock()
-	pr.stats.lastDetailTime = time.Now()
+	pr.stats.lastDetailTime = time.Now().UTC()
 	pr.stats.mu.Unlock()

 	elapsed := time.Since(pr.stats.StartTime)
@@ -225,7 +230,7 @@ func (pr *ProgressReporter) printDetailedStatus() {
 	totalBytes := bytesScanned + bytesSkipped
 	rate := float64(totalBytes) / elapsed.Seconds()

-	log.Notice("=== Backup Progress Report ===")
+	log.Notice("=== Snapshot Progress Report ===")
 	log.Info("Elapsed time", "duration", formatDuration(elapsed))

 	// Calculate and show ETA if we have data
@@ -264,7 +269,7 @@ func (pr *ProgressReporter) printDetailedStatus() {
 		"created", blobsCreated,
 		"uploaded", blobsUploaded,
 		"pending", blobsCreated-blobsUploaded)
-	log.Info("Upload progress",
+	log.Info("Total uploaded to S3",
 		"uploaded", humanize.Bytes(uint64(bytesUploaded)),
 		"compression_ratio", formatRatio(bytesUploaded, bytesScanned))
 	if currentFile != "" {
@@ -313,31 +318,8 @@ func truncatePath(path string, maxLen int) string {

 // printUploadProgress prints upload progress
 func (pr *ProgressReporter) printUploadProgress(info *UploadInfo) {
-	elapsed := time.Since(info.StartTime)
-	if elapsed < time.Millisecond {
-		elapsed = time.Millisecond // Avoid division by zero
-	}
-
-	bytesPerSec := float64(info.Size) / elapsed.Seconds()
-	bitsPerSec := bytesPerSec * 8
-
-	// Format speed in bits/second
-	var speedStr string
-	if bitsPerSec >= 1e9 {
-		speedStr = fmt.Sprintf("%.1fGbit/sec", bitsPerSec/1e9)
-	} else if bitsPerSec >= 1e6 {
-		speedStr = fmt.Sprintf("%.0fMbit/sec", bitsPerSec/1e6)
-	} else if bitsPerSec >= 1e3 {
-		speedStr = fmt.Sprintf("%.0fKbit/sec", bitsPerSec/1e3)
-	} else {
-		speedStr = fmt.Sprintf("%.0fbit/sec", bitsPerSec)
-	}
-
-	log.Info("Uploading blob",
-		"hash", info.BlobHash[:8]+"...",
-		"size", humanize.Bytes(uint64(info.Size)),
-		"elapsed", formatDuration(elapsed),
-		"speed", speedStr)
+	// This function is called repeatedly during upload, not just at start
+	// Don't print anything here - the actual progress is shown by ReportUploadProgress
 }

 // ReportUploadStart marks the beginning of a blob upload
@@ -345,7 +327,7 @@ func (pr *ProgressReporter) ReportUploadStart(blobHash string, size int64) {
 	info := &UploadInfo{
 		BlobHash:  blobHash,
 		Size:      size,
-		StartTime: time.Now(),
+		StartTime: time.Now().UTC(),
 	}
 	pr.stats.CurrentUpload.Store(info)
 }
@@ -355,6 +337,9 @@ func (pr *ProgressReporter) ReportUploadComplete(blobHash string, size int64, du
 	// Clear current upload
 	pr.stats.CurrentUpload.Store((*UploadInfo)(nil))

+	// Add to total upload duration
+	pr.stats.UploadDurationMs.Add(duration.Milliseconds())
+
 	// Calculate speed
 	if duration < time.Millisecond {
 		duration = time.Millisecond
@@ -374,7 +359,7 @@ func (pr *ProgressReporter) ReportUploadComplete(blobHash string, size int64, du
 		speedStr = fmt.Sprintf("%.0fbit/sec", bitsPerSec)
 	}

-	log.Info("Blob uploaded",
+	log.Info("Blob upload completed",
 		"hash", blobHash[:8]+"...",
 		"size", humanize.Bytes(uint64(size)),
 		"duration", formatDuration(duration),
@@ -384,6 +369,44 @@ func (pr *ProgressReporter) ReportUploadComplete(blobHash string, size int64, du
 // UpdateChunkingActivity updates the last chunking time
 func (pr *ProgressReporter) UpdateChunkingActivity() {
 	pr.stats.mu.Lock()
-	pr.stats.lastChunkingTime = time.Now()
+	pr.stats.lastChunkingTime = time.Now().UTC()
 	pr.stats.mu.Unlock()
 }
+
+// ReportUploadProgress reports current upload progress with instantaneous speed
+func (pr *ProgressReporter) ReportUploadProgress(blobHash string, bytesUploaded, totalSize int64, instantSpeed float64) {
+	// Update the current upload info with progress
+	if uploadInfo, ok := pr.stats.CurrentUpload.Load().(*UploadInfo); ok && uploadInfo != nil {
+		// Format speed in bits/second
+		bitsPerSec := instantSpeed * 8
+		var speedStr string
+		if bitsPerSec >= 1e9 {
+			speedStr = fmt.Sprintf("%.1fGbit/sec", bitsPerSec/1e9)
+		} else if bitsPerSec >= 1e6 {
+			speedStr = fmt.Sprintf("%.0fMbit/sec", bitsPerSec/1e6)
+		} else if bitsPerSec >= 1e3 {
+			speedStr = fmt.Sprintf("%.0fKbit/sec", bitsPerSec/1e3)
+		} else {
+			speedStr = fmt.Sprintf("%.0fbit/sec", bitsPerSec)
+		}
+
+		percent := float64(bytesUploaded) / float64(totalSize) * 100
+
+		// Calculate ETA based on current speed
+		etaStr := "unknown"
+		if instantSpeed > 0 && bytesUploaded < totalSize {
+			remainingBytes := totalSize - bytesUploaded
+			remainingSeconds := float64(remainingBytes) / instantSpeed
+			eta := time.Duration(remainingSeconds * float64(time.Second))
+			etaStr = formatDuration(eta)
+		}
+
+		log.Info("Blob upload progress",
+			"hash", blobHash[:8]+"...",
+			"progress", fmt.Sprintf("%.1f%%", percent),
+			"uploaded", humanize.Bytes(uint64(bytesUploaded)),
+			"total", humanize.Bytes(uint64(totalSize)),
+			"speed", speedStr,
+			"eta", etaStr)
+	}
+}
--- a/internal/backup/scanner.go
+++ b/internal/backup/scanner.go
@@ -15,6 +15,7 @@ import (
 	"git.eeqj.de/sneak/vaultik/internal/crypto"
 	"git.eeqj.de/sneak/vaultik/internal/database"
 	"git.eeqj.de/sneak/vaultik/internal/log"
+	"git.eeqj.de/sneak/vaultik/internal/s3"
 	"github.com/dustin/go-humanize"
 	"github.com/spf13/afero"
 )
@@ -49,6 +50,8 @@ type Scanner struct {
 // S3Client interface for blob storage operations
 type S3Client interface {
 	PutObject(ctx context.Context, key string, data io.Reader) error
+	PutObjectWithProgress(ctx context.Context, key string, data io.Reader, size int64, progress s3.ProgressCallback) error
+	StatObject(ctx context.Context, key string) (*s3.ObjectInfo, error)
 }

 // ScannerConfig contains configuration for the scanner
@@ -125,7 +128,7 @@ func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*Sc
 	s.snapshotID = snapshotID
 	s.scanCtx = ctx
 	result := &ScanResult{
-		StartTime: time.Now(),
+		StartTime: time.Now().UTC(),
 	}

 	// Set blob handler for concurrent upload
@@ -143,7 +146,7 @@ func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*Sc
 	}

 	// Phase 1: Scan directory and collect files to process
-	log.Info("Phase 1: Scanning directory structure")
+	log.Info("Phase 1/3: Scanning directory structure")
 	filesToProcess, err := s.scanPhase(ctx, path, result)
 	if err != nil {
 		return nil, fmt.Errorf("scan phase failed: %w", err)
@@ -169,7 +172,7 @@ func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*Sc

 	// Phase 2: Process files and create chunks
 	if len(filesToProcess) > 0 {
-		log.Info("Phase 2: Processing files and creating chunks")
+		log.Info("Phase 2/3: Creating snapshot (chunking, compressing, encrypting, and uploading blobs)")
 		if err := s.processPhase(ctx, filesToProcess, result); err != nil {
 			return nil, fmt.Errorf("process phase failed: %w", err)
 		}
@@ -179,7 +182,7 @@ func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*Sc
 	blobs := s.packer.GetFinishedBlobs()
 	result.BlobsCreated += len(blobs)

-	result.EndTime = time.Now()
+	result.EndTime = time.Now().UTC()
 	return result, nil
 }

@@ -290,21 +293,12 @@ func (s *Scanner) checkFileAndUpdateMetadata(ctx context.Context, path string, i
 	default:
 	}

-	var file *database.File
-	var needsProcessing bool
-
-	// Use a short transaction just for the database operations
-	err := s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
-		var err error
-		file, needsProcessing, err = s.checkFile(txCtx, tx, path, info, result)
-		return err
-	})
-
-	return file, needsProcessing, err
+	// Process file without holding a long transaction
+	return s.checkFile(ctx, path, info, result)
 }

-// checkFile checks if a file needs processing and updates metadata within a transaction
-func (s *Scanner) checkFile(ctx context.Context, tx *sql.Tx, path string, info os.FileInfo, result *ScanResult) (*database.File, bool, error) {
+// checkFile checks if a file needs processing and updates metadata
+func (s *Scanner) checkFile(ctx context.Context, path string, info os.FileInfo, result *ScanResult) (*database.File, bool, error) {
 	// Get file stats
 	stat, ok := info.Sys().(interface {
 		Uid() uint32
@@ -338,25 +332,31 @@ func (s *Scanner) checkFile(ctx context.Context, tx *sql.Tx, path string, info o
 		LinkTarget: linkTarget,
 	}

-	// Check if file has changed since last backup
+	// Check if file has changed since last backup (no transaction needed for read)
 	log.Debug("Checking if file exists in database", "path", path)
-	existingFile, err := s.repos.Files.GetByPathTx(ctx, tx, path)
+	existingFile, err := s.repos.Files.GetByPath(ctx, path)
 	if err != nil {
 		return nil, false, fmt.Errorf("checking existing file: %w", err)
 	}

 	fileChanged := existingFile == nil || s.hasFileChanged(existingFile, file)

-	// Always update file metadata
+	// Update file metadata in a short transaction
 	log.Debug("Updating file metadata", "path", path, "changed", fileChanged)
-	if err := s.repos.Files.Create(ctx, tx, file); err != nil {
+	err = s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		return s.repos.Files.Create(ctx, tx, file)
+	})
+	if err != nil {
 		return nil, false, err
 	}
 	log.Debug("File metadata updated", "path", path)

-	// Add file to snapshot
+	// Add file to snapshot in a short transaction
 	log.Debug("Adding file to snapshot", "path", path, "snapshot", s.snapshotID)
-	if err := s.repos.Snapshots.AddFile(ctx, tx, s.snapshotID, path); err != nil {
+	err = s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		return s.repos.Snapshots.AddFile(ctx, tx, s.snapshotID, path)
+	})
+	if err != nil {
 		return nil, false, fmt.Errorf("adding file to snapshot: %w", err)
 	}
 	log.Debug("File added to snapshot", "path", path)
@@ -381,7 +381,7 @@ func (s *Scanner) checkFile(ctx context.Context, tx *sql.Tx, path string, info o
 		}
 		// File hasn't changed, but we still need to associate existing chunks with this snapshot
 		log.Debug("File hasn't changed, associating existing chunks", "path", path)
-		if err := s.associateExistingChunks(ctx, tx, path); err != nil {
+		if err := s.associateExistingChunks(ctx, path); err != nil {
 			return nil, false, fmt.Errorf("associating existing chunks: %w", err)
 		}
 		log.Debug("Existing chunks associated", "path", path)
@@ -421,25 +421,25 @@ func (s *Scanner) hasFileChanged(existingFile, newFile *database.File) bool {
 }

 // associateExistingChunks links existing chunks from an unchanged file to the current snapshot
-func (s *Scanner) associateExistingChunks(ctx context.Context, tx *sql.Tx, path string) error {
+func (s *Scanner) associateExistingChunks(ctx context.Context, path string) error {
 	log.Debug("associateExistingChunks start", "path", path)

-	// Get existing file chunks
+	// Get existing file chunks (no transaction needed for read)
 	log.Debug("Getting existing file chunks", "path", path)
-	fileChunks, err := s.repos.FileChunks.GetByFileTx(ctx, tx, path)
+	fileChunks, err := s.repos.FileChunks.GetByFile(ctx, path)
 	if err != nil {
 		return fmt.Errorf("getting existing file chunks: %w", err)
 	}
 	log.Debug("Got file chunks", "path", path, "count", len(fileChunks))

-	// For each chunk, find its blob and associate with current snapshot
-	processedBlobs := make(map[string]bool)
+	// Collect unique blob IDs that need to be added to snapshot
+	blobsToAdd := make(map[string]string) // blob ID -> blob hash
 	for i, fc := range fileChunks {
 		log.Debug("Processing chunk", "path", path, "chunk_index", i, "chunk_hash", fc.ChunkHash)

-		// Find which blob contains this chunk
+		// Find which blob contains this chunk (no transaction needed for read)
 		log.Debug("Finding blob for chunk", "chunk_hash", fc.ChunkHash)
-		blobChunk, err := s.repos.BlobChunks.GetByChunkHashTx(ctx, tx, fc.ChunkHash)
+		blobChunk, err := s.repos.BlobChunks.GetByChunkHash(ctx, fc.ChunkHash)
 		if err != nil {
 			return fmt.Errorf("finding blob for chunk %s: %w", fc.ChunkHash, err)
 		}
@@ -449,28 +449,39 @@ func (s *Scanner) associateExistingChunks(ctx context.Context, tx *sql.Tx, path
 		}
 		log.Debug("Found blob for chunk", "chunk_hash", fc.ChunkHash, "blob_id", blobChunk.BlobID)

-		// Get blob to find its hash
-		blob, err := s.repos.Blobs.GetByID(ctx, blobChunk.BlobID)
-		if err != nil {
-			return fmt.Errorf("getting blob %s: %w", blobChunk.BlobID, err)
-		}
-		if blob == nil {
-			log.Warn("Blob record not found", "blob_id", blobChunk.BlobID)
-			continue
-		}
-
-		// Add blob to snapshot if not already processed
-		if !processedBlobs[blobChunk.BlobID] {
-			log.Debug("Adding blob to snapshot", "blob_id", blobChunk.BlobID, "blob_hash", blob.Hash, "snapshot", s.snapshotID)
-			if err := s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, blobChunk.BlobID, blob.Hash); err != nil {
-				return fmt.Errorf("adding existing blob to snapshot: %w", err)
-			}
-			log.Debug("Added blob to snapshot", "blob_id", blobChunk.BlobID)
-			processedBlobs[blobChunk.BlobID] = true
+		// Track blob ID for later processing
+		if _, exists := blobsToAdd[blobChunk.BlobID]; !exists {
+			blobsToAdd[blobChunk.BlobID] = "" // We'll get the hash later
 		}
 	}

-	log.Debug("associateExistingChunks complete", "path", path, "blobs_processed", len(processedBlobs))
+	// Now get blob hashes outside of transaction operations
+	for blobID := range blobsToAdd {
+		blob, err := s.repos.Blobs.GetByID(ctx, blobID)
+		if err != nil {
+			return fmt.Errorf("getting blob %s: %w", blobID, err)
+		}
+		if blob == nil {
+			log.Warn("Blob record not found", "blob_id", blobID)
+			delete(blobsToAdd, blobID)
+			continue
+		}
+		blobsToAdd[blobID] = blob.Hash
+	}
+
+	// Add blobs to snapshot using short transactions
+	for blobID, blobHash := range blobsToAdd {
+		log.Debug("Adding blob to snapshot", "blob_id", blobID, "blob_hash", blobHash, "snapshot", s.snapshotID)
+		err := s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+			return s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, blobID, blobHash)
+		})
+		if err != nil {
+			return fmt.Errorf("adding existing blob to snapshot: %w", err)
+		}
+		log.Debug("Added blob to snapshot", "blob_id", blobID)
+	}
+
+	log.Debug("associateExistingChunks complete", "path", path, "blobs_processed", len(blobsToAdd))
 	return nil
 }

@@ -478,7 +489,7 @@ func (s *Scanner) associateExistingChunks(ctx context.Context, tx *sql.Tx, path
 func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
 	log.Debug("Blob handler called", "blob_hash", blobWithReader.Hash[:8]+"...")

-	startTime := time.Now()
+	startTime := time.Now().UTC()
 	finishedBlob := blobWithReader.FinishedBlob

 	// Report upload start
@@ -492,7 +503,40 @@ func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
 	if ctx == nil {
 		ctx = context.Background()
 	}
-	if err := s.s3Client.PutObject(ctx, "blobs/"+finishedBlob.Hash, blobWithReader.Reader); err != nil {
+
+	// Track bytes uploaded for accurate speed calculation
+	lastProgressTime := time.Now()
+	lastProgressBytes := int64(0)
+
+	progressCallback := func(uploaded int64) error {
+
+		// Calculate instantaneous speed
+		now := time.Now()
+		elapsed := now.Sub(lastProgressTime).Seconds()
+		if elapsed > 0.5 { // Update speed every 0.5 seconds
+			bytesSinceLastUpdate := uploaded - lastProgressBytes
+			speed := float64(bytesSinceLastUpdate) / elapsed
+
+			if s.progress != nil {
+				s.progress.ReportUploadProgress(finishedBlob.Hash, uploaded, finishedBlob.Compressed, speed)
+			}
+
+			lastProgressTime = now
+			lastProgressBytes = uploaded
+		}
+
+		// Check for cancellation
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		default:
+			return nil
+		}
+	}
+
+	// Create sharded path: blobs/ca/fe/cafebabe...
+	blobPath := fmt.Sprintf("blobs/%s/%s/%s", finishedBlob.Hash[:2], finishedBlob.Hash[2:4], finishedBlob.Hash)
+	if err := s.s3Client.PutObjectWithProgress(ctx, blobPath, blobWithReader.Reader, finishedBlob.Compressed, progressCallback); err != nil {
 		return fmt.Errorf("uploading blob %s to S3: %w", finishedBlob.Hash, err)
 	}

@@ -574,8 +618,8 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
 	var chunks []chunkInfo
 	chunkIndex := 0

-	// Process chunks in streaming fashion
-	err = s.chunker.ChunkReaderStreaming(file, func(chunk chunker.Chunk) error {
+	// Process chunks in streaming fashion and get full file hash
+	fileHash, err := s.chunker.ChunkReaderStreaming(file, func(chunk chunker.Chunk) error {
 		// Check for cancellation
 		select {
 		case <-ctx.Done():
@@ -589,17 +633,16 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
 			"hash", chunk.Hash,
 			"size", chunk.Size)

-		// Check if chunk already exists
-		chunkExists := false
-		err := s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
-			existing, err := s.repos.Chunks.GetByHash(txCtx, chunk.Hash)
-			if err != nil {
-				return err
-			}
-			chunkExists = (existing != nil)
+		// Check if chunk already exists (outside of transaction)
+		existing, err := s.repos.Chunks.GetByHash(ctx, chunk.Hash)
+		if err != nil {
+			return fmt.Errorf("checking chunk existence: %w", err)
+		}
+		chunkExists := (existing != nil)

-			// Store chunk if new
-			if !chunkExists {
+		// Store chunk if new
+		if !chunkExists {
+			err := s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
 				dbChunk := &database.Chunk{
 					ChunkHash: chunk.Hash,
 					SHA256:    chunk.Hash,
@@ -608,17 +651,17 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
 				if err := s.repos.Chunks.Create(txCtx, tx, dbChunk); err != nil {
 					return fmt.Errorf("creating chunk: %w", err)
 				}
+				return nil
+			})
+			if err != nil {
+				return fmt.Errorf("storing chunk: %w", err)
 			}
-			return nil
-		})
-		if err != nil {
-			return fmt.Errorf("checking/storing chunk: %w", err)
 		}

 		// Track file chunk association for later storage
 		chunks = append(chunks, chunkInfo{
 			fileChunk: database.FileChunk{
-				Path:      fileToProcess.Path,
+				FileID:    fileToProcess.File.ID,
 				Idx:       chunkIndex,
 				ChunkHash: chunk.Hash,
 			},
@@ -683,6 +726,11 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
 		return fmt.Errorf("chunking file: %w", err)
 	}

+	log.Debug("Completed chunking file",
+		"path", fileToProcess.Path,
+		"file_hash", fileHash,
+		"chunks", len(chunks))
+
 	// Store file-chunk associations and chunk-file mappings in database
 	err = s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
 		for _, ci := range chunks {
@@ -694,7 +742,7 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
 			// Create chunk-file mapping
 			chunkFile := &database.ChunkFile{
 				ChunkHash:  ci.fileChunk.ChunkHash,
-				FilePath:   fileToProcess.Path,
+				FileID:     fileToProcess.File.ID,
 				FileOffset: ci.offset,
 				Length:     ci.size,
 			}
@@ -704,7 +752,7 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
 		}

 		// Add file to snapshot
-		if err := s.repos.Snapshots.AddFile(txCtx, tx, s.snapshotID, fileToProcess.Path); err != nil {
+		if err := s.repos.Snapshots.AddFileByID(txCtx, tx, s.snapshotID, fileToProcess.File.ID); err != nil {
 			return fmt.Errorf("adding file to snapshot: %w", err)
 		}

@@ -713,3 +761,8 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT

 	return err
 }
+
+// GetProgress returns the progress reporter for this scanner
+func (s *Scanner) GetProgress() *ProgressReporter {
+	return s.progress
+}
--- a/internal/backup/scanner_test.go
+++ b/internal/backup/scanner_test.go
@@ -213,7 +213,7 @@ func TestScannerWithSymlinks(t *testing.T) {
 		Repositories:     repos,
 		MaxBlobSize:      int64(1024 * 1024),
 		CompressionLevel: 3,
-		AgeRecipients:    []string{},
+		AgeRecipients:    []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
 	})

 	// Create a snapshot record for testing
@@ -314,7 +314,7 @@ func TestScannerLargeFile(t *testing.T) {
 		Repositories:     repos,
 		MaxBlobSize:      int64(1024 * 1024),
 		CompressionLevel: 3,
-		AgeRecipients:    []string{},
+		AgeRecipients:    []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
 	})

 	// Create a snapshot record for testing
--- a/internal/backup/snapshot.go
+++ b/internal/backup/snapshot.go
@@ -78,21 +78,22 @@ func NewSnapshotManager(repos *database.Repositories, s3Client S3Client, encrypt
 }

 // CreateSnapshot creates a new snapshot record in the database at the start of a backup
-func (sm *SnapshotManager) CreateSnapshot(ctx context.Context, hostname, version string) (string, error) {
-	snapshotID := fmt.Sprintf("%s-%s", hostname, time.Now().Format("20060102-150405"))
+func (sm *SnapshotManager) CreateSnapshot(ctx context.Context, hostname, version, gitRevision string) (string, error) {
+	snapshotID := fmt.Sprintf("%s-%s", hostname, time.Now().UTC().Format("20060102-150405Z"))

 	snapshot := &database.Snapshot{
-		ID:               snapshotID,
-		Hostname:         hostname,
-		VaultikVersion:   version,
-		StartedAt:        time.Now(),
-		CompletedAt:      nil, // Not completed yet
-		FileCount:        0,
-		ChunkCount:       0,
-		BlobCount:        0,
-		TotalSize:        0,
-		BlobSize:         0,
-		CompressionRatio: 1.0,
+		ID:                 snapshotID,
+		Hostname:           hostname,
+		VaultikVersion:     version,
+		VaultikGitRevision: gitRevision,
+		StartedAt:          time.Now().UTC(),
+		CompletedAt:        nil, // Not completed yet
+		FileCount:          0,
+		ChunkCount:         0,
+		BlobCount:          0,
+		TotalSize:          0,
+		BlobSize:           0,
+		CompressionRatio:   1.0,
 	}

 	err := sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
@@ -126,6 +127,30 @@ func (sm *SnapshotManager) UpdateSnapshotStats(ctx context.Context, snapshotID s
 	return nil
 }

+// UpdateSnapshotStatsExtended updates snapshot statistics with extended metrics.
+// This includes compression level, uncompressed blob size, and upload duration.
+func (sm *SnapshotManager) UpdateSnapshotStatsExtended(ctx context.Context, snapshotID string, stats ExtendedBackupStats) error {
+	return sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		// First update basic stats
+		if err := sm.repos.Snapshots.UpdateCounts(ctx, tx, snapshotID,
+			int64(stats.FilesScanned),
+			int64(stats.ChunksCreated),
+			int64(stats.BlobsCreated),
+			stats.BytesScanned,
+			stats.BytesUploaded,
+		); err != nil {
+			return err
+		}
+
+		// Then update extended stats
+		return sm.repos.Snapshots.UpdateExtendedStats(ctx, tx, snapshotID,
+			stats.BlobUncompressedSize,
+			stats.CompressionLevel,
+			stats.UploadDurationMs,
+		)
+	})
+}
+
 // CompleteSnapshot marks a snapshot as completed and exports its metadata
 func (sm *SnapshotManager) CompleteSnapshot(ctx context.Context, snapshotID string) error {
 	// Mark the snapshot as completed
@@ -158,14 +183,16 @@ func (sm *SnapshotManager) CompleteSnapshot(ctx context.Context, snapshotID stri
 //
 // This ensures database consistency during the copy operation.
 func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath string, snapshotID string) error {
-	log.Info("Exporting snapshot metadata", "snapshot_id", snapshotID)
+	log.Info("Phase 3/3: Exporting snapshot metadata", "snapshot_id", snapshotID, "source_db", dbPath)

 	// Create temp directory for all temporary files
 	tempDir, err := os.MkdirTemp("", "vaultik-snapshot-*")
 	if err != nil {
 		return fmt.Errorf("creating temp dir: %w", err)
 	}
+	log.Debug("Created temporary directory", "path", tempDir)
 	defer func() {
+		log.Debug("Cleaning up temporary directory", "path", tempDir)
 		if err := os.RemoveAll(tempDir); err != nil {
 			log.Debug("Failed to remove temp dir", "path", tempDir, "error", err)
 		}
@@ -174,28 +201,37 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
 	// Step 1: Copy database to temp file
 	// The main database should be closed at this point
 	tempDBPath := filepath.Join(tempDir, "snapshot.db")
+	log.Debug("Copying database to temporary location", "source", dbPath, "destination", tempDBPath)
 	if err := copyFile(dbPath, tempDBPath); err != nil {
 		return fmt.Errorf("copying database: %w", err)
 	}
+	log.Debug("Database copy complete", "size", getFileSize(tempDBPath))

 	// Step 2: Clean the temp database to only contain current snapshot data
+	log.Debug("Cleaning snapshot database to contain only current snapshot", "snapshot_id", snapshotID)
 	if err := sm.cleanSnapshotDB(ctx, tempDBPath, snapshotID); err != nil {
 		return fmt.Errorf("cleaning snapshot database: %w", err)
 	}
+	log.Debug("Database cleaning complete", "size_after_clean", getFileSize(tempDBPath))

 	// Step 3: Dump the cleaned database to SQL
 	dumpPath := filepath.Join(tempDir, "snapshot.sql")
+	log.Debug("Dumping database to SQL", "source", tempDBPath, "destination", dumpPath)
 	if err := sm.dumpDatabase(tempDBPath, dumpPath); err != nil {
 		return fmt.Errorf("dumping database: %w", err)
 	}
+	log.Debug("SQL dump complete", "size", getFileSize(dumpPath))

 	// Step 4: Compress the SQL dump
 	compressedPath := filepath.Join(tempDir, "snapshot.sql.zst")
+	log.Debug("Compressing SQL dump", "source", dumpPath, "destination", compressedPath)
 	if err := sm.compressDump(dumpPath, compressedPath); err != nil {
 		return fmt.Errorf("compressing dump: %w", err)
 	}
+	log.Debug("Compression complete", "original_size", getFileSize(dumpPath), "compressed_size", getFileSize(compressedPath))

 	// Step 5: Read compressed data for encryption/upload
+	log.Debug("Reading compressed data for upload", "path", compressedPath)
 	compressedData, err := os.ReadFile(compressedPath)
 	if err != nil {
 		return fmt.Errorf("reading compressed dump: %w", err)
@@ -204,14 +240,19 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
 	// Step 6: Encrypt if encryptor is available
 	finalData := compressedData
 	if sm.encryptor != nil {
+		log.Debug("Encrypting snapshot data", "size_before", len(compressedData))
 		encrypted, err := sm.encryptor.Encrypt(compressedData)
 		if err != nil {
 			return fmt.Errorf("encrypting snapshot: %w", err)
 		}
 		finalData = encrypted
+		log.Debug("Encryption complete", "size_after", len(encrypted))
+	} else {
+		log.Debug("No encryption configured, using compressed data as-is")
 	}

 	// Step 7: Generate blob manifest (before closing temp DB)
+	log.Debug("Generating blob manifest from temporary database", "db_path", tempDBPath)
 	blobManifest, err := sm.generateBlobManifest(ctx, tempDBPath, snapshotID)
 	if err != nil {
 		return fmt.Errorf("generating blob manifest: %w", err)
@@ -224,15 +265,19 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
 		dbKey += ".age"
 	}

+	log.Debug("Uploading snapshot database to S3", "key", dbKey, "size", len(finalData))
 	if err := sm.s3Client.PutObject(ctx, dbKey, bytes.NewReader(finalData)); err != nil {
 		return fmt.Errorf("uploading snapshot database: %w", err)
 	}
+	log.Debug("Database upload complete", "key", dbKey)

 	// Upload blob manifest (unencrypted, compressed)
 	manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
+	log.Debug("Uploading blob manifest to S3", "key", manifestKey, "size", len(blobManifest))
 	if err := sm.s3Client.PutObject(ctx, manifestKey, bytes.NewReader(blobManifest)); err != nil {
 		return fmt.Errorf("uploading blob manifest: %w", err)
 	}
+	log.Debug("Manifest upload complete", "key", manifestKey)

 	log.Info("Uploaded snapshot metadata",
 		"snapshot_id", snapshotID,
@@ -260,14 +305,18 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
 // Future implementation when we have snapshot_files table:
 //
 //	DELETE FROM snapshots WHERE id != ?;
-//	DELETE FROM files WHERE path NOT IN (
-//	  SELECT file_path FROM snapshot_files WHERE snapshot_id = ?
+//	DELETE FROM files WHERE NOT EXISTS (
+//	  SELECT 1 FROM snapshot_files
+//	  WHERE snapshot_files.file_id = files.id
+//	  AND snapshot_files.snapshot_id = ?
 //	);
-//	DELETE FROM chunks WHERE chunk_hash NOT IN (
-//	  SELECT DISTINCT chunk_hash FROM file_chunks
+//	DELETE FROM chunks WHERE NOT EXISTS (
+//	  SELECT 1 FROM file_chunks
+//	  WHERE file_chunks.chunk_hash = chunks.chunk_hash
 //	);
-//	DELETE FROM blobs WHERE blob_hash NOT IN (
-//	  SELECT DISTINCT blob_hash FROM blob_chunks
+//	DELETE FROM blobs WHERE NOT EXISTS (
+//	  SELECT 1 FROM blob_chunks
+//	  WHERE blob_chunks.blob_hash = blobs.blob_hash
 //	);
 func (sm *SnapshotManager) cleanSnapshotDB(ctx context.Context, dbPath string, snapshotID string) error {
 	// Open the temp database
@@ -293,84 +342,127 @@ func (sm *SnapshotManager) cleanSnapshotDB(ctx context.Context, dbPath string, s
 	}()

 	// Step 1: Delete all other snapshots
-	_, err = tx.ExecContext(ctx, "DELETE FROM snapshots WHERE id != ?", snapshotID)
+	log.Debug("Deleting other snapshots", "keeping", snapshotID)
+	database.LogSQL("Execute", "DELETE FROM snapshots WHERE id != ?", snapshotID)
+	result, err := tx.ExecContext(ctx, "DELETE FROM snapshots WHERE id != ?", snapshotID)
 	if err != nil {
 		return fmt.Errorf("deleting other snapshots: %w", err)
 	}
+	rowsAffected, _ := result.RowsAffected()
+	log.Debug("Deleted snapshots", "count", rowsAffected)

 	// Step 2: Delete files not in this snapshot
-	_, err = tx.ExecContext(ctx, `
+	log.Debug("Deleting files not in current snapshot")
+	database.LogSQL("Execute", `DELETE FROM files WHERE NOT EXISTS (SELECT 1 FROM snapshot_files WHERE snapshot_files.file_id = files.id AND snapshot_files.snapshot_id = ?)`, snapshotID)
+	result, err = tx.ExecContext(ctx, `
 		DELETE FROM files 
-		WHERE path NOT IN (
-			SELECT file_path FROM snapshot_files WHERE snapshot_id = ?
+		WHERE NOT EXISTS (
+			SELECT 1 FROM snapshot_files 
+			WHERE snapshot_files.file_id = files.id 
+			AND snapshot_files.snapshot_id = ?
 		)`, snapshotID)
 	if err != nil {
 		return fmt.Errorf("deleting orphaned files: %w", err)
 	}
+	rowsAffected, _ = result.RowsAffected()
+	log.Debug("Deleted files", "count", rowsAffected)

 	// Step 3: file_chunks will be deleted via CASCADE from files
+	log.Debug("file_chunks will be deleted via CASCADE")

 	// Step 4: Delete chunk_files for deleted files
-	_, err = tx.ExecContext(ctx, `
+	log.Debug("Deleting orphaned chunk_files")
+	database.LogSQL("Execute", `DELETE FROM chunk_files WHERE NOT EXISTS (SELECT 1 FROM files WHERE files.id = chunk_files.file_id)`)
+	result, err = tx.ExecContext(ctx, `
 		DELETE FROM chunk_files 
-		WHERE file_path NOT IN (
-			SELECT path FROM files
+		WHERE NOT EXISTS (
+			SELECT 1 FROM files 
+			WHERE files.id = chunk_files.file_id
 		)`)
 	if err != nil {
 		return fmt.Errorf("deleting orphaned chunk_files: %w", err)
 	}
+	rowsAffected, _ = result.RowsAffected()
+	log.Debug("Deleted chunk_files", "count", rowsAffected)

 	// Step 5: Delete chunks with no remaining file references
-	_, err = tx.ExecContext(ctx, `
+	log.Debug("Deleting orphaned chunks")
+	database.LogSQL("Execute", `DELETE FROM chunks WHERE NOT EXISTS (SELECT 1 FROM file_chunks WHERE file_chunks.chunk_hash = chunks.chunk_hash)`)
+	result, err = tx.ExecContext(ctx, `
 		DELETE FROM chunks 
-		WHERE chunk_hash NOT IN (
-			SELECT DISTINCT chunk_hash FROM file_chunks
+		WHERE NOT EXISTS (
+			SELECT 1 FROM file_chunks 
+			WHERE file_chunks.chunk_hash = chunks.chunk_hash
 		)`)
 	if err != nil {
 		return fmt.Errorf("deleting orphaned chunks: %w", err)
 	}
+	rowsAffected, _ = result.RowsAffected()
+	log.Debug("Deleted chunks", "count", rowsAffected)

 	// Step 6: Delete blob_chunks for deleted chunks
-	_, err = tx.ExecContext(ctx, `
+	log.Debug("Deleting orphaned blob_chunks")
+	database.LogSQL("Execute", `DELETE FROM blob_chunks WHERE NOT EXISTS (SELECT 1 FROM chunks WHERE chunks.chunk_hash = blob_chunks.chunk_hash)`)
+	result, err = tx.ExecContext(ctx, `
 		DELETE FROM blob_chunks 
-		WHERE chunk_hash NOT IN (
-			SELECT chunk_hash FROM chunks
+		WHERE NOT EXISTS (
+			SELECT 1 FROM chunks 
+			WHERE chunks.chunk_hash = blob_chunks.chunk_hash
 		)`)
 	if err != nil {
 		return fmt.Errorf("deleting orphaned blob_chunks: %w", err)
 	}
+	rowsAffected, _ = result.RowsAffected()
+	log.Debug("Deleted blob_chunks", "count", rowsAffected)

 	// Step 7: Delete blobs not in this snapshot
-	_, err = tx.ExecContext(ctx, `
+	log.Debug("Deleting blobs not in current snapshot")
+	database.LogSQL("Execute", `DELETE FROM blobs WHERE NOT EXISTS (SELECT 1 FROM snapshot_blobs WHERE snapshot_blobs.blob_hash = blobs.blob_hash AND snapshot_blobs.snapshot_id = ?)`, snapshotID)
+	result, err = tx.ExecContext(ctx, `
 		DELETE FROM blobs 
-		WHERE blob_hash NOT IN (
-			SELECT blob_hash FROM snapshot_blobs WHERE snapshot_id = ?
+		WHERE NOT EXISTS (
+			SELECT 1 FROM snapshot_blobs 
+			WHERE snapshot_blobs.blob_hash = blobs.blob_hash 
+			AND snapshot_blobs.snapshot_id = ?
 		)`, snapshotID)
 	if err != nil {
 		return fmt.Errorf("deleting orphaned blobs: %w", err)
 	}
+	rowsAffected, _ = result.RowsAffected()
+	log.Debug("Deleted blobs not in snapshot", "count", rowsAffected)

 	// Step 8: Delete orphaned snapshot_files and snapshot_blobs
-	_, err = tx.ExecContext(ctx, "DELETE FROM snapshot_files WHERE snapshot_id != ?", snapshotID)
+	log.Debug("Deleting orphaned snapshot_files")
+	database.LogSQL("Execute", "DELETE FROM snapshot_files WHERE snapshot_id != ?", snapshotID)
+	result, err = tx.ExecContext(ctx, "DELETE FROM snapshot_files WHERE snapshot_id != ?", snapshotID)
 	if err != nil {
 		return fmt.Errorf("deleting orphaned snapshot_files: %w", err)
 	}
+	rowsAffected, _ = result.RowsAffected()
+	log.Debug("Deleted snapshot_files", "count", rowsAffected)

-	_, err = tx.ExecContext(ctx, "DELETE FROM snapshot_blobs WHERE snapshot_id != ?", snapshotID)
+	log.Debug("Deleting orphaned snapshot_blobs")
+	database.LogSQL("Execute", "DELETE FROM snapshot_blobs WHERE snapshot_id != ?", snapshotID)
+	result, err = tx.ExecContext(ctx, "DELETE FROM snapshot_blobs WHERE snapshot_id != ?", snapshotID)
 	if err != nil {
 		return fmt.Errorf("deleting orphaned snapshot_blobs: %w", err)
 	}
+	rowsAffected, _ = result.RowsAffected()
+	log.Debug("Deleted snapshot_blobs", "count", rowsAffected)

 	// Commit transaction
+	log.Debug("Committing cleanup transaction")
 	if err := tx.Commit(); err != nil {
 		return fmt.Errorf("committing transaction: %w", err)
 	}

+	log.Debug("Database cleanup complete")
 	return nil
 }

 // dumpDatabase creates a SQL dump of the database
 func (sm *SnapshotManager) dumpDatabase(dbPath, dumpPath string) error {
+	log.Debug("Running sqlite3 dump command", "source", dbPath, "destination", dumpPath)
 	cmd := exec.Command("sqlite3", dbPath, ".dump")

 	output, err := cmd.Output()
@@ -378,6 +470,7 @@ func (sm *SnapshotManager) dumpDatabase(dbPath, dumpPath string) error {
 		return fmt.Errorf("running sqlite3 dump: %w", err)
 	}

+	log.Debug("SQL dump generated", "size", len(output))
 	if err := os.WriteFile(dumpPath, output, 0644); err != nil {
 		return fmt.Errorf("writing dump file: %w", err)
 	}
@@ -387,27 +480,32 @@ func (sm *SnapshotManager) dumpDatabase(dbPath, dumpPath string) error {

 // compressDump compresses the SQL dump using zstd
 func (sm *SnapshotManager) compressDump(inputPath, outputPath string) error {
+	log.Debug("Opening SQL dump for compression", "path", inputPath)
 	input, err := os.Open(inputPath)
 	if err != nil {
 		return fmt.Errorf("opening input file: %w", err)
 	}
 	defer func() {
+		log.Debug("Closing input file", "path", inputPath)
 		if err := input.Close(); err != nil {
-			log.Debug("Failed to close input file", "error", err)
+			log.Debug("Failed to close input file", "path", inputPath, "error", err)
 		}
 	}()

+	log.Debug("Creating output file for compressed data", "path", outputPath)
 	output, err := os.Create(outputPath)
 	if err != nil {
 		return fmt.Errorf("creating output file: %w", err)
 	}
 	defer func() {
+		log.Debug("Closing output file", "path", outputPath)
 		if err := output.Close(); err != nil {
-			log.Debug("Failed to close output file", "error", err)
+			log.Debug("Failed to close output file", "path", outputPath, "error", err)
 		}
 	}()

 	// Create zstd encoder with good compression and multithreading
+	log.Debug("Creating zstd compressor", "level", "SpeedBetterCompression", "concurrency", runtime.NumCPU())
 	zstdWriter, err := zstd.NewWriter(output,
 		zstd.WithEncoderLevel(zstd.SpeedBetterCompression),
 		zstd.WithEncoderConcurrency(runtime.NumCPU()),
@@ -422,6 +520,7 @@ func (sm *SnapshotManager) compressDump(inputPath, outputPath string) error {
 		}
 	}()

+	log.Debug("Compressing data")
 	if _, err := io.Copy(zstdWriter, input); err != nil {
 		return fmt.Errorf("compressing data: %w", err)
 	}
@@ -431,35 +530,44 @@ func (sm *SnapshotManager) compressDump(inputPath, outputPath string) error {

 // copyFile copies a file from src to dst
 func copyFile(src, dst string) error {
+	log.Debug("Opening source file for copy", "path", src)
 	sourceFile, err := os.Open(src)
 	if err != nil {
 		return err
 	}
 	defer func() {
+		log.Debug("Closing source file", "path", src)
 		if err := sourceFile.Close(); err != nil {
-			log.Debug("Failed to close source file", "error", err)
+			log.Debug("Failed to close source file", "path", src, "error", err)
 		}
 	}()

+	log.Debug("Creating destination file", "path", dst)
 	destFile, err := os.Create(dst)
 	if err != nil {
 		return err
 	}
 	defer func() {
+		log.Debug("Closing destination file", "path", dst)
 		if err := destFile.Close(); err != nil {
-			log.Debug("Failed to close destination file", "error", err)
+			log.Debug("Failed to close destination file", "path", dst, "error", err)
 		}
 	}()

-	if _, err := io.Copy(destFile, sourceFile); err != nil {
+	log.Debug("Copying file data")
+	n, err := io.Copy(destFile, sourceFile)
+	if err != nil {
 		return err
 	}
+	log.Debug("File copy complete", "bytes_copied", n)

 	return nil
 }

 // generateBlobManifest creates a compressed JSON list of all blobs in the snapshot
 func (sm *SnapshotManager) generateBlobManifest(ctx context.Context, dbPath string, snapshotID string) ([]byte, error) {
+	log.Debug("Generating blob manifest", "db_path", dbPath, "snapshot_id", snapshotID)
+
 	// Open the cleaned database using the database package
 	db, err := database.New(ctx, dbPath)
 	if err != nil {
@@ -471,10 +579,12 @@ func (sm *SnapshotManager) generateBlobManifest(ctx context.Context, dbPath stri
 	repos := database.NewRepositories(db)

 	// Get all blobs for this snapshot
+	log.Debug("Querying blobs for snapshot", "snapshot_id", snapshotID)
 	blobs, err := repos.Snapshots.GetBlobHashes(ctx, snapshotID)
 	if err != nil {
 		return nil, fmt.Errorf("getting snapshot blobs: %w", err)
 	}
+	log.Debug("Found blobs", "count", len(blobs))

 	// Create manifest structure
 	manifest := struct {
@@ -490,16 +600,20 @@ func (sm *SnapshotManager) generateBlobManifest(ctx context.Context, dbPath stri
 	}

 	// Marshal to JSON
+	log.Debug("Marshaling manifest to JSON")
 	jsonData, err := json.MarshalIndent(manifest, "", "  ")
 	if err != nil {
 		return nil, fmt.Errorf("marshaling manifest: %w", err)
 	}
+	log.Debug("JSON manifest created", "size", len(jsonData))

 	// Compress with zstd
+	log.Debug("Compressing manifest with zstd")
 	compressed, err := compressData(jsonData)
 	if err != nil {
 		return nil, fmt.Errorf("compressing manifest: %w", err)
 	}
+	log.Debug("Manifest compressed", "original_size", len(jsonData), "compressed_size", len(compressed))

 	log.Info("Generated blob manifest",
 		"snapshot_id", snapshotID,
@@ -532,6 +646,15 @@ func compressData(data []byte) ([]byte, error) {
 	return buf.Bytes(), nil
 }

+// getFileSize returns the size of a file in bytes, or -1 if error
+func getFileSize(path string) int64 {
+	info, err := os.Stat(path)
+	if err != nil {
+		return -1
+	}
+	return info.Size()
+}
+
 // BackupStats contains statistics from a backup operation
 type BackupStats struct {
 	FilesScanned  int
@@ -540,3 +663,108 @@ type BackupStats struct {
 	BlobsCreated  int
 	BytesUploaded int64
 }
+
+// ExtendedBackupStats contains additional statistics for comprehensive tracking
+type ExtendedBackupStats struct {
+	BackupStats
+	BlobUncompressedSize int64 // Total uncompressed size of all referenced blobs
+	CompressionLevel     int   // Compression level used for this snapshot
+	UploadDurationMs     int64 // Total milliseconds spent uploading to S3
+}
+
+// CleanupIncompleteSnapshots removes incomplete snapshots that don't have metadata in S3.
+// This is critical for data safety: incomplete snapshots can cause deduplication to skip
+// files that were never successfully backed up, resulting in data loss.
+func (sm *SnapshotManager) CleanupIncompleteSnapshots(ctx context.Context, hostname string) error {
+	log.Info("Checking for incomplete snapshots", "hostname", hostname)
+
+	// Get all incomplete snapshots for this hostname
+	incompleteSnapshots, err := sm.repos.Snapshots.GetIncompleteByHostname(ctx, hostname)
+	if err != nil {
+		return fmt.Errorf("getting incomplete snapshots: %w", err)
+	}
+
+	if len(incompleteSnapshots) == 0 {
+		log.Debug("No incomplete snapshots found")
+		return nil
+	}
+
+	log.Info("Found incomplete snapshots", "count", len(incompleteSnapshots))
+
+	// Check each incomplete snapshot for metadata in S3
+	for _, snapshot := range incompleteSnapshots {
+		// Check if metadata exists in S3
+		metadataKey := fmt.Sprintf("metadata/%s/db.zst", snapshot.ID)
+		_, err := sm.s3Client.StatObject(ctx, metadataKey)
+
+		if err != nil {
+			// Metadata doesn't exist in S3 - this is an incomplete snapshot
+			log.Info("Cleaning up incomplete snapshot", "snapshot_id", snapshot.ID, "started_at", snapshot.StartedAt)
+
+			// Delete the snapshot and all its associations
+			if err := sm.deleteSnapshot(ctx, snapshot.ID); err != nil {
+				return fmt.Errorf("deleting incomplete snapshot %s: %w", snapshot.ID, err)
+			}
+
+			log.Info("Deleted incomplete snapshot", "snapshot_id", snapshot.ID)
+		} else {
+			// Metadata exists - this snapshot was completed but database wasn't updated
+			// This shouldn't happen in normal operation, but mark it complete
+			log.Warn("Found snapshot with metadata but incomplete in DB", "snapshot_id", snapshot.ID)
+			if err := sm.repos.Snapshots.MarkComplete(ctx, nil, snapshot.ID); err != nil {
+				log.Error("Failed to mark snapshot complete", "snapshot_id", snapshot.ID, "error", err)
+			}
+		}
+	}
+
+	return nil
+}
+
+// deleteSnapshot removes a snapshot and all its associations from the database
+func (sm *SnapshotManager) deleteSnapshot(ctx context.Context, snapshotID string) error {
+	// Delete snapshot_files entries
+	if err := sm.repos.Snapshots.DeleteSnapshotFiles(ctx, snapshotID); err != nil {
+		return fmt.Errorf("deleting snapshot files: %w", err)
+	}
+
+	// Delete snapshot_blobs entries
+	if err := sm.repos.Snapshots.DeleteSnapshotBlobs(ctx, snapshotID); err != nil {
+		return fmt.Errorf("deleting snapshot blobs: %w", err)
+	}
+
+	// Delete the snapshot itself
+	if err := sm.repos.Snapshots.Delete(ctx, snapshotID); err != nil {
+		return fmt.Errorf("deleting snapshot: %w", err)
+	}
+
+	// Clean up orphaned data
+	log.Debug("Cleaning up orphaned data")
+	if err := sm.cleanupOrphanedData(ctx); err != nil {
+		return fmt.Errorf("cleaning up orphaned data: %w", err)
+	}
+
+	return nil
+}
+
+// cleanupOrphanedData removes files, chunks, and blobs that are no longer referenced by any snapshot
+func (sm *SnapshotManager) cleanupOrphanedData(ctx context.Context) error {
+	// Delete orphaned files (files not in any snapshot)
+	log.Debug("Deleting orphaned files")
+	if err := sm.repos.Files.DeleteOrphaned(ctx); err != nil {
+		return fmt.Errorf("deleting orphaned files: %w", err)
+	}
+
+	// Delete orphaned chunks (chunks not referenced by any file)
+	log.Debug("Deleting orphaned chunks")
+	if err := sm.repos.Chunks.DeleteOrphaned(ctx); err != nil {
+		return fmt.Errorf("deleting orphaned chunks: %w", err)
+	}
+
+	// Delete orphaned blobs (blobs not in any snapshot)
+	log.Debug("Deleting orphaned blobs")
+	if err := sm.repos.Blobs.DeleteOrphaned(ctx); err != nil {
+		return fmt.Errorf("deleting orphaned blobs: %w", err)
+	}
+
+	return nil
+}