Major refactoring: Updated manifest format and renamed backup to snapshot

- Created manifest.go with proper Manifest structure including blob sizes - Updated manifest generation to include compressed size for each blob - Added TotalCompressedSize field to manifest for quick access - Renamed backup package to snapshot for clarity - Updated snapshot list to show all remote snapshots - Remote snapshots not in local DB fetch manifest to get size - Local snapshots not in remote are automatically deleted - Removed backwards compatibility code (pre-1.0, no users) - Fixed prune command to use new manifest format - Updated all imports and references from backup to snapshot
2025-07-26 03:27:47 +02:00
parent c07d8eec0a
commit a544fa80f2
11 changed files with 254 additions and 168 deletions
--- a/internal/snapshot/backup_test.go
+++ b/internal/snapshot/backup_test.go
@@ -0,0 +1,532 @@
+package snapshot
+
+import (
+	"context"
+	"crypto/sha256"
+	"database/sql"
+	"fmt"
+	"io"
+	"io/fs"
+	"os"
+	"path/filepath"
+	"testing"
+	"testing/fstest"
+	"time"
+
+	"git.eeqj.de/sneak/vaultik/internal/database"
+)
+
+// MockS3Client is a mock implementation of S3 operations for testing
+type MockS3Client struct {
+	storage map[string][]byte
+}
+
+func NewMockS3Client() *MockS3Client {
+	return &MockS3Client{
+		storage: make(map[string][]byte),
+	}
+}
+
+func (m *MockS3Client) PutBlob(ctx context.Context, hash string, data []byte) error {
+	m.storage[hash] = data
+	return nil
+}
+
+func (m *MockS3Client) GetBlob(ctx context.Context, hash string) ([]byte, error) {
+	data, ok := m.storage[hash]
+	if !ok {
+		return nil, fmt.Errorf("blob not found: %s", hash)
+	}
+	return data, nil
+}
+
+func (m *MockS3Client) BlobExists(ctx context.Context, hash string) (bool, error) {
+	_, ok := m.storage[hash]
+	return ok, nil
+}
+
+func (m *MockS3Client) CreateBucket(ctx context.Context, bucket string) error {
+	return nil
+}
+
+func TestBackupWithInMemoryFS(t *testing.T) {
+	// Create a temporary directory for the database
+	tempDir := t.TempDir()
+	dbPath := filepath.Join(tempDir, "test.db")
+
+	// Create test filesystem
+	testFS := fstest.MapFS{
+		"file1.txt": &fstest.MapFile{
+			Data:    []byte("Hello, World!"),
+			Mode:    0644,
+			ModTime: time.Now(),
+		},
+		"dir1/file2.txt": &fstest.MapFile{
+			Data:    []byte("This is a test file with some content."),
+			Mode:    0755,
+			ModTime: time.Now(),
+		},
+		"dir1/subdir/file3.txt": &fstest.MapFile{
+			Data:    []byte("Another file in a subdirectory."),
+			Mode:    0600,
+			ModTime: time.Now(),
+		},
+		"largefile.bin": &fstest.MapFile{
+			Data:    generateLargeFileContent(10 * 1024 * 1024), // 10MB file with varied content
+			Mode:    0644,
+			ModTime: time.Now(),
+		},
+	}
+
+	// Initialize the database
+	ctx := context.Background()
+	db, err := database.New(ctx, dbPath)
+	if err != nil {
+		t.Fatalf("Failed to create database: %v", err)
+	}
+	defer func() {
+		if err := db.Close(); err != nil {
+			t.Logf("Failed to close database: %v", err)
+		}
+	}()
+
+	repos := database.NewRepositories(db)
+
+	// Create mock S3 client
+	s3Client := NewMockS3Client()
+
+	// Run backup
+	backupEngine := &BackupEngine{
+		repos:    repos,
+		s3Client: s3Client,
+	}
+
+	snapshotID, err := backupEngine.Backup(ctx, testFS, ".")
+	if err != nil {
+		t.Fatalf("Backup failed: %v", err)
+	}
+
+	// Verify snapshot was created
+	snapshot, err := repos.Snapshots.GetByID(ctx, snapshotID)
+	if err != nil {
+		t.Fatalf("Failed to get snapshot: %v", err)
+	}
+
+	if snapshot == nil {
+		t.Fatal("Snapshot not found")
+	}
+
+	if snapshot.FileCount == 0 {
+		t.Error("Expected snapshot to have files")
+	}
+
+	// Verify files in database
+	files, err := repos.Files.ListByPrefix(ctx, "")
+	if err != nil {
+		t.Fatalf("Failed to list files: %v", err)
+	}
+
+	expectedFiles := map[string]bool{
+		"file1.txt":             true,
+		"dir1/file2.txt":        true,
+		"dir1/subdir/file3.txt": true,
+		"largefile.bin":         true,
+	}
+
+	if len(files) != len(expectedFiles) {
+		t.Errorf("Expected %d files, got %d", len(expectedFiles), len(files))
+	}
+
+	for _, file := range files {
+		if !expectedFiles[file.Path] {
+			t.Errorf("Unexpected file in database: %s", file.Path)
+		}
+		delete(expectedFiles, file.Path)
+
+		// Verify file metadata
+		fsFile := testFS[file.Path]
+		if fsFile == nil {
+			t.Errorf("File %s not found in test filesystem", file.Path)
+			continue
+		}
+
+		if file.Size != int64(len(fsFile.Data)) {
+			t.Errorf("File %s: expected size %d, got %d", file.Path, len(fsFile.Data), file.Size)
+		}
+
+		if file.Mode != uint32(fsFile.Mode) {
+			t.Errorf("File %s: expected mode %o, got %o", file.Path, fsFile.Mode, file.Mode)
+		}
+	}
+
+	if len(expectedFiles) > 0 {
+		t.Errorf("Files not found in database: %v", expectedFiles)
+	}
+
+	// Verify chunks
+	chunks, err := repos.Chunks.List(ctx)
+	if err != nil {
+		t.Fatalf("Failed to list chunks: %v", err)
+	}
+
+	if len(chunks) == 0 {
+		t.Error("No chunks found in database")
+	}
+
+	// The large file should create 10 chunks (10MB / 1MB chunk size)
+	// Plus the small files
+	minExpectedChunks := 10 + 3
+	if len(chunks) < minExpectedChunks {
+		t.Errorf("Expected at least %d chunks, got %d", minExpectedChunks, len(chunks))
+	}
+
+	// Verify at least one blob was created and uploaded
+	// We can't list blobs directly, but we can check via snapshot blobs
+	blobHashes, err := repos.Snapshots.GetBlobHashes(ctx, snapshotID)
+	if err != nil {
+		t.Fatalf("Failed to get blob hashes: %v", err)
+	}
+	if len(blobHashes) == 0 {
+		t.Error("Expected at least one blob to be created")
+	}
+
+	for _, blobHash := range blobHashes {
+		// Check blob exists in mock S3
+		exists, err := s3Client.BlobExists(ctx, blobHash)
+		if err != nil {
+			t.Errorf("Failed to check blob %s: %v", blobHash, err)
+		}
+		if !exists {
+			t.Errorf("Blob %s not found in S3", blobHash)
+		}
+	}
+}
+
+func TestBackupDeduplication(t *testing.T) {
+	// Create a temporary directory for the database
+	tempDir := t.TempDir()
+	dbPath := filepath.Join(tempDir, "test.db")
+
+	// Create test filesystem with duplicate content
+	testFS := fstest.MapFS{
+		"file1.txt": &fstest.MapFile{
+			Data:    []byte("Duplicate content"),
+			Mode:    0644,
+			ModTime: time.Now(),
+		},
+		"file2.txt": &fstest.MapFile{
+			Data:    []byte("Duplicate content"),
+			Mode:    0644,
+			ModTime: time.Now(),
+		},
+		"file3.txt": &fstest.MapFile{
+			Data:    []byte("Unique content"),
+			Mode:    0644,
+			ModTime: time.Now(),
+		},
+	}
+
+	// Initialize the database
+	ctx := context.Background()
+	db, err := database.New(ctx, dbPath)
+	if err != nil {
+		t.Fatalf("Failed to create database: %v", err)
+	}
+	defer func() {
+		if err := db.Close(); err != nil {
+			t.Logf("Failed to close database: %v", err)
+		}
+	}()
+
+	repos := database.NewRepositories(db)
+
+	// Create mock S3 client
+	s3Client := NewMockS3Client()
+
+	// Run backup
+	backupEngine := &BackupEngine{
+		repos:    repos,
+		s3Client: s3Client,
+	}
+
+	_, err = backupEngine.Backup(ctx, testFS, ".")
+	if err != nil {
+		t.Fatalf("Backup failed: %v", err)
+	}
+
+	// Verify deduplication
+	chunks, err := repos.Chunks.List(ctx)
+	if err != nil {
+		t.Fatalf("Failed to list chunks: %v", err)
+	}
+
+	// Should have only 2 unique chunks (duplicate content + unique content)
+	if len(chunks) != 2 {
+		t.Errorf("Expected 2 unique chunks, got %d", len(chunks))
+	}
+
+	// Verify chunk references
+	for _, chunk := range chunks {
+		files, err := repos.ChunkFiles.GetByChunkHash(ctx, chunk.ChunkHash)
+		if err != nil {
+			t.Errorf("Failed to get files for chunk %s: %v", chunk.ChunkHash, err)
+		}
+
+		// The duplicate content chunk should be referenced by 2 files
+		if chunk.Size == int64(len("Duplicate content")) && len(files) != 2 {
+			t.Errorf("Expected duplicate chunk to be referenced by 2 files, got %d", len(files))
+		}
+	}
+}
+
+// BackupEngine performs backup operations
+type BackupEngine struct {
+	repos    *database.Repositories
+	s3Client interface {
+		PutBlob(ctx context.Context, hash string, data []byte) error
+		BlobExists(ctx context.Context, hash string) (bool, error)
+	}
+}
+
+// Backup performs a backup of the given filesystem
+func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (string, error) {
+	// Create a new snapshot
+	hostname, _ := os.Hostname()
+	snapshotID := time.Now().Format(time.RFC3339)
+	snapshot := &database.Snapshot{
+		ID:             snapshotID,
+		Hostname:       hostname,
+		VaultikVersion: "test",
+		StartedAt:      time.Now(),
+		CompletedAt:    nil,
+	}
+
+	// Create initial snapshot record
+	err := b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		return b.repos.Snapshots.Create(ctx, tx, snapshot)
+	})
+	if err != nil {
+		return "", err
+	}
+
+	// Track counters
+	var fileCount, chunkCount, blobCount, totalSize, blobSize int64
+
+	// Track which chunks we've seen to handle deduplication
+	processedChunks := make(map[string]bool)
+
+	// Scan the filesystem and process files
+	err = fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error {
+		if err != nil {
+			return err
+		}
+
+		// Skip directories
+		if d.IsDir() {
+			return nil
+		}
+
+		// Get file info
+		info, err := d.Info()
+		if err != nil {
+			return err
+		}
+
+		// Handle symlinks
+		if info.Mode()&fs.ModeSymlink != 0 {
+			// For testing, we'll skip symlinks since fstest doesn't support them well
+			return nil
+		}
+
+		// Create file record in a short transaction
+		file := &database.File{
+			Path:  path,
+			Size:  info.Size(),
+			Mode:  uint32(info.Mode()),
+			MTime: info.ModTime(),
+			CTime: info.ModTime(), // Use mtime as ctime for test
+			UID:   1000,           // Default UID for test
+			GID:   1000,           // Default GID for test
+		}
+		err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+			return b.repos.Files.Create(ctx, tx, file)
+		})
+		if err != nil {
+			return err
+		}
+
+		fileCount++
+		totalSize += info.Size()
+
+		// Read and process file in chunks
+		f, err := fsys.Open(path)
+		if err != nil {
+			return err
+		}
+		defer func() {
+			if err := f.Close(); err != nil {
+				// Log but don't fail since we're already in an error path potentially
+				fmt.Fprintf(os.Stderr, "Failed to close file: %v\n", err)
+			}
+		}()
+
+		// Process file in chunks
+		chunkIndex := 0
+		buffer := make([]byte, defaultChunkSize)
+
+		for {
+			n, err := f.Read(buffer)
+			if err != nil && err != io.EOF {
+				return err
+			}
+			if n == 0 {
+				break
+			}
+
+			chunkData := buffer[:n]
+			chunkHash := calculateHash(chunkData)
+
+			// Check if chunk already exists (outside of transaction)
+			existingChunk, _ := b.repos.Chunks.GetByHash(ctx, chunkHash)
+			if existingChunk == nil {
+				// Create new chunk in a short transaction
+				err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+					chunk := &database.Chunk{
+						ChunkHash: chunkHash,
+						Size:      int64(n),
+					}
+					return b.repos.Chunks.Create(ctx, tx, chunk)
+				})
+				if err != nil {
+					return err
+				}
+				processedChunks[chunkHash] = true
+			}
+
+			// Create file-chunk mapping in a short transaction
+			err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+				fileChunk := &database.FileChunk{
+					FileID:    file.ID,
+					Idx:       chunkIndex,
+					ChunkHash: chunkHash,
+				}
+				return b.repos.FileChunks.Create(ctx, tx, fileChunk)
+			})
+			if err != nil {
+				return err
+			}
+
+			// Create chunk-file mapping in a short transaction
+			err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+				chunkFile := &database.ChunkFile{
+					ChunkHash:  chunkHash,
+					FileID:     file.ID,
+					FileOffset: int64(chunkIndex * defaultChunkSize),
+					Length:     int64(n),
+				}
+				return b.repos.ChunkFiles.Create(ctx, tx, chunkFile)
+			})
+			if err != nil {
+				return err
+			}
+
+			chunkIndex++
+		}
+
+		return nil
+	})
+
+	if err != nil {
+		return "", err
+	}
+
+	// After all files are processed, create blobs for new chunks
+	for chunkHash := range processedChunks {
+		// Get chunk data (outside of transaction)
+		chunk, err := b.repos.Chunks.GetByHash(ctx, chunkHash)
+		if err != nil {
+			return "", err
+		}
+
+		chunkCount++
+
+		// In a real system, blobs would contain multiple chunks and be encrypted
+		// For testing, we'll create a blob with a "blob-" prefix to differentiate
+		blobHash := "blob-" + chunkHash
+
+		// For the test, we'll create dummy data since we don't have the original
+		dummyData := []byte(chunkHash)
+
+		// Upload to S3 as a blob
+		if err := b.s3Client.PutBlob(ctx, blobHash, dummyData); err != nil {
+			return "", err
+		}
+
+		// Create blob entry in a short transaction
+		err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+			blob := &database.Blob{
+				ID:        "test-blob-" + blobHash[:8],
+				Hash:      blobHash,
+				CreatedTS: time.Now(),
+			}
+			return b.repos.Blobs.Create(ctx, tx, blob)
+		})
+		if err != nil {
+			return "", err
+		}
+
+		blobCount++
+		blobSize += chunk.Size
+
+		// Create blob-chunk mapping in a short transaction
+		err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+			blobChunk := &database.BlobChunk{
+				BlobID:    "test-blob-" + blobHash[:8],
+				ChunkHash: chunkHash,
+				Offset:    0,
+				Length:    chunk.Size,
+			}
+			return b.repos.BlobChunks.Create(ctx, tx, blobChunk)
+		})
+		if err != nil {
+			return "", err
+		}
+
+		// Add blob to snapshot in a short transaction
+		err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+			return b.repos.Snapshots.AddBlob(ctx, tx, snapshotID, "test-blob-"+blobHash[:8], blobHash)
+		})
+		if err != nil {
+			return "", err
+		}
+	}
+
+	// Update snapshot with final counts
+	err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		return b.repos.Snapshots.UpdateCounts(ctx, tx, snapshotID, fileCount, chunkCount, blobCount, totalSize, blobSize)
+	})
+
+	if err != nil {
+		return "", err
+	}
+
+	return snapshotID, nil
+}
+
+func calculateHash(data []byte) string {
+	h := sha256.New()
+	h.Write(data)
+	return fmt.Sprintf("%x", h.Sum(nil))
+}
+
+func generateLargeFileContent(size int) []byte {
+	data := make([]byte, size)
+	// Fill with pattern that changes every chunk to avoid deduplication
+	for i := 0; i < size; i++ {
+		chunkNum := i / defaultChunkSize
+		data[i] = byte((i + chunkNum) % 256)
+	}
+	return data
+}
+
+const defaultChunkSize = 1024 * 1024 // 1MB chunks
--- a/internal/snapshot/file_change_test.go
+++ b/internal/snapshot/file_change_test.go
@@ -0,0 +1,236 @@
+package snapshot_test
+
+import (
+	"context"
+	"database/sql"
+	"testing"
+	"time"
+
+	"git.eeqj.de/sneak/vaultik/internal/database"
+	"git.eeqj.de/sneak/vaultik/internal/log"
+	"git.eeqj.de/sneak/vaultik/internal/snapshot"
+	"github.com/spf13/afero"
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// TestFileContentChange verifies that when a file's content changes,
+// the old chunks are properly disassociated
+func TestFileContentChange(t *testing.T) {
+	// Initialize logger for tests
+	log.Initialize(log.Config{})
+
+	// Create in-memory filesystem
+	fs := afero.NewMemMapFs()
+
+	// Create initial file
+	err := afero.WriteFile(fs, "/test.txt", []byte("Initial content"), 0644)
+	require.NoError(t, err)
+
+	// Create test database
+	db, err := database.NewTestDB()
+	require.NoError(t, err)
+	defer func() {
+		if err := db.Close(); err != nil {
+			t.Errorf("failed to close database: %v", err)
+		}
+	}()
+
+	repos := database.NewRepositories(db)
+
+	// Create scanner
+	scanner := snapshot.NewScanner(snapshot.ScannerConfig{
+		FS:               fs,
+		ChunkSize:        int64(1024 * 16), // 16KB chunks for testing
+		Repositories:     repos,
+		MaxBlobSize:      int64(1024 * 1024), // 1MB blobs
+		CompressionLevel: 3,
+		AgeRecipients:    []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
+	})
+
+	// Create first snapshot
+	ctx := context.Background()
+	snapshotID1 := "snapshot1"
+	err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		snapshot := &database.Snapshot{
+			ID:             snapshotID1,
+			Hostname:       "test-host",
+			VaultikVersion: "test",
+			StartedAt:      time.Now(),
+		}
+		return repos.Snapshots.Create(ctx, tx, snapshot)
+	})
+	require.NoError(t, err)
+
+	// First scan - should create chunks for initial content
+	result1, err := scanner.Scan(ctx, "/", snapshotID1)
+	require.NoError(t, err)
+	t.Logf("First scan: %d files scanned", result1.FilesScanned)
+
+	// Get file chunks from first scan
+	fileChunks1, err := repos.FileChunks.GetByPath(ctx, "/test.txt")
+	require.NoError(t, err)
+	assert.Len(t, fileChunks1, 1) // Small file = 1 chunk
+	oldChunkHash := fileChunks1[0].ChunkHash
+
+	// Get chunk files from first scan
+	chunkFiles1, err := repos.ChunkFiles.GetByFilePath(ctx, "/test.txt")
+	require.NoError(t, err)
+	assert.Len(t, chunkFiles1, 1)
+
+	// Modify the file
+	time.Sleep(10 * time.Millisecond) // Ensure mtime changes
+	err = afero.WriteFile(fs, "/test.txt", []byte("Modified content with different data"), 0644)
+	require.NoError(t, err)
+
+	// Create second snapshot
+	snapshotID2 := "snapshot2"
+	err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		snapshot := &database.Snapshot{
+			ID:             snapshotID2,
+			Hostname:       "test-host",
+			VaultikVersion: "test",
+			StartedAt:      time.Now(),
+		}
+		return repos.Snapshots.Create(ctx, tx, snapshot)
+	})
+	require.NoError(t, err)
+
+	// Second scan - should create new chunks and remove old associations
+	result2, err := scanner.Scan(ctx, "/", snapshotID2)
+	require.NoError(t, err)
+	t.Logf("Second scan: %d files scanned", result2.FilesScanned)
+
+	// Get file chunks from second scan
+	fileChunks2, err := repos.FileChunks.GetByPath(ctx, "/test.txt")
+	require.NoError(t, err)
+	assert.Len(t, fileChunks2, 1) // Still 1 chunk but different hash
+	newChunkHash := fileChunks2[0].ChunkHash
+
+	// Verify the chunk hashes are different
+	assert.NotEqual(t, oldChunkHash, newChunkHash, "Chunk hash should change when content changes")
+
+	// Get chunk files from second scan
+	chunkFiles2, err := repos.ChunkFiles.GetByFilePath(ctx, "/test.txt")
+	require.NoError(t, err)
+	assert.Len(t, chunkFiles2, 1)
+	assert.Equal(t, newChunkHash, chunkFiles2[0].ChunkHash)
+
+	// Verify old chunk still exists (it's still valid data)
+	oldChunk, err := repos.Chunks.GetByHash(ctx, oldChunkHash)
+	require.NoError(t, err)
+	assert.NotNil(t, oldChunk)
+
+	// Verify new chunk exists
+	newChunk, err := repos.Chunks.GetByHash(ctx, newChunkHash)
+	require.NoError(t, err)
+	assert.NotNil(t, newChunk)
+
+	// Verify that chunk_files for old chunk no longer references this file
+	oldChunkFiles, err := repos.ChunkFiles.GetByChunkHash(ctx, oldChunkHash)
+	require.NoError(t, err)
+	for _, cf := range oldChunkFiles {
+		file, err := repos.Files.GetByID(ctx, cf.FileID)
+		require.NoError(t, err)
+		assert.NotEqual(t, "/data/test.txt", file.Path, "Old chunk should not be associated with the modified file")
+	}
+}
+
+// TestMultipleFileChanges verifies handling of multiple file changes in one scan
+func TestMultipleFileChanges(t *testing.T) {
+	// Initialize logger for tests
+	log.Initialize(log.Config{})
+
+	// Create in-memory filesystem
+	fs := afero.NewMemMapFs()
+
+	// Create initial files
+	files := map[string]string{
+		"/file1.txt": "Content 1",
+		"/file2.txt": "Content 2",
+		"/file3.txt": "Content 3",
+	}
+
+	for path, content := range files {
+		err := afero.WriteFile(fs, path, []byte(content), 0644)
+		require.NoError(t, err)
+	}
+
+	// Create test database
+	db, err := database.NewTestDB()
+	require.NoError(t, err)
+	defer func() {
+		if err := db.Close(); err != nil {
+			t.Errorf("failed to close database: %v", err)
+		}
+	}()
+
+	repos := database.NewRepositories(db)
+
+	// Create scanner
+	scanner := snapshot.NewScanner(snapshot.ScannerConfig{
+		FS:               fs,
+		ChunkSize:        int64(1024 * 16), // 16KB chunks for testing
+		Repositories:     repos,
+		MaxBlobSize:      int64(1024 * 1024), // 1MB blobs
+		CompressionLevel: 3,
+		AgeRecipients:    []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
+	})
+
+	// Create first snapshot
+	ctx := context.Background()
+	snapshotID1 := "snapshot1"
+	err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		snapshot := &database.Snapshot{
+			ID:             snapshotID1,
+			Hostname:       "test-host",
+			VaultikVersion: "test",
+			StartedAt:      time.Now(),
+		}
+		return repos.Snapshots.Create(ctx, tx, snapshot)
+	})
+	require.NoError(t, err)
+
+	// First scan
+	result1, err := scanner.Scan(ctx, "/", snapshotID1)
+	require.NoError(t, err)
+	// 4 files because root directory is also counted
+	assert.Equal(t, 4, result1.FilesScanned)
+
+	// Modify two files
+	time.Sleep(10 * time.Millisecond) // Ensure mtime changes
+	err = afero.WriteFile(fs, "/file1.txt", []byte("Modified content 1"), 0644)
+	require.NoError(t, err)
+	err = afero.WriteFile(fs, "/file3.txt", []byte("Modified content 3"), 0644)
+	require.NoError(t, err)
+
+	// Create second snapshot
+	snapshotID2 := "snapshot2"
+	err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		snapshot := &database.Snapshot{
+			ID:             snapshotID2,
+			Hostname:       "test-host",
+			VaultikVersion: "test",
+			StartedAt:      time.Now(),
+		}
+		return repos.Snapshots.Create(ctx, tx, snapshot)
+	})
+	require.NoError(t, err)
+
+	// Second scan
+	result2, err := scanner.Scan(ctx, "/", snapshotID2)
+	require.NoError(t, err)
+	// 4 files because root directory is also counted
+	assert.Equal(t, 4, result2.FilesScanned)
+
+	// Verify each file has exactly one set of chunks
+	for path := range files {
+		fileChunks, err := repos.FileChunks.GetByPath(ctx, path)
+		require.NoError(t, err)
+		assert.Len(t, fileChunks, 1, "File %s should have exactly 1 chunk association", path)
+
+		chunkFiles, err := repos.ChunkFiles.GetByFilePath(ctx, path)
+		require.NoError(t, err)
+		assert.Len(t, chunkFiles, 1, "File %s should have exactly 1 chunk-file association", path)
+	}
+}
--- a/internal/snapshot/manifest.go
+++ b/internal/snapshot/manifest.go
@@ -0,0 +1,70 @@
+package snapshot
+
+import (
+	"bytes"
+	"encoding/json"
+	"fmt"
+	"io"
+
+	"github.com/klauspost/compress/zstd"
+)
+
+// Manifest represents the structure of a snapshot's blob manifest
+type Manifest struct {
+	SnapshotID          string     `json:"snapshot_id"`
+	Timestamp           string     `json:"timestamp"`
+	BlobCount           int        `json:"blob_count"`
+	TotalCompressedSize int64      `json:"total_compressed_size"`
+	Blobs               []BlobInfo `json:"blobs"`
+}
+
+// BlobInfo represents information about a single blob in the manifest
+type BlobInfo struct {
+	Hash           string `json:"hash"`
+	CompressedSize int64  `json:"compressed_size"`
+}
+
+// DecodeManifest decodes a manifest from a reader containing compressed JSON
+func DecodeManifest(r io.Reader) (*Manifest, error) {
+	// Decompress using zstd
+	zr, err := zstd.NewReader(r)
+	if err != nil {
+		return nil, fmt.Errorf("creating zstd reader: %w", err)
+	}
+	defer zr.Close()
+
+	// Decode JSON manifest
+	var manifest Manifest
+	if err := json.NewDecoder(zr).Decode(&manifest); err != nil {
+		return nil, fmt.Errorf("decoding manifest: %w", err)
+	}
+
+	return &manifest, nil
+}
+
+// EncodeManifest encodes a manifest to compressed JSON
+func EncodeManifest(manifest *Manifest, compressionLevel int) ([]byte, error) {
+	// Marshal to JSON
+	jsonData, err := json.MarshalIndent(manifest, "", "  ")
+	if err != nil {
+		return nil, fmt.Errorf("marshaling manifest: %w", err)
+	}
+
+	// Compress using zstd
+	var compressedBuf bytes.Buffer
+	writer, err := zstd.NewWriter(&compressedBuf, zstd.WithEncoderLevel(zstd.EncoderLevelFromZstd(compressionLevel)))
+	if err != nil {
+		return nil, fmt.Errorf("creating zstd writer: %w", err)
+	}
+
+	if _, err := writer.Write(jsonData); err != nil {
+		_ = writer.Close()
+		return nil, fmt.Errorf("writing compressed data: %w", err)
+	}
+
+	if err := writer.Close(); err != nil {
+		return nil, fmt.Errorf("closing zstd writer: %w", err)
+	}
+
+	return compressedBuf.Bytes(), nil
+}
--- a/internal/snapshot/module.go
+++ b/internal/snapshot/module.go
@@ -0,0 +1,42 @@
+package snapshot
+
+import (
+	"git.eeqj.de/sneak/vaultik/internal/config"
+	"git.eeqj.de/sneak/vaultik/internal/database"
+	"git.eeqj.de/sneak/vaultik/internal/s3"
+	"github.com/spf13/afero"
+	"go.uber.org/fx"
+)
+
+// ScannerParams holds parameters for scanner creation
+type ScannerParams struct {
+	EnableProgress bool
+}
+
+// Module exports backup functionality as an fx module.
+// It provides a ScannerFactory that can create Scanner instances
+// with custom parameters while sharing common dependencies.
+var Module = fx.Module("backup",
+	fx.Provide(
+		provideScannerFactory,
+		NewSnapshotManager,
+	),
+)
+
+// ScannerFactory creates scanners with custom parameters
+type ScannerFactory func(params ScannerParams) *Scanner
+
+func provideScannerFactory(cfg *config.Config, repos *database.Repositories, s3Client *s3.Client) ScannerFactory {
+	return func(params ScannerParams) *Scanner {
+		return NewScanner(ScannerConfig{
+			FS:               afero.NewOsFs(),
+			ChunkSize:        cfg.ChunkSize.Int64(),
+			Repositories:     repos,
+			S3Client:         s3Client,
+			MaxBlobSize:      cfg.BlobSizeLimit.Int64(),
+			CompressionLevel: cfg.CompressionLevel,
+			AgeRecipients:    cfg.AgeRecipients,
+			EnableProgress:   params.EnableProgress,
+		})
+	}
+}
--- a/internal/snapshot/progress.go
+++ b/internal/snapshot/progress.go
@@ -0,0 +1,412 @@
+package snapshot
+
+import (
+	"context"
+	"fmt"
+	"os"
+	"os/signal"
+	"sync"
+	"sync/atomic"
+	"syscall"
+	"time"
+
+	"git.eeqj.de/sneak/vaultik/internal/log"
+	"github.com/dustin/go-humanize"
+)
+
+const (
+	// SummaryInterval defines how often one-line status updates are printed.
+	// These updates show current progress, ETA, and the file being processed.
+	SummaryInterval = 10 * time.Second
+
+	// DetailInterval defines how often multi-line detailed status reports are printed.
+	// These reports include comprehensive statistics about files, chunks, blobs, and uploads.
+	DetailInterval = 60 * time.Second
+)
+
+// ProgressStats holds atomic counters for progress tracking
+type ProgressStats struct {
+	FilesScanned     atomic.Int64 // Total files seen during scan (includes skipped)
+	FilesProcessed   atomic.Int64 // Files actually processed in phase 2
+	FilesSkipped     atomic.Int64 // Files skipped due to no changes
+	BytesScanned     atomic.Int64 // Bytes from new/changed files only
+	BytesSkipped     atomic.Int64 // Bytes from unchanged files
+	BytesProcessed   atomic.Int64 // Actual bytes processed (for ETA calculation)
+	ChunksCreated    atomic.Int64
+	BlobsCreated     atomic.Int64
+	BlobsUploaded    atomic.Int64
+	BytesUploaded    atomic.Int64
+	UploadDurationMs atomic.Int64 // Total milliseconds spent uploading to S3
+	CurrentFile      atomic.Value // stores string
+	TotalSize        atomic.Int64 // Total size to process (set after scan phase)
+	TotalFiles       atomic.Int64 // Total files to process in phase 2
+	ProcessStartTime atomic.Value // stores time.Time when processing starts
+	StartTime        time.Time
+	mu               sync.RWMutex
+	lastDetailTime   time.Time
+
+	// Upload tracking
+	CurrentUpload    atomic.Value // stores *UploadInfo
+	lastChunkingTime time.Time    // Track when we last showed chunking progress
+}
+
+// UploadInfo tracks current upload progress
+type UploadInfo struct {
+	BlobHash  string
+	Size      int64
+	StartTime time.Time
+}
+
+// ProgressReporter handles periodic progress reporting
+type ProgressReporter struct {
+	stats         *ProgressStats
+	ctx           context.Context
+	cancel        context.CancelFunc
+	wg            sync.WaitGroup
+	detailTicker  *time.Ticker
+	summaryTicker *time.Ticker
+	sigChan       chan os.Signal
+}
+
+// NewProgressReporter creates a new progress reporter
+func NewProgressReporter() *ProgressReporter {
+	stats := &ProgressStats{
+		StartTime:      time.Now().UTC(),
+		lastDetailTime: time.Now().UTC(),
+	}
+	stats.CurrentFile.Store("")
+
+	ctx, cancel := context.WithCancel(context.Background())
+
+	pr := &ProgressReporter{
+		stats:         stats,
+		ctx:           ctx,
+		cancel:        cancel,
+		summaryTicker: time.NewTicker(SummaryInterval),
+		detailTicker:  time.NewTicker(DetailInterval),
+		sigChan:       make(chan os.Signal, 1),
+	}
+
+	// Register for SIGUSR1
+	signal.Notify(pr.sigChan, syscall.SIGUSR1)
+
+	return pr
+}
+
+// Start begins the progress reporting
+func (pr *ProgressReporter) Start() {
+	pr.wg.Add(1)
+	go pr.run()
+
+	// Print initial multi-line status
+	pr.printDetailedStatus()
+}
+
+// Stop stops the progress reporting
+func (pr *ProgressReporter) Stop() {
+	pr.cancel()
+	pr.summaryTicker.Stop()
+	pr.detailTicker.Stop()
+	signal.Stop(pr.sigChan)
+	close(pr.sigChan)
+	pr.wg.Wait()
+}
+
+// GetStats returns the progress stats for updating
+func (pr *ProgressReporter) GetStats() *ProgressStats {
+	return pr.stats
+}
+
+// SetTotalSize sets the total size to process (after scan phase)
+func (pr *ProgressReporter) SetTotalSize(size int64) {
+	pr.stats.TotalSize.Store(size)
+	pr.stats.ProcessStartTime.Store(time.Now().UTC())
+}
+
+// run is the main progress reporting loop
+func (pr *ProgressReporter) run() {
+	defer pr.wg.Done()
+
+	for {
+		select {
+		case <-pr.ctx.Done():
+			return
+		case <-pr.summaryTicker.C:
+			pr.printSummaryStatus()
+		case <-pr.detailTicker.C:
+			pr.printDetailedStatus()
+		case <-pr.sigChan:
+			// SIGUSR1 received, print detailed status
+			log.Info("SIGUSR1 received, printing detailed status")
+			pr.printDetailedStatus()
+		}
+	}
+}
+
+// printSummaryStatus prints a one-line status update
+func (pr *ProgressReporter) printSummaryStatus() {
+	// Check if we're currently uploading
+	if uploadInfo, ok := pr.stats.CurrentUpload.Load().(*UploadInfo); ok && uploadInfo != nil {
+		// Show upload progress instead
+		pr.printUploadProgress(uploadInfo)
+		return
+	}
+
+	// Only show chunking progress if we've done chunking recently
+	pr.stats.mu.RLock()
+	timeSinceLastChunk := time.Since(pr.stats.lastChunkingTime)
+	pr.stats.mu.RUnlock()
+
+	if timeSinceLastChunk > SummaryInterval*2 {
+		// No recent chunking activity, don't show progress
+		return
+	}
+
+	elapsed := time.Since(pr.stats.StartTime)
+	bytesScanned := pr.stats.BytesScanned.Load()
+	bytesSkipped := pr.stats.BytesSkipped.Load()
+	bytesProcessed := pr.stats.BytesProcessed.Load()
+	totalSize := pr.stats.TotalSize.Load()
+	currentFile := pr.stats.CurrentFile.Load().(string)
+
+	// Calculate ETA if we have total size and are processing
+	etaStr := ""
+	if totalSize > 0 && bytesProcessed > 0 {
+		processStart, ok := pr.stats.ProcessStartTime.Load().(time.Time)
+		if ok && !processStart.IsZero() {
+			processElapsed := time.Since(processStart)
+			rate := float64(bytesProcessed) / processElapsed.Seconds()
+			if rate > 0 {
+				remainingBytes := totalSize - bytesProcessed
+				remainingSeconds := float64(remainingBytes) / rate
+				eta := time.Duration(remainingSeconds * float64(time.Second))
+				etaStr = fmt.Sprintf(" | ETA: %s", formatDuration(eta))
+			}
+		}
+	}
+
+	rate := float64(bytesScanned+bytesSkipped) / elapsed.Seconds()
+
+	// Show files processed / total files to process
+	filesProcessed := pr.stats.FilesProcessed.Load()
+	totalFiles := pr.stats.TotalFiles.Load()
+
+	status := fmt.Sprintf("Snapshot progress: %d/%d files, %s/%s (%.1f%%), %s/s%s",
+		filesProcessed,
+		totalFiles,
+		humanize.Bytes(uint64(bytesProcessed)),
+		humanize.Bytes(uint64(totalSize)),
+		float64(bytesProcessed)/float64(totalSize)*100,
+		humanize.Bytes(uint64(rate)),
+		etaStr,
+	)
+
+	if currentFile != "" {
+		status += fmt.Sprintf(" | Current: %s", truncatePath(currentFile, 40))
+	}
+
+	log.Info(status)
+}
+
+// printDetailedStatus prints a multi-line detailed status
+func (pr *ProgressReporter) printDetailedStatus() {
+	pr.stats.mu.Lock()
+	pr.stats.lastDetailTime = time.Now().UTC()
+	pr.stats.mu.Unlock()
+
+	elapsed := time.Since(pr.stats.StartTime)
+	filesScanned := pr.stats.FilesScanned.Load()
+	filesSkipped := pr.stats.FilesSkipped.Load()
+	bytesScanned := pr.stats.BytesScanned.Load()
+	bytesSkipped := pr.stats.BytesSkipped.Load()
+	bytesProcessed := pr.stats.BytesProcessed.Load()
+	totalSize := pr.stats.TotalSize.Load()
+	chunksCreated := pr.stats.ChunksCreated.Load()
+	blobsCreated := pr.stats.BlobsCreated.Load()
+	blobsUploaded := pr.stats.BlobsUploaded.Load()
+	bytesUploaded := pr.stats.BytesUploaded.Load()
+	currentFile := pr.stats.CurrentFile.Load().(string)
+
+	totalBytes := bytesScanned + bytesSkipped
+	rate := float64(totalBytes) / elapsed.Seconds()
+
+	log.Notice("=== Snapshot Progress Report ===")
+	log.Info("Elapsed time", "duration", formatDuration(elapsed))
+
+	// Calculate and show ETA if we have data
+	if totalSize > 0 && bytesProcessed > 0 {
+		processStart, ok := pr.stats.ProcessStartTime.Load().(time.Time)
+		if ok && !processStart.IsZero() {
+			processElapsed := time.Since(processStart)
+			processRate := float64(bytesProcessed) / processElapsed.Seconds()
+			if processRate > 0 {
+				remainingBytes := totalSize - bytesProcessed
+				remainingSeconds := float64(remainingBytes) / processRate
+				eta := time.Duration(remainingSeconds * float64(time.Second))
+				percentComplete := float64(bytesProcessed) / float64(totalSize) * 100
+				log.Info("Overall progress",
+					"percent", fmt.Sprintf("%.1f%%", percentComplete),
+					"processed", humanize.Bytes(uint64(bytesProcessed)),
+					"total", humanize.Bytes(uint64(totalSize)),
+					"rate", humanize.Bytes(uint64(processRate))+"/s",
+					"eta", formatDuration(eta))
+			}
+		}
+	}
+
+	log.Info("Files processed",
+		"scanned", filesScanned,
+		"skipped", filesSkipped,
+		"total", filesScanned,
+		"skip_rate", formatPercent(filesSkipped, filesScanned))
+	log.Info("Data scanned",
+		"new", humanize.Bytes(uint64(bytesScanned)),
+		"skipped", humanize.Bytes(uint64(bytesSkipped)),
+		"total", humanize.Bytes(uint64(totalBytes)),
+		"scan_rate", humanize.Bytes(uint64(rate))+"/s")
+	log.Info("Chunks created", "count", chunksCreated)
+	log.Info("Blobs status",
+		"created", blobsCreated,
+		"uploaded", blobsUploaded,
+		"pending", blobsCreated-blobsUploaded)
+	log.Info("Total uploaded to S3",
+		"uploaded", humanize.Bytes(uint64(bytesUploaded)),
+		"compression_ratio", formatRatio(bytesUploaded, bytesScanned))
+	if currentFile != "" {
+		log.Info("Current file", "path", currentFile)
+	}
+	log.Notice("=============================")
+}
+
+// Helper functions
+
+func formatDuration(d time.Duration) string {
+	if d < 0 {
+		return "unknown"
+	}
+	if d < time.Minute {
+		return fmt.Sprintf("%ds", int(d.Seconds()))
+	}
+	if d < time.Hour {
+		return fmt.Sprintf("%dm%ds", int(d.Minutes()), int(d.Seconds())%60)
+	}
+	return fmt.Sprintf("%dh%dm", int(d.Hours()), int(d.Minutes())%60)
+}
+
+func formatPercent(numerator, denominator int64) string {
+	if denominator == 0 {
+		return "0.0%"
+	}
+	return fmt.Sprintf("%.1f%%", float64(numerator)/float64(denominator)*100)
+}
+
+func formatRatio(compressed, uncompressed int64) string {
+	if uncompressed == 0 {
+		return "1.00"
+	}
+	ratio := float64(compressed) / float64(uncompressed)
+	return fmt.Sprintf("%.2f", ratio)
+}
+
+func truncatePath(path string, maxLen int) string {
+	if len(path) <= maxLen {
+		return path
+	}
+	// Keep the last maxLen-3 characters and prepend "..."
+	return "..." + path[len(path)-(maxLen-3):]
+}
+
+// printUploadProgress prints upload progress
+func (pr *ProgressReporter) printUploadProgress(info *UploadInfo) {
+	// This function is called repeatedly during upload, not just at start
+	// Don't print anything here - the actual progress is shown by ReportUploadProgress
+}
+
+// ReportUploadStart marks the beginning of a blob upload
+func (pr *ProgressReporter) ReportUploadStart(blobHash string, size int64) {
+	info := &UploadInfo{
+		BlobHash:  blobHash,
+		Size:      size,
+		StartTime: time.Now().UTC(),
+	}
+	pr.stats.CurrentUpload.Store(info)
+}
+
+// ReportUploadComplete marks the completion of a blob upload
+func (pr *ProgressReporter) ReportUploadComplete(blobHash string, size int64, duration time.Duration) {
+	// Clear current upload
+	pr.stats.CurrentUpload.Store((*UploadInfo)(nil))
+
+	// Add to total upload duration
+	pr.stats.UploadDurationMs.Add(duration.Milliseconds())
+
+	// Calculate speed
+	if duration < time.Millisecond {
+		duration = time.Millisecond
+	}
+	bytesPerSec := float64(size) / duration.Seconds()
+	bitsPerSec := bytesPerSec * 8
+
+	// Format speed
+	var speedStr string
+	if bitsPerSec >= 1e9 {
+		speedStr = fmt.Sprintf("%.1fGbit/sec", bitsPerSec/1e9)
+	} else if bitsPerSec >= 1e6 {
+		speedStr = fmt.Sprintf("%.0fMbit/sec", bitsPerSec/1e6)
+	} else if bitsPerSec >= 1e3 {
+		speedStr = fmt.Sprintf("%.0fKbit/sec", bitsPerSec/1e3)
+	} else {
+		speedStr = fmt.Sprintf("%.0fbit/sec", bitsPerSec)
+	}
+
+	log.Info("Blob upload completed",
+		"hash", blobHash[:8]+"...",
+		"size", humanize.Bytes(uint64(size)),
+		"duration", formatDuration(duration),
+		"speed", speedStr)
+}
+
+// UpdateChunkingActivity updates the last chunking time
+func (pr *ProgressReporter) UpdateChunkingActivity() {
+	pr.stats.mu.Lock()
+	pr.stats.lastChunkingTime = time.Now().UTC()
+	pr.stats.mu.Unlock()
+}
+
+// ReportUploadProgress reports current upload progress with instantaneous speed
+func (pr *ProgressReporter) ReportUploadProgress(blobHash string, bytesUploaded, totalSize int64, instantSpeed float64) {
+	// Update the current upload info with progress
+	if uploadInfo, ok := pr.stats.CurrentUpload.Load().(*UploadInfo); ok && uploadInfo != nil {
+		// Format speed in bits/second
+		bitsPerSec := instantSpeed * 8
+		var speedStr string
+		if bitsPerSec >= 1e9 {
+			speedStr = fmt.Sprintf("%.1fGbit/sec", bitsPerSec/1e9)
+		} else if bitsPerSec >= 1e6 {
+			speedStr = fmt.Sprintf("%.0fMbit/sec", bitsPerSec/1e6)
+		} else if bitsPerSec >= 1e3 {
+			speedStr = fmt.Sprintf("%.0fKbit/sec", bitsPerSec/1e3)
+		} else {
+			speedStr = fmt.Sprintf("%.0fbit/sec", bitsPerSec)
+		}
+
+		percent := float64(bytesUploaded) / float64(totalSize) * 100
+
+		// Calculate ETA based on current speed
+		etaStr := "unknown"
+		if instantSpeed > 0 && bytesUploaded < totalSize {
+			remainingBytes := totalSize - bytesUploaded
+			remainingSeconds := float64(remainingBytes) / instantSpeed
+			eta := time.Duration(remainingSeconds * float64(time.Second))
+			etaStr = formatDuration(eta)
+		}
+
+		log.Info("Blob upload progress",
+			"hash", blobHash[:8]+"...",
+			"progress", fmt.Sprintf("%.1f%%", percent),
+			"uploaded", humanize.Bytes(uint64(bytesUploaded)),
+			"total", humanize.Bytes(uint64(totalSize)),
+			"speed", speedStr,
+			"eta", etaStr)
+	}
+}
--- a/internal/snapshot/scanner.go
+++ b/internal/snapshot/scanner.go
@@ -0,0 +1,856 @@
+package snapshot
+
+import (
+	"context"
+	"database/sql"
+	"fmt"
+	"io"
+	"os"
+	"strings"
+	"sync"
+	"time"
+
+	"git.eeqj.de/sneak/vaultik/internal/blob"
+	"git.eeqj.de/sneak/vaultik/internal/chunker"
+	"git.eeqj.de/sneak/vaultik/internal/database"
+	"git.eeqj.de/sneak/vaultik/internal/log"
+	"git.eeqj.de/sneak/vaultik/internal/s3"
+	"github.com/dustin/go-humanize"
+	"github.com/spf13/afero"
+)
+
+// FileToProcess holds information about a file that needs processing
+type FileToProcess struct {
+	Path     string
+	FileInfo os.FileInfo
+	File     *database.File
+}
+
+// Scanner scans directories and populates the database with file and chunk information
+type Scanner struct {
+	fs               afero.Fs
+	chunker          *chunker.Chunker
+	packer           *blob.Packer
+	repos            *database.Repositories
+	s3Client         S3Client
+	maxBlobSize      int64
+	compressionLevel int
+	ageRecipient     string
+	snapshotID       string // Current snapshot being processed
+	progress         *ProgressReporter
+
+	// Mutex for coordinating blob creation
+	packerMu sync.Mutex // Blocks chunk production during blob creation
+
+	// Context for cancellation
+	scanCtx context.Context
+}
+
+// S3Client interface for blob storage operations
+type S3Client interface {
+	PutObject(ctx context.Context, key string, data io.Reader) error
+	PutObjectWithProgress(ctx context.Context, key string, data io.Reader, size int64, progress s3.ProgressCallback) error
+	StatObject(ctx context.Context, key string) (*s3.ObjectInfo, error)
+}
+
+// ScannerConfig contains configuration for the scanner
+type ScannerConfig struct {
+	FS               afero.Fs
+	ChunkSize        int64
+	Repositories     *database.Repositories
+	S3Client         S3Client
+	MaxBlobSize      int64
+	CompressionLevel int
+	AgeRecipients    []string // Optional, empty means no encryption
+	EnableProgress   bool     // Enable progress reporting
+}
+
+// ScanResult contains the results of a scan operation
+type ScanResult struct {
+	FilesScanned  int
+	FilesSkipped  int
+	BytesScanned  int64
+	BytesSkipped  int64
+	ChunksCreated int
+	BlobsCreated  int
+	StartTime     time.Time
+	EndTime       time.Time
+}
+
+// NewScanner creates a new scanner instance
+func NewScanner(cfg ScannerConfig) *Scanner {
+	// Create encryptor (required for blob packing)
+	if len(cfg.AgeRecipients) == 0 {
+		log.Error("No age recipients configured - encryption is required")
+		return nil
+	}
+
+	// Create blob packer with encryption
+	packerCfg := blob.PackerConfig{
+		MaxBlobSize:      cfg.MaxBlobSize,
+		CompressionLevel: cfg.CompressionLevel,
+		Recipients:       cfg.AgeRecipients,
+		Repositories:     cfg.Repositories,
+	}
+	packer, err := blob.NewPacker(packerCfg)
+	if err != nil {
+		log.Error("Failed to create packer", "error", err)
+		return nil
+	}
+
+	var progress *ProgressReporter
+	if cfg.EnableProgress {
+		progress = NewProgressReporter()
+	}
+
+	return &Scanner{
+		fs:               cfg.FS,
+		chunker:          chunker.NewChunker(cfg.ChunkSize),
+		packer:           packer,
+		repos:            cfg.Repositories,
+		s3Client:         cfg.S3Client,
+		maxBlobSize:      cfg.MaxBlobSize,
+		compressionLevel: cfg.CompressionLevel,
+		ageRecipient:     strings.Join(cfg.AgeRecipients, ","),
+		progress:         progress,
+	}
+}
+
+// Scan scans a directory and populates the database
+func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*ScanResult, error) {
+	s.snapshotID = snapshotID
+	s.scanCtx = ctx
+	result := &ScanResult{
+		StartTime: time.Now().UTC(),
+	}
+
+	// Set blob handler for concurrent upload
+	if s.s3Client != nil {
+		log.Debug("Setting blob handler for S3 uploads")
+		s.packer.SetBlobHandler(s.handleBlobReady)
+	} else {
+		log.Debug("No S3 client configured, blobs will not be uploaded")
+	}
+
+	// Start progress reporting if enabled
+	if s.progress != nil {
+		s.progress.Start()
+		defer s.progress.Stop()
+	}
+
+	// Phase 1: Scan directory and collect files to process
+	log.Info("Phase 1/3: Scanning directory structure")
+	filesToProcess, err := s.scanPhase(ctx, path, result)
+	if err != nil {
+		return nil, fmt.Errorf("scan phase failed: %w", err)
+	}
+
+	// Calculate total size to process
+	var totalSizeToProcess int64
+	for _, file := range filesToProcess {
+		totalSizeToProcess += file.FileInfo.Size()
+	}
+
+	// Update progress with total size and file count
+	if s.progress != nil {
+		s.progress.SetTotalSize(totalSizeToProcess)
+		s.progress.GetStats().TotalFiles.Store(int64(len(filesToProcess)))
+	}
+
+	log.Info("Phase 1 complete",
+		"total_files", len(filesToProcess),
+		"total_size", humanize.Bytes(uint64(totalSizeToProcess)),
+		"files_skipped", result.FilesSkipped,
+		"bytes_skipped", humanize.Bytes(uint64(result.BytesSkipped)))
+
+	// Print detailed scan summary
+	fmt.Printf("\n=== Scan Summary ===\n")
+	fmt.Printf("Total files examined: %d\n", result.FilesScanned)
+	fmt.Printf("Files with content changes: %d\n", len(filesToProcess))
+	fmt.Printf("Files with unchanged content: %d\n", result.FilesSkipped)
+	fmt.Printf("Total size of changed files: %s\n", humanize.Bytes(uint64(totalSizeToProcess)))
+	fmt.Printf("Total size of unchanged files: %s\n", humanize.Bytes(uint64(result.BytesSkipped)))
+	if len(filesToProcess) > 0 {
+		fmt.Printf("\nStarting snapshot of %d changed files...\n\n", len(filesToProcess))
+	} else {
+		fmt.Printf("\nNo file contents have changed.\n")
+		fmt.Printf("Creating metadata-only snapshot to capture current state...\n\n")
+	}
+
+	// Phase 2: Process files and create chunks
+	if len(filesToProcess) > 0 {
+		log.Info("Phase 2/3: Creating snapshot (chunking, compressing, encrypting, and uploading blobs)")
+		if err := s.processPhase(ctx, filesToProcess, result); err != nil {
+			return nil, fmt.Errorf("process phase failed: %w", err)
+		}
+	} else {
+		log.Info("Phase 2/3: Skipping (no file contents changed, metadata-only snapshot)")
+	}
+
+	// Get final stats from packer
+	blobs := s.packer.GetFinishedBlobs()
+	result.BlobsCreated += len(blobs)
+
+	// Query database for actual blob count created during this snapshot
+	// The database is authoritative, especially for concurrent blob uploads
+	// We count uploads rather than all snapshot_blobs to get only NEW blobs
+	if s.snapshotID != "" {
+		uploadCount, err := s.repos.Uploads.GetCountBySnapshot(ctx, s.snapshotID)
+		if err != nil {
+			log.Warn("Failed to query upload count from database", "error", err)
+		} else {
+			result.BlobsCreated = int(uploadCount)
+		}
+	}
+
+	result.EndTime = time.Now().UTC()
+	return result, nil
+}
+
+// scanPhase performs the initial directory scan to identify files to process
+func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult) ([]*FileToProcess, error) {
+	var filesToProcess []*FileToProcess
+	var mu sync.Mutex
+
+	// Set up periodic status output
+	lastStatusTime := time.Now()
+	statusInterval := 15 * time.Second
+	var filesScanned int64
+	var bytesScanned int64
+
+	log.Debug("Starting directory walk", "path", path)
+	err := afero.Walk(s.fs, path, func(path string, info os.FileInfo, err error) error {
+		log.Debug("Scanning filesystem entry", "path", path)
+		if err != nil {
+			log.Debug("Error accessing filesystem entry", "path", path, "error", err)
+			return err
+		}
+
+		// Check context cancellation
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		default:
+		}
+
+		// Check file and update metadata
+		file, needsProcessing, err := s.checkFileAndUpdateMetadata(ctx, path, info, result)
+		if err != nil {
+			// Don't log context cancellation as an error
+			if err == context.Canceled {
+				return err
+			}
+			return fmt.Errorf("failed to check %s: %w", path, err)
+		}
+
+		// If file needs processing, add to list
+		if needsProcessing && info.Mode().IsRegular() && info.Size() > 0 {
+			mu.Lock()
+			filesToProcess = append(filesToProcess, &FileToProcess{
+				Path:     path,
+				FileInfo: info,
+				File:     file,
+			})
+			mu.Unlock()
+		}
+
+		// Update scan statistics
+		if info.Mode().IsRegular() {
+			filesScanned++
+			bytesScanned += info.Size()
+		}
+
+		// Output periodic status
+		if time.Since(lastStatusTime) >= statusInterval {
+			mu.Lock()
+			changedCount := len(filesToProcess)
+			mu.Unlock()
+
+			fmt.Printf("Scan progress: %d files examined, %s total size, %d files changed\n",
+				filesScanned,
+				humanize.Bytes(uint64(bytesScanned)),
+				changedCount)
+			lastStatusTime = time.Now()
+		}
+
+		return nil
+	})
+
+	if err != nil {
+		return nil, err
+	}
+
+	return filesToProcess, nil
+}
+
+// processPhase processes the files that need backing up
+func (s *Scanner) processPhase(ctx context.Context, filesToProcess []*FileToProcess, result *ScanResult) error {
+	// Set up periodic status output
+	lastStatusTime := time.Now()
+	statusInterval := 15 * time.Second
+	startTime := time.Now()
+	filesProcessed := 0
+	totalFiles := len(filesToProcess)
+
+	// Process each file
+	for _, fileToProcess := range filesToProcess {
+		// Update progress
+		if s.progress != nil {
+			s.progress.GetStats().CurrentFile.Store(fileToProcess.Path)
+		}
+
+		// Process file in streaming fashion
+		if err := s.processFileStreaming(ctx, fileToProcess, result); err != nil {
+			return fmt.Errorf("processing file %s: %w", fileToProcess.Path, err)
+		}
+
+		// Update files processed counter
+		if s.progress != nil {
+			s.progress.GetStats().FilesProcessed.Add(1)
+		}
+
+		filesProcessed++
+
+		// Output periodic status
+		if time.Since(lastStatusTime) >= statusInterval {
+			elapsed := time.Since(startTime)
+			remaining := totalFiles - filesProcessed
+			var eta time.Duration
+			if filesProcessed > 0 {
+				eta = elapsed / time.Duration(filesProcessed) * time.Duration(remaining)
+			}
+
+			fmt.Printf("Snapshot progress: %d/%d files processed, %d chunks created, %d blobs uploaded",
+				filesProcessed, totalFiles, result.ChunksCreated, result.BlobsCreated)
+			if remaining > 0 && eta > 0 {
+				fmt.Printf(", ETA: %s", eta.Round(time.Second))
+			}
+			fmt.Println()
+			lastStatusTime = time.Now()
+		}
+	}
+
+	// Final flush (outside any transaction)
+	s.packerMu.Lock()
+	if err := s.packer.Flush(); err != nil {
+		s.packerMu.Unlock()
+		return fmt.Errorf("flushing packer: %w", err)
+	}
+	s.packerMu.Unlock()
+
+	// If no S3 client, store any remaining blobs
+	if s.s3Client == nil {
+		blobs := s.packer.GetFinishedBlobs()
+		for _, b := range blobs {
+			// Blob metadata is already stored incrementally during packing
+			// Just add the blob to the snapshot
+			err := s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+				return s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, b.ID, b.Hash)
+			})
+			if err != nil {
+				return fmt.Errorf("storing blob metadata: %w", err)
+			}
+		}
+		result.BlobsCreated += len(blobs)
+	}
+
+	return nil
+}
+
+// checkFileAndUpdateMetadata checks if a file needs processing and updates metadata
+func (s *Scanner) checkFileAndUpdateMetadata(ctx context.Context, path string, info os.FileInfo, result *ScanResult) (*database.File, bool, error) {
+	// Check context cancellation
+	select {
+	case <-ctx.Done():
+		return nil, false, ctx.Err()
+	default:
+	}
+
+	// Process file without holding a long transaction
+	return s.checkFile(ctx, path, info, result)
+}
+
+// checkFile checks if a file needs processing and updates metadata
+func (s *Scanner) checkFile(ctx context.Context, path string, info os.FileInfo, result *ScanResult) (*database.File, bool, error) {
+	// Get file stats
+	stat, ok := info.Sys().(interface {
+		Uid() uint32
+		Gid() uint32
+	})
+
+	var uid, gid uint32
+	if ok {
+		uid = stat.Uid()
+		gid = stat.Gid()
+	}
+
+	// Check if it's a symlink
+	var linkTarget string
+	if info.Mode()&os.ModeSymlink != 0 {
+		// Read the symlink target
+		if linker, ok := s.fs.(afero.LinkReader); ok {
+			linkTarget, _ = linker.ReadlinkIfPossible(path)
+		}
+	}
+
+	// Create file record
+	file := &database.File{
+		Path:       path,
+		MTime:      info.ModTime(),
+		CTime:      info.ModTime(), // afero doesn't provide ctime
+		Size:       info.Size(),
+		Mode:       uint32(info.Mode()),
+		UID:        uid,
+		GID:        gid,
+		LinkTarget: linkTarget,
+	}
+
+	// Check if file has changed since last backup (no transaction needed for read)
+	log.Debug("Querying database for existing file record", "path", path)
+	existingFile, err := s.repos.Files.GetByPath(ctx, path)
+	if err != nil {
+		return nil, false, fmt.Errorf("checking existing file: %w", err)
+	}
+
+	fileChanged := existingFile == nil || s.hasFileChanged(existingFile, file)
+
+	// Update file metadata and add to snapshot in a single transaction
+	log.Debug("Updating file record in database and adding to snapshot", "path", path, "changed", fileChanged, "snapshot", s.snapshotID)
+	err = s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		// First create/update the file
+		if err := s.repos.Files.Create(ctx, tx, file); err != nil {
+			return fmt.Errorf("creating file: %w", err)
+		}
+		// Then add it to the snapshot using the file ID
+		if err := s.repos.Snapshots.AddFileByID(ctx, tx, s.snapshotID, file.ID); err != nil {
+			return fmt.Errorf("adding file to snapshot: %w", err)
+		}
+		return nil
+	})
+	if err != nil {
+		return nil, false, err
+	}
+	log.Debug("File record added to snapshot association", "path", path)
+
+	result.FilesScanned++
+
+	// Update progress
+	if s.progress != nil {
+		stats := s.progress.GetStats()
+		stats.FilesScanned.Add(1)
+		stats.CurrentFile.Store(path)
+	}
+
+	// Track skipped files
+	if info.Mode().IsRegular() && info.Size() > 0 && !fileChanged {
+		result.FilesSkipped++
+		result.BytesSkipped += info.Size()
+		if s.progress != nil {
+			stats := s.progress.GetStats()
+			stats.FilesSkipped.Add(1)
+			stats.BytesSkipped.Add(info.Size())
+		}
+		// File hasn't changed, but we still need to associate existing chunks with this snapshot
+		log.Debug("File content unchanged, reusing existing chunks and blobs", "path", path)
+		if err := s.associateExistingChunks(ctx, path); err != nil {
+			return nil, false, fmt.Errorf("associating existing chunks: %w", err)
+		}
+		log.Debug("Existing chunks and blobs associated with snapshot", "path", path)
+	} else {
+		// File changed or is not a regular file
+		result.BytesScanned += info.Size()
+		if s.progress != nil {
+			s.progress.GetStats().BytesScanned.Add(info.Size())
+		}
+	}
+
+	return file, fileChanged, nil
+}
+
+// hasFileChanged determines if a file has changed since last backup
+func (s *Scanner) hasFileChanged(existingFile, newFile *database.File) bool {
+	// Check if any metadata has changed
+	if existingFile.Size != newFile.Size {
+		return true
+	}
+	if existingFile.MTime.Unix() != newFile.MTime.Unix() {
+		return true
+	}
+	if existingFile.Mode != newFile.Mode {
+		return true
+	}
+	if existingFile.UID != newFile.UID {
+		return true
+	}
+	if existingFile.GID != newFile.GID {
+		return true
+	}
+	if existingFile.LinkTarget != newFile.LinkTarget {
+		return true
+	}
+	return false
+}
+
+// associateExistingChunks links existing chunks from an unchanged file to the current snapshot
+func (s *Scanner) associateExistingChunks(ctx context.Context, path string) error {
+	log.Debug("associateExistingChunks start", "path", path)
+
+	// Get existing file chunks (no transaction needed for read)
+	log.Debug("Querying database for file's chunk associations", "path", path)
+	fileChunks, err := s.repos.FileChunks.GetByFile(ctx, path)
+	if err != nil {
+		return fmt.Errorf("getting existing file chunks: %w", err)
+	}
+	log.Debug("Retrieved file chunk associations from database", "path", path, "count", len(fileChunks))
+
+	// Collect unique blob IDs that need to be added to snapshot
+	blobsToAdd := make(map[string]string) // blob ID -> blob hash
+	for i, fc := range fileChunks {
+		log.Debug("Looking up blob containing chunk", "path", path, "chunk_index", i, "chunk_hash", fc.ChunkHash)
+
+		// Find which blob contains this chunk (no transaction needed for read)
+		log.Debug("Querying database for blob containing chunk", "chunk_hash", fc.ChunkHash)
+		blobChunk, err := s.repos.BlobChunks.GetByChunkHash(ctx, fc.ChunkHash)
+		if err != nil {
+			return fmt.Errorf("finding blob for chunk %s: %w", fc.ChunkHash, err)
+		}
+		if blobChunk == nil {
+			log.Warn("Chunk record exists in database but not associated with any blob", "chunk", fc.ChunkHash, "file", path)
+			continue
+		}
+		log.Debug("Found blob record containing chunk", "chunk_hash", fc.ChunkHash, "blob_id", blobChunk.BlobID)
+
+		// Track blob ID for later processing
+		if _, exists := blobsToAdd[blobChunk.BlobID]; !exists {
+			blobsToAdd[blobChunk.BlobID] = "" // We'll get the hash later
+		}
+	}
+
+	// Now get blob hashes outside of transaction operations
+	for blobID := range blobsToAdd {
+		blob, err := s.repos.Blobs.GetByID(ctx, blobID)
+		if err != nil {
+			return fmt.Errorf("getting blob %s: %w", blobID, err)
+		}
+		if blob == nil {
+			log.Warn("Blob record missing from database", "blob_id", blobID)
+			delete(blobsToAdd, blobID)
+			continue
+		}
+		blobsToAdd[blobID] = blob.Hash
+	}
+
+	// Add blobs to snapshot using short transactions
+	for blobID, blobHash := range blobsToAdd {
+		log.Debug("Adding blob reference to snapshot association", "blob_id", blobID, "blob_hash", blobHash, "snapshot", s.snapshotID)
+		err := s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+			return s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, blobID, blobHash)
+		})
+		if err != nil {
+			return fmt.Errorf("adding existing blob to snapshot: %w", err)
+		}
+		log.Debug("Created snapshot-blob association in database", "blob_id", blobID)
+	}
+
+	log.Debug("associateExistingChunks complete", "path", path, "blobs_processed", len(blobsToAdd))
+	return nil
+}
+
+// handleBlobReady is called by the packer when a blob is finalized
+func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
+	log.Debug("Invoking blob upload handler", "blob_hash", blobWithReader.Hash[:8]+"...")
+
+	startTime := time.Now().UTC()
+	finishedBlob := blobWithReader.FinishedBlob
+
+	// Report upload start
+	if s.progress != nil {
+		s.progress.ReportUploadStart(finishedBlob.Hash, finishedBlob.Compressed)
+	}
+
+	// Upload to S3 first (without holding any locks)
+	// Use scan context for cancellation support
+	ctx := s.scanCtx
+	if ctx == nil {
+		ctx = context.Background()
+	}
+
+	// Track bytes uploaded for accurate speed calculation
+	lastProgressTime := time.Now()
+	lastProgressBytes := int64(0)
+
+	progressCallback := func(uploaded int64) error {
+
+		// Calculate instantaneous speed
+		now := time.Now()
+		elapsed := now.Sub(lastProgressTime).Seconds()
+		if elapsed > 0.5 { // Update speed every 0.5 seconds
+			bytesSinceLastUpdate := uploaded - lastProgressBytes
+			speed := float64(bytesSinceLastUpdate) / elapsed
+
+			if s.progress != nil {
+				s.progress.ReportUploadProgress(finishedBlob.Hash, uploaded, finishedBlob.Compressed, speed)
+			}
+
+			lastProgressTime = now
+			lastProgressBytes = uploaded
+		}
+
+		// Check for cancellation
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		default:
+			return nil
+		}
+	}
+
+	// Create sharded path: blobs/ca/fe/cafebabe...
+	blobPath := fmt.Sprintf("blobs/%s/%s/%s", finishedBlob.Hash[:2], finishedBlob.Hash[2:4], finishedBlob.Hash)
+	if err := s.s3Client.PutObjectWithProgress(ctx, blobPath, blobWithReader.Reader, finishedBlob.Compressed, progressCallback); err != nil {
+		return fmt.Errorf("uploading blob %s to S3: %w", finishedBlob.Hash, err)
+	}
+
+	uploadDuration := time.Since(startTime)
+
+	// Log upload stats
+	uploadSpeed := float64(finishedBlob.Compressed) * 8 / uploadDuration.Seconds() // bits per second
+	log.Info("Successfully uploaded blob to S3 storage",
+		"path", blobPath,
+		"size", humanize.Bytes(uint64(finishedBlob.Compressed)),
+		"duration", uploadDuration,
+		"speed", humanize.SI(uploadSpeed, "bps"))
+
+	// Report upload complete
+	if s.progress != nil {
+		s.progress.ReportUploadComplete(finishedBlob.Hash, finishedBlob.Compressed, uploadDuration)
+	}
+
+	// Update progress
+	if s.progress != nil {
+		stats := s.progress.GetStats()
+		stats.BlobsUploaded.Add(1)
+		stats.BytesUploaded.Add(finishedBlob.Compressed)
+		stats.BlobsCreated.Add(1)
+	}
+
+	// Store metadata in database (after upload is complete)
+	dbCtx := s.scanCtx
+	if dbCtx == nil {
+		dbCtx = context.Background()
+	}
+	err := s.repos.WithTx(dbCtx, func(ctx context.Context, tx *sql.Tx) error {
+		// Update blob upload timestamp
+		if err := s.repos.Blobs.UpdateUploaded(ctx, tx, finishedBlob.ID); err != nil {
+			return fmt.Errorf("updating blob upload timestamp: %w", err)
+		}
+
+		// Add the blob to the snapshot
+		if err := s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, finishedBlob.ID, finishedBlob.Hash); err != nil {
+			return fmt.Errorf("adding blob to snapshot: %w", err)
+		}
+
+		// Record upload metrics
+		upload := &database.Upload{
+			BlobHash:   finishedBlob.Hash,
+			SnapshotID: s.snapshotID,
+			UploadedAt: startTime,
+			Size:       finishedBlob.Compressed,
+			DurationMs: uploadDuration.Milliseconds(),
+		}
+		if err := s.repos.Uploads.Create(ctx, tx, upload); err != nil {
+			return fmt.Errorf("recording upload metrics: %w", err)
+		}
+
+		return nil
+	})
+
+	// Cleanup temp file if needed
+	if blobWithReader.TempFile != nil {
+		tempName := blobWithReader.TempFile.Name()
+		if err := blobWithReader.TempFile.Close(); err != nil {
+			log.Fatal("Failed to close temp file", "file", tempName, "error", err)
+		}
+		if err := os.Remove(tempName); err != nil {
+			log.Fatal("Failed to remove temp file", "file", tempName, "error", err)
+		}
+	}
+
+	return err
+}
+
+// processFileStreaming processes a file by streaming chunks directly to the packer
+func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileToProcess, result *ScanResult) error {
+	// Open the file
+	file, err := s.fs.Open(fileToProcess.Path)
+	if err != nil {
+		return fmt.Errorf("opening file: %w", err)
+	}
+	defer func() { _ = file.Close() }()
+
+	// We'll collect file chunks for database storage
+	// but process them for packing as we go
+	type chunkInfo struct {
+		fileChunk database.FileChunk
+		offset    int64
+		size      int64
+	}
+	var chunks []chunkInfo
+	chunkIndex := 0
+
+	// Process chunks in streaming fashion and get full file hash
+	fileHash, err := s.chunker.ChunkReaderStreaming(file, func(chunk chunker.Chunk) error {
+		// Check for cancellation
+		select {
+		case <-ctx.Done():
+			return ctx.Err()
+		default:
+		}
+
+		log.Debug("Processing content-defined chunk from file",
+			"file", fileToProcess.Path,
+			"chunk_index", chunkIndex,
+			"hash", chunk.Hash,
+			"size", chunk.Size)
+
+		// Check if chunk already exists (outside of transaction)
+		existing, err := s.repos.Chunks.GetByHash(ctx, chunk.Hash)
+		if err != nil {
+			return fmt.Errorf("checking chunk existence: %w", err)
+		}
+		chunkExists := (existing != nil)
+
+		// Store chunk if new
+		if !chunkExists {
+			err := s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
+				dbChunk := &database.Chunk{
+					ChunkHash: chunk.Hash,
+					Size:      chunk.Size,
+				}
+				if err := s.repos.Chunks.Create(txCtx, tx, dbChunk); err != nil {
+					return fmt.Errorf("creating chunk: %w", err)
+				}
+				return nil
+			})
+			if err != nil {
+				return fmt.Errorf("storing chunk: %w", err)
+			}
+		}
+
+		// Track file chunk association for later storage
+		chunks = append(chunks, chunkInfo{
+			fileChunk: database.FileChunk{
+				FileID:    fileToProcess.File.ID,
+				Idx:       chunkIndex,
+				ChunkHash: chunk.Hash,
+			},
+			offset: chunk.Offset,
+			size:   chunk.Size,
+		})
+
+		// Update stats
+		if chunkExists {
+			result.FilesSkipped++ // Track as skipped for now
+			result.BytesSkipped += chunk.Size
+			if s.progress != nil {
+				s.progress.GetStats().BytesSkipped.Add(chunk.Size)
+			}
+		} else {
+			result.ChunksCreated++
+			result.BytesScanned += chunk.Size
+			if s.progress != nil {
+				s.progress.GetStats().ChunksCreated.Add(1)
+				s.progress.GetStats().BytesProcessed.Add(chunk.Size)
+				s.progress.UpdateChunkingActivity()
+			}
+		}
+
+		// Add chunk to packer immediately (streaming)
+		// This happens outside the database transaction
+		if !chunkExists {
+			s.packerMu.Lock()
+			err := s.packer.AddChunk(&blob.ChunkRef{
+				Hash: chunk.Hash,
+				Data: chunk.Data,
+			})
+			if err == blob.ErrBlobSizeLimitExceeded {
+				// Finalize current blob and retry
+				if err := s.packer.FinalizeBlob(); err != nil {
+					s.packerMu.Unlock()
+					return fmt.Errorf("finalizing blob: %w", err)
+				}
+				// Retry adding the chunk
+				if err := s.packer.AddChunk(&blob.ChunkRef{
+					Hash: chunk.Hash,
+					Data: chunk.Data,
+				}); err != nil {
+					s.packerMu.Unlock()
+					return fmt.Errorf("adding chunk after finalize: %w", err)
+				}
+			} else if err != nil {
+				s.packerMu.Unlock()
+				return fmt.Errorf("adding chunk to packer: %w", err)
+			}
+			s.packerMu.Unlock()
+		}
+
+		// Clear chunk data from memory immediately after use
+		chunk.Data = nil
+
+		chunkIndex++
+		return nil
+	})
+
+	if err != nil {
+		return fmt.Errorf("chunking file: %w", err)
+	}
+
+	log.Debug("Completed snapshotting file",
+		"path", fileToProcess.Path,
+		"file_hash", fileHash,
+		"chunks", len(chunks))
+
+	// Store file-chunk associations and chunk-file mappings in database
+	err = s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
+		// First, delete all existing file_chunks and chunk_files for this file
+		// This ensures old chunks are no longer associated when file content changes
+		if err := s.repos.FileChunks.DeleteByFileID(txCtx, tx, fileToProcess.File.ID); err != nil {
+			return fmt.Errorf("deleting old file chunks: %w", err)
+		}
+		if err := s.repos.ChunkFiles.DeleteByFileID(txCtx, tx, fileToProcess.File.ID); err != nil {
+			return fmt.Errorf("deleting old chunk files: %w", err)
+		}
+
+		for _, ci := range chunks {
+			// Create file-chunk mapping
+			if err := s.repos.FileChunks.Create(txCtx, tx, &ci.fileChunk); err != nil {
+				return fmt.Errorf("creating file chunk: %w", err)
+			}
+
+			// Create chunk-file mapping
+			chunkFile := &database.ChunkFile{
+				ChunkHash:  ci.fileChunk.ChunkHash,
+				FileID:     fileToProcess.File.ID,
+				FileOffset: ci.offset,
+				Length:     ci.size,
+			}
+			if err := s.repos.ChunkFiles.Create(txCtx, tx, chunkFile); err != nil {
+				return fmt.Errorf("creating chunk file: %w", err)
+			}
+		}
+
+		// Add file to snapshot
+		if err := s.repos.Snapshots.AddFileByID(txCtx, tx, s.snapshotID, fileToProcess.File.ID); err != nil {
+			return fmt.Errorf("adding file to snapshot: %w", err)
+		}
+
+		return nil
+	})
+
+	return err
+}
+
+// GetProgress returns the progress reporter for this scanner
+func (s *Scanner) GetProgress() *ProgressReporter {
+	return s.progress
+}
--- a/internal/snapshot/scanner_test.go
+++ b/internal/snapshot/scanner_test.go
@@ -0,0 +1,381 @@
+package snapshot_test
+
+import (
+	"context"
+	"database/sql"
+	"path/filepath"
+	"testing"
+	"time"
+
+	"git.eeqj.de/sneak/vaultik/internal/database"
+	"git.eeqj.de/sneak/vaultik/internal/log"
+	"git.eeqj.de/sneak/vaultik/internal/snapshot"
+	"github.com/spf13/afero"
+)
+
+func TestScannerSimpleDirectory(t *testing.T) {
+	// Initialize logger for tests
+	log.Initialize(log.Config{})
+
+	// Create in-memory filesystem
+	fs := afero.NewMemMapFs()
+
+	// Create test directory structure
+	testFiles := map[string]string{
+		"/source/file1.txt":         "Hello, world!",                // 13 bytes
+		"/source/file2.txt":         "This is another file",         // 20 bytes
+		"/source/subdir/file3.txt":  "File in subdirectory",         // 20 bytes
+		"/source/subdir/file4.txt":  "Another file in subdirectory", // 28 bytes
+		"/source/empty.txt":         "",                             // 0 bytes
+		"/source/subdir2/file5.txt": "Yet another file",             // 16 bytes
+	}
+
+	// Create files with specific times
+	testTime := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
+	for path, content := range testFiles {
+		dir := filepath.Dir(path)
+		if err := fs.MkdirAll(dir, 0755); err != nil {
+			t.Fatalf("failed to create directory %s: %v", dir, err)
+		}
+		if err := afero.WriteFile(fs, path, []byte(content), 0644); err != nil {
+			t.Fatalf("failed to write file %s: %v", path, err)
+		}
+		// Set times
+		if err := fs.Chtimes(path, testTime, testTime); err != nil {
+			t.Fatalf("failed to set times for %s: %v", path, err)
+		}
+	}
+
+	// Create test database
+	db, err := database.NewTestDB()
+	if err != nil {
+		t.Fatalf("failed to create test database: %v", err)
+	}
+	defer func() {
+		if err := db.Close(); err != nil {
+			t.Errorf("failed to close database: %v", err)
+		}
+	}()
+
+	repos := database.NewRepositories(db)
+
+	// Create scanner
+	scanner := snapshot.NewScanner(snapshot.ScannerConfig{
+		FS:               fs,
+		ChunkSize:        int64(1024 * 16), // 16KB chunks for testing
+		Repositories:     repos,
+		MaxBlobSize:      int64(1024 * 1024), // 1MB blobs
+		CompressionLevel: 3,
+		AgeRecipients:    []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
+	})
+
+	// Create a snapshot record for testing
+	ctx := context.Background()
+	snapshotID := "test-snapshot-001"
+	err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		snapshot := &database.Snapshot{
+			ID:               snapshotID,
+			Hostname:         "test-host",
+			VaultikVersion:   "test",
+			StartedAt:        time.Now(),
+			CompletedAt:      nil,
+			FileCount:        0,
+			ChunkCount:       0,
+			BlobCount:        0,
+			TotalSize:        0,
+			BlobSize:         0,
+			CompressionRatio: 1.0,
+		}
+		return repos.Snapshots.Create(ctx, tx, snapshot)
+	})
+	if err != nil {
+		t.Fatalf("failed to create snapshot: %v", err)
+	}
+
+	// Scan the directory
+	var result *snapshot.ScanResult
+	result, err = scanner.Scan(ctx, "/source", snapshotID)
+	if err != nil {
+		t.Fatalf("scan failed: %v", err)
+	}
+
+	// Verify results
+	// We now scan 6 files + 3 directories (source, subdir, subdir2) = 9 entries
+	if result.FilesScanned != 9 {
+		t.Errorf("expected 9 entries scanned, got %d", result.FilesScanned)
+	}
+
+	// Directories have their own sizes, so the total will be more than just file content
+	if result.BytesScanned < 97 { // At minimum we have 97 bytes of file content
+		t.Errorf("expected at least 97 bytes scanned, got %d", result.BytesScanned)
+	}
+
+	// Verify files in database
+	files, err := repos.Files.ListByPrefix(ctx, "/source")
+	if err != nil {
+		t.Fatalf("failed to list files: %v", err)
+	}
+
+	// We should have 6 files + 3 directories = 9 entries
+	if len(files) != 9 {
+		t.Errorf("expected 9 entries in database, got %d", len(files))
+	}
+
+	// Verify specific file
+	file1, err := repos.Files.GetByPath(ctx, "/source/file1.txt")
+	if err != nil {
+		t.Fatalf("failed to get file1.txt: %v", err)
+	}
+
+	if file1.Size != 13 {
+		t.Errorf("expected file1.txt size 13, got %d", file1.Size)
+	}
+
+	if file1.Mode != 0644 {
+		t.Errorf("expected file1.txt mode 0644, got %o", file1.Mode)
+	}
+
+	// Verify chunks were created
+	chunks, err := repos.FileChunks.GetByFile(ctx, "/source/file1.txt")
+	if err != nil {
+		t.Fatalf("failed to get chunks for file1.txt: %v", err)
+	}
+
+	if len(chunks) != 1 { // Small file should be one chunk
+		t.Errorf("expected 1 chunk for file1.txt, got %d", len(chunks))
+	}
+
+	// Verify deduplication - file3.txt and file4.txt have different content
+	// but we should still have the correct number of unique chunks
+	allChunks, err := repos.Chunks.List(ctx)
+	if err != nil {
+		t.Fatalf("failed to list all chunks: %v", err)
+	}
+
+	// We should have at most 6 chunks (one per unique file content)
+	// Empty file might not create a chunk
+	if len(allChunks) > 6 {
+		t.Errorf("expected at most 6 chunks, got %d", len(allChunks))
+	}
+}
+
+func TestScannerWithSymlinks(t *testing.T) {
+	// Initialize logger for tests
+	log.Initialize(log.Config{})
+
+	// Create in-memory filesystem
+	fs := afero.NewMemMapFs()
+
+	// Create test files
+	if err := fs.MkdirAll("/source", 0755); err != nil {
+		t.Fatal(err)
+	}
+	if err := afero.WriteFile(fs, "/source/target.txt", []byte("target content"), 0644); err != nil {
+		t.Fatal(err)
+	}
+	if err := afero.WriteFile(fs, "/outside/file.txt", []byte("outside content"), 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	// Create symlinks (if supported by the filesystem)
+	linker, ok := fs.(afero.Symlinker)
+	if !ok {
+		t.Skip("filesystem does not support symlinks")
+	}
+
+	// Symlink to file in source
+	if err := linker.SymlinkIfPossible("target.txt", "/source/link1.txt"); err != nil {
+		t.Fatal(err)
+	}
+
+	// Symlink to file outside source
+	if err := linker.SymlinkIfPossible("/outside/file.txt", "/source/link2.txt"); err != nil {
+		t.Fatal(err)
+	}
+
+	// Create test database
+	db, err := database.NewTestDB()
+	if err != nil {
+		t.Fatalf("failed to create test database: %v", err)
+	}
+	defer func() {
+		if err := db.Close(); err != nil {
+			t.Errorf("failed to close database: %v", err)
+		}
+	}()
+
+	repos := database.NewRepositories(db)
+
+	// Create scanner
+	scanner := snapshot.NewScanner(snapshot.ScannerConfig{
+		FS:               fs,
+		ChunkSize:        1024 * 16,
+		Repositories:     repos,
+		MaxBlobSize:      int64(1024 * 1024),
+		CompressionLevel: 3,
+		AgeRecipients:    []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
+	})
+
+	// Create a snapshot record for testing
+	ctx := context.Background()
+	snapshotID := "test-snapshot-001"
+	err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		snapshot := &database.Snapshot{
+			ID:               snapshotID,
+			Hostname:         "test-host",
+			VaultikVersion:   "test",
+			StartedAt:        time.Now(),
+			CompletedAt:      nil,
+			FileCount:        0,
+			ChunkCount:       0,
+			BlobCount:        0,
+			TotalSize:        0,
+			BlobSize:         0,
+			CompressionRatio: 1.0,
+		}
+		return repos.Snapshots.Create(ctx, tx, snapshot)
+	})
+	if err != nil {
+		t.Fatalf("failed to create snapshot: %v", err)
+	}
+
+	// Scan the directory
+	var result *snapshot.ScanResult
+	result, err = scanner.Scan(ctx, "/source", snapshotID)
+	if err != nil {
+		t.Fatalf("scan failed: %v", err)
+	}
+
+	// Should have scanned 3 files (target + 2 symlinks)
+	if result.FilesScanned != 3 {
+		t.Errorf("expected 3 files scanned, got %d", result.FilesScanned)
+	}
+
+	// Check symlinks in database
+	link1, err := repos.Files.GetByPath(ctx, "/source/link1.txt")
+	if err != nil {
+		t.Fatalf("failed to get link1.txt: %v", err)
+	}
+
+	if link1.LinkTarget != "target.txt" {
+		t.Errorf("expected link1.txt target 'target.txt', got %q", link1.LinkTarget)
+	}
+
+	link2, err := repos.Files.GetByPath(ctx, "/source/link2.txt")
+	if err != nil {
+		t.Fatalf("failed to get link2.txt: %v", err)
+	}
+
+	if link2.LinkTarget != "/outside/file.txt" {
+		t.Errorf("expected link2.txt target '/outside/file.txt', got %q", link2.LinkTarget)
+	}
+}
+
+func TestScannerLargeFile(t *testing.T) {
+	// Initialize logger for tests
+	log.Initialize(log.Config{})
+
+	// Create in-memory filesystem
+	fs := afero.NewMemMapFs()
+
+	// Create a large file that will require multiple chunks
+	// Use random content to ensure good chunk boundaries
+	largeContent := make([]byte, 1024*1024) // 1MB
+	// Fill with pseudo-random data to ensure chunk boundaries
+	for i := 0; i < len(largeContent); i++ {
+		// Simple pseudo-random generator for deterministic tests
+		largeContent[i] = byte((i * 7919) ^ (i >> 3))
+	}
+
+	if err := fs.MkdirAll("/source", 0755); err != nil {
+		t.Fatal(err)
+	}
+	if err := afero.WriteFile(fs, "/source/large.bin", largeContent, 0644); err != nil {
+		t.Fatal(err)
+	}
+
+	// Create test database
+	db, err := database.NewTestDB()
+	if err != nil {
+		t.Fatalf("failed to create test database: %v", err)
+	}
+	defer func() {
+		if err := db.Close(); err != nil {
+			t.Errorf("failed to close database: %v", err)
+		}
+	}()
+
+	repos := database.NewRepositories(db)
+
+	// Create scanner with 64KB average chunk size
+	scanner := snapshot.NewScanner(snapshot.ScannerConfig{
+		FS:               fs,
+		ChunkSize:        int64(1024 * 64), // 64KB average chunks
+		Repositories:     repos,
+		MaxBlobSize:      int64(1024 * 1024),
+		CompressionLevel: 3,
+		AgeRecipients:    []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
+	})
+
+	// Create a snapshot record for testing
+	ctx := context.Background()
+	snapshotID := "test-snapshot-001"
+	err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		snapshot := &database.Snapshot{
+			ID:               snapshotID,
+			Hostname:         "test-host",
+			VaultikVersion:   "test",
+			StartedAt:        time.Now(),
+			CompletedAt:      nil,
+			FileCount:        0,
+			ChunkCount:       0,
+			BlobCount:        0,
+			TotalSize:        0,
+			BlobSize:         0,
+			CompressionRatio: 1.0,
+		}
+		return repos.Snapshots.Create(ctx, tx, snapshot)
+	})
+	if err != nil {
+		t.Fatalf("failed to create snapshot: %v", err)
+	}
+
+	// Scan the directory
+	var result *snapshot.ScanResult
+	result, err = scanner.Scan(ctx, "/source", snapshotID)
+	if err != nil {
+		t.Fatalf("scan failed: %v", err)
+	}
+
+	// We scan 1 file + 1 directory = 2 entries
+	if result.FilesScanned != 2 {
+		t.Errorf("expected 2 entries scanned, got %d", result.FilesScanned)
+	}
+
+	// The file size should be at least 1MB
+	if result.BytesScanned < 1024*1024 {
+		t.Errorf("expected at least %d bytes scanned, got %d", 1024*1024, result.BytesScanned)
+	}
+
+	// Verify chunks
+	chunks, err := repos.FileChunks.GetByFile(ctx, "/source/large.bin")
+	if err != nil {
+		t.Fatalf("failed to get chunks: %v", err)
+	}
+
+	// With content-defined chunking, the number of chunks depends on content
+	// For a 1MB file, we should get at least 1 chunk
+	if len(chunks) < 1 {
+		t.Errorf("expected at least 1 chunk, got %d", len(chunks))
+	}
+
+	// Log the actual number of chunks for debugging
+	t.Logf("1MB file produced %d chunks with 64KB average chunk size", len(chunks))
+
+	// Verify chunk sequence
+	for i, fc := range chunks {
+		if fc.Idx != i {
+			t.Errorf("chunk %d has incorrect sequence %d", i, fc.Idx)
+		}
+	}
+}
--- a/internal/snapshot/snapshot.go
+++ b/internal/snapshot/snapshot.go
@@ -0,0 +1,861 @@
+package snapshot
+
+// Snapshot Metadata Export Process
+// ================================
+//
+// The snapshot metadata contains all information needed to restore a snapshot.
+// Instead of creating a custom format, we use a trimmed copy of the SQLite
+// database containing only data relevant to the current snapshot.
+//
+// Process Overview:
+// 1. After all files/chunks/blobs are backed up, create a snapshot record
+// 2. Close the main database to ensure consistency
+// 3. Copy the entire database to a temporary file
+// 4. Open the temporary database
+// 5. Delete all snapshots except the current one
+// 6. Delete all orphaned records:
+//    - Files not referenced by any remaining snapshot
+//    - Chunks not referenced by any remaining files
+//    - Blobs not containing any remaining chunks
+//    - All related mapping tables (file_chunks, chunk_files, blob_chunks)
+// 7. Close the temporary database
+// 8. Use sqlite3 to dump the cleaned database to SQL
+// 9. Delete the temporary database file
+// 10. Compress the SQL dump with zstd
+// 11. Encrypt the compressed dump with age (if encryption is enabled)
+// 12. Upload to S3 as: snapshots/{snapshot-id}.sql.zst[.age]
+// 13. Reopen the main database
+//
+// Advantages of this approach:
+// - No custom metadata format needed
+// - Reuses existing database schema and relationships
+// - SQL dumps are portable and compress well
+// - Restore process can simply execute the SQL
+// - Atomic and consistent snapshot of all metadata
+//
+// TODO: Future improvements:
+// - Add snapshot-file relationships to track which files belong to which snapshot
+// - Implement incremental snapshots that reference previous snapshots
+// - Add snapshot manifest with additional metadata (size, chunk count, etc.)
+
+import (
+	"bytes"
+	"context"
+	"database/sql"
+	"fmt"
+	"io"
+	"os"
+	"os/exec"
+	"path/filepath"
+	"time"
+
+	"git.eeqj.de/sneak/vaultik/internal/blobgen"
+	"git.eeqj.de/sneak/vaultik/internal/config"
+	"git.eeqj.de/sneak/vaultik/internal/database"
+	"git.eeqj.de/sneak/vaultik/internal/log"
+	"git.eeqj.de/sneak/vaultik/internal/s3"
+	"github.com/dustin/go-humanize"
+	"go.uber.org/fx"
+)
+
+// SnapshotManager handles snapshot creation and metadata export
+type SnapshotManager struct {
+	repos    *database.Repositories
+	s3Client S3Client
+	config   *config.Config
+}
+
+// SnapshotManagerParams holds dependencies for NewSnapshotManager
+type SnapshotManagerParams struct {
+	fx.In
+
+	Repos    *database.Repositories
+	S3Client *s3.Client
+	Config   *config.Config
+}
+
+// NewSnapshotManager creates a new snapshot manager for dependency injection
+func NewSnapshotManager(params SnapshotManagerParams) *SnapshotManager {
+	return &SnapshotManager{
+		repos:    params.Repos,
+		s3Client: params.S3Client,
+		config:   params.Config,
+	}
+}
+
+// CreateSnapshot creates a new snapshot record in the database at the start of a backup
+func (sm *SnapshotManager) CreateSnapshot(ctx context.Context, hostname, version, gitRevision string) (string, error) {
+	snapshotID := fmt.Sprintf("%s-%s", hostname, time.Now().UTC().Format("20060102-150405Z"))
+
+	snapshot := &database.Snapshot{
+		ID:                 snapshotID,
+		Hostname:           hostname,
+		VaultikVersion:     version,
+		VaultikGitRevision: gitRevision,
+		StartedAt:          time.Now().UTC(),
+		CompletedAt:        nil, // Not completed yet
+		FileCount:          0,
+		ChunkCount:         0,
+		BlobCount:          0,
+		TotalSize:          0,
+		BlobSize:           0,
+		CompressionRatio:   1.0,
+	}
+
+	err := sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		return sm.repos.Snapshots.Create(ctx, tx, snapshot)
+	})
+
+	if err != nil {
+		return "", fmt.Errorf("creating snapshot: %w", err)
+	}
+
+	log.Info("Created snapshot", "snapshot_id", snapshotID)
+	return snapshotID, nil
+}
+
+// UpdateSnapshotStats updates the statistics for a snapshot during backup
+func (sm *SnapshotManager) UpdateSnapshotStats(ctx context.Context, snapshotID string, stats BackupStats) error {
+	err := sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		return sm.repos.Snapshots.UpdateCounts(ctx, tx, snapshotID,
+			int64(stats.FilesScanned),
+			int64(stats.ChunksCreated),
+			int64(stats.BlobsCreated),
+			stats.BytesScanned,
+			stats.BytesUploaded,
+		)
+	})
+
+	if err != nil {
+		return fmt.Errorf("updating snapshot stats: %w", err)
+	}
+
+	return nil
+}
+
+// UpdateSnapshotStatsExtended updates snapshot statistics with extended metrics.
+// This includes compression level, uncompressed blob size, and upload duration.
+func (sm *SnapshotManager) UpdateSnapshotStatsExtended(ctx context.Context, snapshotID string, stats ExtendedBackupStats) error {
+	return sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		// First update basic stats
+		if err := sm.repos.Snapshots.UpdateCounts(ctx, tx, snapshotID,
+			int64(stats.FilesScanned),
+			int64(stats.ChunksCreated),
+			int64(stats.BlobsCreated),
+			stats.BytesScanned,
+			stats.BytesUploaded,
+		); err != nil {
+			return err
+		}
+
+		// Then update extended stats
+		return sm.repos.Snapshots.UpdateExtendedStats(ctx, tx, snapshotID,
+			stats.BlobUncompressedSize,
+			stats.CompressionLevel,
+			stats.UploadDurationMs,
+		)
+	})
+}
+
+// CompleteSnapshot marks a snapshot as completed and exports its metadata
+func (sm *SnapshotManager) CompleteSnapshot(ctx context.Context, snapshotID string) error {
+	// Mark the snapshot as completed
+	err := sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		return sm.repos.Snapshots.MarkComplete(ctx, tx, snapshotID)
+	})
+
+	if err != nil {
+		return fmt.Errorf("marking snapshot complete: %w", err)
+	}
+
+	log.Info("Completed snapshot", "snapshot_id", snapshotID)
+	return nil
+}
+
+// ExportSnapshotMetadata exports snapshot metadata to S3
+//
+// This method executes the complete snapshot metadata export process:
+// 1. Creates a temporary directory for working files
+// 2. Copies the main database to preserve its state
+// 3. Cleans the copy to contain only current snapshot data
+// 4. Dumps the cleaned database to SQL
+// 5. Compresses the SQL dump with zstd
+// 6. Encrypts the compressed data (if encryption is enabled)
+// 7. Uploads to S3 at: snapshots/{snapshot-id}.sql.zst[.age]
+//
+// The caller is responsible for:
+// - Ensuring the main database is closed before calling this method
+// - Reopening the main database after this method returns
+//
+// This ensures database consistency during the copy operation.
+func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath string, snapshotID string) error {
+	log.Info("Phase 3/3: Exporting snapshot metadata", "snapshot_id", snapshotID, "source_db", dbPath)
+
+	// Create temp directory for all temporary files
+	tempDir, err := os.MkdirTemp("", "vaultik-snapshot-*")
+	if err != nil {
+		return fmt.Errorf("creating temp dir: %w", err)
+	}
+	log.Debug("Created temporary directory", "path", tempDir)
+	defer func() {
+		log.Debug("Cleaning up temporary directory", "path", tempDir)
+		if err := os.RemoveAll(tempDir); err != nil {
+			log.Debug("Failed to remove temp dir", "path", tempDir, "error", err)
+		}
+	}()
+
+	// Step 1: Copy database to temp file
+	// The main database should be closed at this point
+	tempDBPath := filepath.Join(tempDir, "snapshot.db")
+	log.Debug("Copying database to temporary location", "source", dbPath, "destination", tempDBPath)
+	if err := copyFile(dbPath, tempDBPath); err != nil {
+		return fmt.Errorf("copying database: %w", err)
+	}
+	log.Debug("Database copy complete", "size", getFileSize(tempDBPath))
+
+	// Step 2: Clean the temp database to only contain current snapshot data
+	log.Debug("Cleaning temporary database to contain only current snapshot data", "snapshot_id", snapshotID, "db_path", tempDBPath)
+	stats, err := sm.cleanSnapshotDB(ctx, tempDBPath, snapshotID)
+	if err != nil {
+		return fmt.Errorf("cleaning snapshot database: %w", err)
+	}
+	log.Info("Temporary database cleanup complete",
+		"db_path", tempDBPath,
+		"size_after_clean", humanize.Bytes(uint64(getFileSize(tempDBPath))),
+		"files", stats.FileCount,
+		"chunks", stats.ChunkCount,
+		"blobs", stats.BlobCount,
+		"total_compressed_size", humanize.Bytes(uint64(stats.CompressedSize)),
+		"total_uncompressed_size", humanize.Bytes(uint64(stats.UncompressedSize)),
+		"compression_ratio", fmt.Sprintf("%.2fx", float64(stats.UncompressedSize)/float64(stats.CompressedSize)))
+
+	// Step 3: Dump the cleaned database to SQL
+	dumpPath := filepath.Join(tempDir, "snapshot.sql")
+	log.Debug("Dumping database to SQL", "source", tempDBPath, "destination", dumpPath)
+	if err := sm.dumpDatabase(tempDBPath, dumpPath); err != nil {
+		return fmt.Errorf("dumping database: %w", err)
+	}
+	log.Debug("SQL dump complete", "size", getFileSize(dumpPath))
+
+	// Step 4: Compress and encrypt the SQL dump
+	compressedPath := filepath.Join(tempDir, "snapshot.sql.zst.age")
+	log.Debug("Compressing and encrypting SQL dump", "source", dumpPath, "destination", compressedPath)
+	if err := sm.compressDump(dumpPath, compressedPath); err != nil {
+		return fmt.Errorf("compressing dump: %w", err)
+	}
+	log.Debug("Compression complete", "original_size", getFileSize(dumpPath), "compressed_size", getFileSize(compressedPath))
+
+	// Step 5: Read compressed and encrypted data for upload
+	log.Debug("Reading compressed and encrypted data for upload", "path", compressedPath)
+	finalData, err := os.ReadFile(compressedPath)
+	if err != nil {
+		return fmt.Errorf("reading compressed dump: %w", err)
+	}
+
+	// Step 6: Generate blob manifest (before closing temp DB)
+	log.Debug("Generating blob manifest from temporary database", "db_path", tempDBPath)
+	blobManifest, err := sm.generateBlobManifest(ctx, tempDBPath, snapshotID)
+	if err != nil {
+		return fmt.Errorf("generating blob manifest: %w", err)
+	}
+
+	// Step 7: Upload to S3 in snapshot subdirectory
+	// Upload database backup (compressed and encrypted)
+	dbKey := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID)
+
+	log.Debug("Uploading snapshot database to S3", "key", dbKey, "size", len(finalData))
+	dbUploadStart := time.Now()
+	if err := sm.s3Client.PutObject(ctx, dbKey, bytes.NewReader(finalData)); err != nil {
+		return fmt.Errorf("uploading snapshot database: %w", err)
+	}
+	dbUploadDuration := time.Since(dbUploadStart)
+	dbUploadSpeed := float64(len(finalData)) * 8 / dbUploadDuration.Seconds() // bits per second
+	log.Info("Uploaded snapshot database to S3",
+		"path", dbKey,
+		"size", humanize.Bytes(uint64(len(finalData))),
+		"duration", dbUploadDuration,
+		"speed", humanize.SI(dbUploadSpeed, "bps"))
+
+	// Upload blob manifest (compressed only, not encrypted)
+	manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
+	log.Debug("Uploading blob manifest to S3", "key", manifestKey, "size", len(blobManifest))
+	manifestUploadStart := time.Now()
+	if err := sm.s3Client.PutObject(ctx, manifestKey, bytes.NewReader(blobManifest)); err != nil {
+		return fmt.Errorf("uploading blob manifest: %w", err)
+	}
+	manifestUploadDuration := time.Since(manifestUploadStart)
+	manifestUploadSpeed := float64(len(blobManifest)) * 8 / manifestUploadDuration.Seconds() // bits per second
+	log.Info("Uploaded blob manifest to S3",
+		"path", manifestKey,
+		"size", humanize.Bytes(uint64(len(blobManifest))),
+		"duration", manifestUploadDuration,
+		"speed", humanize.SI(manifestUploadSpeed, "bps"))
+
+	log.Info("Uploaded snapshot metadata",
+		"snapshot_id", snapshotID,
+		"db_size", len(finalData),
+		"manifest_size", len(blobManifest))
+	return nil
+}
+
+// CleanupStats contains statistics about cleaned snapshot database
+type CleanupStats struct {
+	FileCount        int
+	ChunkCount       int
+	BlobCount        int
+	CompressedSize   int64
+	UncompressedSize int64
+}
+
+// cleanSnapshotDB removes all data except for the specified snapshot
+//
+// The cleanup is performed in a specific order to maintain referential integrity:
+// 1. Delete other snapshots
+// 2. Delete orphaned snapshot associations (snapshot_files, snapshot_blobs) for deleted snapshots
+// 3. Delete orphaned files (not in the current snapshot)
+// 4. Delete orphaned chunk-to-file mappings (references to deleted files)
+// 5. Delete orphaned blobs (not in the current snapshot)
+// 6. Delete orphaned blob-to-chunk mappings (references to deleted chunks)
+// 7. Delete orphaned chunks (not referenced by any file)
+//
+// Each step is implemented as a separate method for clarity and maintainability.
+func (sm *SnapshotManager) cleanSnapshotDB(ctx context.Context, dbPath string, snapshotID string) (*CleanupStats, error) {
+	// Open the temp database
+	db, err := database.New(ctx, dbPath)
+	if err != nil {
+		return nil, fmt.Errorf("opening temp database: %w", err)
+	}
+	defer func() {
+		if err := db.Close(); err != nil {
+			log.Debug("Failed to close temp database", "error", err)
+		}
+	}()
+
+	// Start a transaction
+	tx, err := db.BeginTx(ctx, nil)
+	if err != nil {
+		return nil, fmt.Errorf("beginning transaction: %w", err)
+	}
+	defer func() {
+		if rbErr := tx.Rollback(); rbErr != nil && rbErr != sql.ErrTxDone {
+			log.Debug("Failed to rollback transaction", "error", rbErr)
+		}
+	}()
+
+	// Execute cleanup steps in order
+	if err := sm.deleteOtherSnapshots(ctx, tx, snapshotID); err != nil {
+		return nil, fmt.Errorf("step 1 - delete other snapshots: %w", err)
+	}
+
+	if err := sm.deleteOrphanedSnapshotAssociations(ctx, tx, snapshotID); err != nil {
+		return nil, fmt.Errorf("step 2 - delete orphaned snapshot associations: %w", err)
+	}
+
+	if err := sm.deleteOrphanedFiles(ctx, tx, snapshotID); err != nil {
+		return nil, fmt.Errorf("step 3 - delete orphaned files: %w", err)
+	}
+
+	if err := sm.deleteOrphanedChunkToFileMappings(ctx, tx); err != nil {
+		return nil, fmt.Errorf("step 4 - delete orphaned chunk-to-file mappings: %w", err)
+	}
+
+	if err := sm.deleteOrphanedBlobs(ctx, tx, snapshotID); err != nil {
+		return nil, fmt.Errorf("step 5 - delete orphaned blobs: %w", err)
+	}
+
+	if err := sm.deleteOrphanedBlobToChunkMappings(ctx, tx); err != nil {
+		return nil, fmt.Errorf("step 6 - delete orphaned blob-to-chunk mappings: %w", err)
+	}
+
+	if err := sm.deleteOrphanedChunks(ctx, tx); err != nil {
+		return nil, fmt.Errorf("step 7 - delete orphaned chunks: %w", err)
+	}
+
+	// Commit transaction
+	log.Debug("[Temp DB Cleanup] Committing cleanup transaction")
+	if err := tx.Commit(); err != nil {
+		return nil, fmt.Errorf("committing transaction: %w", err)
+	}
+
+	// Collect statistics about the cleaned database
+	stats := &CleanupStats{}
+
+	// Count files
+	var fileCount int
+	err = db.QueryRowWithLog(ctx, "SELECT COUNT(*) FROM files").Scan(&fileCount)
+	if err != nil {
+		return nil, fmt.Errorf("counting files: %w", err)
+	}
+	stats.FileCount = fileCount
+
+	// Count chunks
+	var chunkCount int
+	err = db.QueryRowWithLog(ctx, "SELECT COUNT(*) FROM chunks").Scan(&chunkCount)
+	if err != nil {
+		return nil, fmt.Errorf("counting chunks: %w", err)
+	}
+	stats.ChunkCount = chunkCount
+
+	// Count blobs and get sizes
+	var blobCount int
+	var compressedSize, uncompressedSize sql.NullInt64
+	err = db.QueryRowWithLog(ctx, `
+		SELECT COUNT(*), COALESCE(SUM(compressed_size), 0), COALESCE(SUM(uncompressed_size), 0) 
+		FROM blobs 
+		WHERE blob_hash IN (SELECT blob_hash FROM snapshot_blobs WHERE snapshot_id = ?)
+	`, snapshotID).Scan(&blobCount, &compressedSize, &uncompressedSize)
+	if err != nil {
+		return nil, fmt.Errorf("counting blobs and sizes: %w", err)
+	}
+	stats.BlobCount = blobCount
+	stats.CompressedSize = compressedSize.Int64
+	stats.UncompressedSize = uncompressedSize.Int64
+
+	log.Debug("[Temp DB Cleanup] Database cleanup complete", "stats", stats)
+	return stats, nil
+}
+
+// dumpDatabase creates a SQL dump of the database
+func (sm *SnapshotManager) dumpDatabase(dbPath, dumpPath string) error {
+	log.Debug("Running sqlite3 dump command", "source", dbPath, "destination", dumpPath)
+	cmd := exec.Command("sqlite3", dbPath, ".dump")
+
+	output, err := cmd.Output()
+	if err != nil {
+		return fmt.Errorf("running sqlite3 dump: %w", err)
+	}
+
+	log.Debug("SQL dump generated", "size", len(output))
+	if err := os.WriteFile(dumpPath, output, 0644); err != nil {
+		return fmt.Errorf("writing dump file: %w", err)
+	}
+
+	return nil
+}
+
+// compressDump compresses the SQL dump using zstd
+func (sm *SnapshotManager) compressDump(inputPath, outputPath string) error {
+	log.Debug("Opening SQL dump for compression", "path", inputPath)
+	input, err := os.Open(inputPath)
+	if err != nil {
+		return fmt.Errorf("opening input file: %w", err)
+	}
+	defer func() {
+		log.Debug("Closing input file", "path", inputPath)
+		if err := input.Close(); err != nil {
+			log.Debug("Failed to close input file", "path", inputPath, "error", err)
+		}
+	}()
+
+	log.Debug("Creating output file for compressed and encrypted data", "path", outputPath)
+	output, err := os.Create(outputPath)
+	if err != nil {
+		return fmt.Errorf("creating output file: %w", err)
+	}
+	defer func() {
+		log.Debug("Closing output file", "path", outputPath)
+		if err := output.Close(); err != nil {
+			log.Debug("Failed to close output file", "path", outputPath, "error", err)
+		}
+	}()
+
+	// Use blobgen for compression and encryption
+	log.Debug("Creating compressor/encryptor", "level", sm.config.CompressionLevel)
+	writer, err := blobgen.NewWriter(output, sm.config.CompressionLevel, sm.config.AgeRecipients)
+	if err != nil {
+		return fmt.Errorf("creating blobgen writer: %w", err)
+	}
+	defer func() {
+		if err := writer.Close(); err != nil {
+			log.Debug("Failed to close writer", "error", err)
+		}
+	}()
+
+	log.Debug("Compressing and encrypting data")
+	if _, err := io.Copy(writer, input); err != nil {
+		return fmt.Errorf("compressing data: %w", err)
+	}
+
+	// Close writer to flush all data
+	if err := writer.Close(); err != nil {
+		return fmt.Errorf("closing writer: %w", err)
+	}
+
+	log.Debug("Compression complete", "hash", fmt.Sprintf("%x", writer.Sum256()))
+
+	return nil
+}
+
+// copyFile copies a file from src to dst
+func copyFile(src, dst string) error {
+	log.Debug("Opening source file for copy", "path", src)
+	sourceFile, err := os.Open(src)
+	if err != nil {
+		return err
+	}
+	defer func() {
+		log.Debug("Closing source file", "path", src)
+		if err := sourceFile.Close(); err != nil {
+			log.Debug("Failed to close source file", "path", src, "error", err)
+		}
+	}()
+
+	log.Debug("Creating destination file", "path", dst)
+	destFile, err := os.Create(dst)
+	if err != nil {
+		return err
+	}
+	defer func() {
+		log.Debug("Closing destination file", "path", dst)
+		if err := destFile.Close(); err != nil {
+			log.Debug("Failed to close destination file", "path", dst, "error", err)
+		}
+	}()
+
+	log.Debug("Copying file data")
+	n, err := io.Copy(destFile, sourceFile)
+	if err != nil {
+		return err
+	}
+	log.Debug("File copy complete", "bytes_copied", n)
+
+	return nil
+}
+
+// generateBlobManifest creates a compressed JSON list of all blobs in the snapshot
+func (sm *SnapshotManager) generateBlobManifest(ctx context.Context, dbPath string, snapshotID string) ([]byte, error) {
+	log.Debug("Generating blob manifest", "db_path", dbPath, "snapshot_id", snapshotID)
+
+	// Open the cleaned database using the database package
+	db, err := database.New(ctx, dbPath)
+	if err != nil {
+		return nil, fmt.Errorf("opening database: %w", err)
+	}
+	defer func() { _ = db.Close() }()
+
+	// Create repositories to access the data
+	repos := database.NewRepositories(db)
+
+	// Get all blobs for this snapshot
+	log.Debug("Querying blobs for snapshot", "snapshot_id", snapshotID)
+	blobHashes, err := repos.Snapshots.GetBlobHashes(ctx, snapshotID)
+	if err != nil {
+		return nil, fmt.Errorf("getting snapshot blobs: %w", err)
+	}
+	log.Debug("Found blobs", "count", len(blobHashes))
+
+	// Get blob details including sizes
+	blobs := make([]BlobInfo, 0, len(blobHashes))
+	totalCompressedSize := int64(0)
+
+	for _, hash := range blobHashes {
+		blob, err := repos.Blobs.GetByHash(ctx, hash)
+		if err != nil {
+			log.Warn("Failed to get blob details", "hash", hash, "error", err)
+			continue
+		}
+		if blob != nil {
+			blobs = append(blobs, BlobInfo{
+				Hash:           hash,
+				CompressedSize: blob.CompressedSize,
+			})
+			totalCompressedSize += blob.CompressedSize
+		}
+	}
+
+	// Create manifest
+	manifest := &Manifest{
+		SnapshotID:          snapshotID,
+		Timestamp:           time.Now().UTC().Format(time.RFC3339),
+		BlobCount:           len(blobs),
+		TotalCompressedSize: totalCompressedSize,
+		Blobs:               blobs,
+	}
+
+	// Encode manifest
+	log.Debug("Encoding manifest")
+	compressedData, err := EncodeManifest(manifest, sm.config.CompressionLevel)
+	if err != nil {
+		return nil, fmt.Errorf("encoding manifest: %w", err)
+	}
+
+	log.Info("Generated blob manifest",
+		"snapshot_id", snapshotID,
+		"blob_count", len(blobs),
+		"total_compressed_size", totalCompressedSize,
+		"manifest_size", len(compressedData))
+
+	return compressedData, nil
+}
+
+// compressData compresses data using zstd
+
+// getFileSize returns the size of a file in bytes, or -1 if error
+func getFileSize(path string) int64 {
+	info, err := os.Stat(path)
+	if err != nil {
+		return -1
+	}
+	return info.Size()
+}
+
+// BackupStats contains statistics from a backup operation
+type BackupStats struct {
+	FilesScanned  int
+	BytesScanned  int64
+	ChunksCreated int
+	BlobsCreated  int
+	BytesUploaded int64
+}
+
+// ExtendedBackupStats contains additional statistics for comprehensive tracking
+type ExtendedBackupStats struct {
+	BackupStats
+	BlobUncompressedSize int64 // Total uncompressed size of all referenced blobs
+	CompressionLevel     int   // Compression level used for this snapshot
+	UploadDurationMs     int64 // Total milliseconds spent uploading to S3
+}
+
+// CleanupIncompleteSnapshots removes incomplete snapshots that don't have metadata in S3.
+// This is critical for data safety: incomplete snapshots can cause deduplication to skip
+// files that were never successfully backed up, resulting in data loss.
+func (sm *SnapshotManager) CleanupIncompleteSnapshots(ctx context.Context, hostname string) error {
+	log.Info("Checking for incomplete snapshots", "hostname", hostname)
+
+	// Get all incomplete snapshots for this hostname
+	incompleteSnapshots, err := sm.repos.Snapshots.GetIncompleteByHostname(ctx, hostname)
+	if err != nil {
+		return fmt.Errorf("getting incomplete snapshots: %w", err)
+	}
+
+	if len(incompleteSnapshots) == 0 {
+		log.Debug("No incomplete snapshots found")
+		return nil
+	}
+
+	log.Info("Found incomplete snapshots", "count", len(incompleteSnapshots))
+
+	// Check each incomplete snapshot for metadata in S3
+	for _, snapshot := range incompleteSnapshots {
+		// Check if metadata exists in S3
+		metadataKey := fmt.Sprintf("metadata/%s/db.zst", snapshot.ID)
+		_, err := sm.s3Client.StatObject(ctx, metadataKey)
+
+		if err != nil {
+			// Metadata doesn't exist in S3 - this is an incomplete snapshot
+			log.Info("Cleaning up incomplete snapshot record", "snapshot_id", snapshot.ID, "started_at", snapshot.StartedAt)
+
+			// Delete the snapshot and all its associations
+			if err := sm.deleteSnapshot(ctx, snapshot.ID); err != nil {
+				return fmt.Errorf("deleting incomplete snapshot %s: %w", snapshot.ID, err)
+			}
+
+			log.Info("Deleted incomplete snapshot record and associated data", "snapshot_id", snapshot.ID)
+		} else {
+			// Metadata exists - this snapshot was completed but database wasn't updated
+			// This shouldn't happen in normal operation, but mark it complete
+			log.Warn("Found snapshot with S3 metadata but incomplete in database", "snapshot_id", snapshot.ID)
+			if err := sm.repos.Snapshots.MarkComplete(ctx, nil, snapshot.ID); err != nil {
+				log.Error("Failed to mark snapshot as complete in database", "snapshot_id", snapshot.ID, "error", err)
+			}
+		}
+	}
+
+	return nil
+}
+
+// deleteSnapshot removes a snapshot and all its associations from the database
+func (sm *SnapshotManager) deleteSnapshot(ctx context.Context, snapshotID string) error {
+	// Delete snapshot_files entries
+	if err := sm.repos.Snapshots.DeleteSnapshotFiles(ctx, snapshotID); err != nil {
+		return fmt.Errorf("deleting snapshot files: %w", err)
+	}
+
+	// Delete snapshot_blobs entries
+	if err := sm.repos.Snapshots.DeleteSnapshotBlobs(ctx, snapshotID); err != nil {
+		return fmt.Errorf("deleting snapshot blobs: %w", err)
+	}
+
+	// Delete the snapshot itself
+	if err := sm.repos.Snapshots.Delete(ctx, snapshotID); err != nil {
+		return fmt.Errorf("deleting snapshot: %w", err)
+	}
+
+	// Clean up orphaned data
+	log.Debug("Cleaning up orphaned records in main database")
+	if err := sm.cleanupOrphanedData(ctx); err != nil {
+		return fmt.Errorf("cleaning up orphaned data: %w", err)
+	}
+
+	return nil
+}
+
+// cleanupOrphanedData removes files, chunks, and blobs that are no longer referenced by any snapshot
+func (sm *SnapshotManager) cleanupOrphanedData(ctx context.Context) error {
+	// Order is important to respect foreign key constraints:
+	// 1. Delete orphaned files (will cascade delete file_chunks)
+	// 2. Delete orphaned blobs (will cascade delete blob_chunks for deleted blobs)
+	// 3. Delete orphaned blob_chunks (where blob exists but chunk doesn't)
+	// 4. Delete orphaned chunks (now safe after all blob_chunks are gone)
+
+	// Delete orphaned files (files not in any snapshot)
+	log.Debug("Deleting orphaned file records from database")
+	if err := sm.repos.Files.DeleteOrphaned(ctx); err != nil {
+		return fmt.Errorf("deleting orphaned files: %w", err)
+	}
+
+	// Delete orphaned blobs (blobs not in any snapshot)
+	// This will cascade delete blob_chunks for deleted blobs
+	log.Debug("Deleting orphaned blob records from database")
+	if err := sm.repos.Blobs.DeleteOrphaned(ctx); err != nil {
+		return fmt.Errorf("deleting orphaned blobs: %w", err)
+	}
+
+	// Delete orphaned blob_chunks entries
+	// This handles cases where the blob still exists but chunks were deleted
+	log.Debug("Deleting orphaned blob_chunks associations from database")
+	if err := sm.repos.BlobChunks.DeleteOrphaned(ctx); err != nil {
+		return fmt.Errorf("deleting orphaned blob_chunks: %w", err)
+	}
+
+	// Delete orphaned chunks (chunks not referenced by any file)
+	// This must come after cleaning up blob_chunks to avoid foreign key violations
+	log.Debug("Deleting orphaned chunk records from database")
+	if err := sm.repos.Chunks.DeleteOrphaned(ctx); err != nil {
+		return fmt.Errorf("deleting orphaned chunks: %w", err)
+	}
+
+	return nil
+}
+
+// deleteOtherSnapshots deletes all snapshots except the current one
+func (sm *SnapshotManager) deleteOtherSnapshots(ctx context.Context, tx *sql.Tx, currentSnapshotID string) error {
+	log.Debug("[Temp DB Cleanup] Deleting all snapshot records except current", "keeping", currentSnapshotID)
+	database.LogSQL("Execute", "DELETE FROM snapshots WHERE id != ?", currentSnapshotID)
+	result, err := tx.ExecContext(ctx, "DELETE FROM snapshots WHERE id != ?", currentSnapshotID)
+	if err != nil {
+		return fmt.Errorf("deleting other snapshots: %w", err)
+	}
+	rowsAffected, _ := result.RowsAffected()
+	log.Debug("[Temp DB Cleanup] Deleted snapshot records from database", "count", rowsAffected)
+	return nil
+}
+
+// deleteOrphanedSnapshotAssociations deletes snapshot_files and snapshot_blobs for deleted snapshots
+func (sm *SnapshotManager) deleteOrphanedSnapshotAssociations(ctx context.Context, tx *sql.Tx, currentSnapshotID string) error {
+	// Delete orphaned snapshot_files
+	log.Debug("[Temp DB Cleanup] Deleting orphaned snapshot_files associations")
+	database.LogSQL("Execute", "DELETE FROM snapshot_files WHERE snapshot_id != ?", currentSnapshotID)
+	result, err := tx.ExecContext(ctx, "DELETE FROM snapshot_files WHERE snapshot_id != ?", currentSnapshotID)
+	if err != nil {
+		return fmt.Errorf("deleting orphaned snapshot_files: %w", err)
+	}
+	rowsAffected, _ := result.RowsAffected()
+	log.Debug("[Temp DB Cleanup] Deleted snapshot_files associations", "count", rowsAffected)
+
+	// Delete orphaned snapshot_blobs
+	log.Debug("[Temp DB Cleanup] Deleting orphaned snapshot_blobs associations")
+	database.LogSQL("Execute", "DELETE FROM snapshot_blobs WHERE snapshot_id != ?", currentSnapshotID)
+	result, err = tx.ExecContext(ctx, "DELETE FROM snapshot_blobs WHERE snapshot_id != ?", currentSnapshotID)
+	if err != nil {
+		return fmt.Errorf("deleting orphaned snapshot_blobs: %w", err)
+	}
+	rowsAffected, _ = result.RowsAffected()
+	log.Debug("[Temp DB Cleanup] Deleted snapshot_blobs associations", "count", rowsAffected)
+	return nil
+}
+
+// deleteOrphanedFiles deletes files not in the current snapshot
+func (sm *SnapshotManager) deleteOrphanedFiles(ctx context.Context, tx *sql.Tx, currentSnapshotID string) error {
+	log.Debug("[Temp DB Cleanup] Deleting file records not referenced by current snapshot")
+	database.LogSQL("Execute", `DELETE FROM files WHERE NOT EXISTS (SELECT 1 FROM snapshot_files WHERE snapshot_files.file_id = files.id AND snapshot_files.snapshot_id = ?)`, currentSnapshotID)
+	result, err := tx.ExecContext(ctx, `
+		DELETE FROM files 
+		WHERE NOT EXISTS (
+			SELECT 1 FROM snapshot_files 
+			WHERE snapshot_files.file_id = files.id 
+			AND snapshot_files.snapshot_id = ?
+		)`, currentSnapshotID)
+	if err != nil {
+		return fmt.Errorf("deleting orphaned files: %w", err)
+	}
+	rowsAffected, _ := result.RowsAffected()
+	log.Debug("[Temp DB Cleanup] Deleted file records from database", "count", rowsAffected)
+
+	// Note: file_chunks will be deleted via CASCADE
+	log.Debug("[Temp DB Cleanup] file_chunks associations deleted via CASCADE")
+	return nil
+}
+
+// deleteOrphanedChunkToFileMappings deletes chunk_files entries for deleted files
+func (sm *SnapshotManager) deleteOrphanedChunkToFileMappings(ctx context.Context, tx *sql.Tx) error {
+	log.Debug("[Temp DB Cleanup] Deleting orphaned chunk_files associations")
+	database.LogSQL("Execute", `DELETE FROM chunk_files WHERE NOT EXISTS (SELECT 1 FROM files WHERE files.id = chunk_files.file_id)`)
+	result, err := tx.ExecContext(ctx, `
+		DELETE FROM chunk_files 
+		WHERE NOT EXISTS (
+			SELECT 1 FROM files 
+			WHERE files.id = chunk_files.file_id
+		)`)
+	if err != nil {
+		return fmt.Errorf("deleting orphaned chunk_files: %w", err)
+	}
+	rowsAffected, _ := result.RowsAffected()
+	log.Debug("[Temp DB Cleanup] Deleted chunk_files associations", "count", rowsAffected)
+	return nil
+}
+
+// deleteOrphanedBlobs deletes blobs not in the current snapshot
+func (sm *SnapshotManager) deleteOrphanedBlobs(ctx context.Context, tx *sql.Tx, currentSnapshotID string) error {
+	log.Debug("[Temp DB Cleanup] Deleting blob records not referenced by current snapshot")
+	database.LogSQL("Execute", `DELETE FROM blobs WHERE NOT EXISTS (SELECT 1 FROM snapshot_blobs WHERE snapshot_blobs.blob_hash = blobs.blob_hash AND snapshot_blobs.snapshot_id = ?)`, currentSnapshotID)
+	result, err := tx.ExecContext(ctx, `
+		DELETE FROM blobs 
+		WHERE NOT EXISTS (
+			SELECT 1 FROM snapshot_blobs 
+			WHERE snapshot_blobs.blob_hash = blobs.blob_hash 
+			AND snapshot_blobs.snapshot_id = ?
+		)`, currentSnapshotID)
+	if err != nil {
+		return fmt.Errorf("deleting orphaned blobs: %w", err)
+	}
+	rowsAffected, _ := result.RowsAffected()
+	log.Debug("[Temp DB Cleanup] Deleted blob records from database", "count", rowsAffected)
+	return nil
+}
+
+// deleteOrphanedBlobToChunkMappings deletes blob_chunks entries for deleted blobs
+func (sm *SnapshotManager) deleteOrphanedBlobToChunkMappings(ctx context.Context, tx *sql.Tx) error {
+	log.Debug("[Temp DB Cleanup] Deleting orphaned blob_chunks associations")
+	database.LogSQL("Execute", `DELETE FROM blob_chunks WHERE NOT EXISTS (SELECT 1 FROM blobs WHERE blobs.id = blob_chunks.blob_id)`)
+	result, err := tx.ExecContext(ctx, `
+		DELETE FROM blob_chunks 
+		WHERE NOT EXISTS (
+			SELECT 1 FROM blobs 
+			WHERE blobs.id = blob_chunks.blob_id
+		)`)
+	if err != nil {
+		return fmt.Errorf("deleting orphaned blob_chunks: %w", err)
+	}
+	rowsAffected, _ := result.RowsAffected()
+	log.Debug("[Temp DB Cleanup] Deleted blob_chunks associations", "count", rowsAffected)
+	return nil
+}
+
+// deleteOrphanedChunks deletes chunks not referenced by any file
+func (sm *SnapshotManager) deleteOrphanedChunks(ctx context.Context, tx *sql.Tx) error {
+	log.Debug("[Temp DB Cleanup] Deleting orphaned chunk records")
+	database.LogSQL("Execute", `DELETE FROM chunks WHERE NOT EXISTS (SELECT 1 FROM file_chunks WHERE file_chunks.chunk_hash = chunks.chunk_hash)`)
+	result, err := tx.ExecContext(ctx, `
+		DELETE FROM chunks 
+		WHERE NOT EXISTS (
+			SELECT 1 FROM file_chunks 
+			WHERE file_chunks.chunk_hash = chunks.chunk_hash
+		)`)
+	if err != nil {
+		return fmt.Errorf("deleting orphaned chunks: %w", err)
+	}
+	rowsAffected, _ := result.RowsAffected()
+	log.Debug("[Temp DB Cleanup] Deleted chunk records from database", "count", rowsAffected)
+	return nil
+}
--- a/internal/snapshot/snapshot_test.go
+++ b/internal/snapshot/snapshot_test.go
@@ -0,0 +1,163 @@
+package snapshot
+
+import (
+	"context"
+	"database/sql"
+	"path/filepath"
+	"testing"
+
+	"git.eeqj.de/sneak/vaultik/internal/config"
+	"git.eeqj.de/sneak/vaultik/internal/database"
+	"git.eeqj.de/sneak/vaultik/internal/log"
+)
+
+const (
+	// Test age public key for encryption
+	testAgeRecipient = "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
+)
+
+func TestCleanSnapshotDBEmptySnapshot(t *testing.T) {
+	// Initialize logger
+	log.Initialize(log.Config{})
+
+	ctx := context.Background()
+
+	// Create a test database
+	tempDir := t.TempDir()
+	dbPath := filepath.Join(tempDir, "test.db")
+	db, err := database.New(ctx, dbPath)
+	if err != nil {
+		t.Fatalf("failed to create database: %v", err)
+	}
+
+	repos := database.NewRepositories(db)
+
+	// Create an empty snapshot
+	snapshot := &database.Snapshot{
+		ID:       "empty-snapshot",
+		Hostname: "test-host",
+	}
+
+	err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		return repos.Snapshots.Create(ctx, tx, snapshot)
+	})
+	if err != nil {
+		t.Fatalf("failed to create snapshot: %v", err)
+	}
+
+	// Create some files and chunks not associated with any snapshot
+	file := &database.File{Path: "/orphan/file.txt", Size: 1000}
+	chunk := &database.Chunk{ChunkHash: "orphan-chunk", Size: 500}
+
+	err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+		if err := repos.Files.Create(ctx, tx, file); err != nil {
+			return err
+		}
+		return repos.Chunks.Create(ctx, tx, chunk)
+	})
+	if err != nil {
+		t.Fatalf("failed to create orphan data: %v", err)
+	}
+
+	// Close the database
+	if err := db.Close(); err != nil {
+		t.Fatalf("failed to close database: %v", err)
+	}
+
+	// Copy database
+	tempDBPath := filepath.Join(tempDir, "temp.db")
+	if err := copyFile(dbPath, tempDBPath); err != nil {
+		t.Fatalf("failed to copy database: %v", err)
+	}
+
+	// Create a mock config for testing
+	cfg := &config.Config{
+		CompressionLevel: 3,
+		AgeRecipients:    []string{testAgeRecipient},
+	}
+	// Clean the database
+	sm := &SnapshotManager{config: cfg}
+	if _, err := sm.cleanSnapshotDB(ctx, tempDBPath, snapshot.ID); err != nil {
+		t.Fatalf("failed to clean snapshot database: %v", err)
+	}
+
+	// Verify the cleaned database
+	cleanedDB, err := database.New(ctx, tempDBPath)
+	if err != nil {
+		t.Fatalf("failed to open cleaned database: %v", err)
+	}
+	defer func() {
+		if err := cleanedDB.Close(); err != nil {
+			t.Errorf("failed to close database: %v", err)
+		}
+	}()
+
+	cleanedRepos := database.NewRepositories(cleanedDB)
+
+	// Verify snapshot exists
+	verifySnapshot, err := cleanedRepos.Snapshots.GetByID(ctx, snapshot.ID)
+	if err != nil {
+		t.Fatalf("failed to get snapshot: %v", err)
+	}
+	if verifySnapshot == nil {
+		t.Error("snapshot should exist")
+	}
+
+	// Verify orphan file is gone
+	f, err := cleanedRepos.Files.GetByPath(ctx, file.Path)
+	if err != nil {
+		t.Fatalf("failed to check file: %v", err)
+	}
+	if f != nil {
+		t.Error("orphan file should not exist")
+	}
+
+	// Verify orphan chunk is gone
+	c, err := cleanedRepos.Chunks.GetByHash(ctx, chunk.ChunkHash)
+	if err != nil {
+		t.Fatalf("failed to check chunk: %v", err)
+	}
+	if c != nil {
+		t.Error("orphan chunk should not exist")
+	}
+}
+
+func TestCleanSnapshotDBNonExistentSnapshot(t *testing.T) {
+	// Initialize logger
+	log.Initialize(log.Config{})
+
+	ctx := context.Background()
+
+	// Create a test database
+	tempDir := t.TempDir()
+	dbPath := filepath.Join(tempDir, "test.db")
+	db, err := database.New(ctx, dbPath)
+	if err != nil {
+		t.Fatalf("failed to create database: %v", err)
+	}
+
+	// Close immediately
+	if err := db.Close(); err != nil {
+		t.Fatalf("failed to close database: %v", err)
+	}
+
+	// Copy database
+	tempDBPath := filepath.Join(tempDir, "temp.db")
+	if err := copyFile(dbPath, tempDBPath); err != nil {
+		t.Fatalf("failed to copy database: %v", err)
+	}
+
+	// Create a mock config for testing
+	cfg := &config.Config{
+		CompressionLevel: 3,
+		AgeRecipients:    []string{testAgeRecipient},
+	}
+	// Try to clean with non-existent snapshot
+	sm := &SnapshotManager{config: cfg}
+	_, err = sm.cleanSnapshotDB(ctx, tempDBPath, "non-existent-snapshot")
+
+	// Should not error - it will just delete everything
+	if err != nil {
+		t.Fatalf("unexpected error: %v", err)
+	}
+}