Add custom types, version command, and restore --verify flag

- Add internal/types package with type-safe wrappers for IDs, hashes, paths, and credentials (FileID, BlobID, ChunkHash, etc.) - Implement driver.Valuer and sql.Scanner for UUID-based types - Add `vaultik version` command showing version, commit, go version - Add `--verify` flag to restore command that checksums all restored files against expected chunk hashes with progress bar - Remove fetch.go (dead code, functionality in restore) - Clean up TODO.md, remove completed items - Update all database and snapshot code to use new custom types
2026-01-14 17:11:52 -08:00
parent 2afd54d693
commit 417b25a5f5
53 changed files with 2330 additions and 1581 deletions
--- a/internal/snapshot/scanner.go
+++ b/internal/snapshot/scanner.go
@@ -16,9 +16,9 @@ import (
 	"git.eeqj.de/sneak/vaultik/internal/database"
 	"git.eeqj.de/sneak/vaultik/internal/log"
 	"git.eeqj.de/sneak/vaultik/internal/storage"
+	"git.eeqj.de/sneak/vaultik/internal/types"
 	"github.com/dustin/go-humanize"
 	"github.com/gobwas/glob"
-	"github.com/google/uuid"
 	"github.com/spf13/afero"
 )

@@ -45,18 +45,20 @@ type compiledPattern struct {

 // Scanner scans directories and populates the database with file and chunk information
 type Scanner struct {
-	fs               afero.Fs
-	chunker          *chunker.Chunker
-	packer           *blob.Packer
-	repos            *database.Repositories
-	storage          storage.Storer
-	maxBlobSize      int64
-	compressionLevel int
-	ageRecipient     string
-	snapshotID       string            // Current snapshot being processed
-	exclude          []string          // Glob patterns for files/directories to exclude
-	compiledExclude  []compiledPattern // Compiled glob patterns
-	progress         *ProgressReporter
+	fs                afero.Fs
+	chunker           *chunker.Chunker
+	packer            *blob.Packer
+	repos             *database.Repositories
+	storage           storage.Storer
+	maxBlobSize       int64
+	compressionLevel  int
+	ageRecipient      string
+	snapshotID        string            // Current snapshot being processed
+	currentSourcePath string            // Current source directory being scanned (for restore path stripping)
+	exclude           []string          // Glob patterns for files/directories to exclude
+	compiledExclude   []compiledPattern // Compiled glob patterns
+	progress          *ProgressReporter
+	skipErrors        bool // Skip file read errors (log loudly but continue)

 	// In-memory cache of known chunk hashes for fast existence checks
 	knownChunks   map[string]struct{}
@@ -90,6 +92,7 @@ type ScannerConfig struct {
 	AgeRecipients    []string // Optional, empty means no encryption
 	EnableProgress   bool     // Enable progress reporting
 	Exclude          []string // Glob patterns for files/directories to exclude
+	SkipErrors       bool     // Skip file read errors (log loudly but continue)
 }

 // ScanResult contains the results of a scan operation
@@ -148,6 +151,7 @@ func NewScanner(cfg ScannerConfig) *Scanner {
 		exclude:            cfg.Exclude,
 		compiledExclude:    compiledExclude,
 		progress:           progress,
+		skipErrors:         cfg.SkipErrors,
 		pendingChunkHashes: make(map[string]struct{}),
 	}
 }
@@ -155,6 +159,7 @@ func NewScanner(cfg ScannerConfig) *Scanner {
 // Scan scans a directory and populates the database
 func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*ScanResult, error) {
 	s.snapshotID = snapshotID
+	s.currentSourcePath = path // Store source path for file records (used during restore)
 	s.scanCtx = ctx
 	result := &ScanResult{
 		StartTime: time.Now().UTC(),
@@ -284,7 +289,7 @@ func (s *Scanner) loadKnownFiles(ctx context.Context, path string) (map[string]*

 	result := make(map[string]*database.File, len(files))
 	for _, f := range files {
-		result[f.Path] = f
+		result[f.Path.String()] = f
 	}

 	return result, nil
@@ -301,7 +306,7 @@ func (s *Scanner) loadKnownChunks(ctx context.Context) error {
 	s.knownChunksMu.Lock()
 	s.knownChunks = make(map[string]struct{}, len(chunks))
 	for _, c := range chunks {
-		s.knownChunks[c.ChunkHash] = struct{}{}
+		s.knownChunks[c.ChunkHash.String()] = struct{}{}
 	}
 	s.knownChunksMu.Unlock()

@@ -432,7 +437,7 @@ func (s *Scanner) flushCompletedPendingFiles(ctx context.Context) error {
 	for _, data := range s.pendingFiles {
 		allChunksCommitted := true
 		for _, fc := range data.fileChunks {
-			if s.isChunkPending(fc.ChunkHash) {
+			if s.isChunkPending(fc.ChunkHash.String()) {
 				allChunksCommitted = false
 				break
 			}
@@ -463,7 +468,7 @@ func (s *Scanner) flushCompletedPendingFiles(ctx context.Context) error {
 	collectStart := time.Now()
 	var allFileChunks []database.FileChunk
 	var allChunkFiles []database.ChunkFile
-	var allFileIDs []string
+	var allFileIDs []types.FileID
 	var allFiles []*database.File

 	for _, data := range canFlush {
@@ -542,7 +547,7 @@ func (s *Scanner) flushCompletedPendingFiles(ctx context.Context) error {
 // ScanPhaseResult contains the results of the scan phase
 type ScanPhaseResult struct {
 	FilesToProcess   []*FileToProcess
-	UnchangedFileIDs []string // IDs of unchanged files to associate with snapshot
+	UnchangedFileIDs []types.FileID // IDs of unchanged files to associate with snapshot
 }

 // scanPhase performs the initial directory scan to identify files to process
@@ -554,7 +559,7 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult
 	estimatedTotal := int64(len(knownFiles))

 	var filesToProcess []*FileToProcess
-	var unchangedFileIDs []string // Just IDs - no new records needed
+	var unchangedFileIDs []types.FileID // Just IDs - no new records needed
 	var mu sync.Mutex

 	// Set up periodic status output
@@ -566,6 +571,11 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult
 	log.Debug("Starting directory walk", "path", path)
 	err := afero.Walk(s.fs, path, func(filePath string, info os.FileInfo, err error) error {
 		if err != nil {
+			if s.skipErrors {
+				log.Error("ERROR: Failed to access file (skipping due to --skip-errors)", "path", filePath, "error", err)
+				fmt.Printf("ERROR: Failed to access %s: %v (skipping)\n", filePath, err)
+				return nil // Continue scanning
+			}
 			log.Debug("Error accessing filesystem entry", "path", filePath, "error", err)
 			return err
 		}
@@ -604,7 +614,7 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult
 				FileInfo: info,
 				File:     file,
 			})
-		} else if file.ID != "" {
+		} else if !file.ID.IsZero() {
 			// Unchanged file with existing ID - just need snapshot association
 			unchangedFileIDs = append(unchangedFileIDs, file.ID)
 		}
@@ -696,22 +706,23 @@ func (s *Scanner) checkFileInMemory(path string, info os.FileInfo, knownFiles ma
 	// Create file record with ID set upfront
 	// For new files, generate UUID immediately so it's available for chunk associations
 	// For existing files, reuse the existing ID
-	var fileID string
+	var fileID types.FileID
 	if exists {
 		fileID = existingFile.ID
 	} else {
-		fileID = uuid.New().String()
+		fileID = types.NewFileID()
 	}

 	file := &database.File{
-		ID:    fileID,
-		Path:  path,
-		MTime: info.ModTime(),
-		CTime: info.ModTime(), // afero doesn't provide ctime
-		Size:  info.Size(),
-		Mode:  uint32(info.Mode()),
-		UID:   uid,
-		GID:   gid,
+		ID:         fileID,
+		Path:       types.FilePath(path),
+		SourcePath: types.SourcePath(s.currentSourcePath), // Store source directory for restore path stripping
+		MTime:      info.ModTime(),
+		CTime:      info.ModTime(), // afero doesn't provide ctime
+		Size:       info.Size(),
+		Mode:       uint32(info.Mode()),
+		UID:        uid,
+		GID:        gid,
 	}

 	// New file - needs processing
@@ -734,7 +745,7 @@ func (s *Scanner) checkFileInMemory(path string, info os.FileInfo, knownFiles ma

 // batchAddFilesToSnapshot adds existing file IDs to the snapshot association table
 // This is used for unchanged files that already have records in the database
-func (s *Scanner) batchAddFilesToSnapshot(ctx context.Context, fileIDs []string) error {
+func (s *Scanner) batchAddFilesToSnapshot(ctx context.Context, fileIDs []types.FileID) error {
 	const batchSize = 1000

 	startTime := time.Now()
@@ -817,6 +828,13 @@ func (s *Scanner) processPhase(ctx context.Context, filesToProcess []*FileToProc
 				result.FilesSkipped++
 				continue
 			}
+			// Skip file read errors if --skip-errors is enabled
+			if s.skipErrors {
+				log.Error("ERROR: Failed to process file (skipping due to --skip-errors)", "path", fileToProcess.Path, "error", err)
+				fmt.Printf("ERROR: Failed to process %s: %v (skipping)\n", fileToProcess.Path, err)
+				result.FilesSkipped++
+				continue
+			}
 			return fmt.Errorf("processing file %s: %w", fileToProcess.Path, err)
 		}

@@ -881,8 +899,12 @@ func (s *Scanner) processPhase(ctx context.Context, filesToProcess []*FileToProc
 		for _, b := range blobs {
 			// Blob metadata is already stored incrementally during packing
 			// Just add the blob to the snapshot
-			err := s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
-				return s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, b.ID, b.Hash)
+			blobID, err := types.ParseBlobID(b.ID)
+			if err != nil {
+				return fmt.Errorf("parsing blob ID: %w", err)
+			}
+			err = s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
+				return s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, blobID, types.BlobHash(b.Hash))
 			})
 			if err != nil {
 				return fmt.Errorf("storing blob metadata: %w", err)
@@ -984,14 +1006,21 @@ func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
 	if dbCtx == nil {
 		dbCtx = context.Background()
 	}
-	err := s.repos.WithTx(dbCtx, func(ctx context.Context, tx *sql.Tx) error {
+
+	// Parse blob ID for typed operations
+	finishedBlobID, err := types.ParseBlobID(finishedBlob.ID)
+	if err != nil {
+		return fmt.Errorf("parsing finished blob ID: %w", err)
+	}
+
+	err = s.repos.WithTx(dbCtx, func(ctx context.Context, tx *sql.Tx) error {
 		// Update blob upload timestamp
 		if err := s.repos.Blobs.UpdateUploaded(ctx, tx, finishedBlob.ID); err != nil {
 			return fmt.Errorf("updating blob upload timestamp: %w", err)
 		}

 		// Add the blob to the snapshot
-		if err := s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, finishedBlob.ID, finishedBlob.Hash); err != nil {
+		if err := s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, finishedBlobID, types.BlobHash(finishedBlob.Hash)); err != nil {
 			return fmt.Errorf("adding blob to snapshot: %w", err)
 		}

@@ -1094,7 +1123,7 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
 			fileChunk: database.FileChunk{
 				FileID:    fileToProcess.File.ID,
 				Idx:       chunkIndex,
-				ChunkHash: chunk.Hash,
+				ChunkHash: types.ChunkHash(chunk.Hash),
 			},
 			offset: chunk.Offset,
 			size:   chunk.Size,