Add custom types, version command, and restore --verify flag

- Add internal/types package with type-safe wrappers for IDs, hashes,
  paths, and credentials (FileID, BlobID, ChunkHash, etc.)
- Implement driver.Valuer and sql.Scanner for UUID-based types
- Add `vaultik version` command showing version, commit, go version
- Add `--verify` flag to restore command that checksums all restored
  files against expected chunk hashes with progress bar
- Remove fetch.go (dead code, functionality in restore)
- Clean up TODO.md, remove completed items
- Update all database and snapshot code to use new custom types
This commit is contained in:
2026-01-14 17:11:52 -08:00
parent 2afd54d693
commit 417b25a5f5
53 changed files with 2330 additions and 1581 deletions

View File

@@ -16,9 +16,9 @@ import (
"git.eeqj.de/sneak/vaultik/internal/database"
"git.eeqj.de/sneak/vaultik/internal/log"
"git.eeqj.de/sneak/vaultik/internal/storage"
"git.eeqj.de/sneak/vaultik/internal/types"
"github.com/dustin/go-humanize"
"github.com/gobwas/glob"
"github.com/google/uuid"
"github.com/spf13/afero"
)
@@ -45,18 +45,20 @@ type compiledPattern struct {
// Scanner scans directories and populates the database with file and chunk information
type Scanner struct {
fs afero.Fs
chunker *chunker.Chunker
packer *blob.Packer
repos *database.Repositories
storage storage.Storer
maxBlobSize int64
compressionLevel int
ageRecipient string
snapshotID string // Current snapshot being processed
exclude []string // Glob patterns for files/directories to exclude
compiledExclude []compiledPattern // Compiled glob patterns
progress *ProgressReporter
fs afero.Fs
chunker *chunker.Chunker
packer *blob.Packer
repos *database.Repositories
storage storage.Storer
maxBlobSize int64
compressionLevel int
ageRecipient string
snapshotID string // Current snapshot being processed
currentSourcePath string // Current source directory being scanned (for restore path stripping)
exclude []string // Glob patterns for files/directories to exclude
compiledExclude []compiledPattern // Compiled glob patterns
progress *ProgressReporter
skipErrors bool // Skip file read errors (log loudly but continue)
// In-memory cache of known chunk hashes for fast existence checks
knownChunks map[string]struct{}
@@ -90,6 +92,7 @@ type ScannerConfig struct {
AgeRecipients []string // Optional, empty means no encryption
EnableProgress bool // Enable progress reporting
Exclude []string // Glob patterns for files/directories to exclude
SkipErrors bool // Skip file read errors (log loudly but continue)
}
// ScanResult contains the results of a scan operation
@@ -148,6 +151,7 @@ func NewScanner(cfg ScannerConfig) *Scanner {
exclude: cfg.Exclude,
compiledExclude: compiledExclude,
progress: progress,
skipErrors: cfg.SkipErrors,
pendingChunkHashes: make(map[string]struct{}),
}
}
@@ -155,6 +159,7 @@ func NewScanner(cfg ScannerConfig) *Scanner {
// Scan scans a directory and populates the database
func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*ScanResult, error) {
s.snapshotID = snapshotID
s.currentSourcePath = path // Store source path for file records (used during restore)
s.scanCtx = ctx
result := &ScanResult{
StartTime: time.Now().UTC(),
@@ -284,7 +289,7 @@ func (s *Scanner) loadKnownFiles(ctx context.Context, path string) (map[string]*
result := make(map[string]*database.File, len(files))
for _, f := range files {
result[f.Path] = f
result[f.Path.String()] = f
}
return result, nil
@@ -301,7 +306,7 @@ func (s *Scanner) loadKnownChunks(ctx context.Context) error {
s.knownChunksMu.Lock()
s.knownChunks = make(map[string]struct{}, len(chunks))
for _, c := range chunks {
s.knownChunks[c.ChunkHash] = struct{}{}
s.knownChunks[c.ChunkHash.String()] = struct{}{}
}
s.knownChunksMu.Unlock()
@@ -432,7 +437,7 @@ func (s *Scanner) flushCompletedPendingFiles(ctx context.Context) error {
for _, data := range s.pendingFiles {
allChunksCommitted := true
for _, fc := range data.fileChunks {
if s.isChunkPending(fc.ChunkHash) {
if s.isChunkPending(fc.ChunkHash.String()) {
allChunksCommitted = false
break
}
@@ -463,7 +468,7 @@ func (s *Scanner) flushCompletedPendingFiles(ctx context.Context) error {
collectStart := time.Now()
var allFileChunks []database.FileChunk
var allChunkFiles []database.ChunkFile
var allFileIDs []string
var allFileIDs []types.FileID
var allFiles []*database.File
for _, data := range canFlush {
@@ -542,7 +547,7 @@ func (s *Scanner) flushCompletedPendingFiles(ctx context.Context) error {
// ScanPhaseResult contains the results of the scan phase
type ScanPhaseResult struct {
FilesToProcess []*FileToProcess
UnchangedFileIDs []string // IDs of unchanged files to associate with snapshot
UnchangedFileIDs []types.FileID // IDs of unchanged files to associate with snapshot
}
// scanPhase performs the initial directory scan to identify files to process
@@ -554,7 +559,7 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult
estimatedTotal := int64(len(knownFiles))
var filesToProcess []*FileToProcess
var unchangedFileIDs []string // Just IDs - no new records needed
var unchangedFileIDs []types.FileID // Just IDs - no new records needed
var mu sync.Mutex
// Set up periodic status output
@@ -566,6 +571,11 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult
log.Debug("Starting directory walk", "path", path)
err := afero.Walk(s.fs, path, func(filePath string, info os.FileInfo, err error) error {
if err != nil {
if s.skipErrors {
log.Error("ERROR: Failed to access file (skipping due to --skip-errors)", "path", filePath, "error", err)
fmt.Printf("ERROR: Failed to access %s: %v (skipping)\n", filePath, err)
return nil // Continue scanning
}
log.Debug("Error accessing filesystem entry", "path", filePath, "error", err)
return err
}
@@ -604,7 +614,7 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult
FileInfo: info,
File: file,
})
} else if file.ID != "" {
} else if !file.ID.IsZero() {
// Unchanged file with existing ID - just need snapshot association
unchangedFileIDs = append(unchangedFileIDs, file.ID)
}
@@ -696,22 +706,23 @@ func (s *Scanner) checkFileInMemory(path string, info os.FileInfo, knownFiles ma
// Create file record with ID set upfront
// For new files, generate UUID immediately so it's available for chunk associations
// For existing files, reuse the existing ID
var fileID string
var fileID types.FileID
if exists {
fileID = existingFile.ID
} else {
fileID = uuid.New().String()
fileID = types.NewFileID()
}
file := &database.File{
ID: fileID,
Path: path,
MTime: info.ModTime(),
CTime: info.ModTime(), // afero doesn't provide ctime
Size: info.Size(),
Mode: uint32(info.Mode()),
UID: uid,
GID: gid,
ID: fileID,
Path: types.FilePath(path),
SourcePath: types.SourcePath(s.currentSourcePath), // Store source directory for restore path stripping
MTime: info.ModTime(),
CTime: info.ModTime(), // afero doesn't provide ctime
Size: info.Size(),
Mode: uint32(info.Mode()),
UID: uid,
GID: gid,
}
// New file - needs processing
@@ -734,7 +745,7 @@ func (s *Scanner) checkFileInMemory(path string, info os.FileInfo, knownFiles ma
// batchAddFilesToSnapshot adds existing file IDs to the snapshot association table
// This is used for unchanged files that already have records in the database
func (s *Scanner) batchAddFilesToSnapshot(ctx context.Context, fileIDs []string) error {
func (s *Scanner) batchAddFilesToSnapshot(ctx context.Context, fileIDs []types.FileID) error {
const batchSize = 1000
startTime := time.Now()
@@ -817,6 +828,13 @@ func (s *Scanner) processPhase(ctx context.Context, filesToProcess []*FileToProc
result.FilesSkipped++
continue
}
// Skip file read errors if --skip-errors is enabled
if s.skipErrors {
log.Error("ERROR: Failed to process file (skipping due to --skip-errors)", "path", fileToProcess.Path, "error", err)
fmt.Printf("ERROR: Failed to process %s: %v (skipping)\n", fileToProcess.Path, err)
result.FilesSkipped++
continue
}
return fmt.Errorf("processing file %s: %w", fileToProcess.Path, err)
}
@@ -881,8 +899,12 @@ func (s *Scanner) processPhase(ctx context.Context, filesToProcess []*FileToProc
for _, b := range blobs {
// Blob metadata is already stored incrementally during packing
// Just add the blob to the snapshot
err := s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
return s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, b.ID, b.Hash)
blobID, err := types.ParseBlobID(b.ID)
if err != nil {
return fmt.Errorf("parsing blob ID: %w", err)
}
err = s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
return s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, blobID, types.BlobHash(b.Hash))
})
if err != nil {
return fmt.Errorf("storing blob metadata: %w", err)
@@ -984,14 +1006,21 @@ func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
if dbCtx == nil {
dbCtx = context.Background()
}
err := s.repos.WithTx(dbCtx, func(ctx context.Context, tx *sql.Tx) error {
// Parse blob ID for typed operations
finishedBlobID, err := types.ParseBlobID(finishedBlob.ID)
if err != nil {
return fmt.Errorf("parsing finished blob ID: %w", err)
}
err = s.repos.WithTx(dbCtx, func(ctx context.Context, tx *sql.Tx) error {
// Update blob upload timestamp
if err := s.repos.Blobs.UpdateUploaded(ctx, tx, finishedBlob.ID); err != nil {
return fmt.Errorf("updating blob upload timestamp: %w", err)
}
// Add the blob to the snapshot
if err := s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, finishedBlob.ID, finishedBlob.Hash); err != nil {
if err := s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, finishedBlobID, types.BlobHash(finishedBlob.Hash)); err != nil {
return fmt.Errorf("adding blob to snapshot: %w", err)
}
@@ -1094,7 +1123,7 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
fileChunk: database.FileChunk{
FileID: fileToProcess.File.ID,
Idx: chunkIndex,
ChunkHash: chunk.Hash,
ChunkHash: types.ChunkHash(chunk.Hash),
},
offset: chunk.Offset,
size: chunk.Size,