Add custom types, version command, and restore --verify flag
- Add internal/types package with type-safe wrappers for IDs, hashes, paths, and credentials (FileID, BlobID, ChunkHash, etc.) - Implement driver.Valuer and sql.Scanner for UUID-based types - Add `vaultik version` command showing version, commit, go version - Add `--verify` flag to restore command that checksums all restored files against expected chunk hashes with progress bar - Remove fetch.go (dead code, functionality in restore) - Clean up TODO.md, remove completed items - Update all database and snapshot code to use new custom types
This commit is contained in:
@@ -16,9 +16,9 @@ import (
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/storage"
|
||||
"git.eeqj.de/sneak/vaultik/internal/types"
|
||||
"github.com/dustin/go-humanize"
|
||||
"github.com/gobwas/glob"
|
||||
"github.com/google/uuid"
|
||||
"github.com/spf13/afero"
|
||||
)
|
||||
|
||||
@@ -45,18 +45,20 @@ type compiledPattern struct {
|
||||
|
||||
// Scanner scans directories and populates the database with file and chunk information
|
||||
type Scanner struct {
|
||||
fs afero.Fs
|
||||
chunker *chunker.Chunker
|
||||
packer *blob.Packer
|
||||
repos *database.Repositories
|
||||
storage storage.Storer
|
||||
maxBlobSize int64
|
||||
compressionLevel int
|
||||
ageRecipient string
|
||||
snapshotID string // Current snapshot being processed
|
||||
exclude []string // Glob patterns for files/directories to exclude
|
||||
compiledExclude []compiledPattern // Compiled glob patterns
|
||||
progress *ProgressReporter
|
||||
fs afero.Fs
|
||||
chunker *chunker.Chunker
|
||||
packer *blob.Packer
|
||||
repos *database.Repositories
|
||||
storage storage.Storer
|
||||
maxBlobSize int64
|
||||
compressionLevel int
|
||||
ageRecipient string
|
||||
snapshotID string // Current snapshot being processed
|
||||
currentSourcePath string // Current source directory being scanned (for restore path stripping)
|
||||
exclude []string // Glob patterns for files/directories to exclude
|
||||
compiledExclude []compiledPattern // Compiled glob patterns
|
||||
progress *ProgressReporter
|
||||
skipErrors bool // Skip file read errors (log loudly but continue)
|
||||
|
||||
// In-memory cache of known chunk hashes for fast existence checks
|
||||
knownChunks map[string]struct{}
|
||||
@@ -90,6 +92,7 @@ type ScannerConfig struct {
|
||||
AgeRecipients []string // Optional, empty means no encryption
|
||||
EnableProgress bool // Enable progress reporting
|
||||
Exclude []string // Glob patterns for files/directories to exclude
|
||||
SkipErrors bool // Skip file read errors (log loudly but continue)
|
||||
}
|
||||
|
||||
// ScanResult contains the results of a scan operation
|
||||
@@ -148,6 +151,7 @@ func NewScanner(cfg ScannerConfig) *Scanner {
|
||||
exclude: cfg.Exclude,
|
||||
compiledExclude: compiledExclude,
|
||||
progress: progress,
|
||||
skipErrors: cfg.SkipErrors,
|
||||
pendingChunkHashes: make(map[string]struct{}),
|
||||
}
|
||||
}
|
||||
@@ -155,6 +159,7 @@ func NewScanner(cfg ScannerConfig) *Scanner {
|
||||
// Scan scans a directory and populates the database
|
||||
func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*ScanResult, error) {
|
||||
s.snapshotID = snapshotID
|
||||
s.currentSourcePath = path // Store source path for file records (used during restore)
|
||||
s.scanCtx = ctx
|
||||
result := &ScanResult{
|
||||
StartTime: time.Now().UTC(),
|
||||
@@ -284,7 +289,7 @@ func (s *Scanner) loadKnownFiles(ctx context.Context, path string) (map[string]*
|
||||
|
||||
result := make(map[string]*database.File, len(files))
|
||||
for _, f := range files {
|
||||
result[f.Path] = f
|
||||
result[f.Path.String()] = f
|
||||
}
|
||||
|
||||
return result, nil
|
||||
@@ -301,7 +306,7 @@ func (s *Scanner) loadKnownChunks(ctx context.Context) error {
|
||||
s.knownChunksMu.Lock()
|
||||
s.knownChunks = make(map[string]struct{}, len(chunks))
|
||||
for _, c := range chunks {
|
||||
s.knownChunks[c.ChunkHash] = struct{}{}
|
||||
s.knownChunks[c.ChunkHash.String()] = struct{}{}
|
||||
}
|
||||
s.knownChunksMu.Unlock()
|
||||
|
||||
@@ -432,7 +437,7 @@ func (s *Scanner) flushCompletedPendingFiles(ctx context.Context) error {
|
||||
for _, data := range s.pendingFiles {
|
||||
allChunksCommitted := true
|
||||
for _, fc := range data.fileChunks {
|
||||
if s.isChunkPending(fc.ChunkHash) {
|
||||
if s.isChunkPending(fc.ChunkHash.String()) {
|
||||
allChunksCommitted = false
|
||||
break
|
||||
}
|
||||
@@ -463,7 +468,7 @@ func (s *Scanner) flushCompletedPendingFiles(ctx context.Context) error {
|
||||
collectStart := time.Now()
|
||||
var allFileChunks []database.FileChunk
|
||||
var allChunkFiles []database.ChunkFile
|
||||
var allFileIDs []string
|
||||
var allFileIDs []types.FileID
|
||||
var allFiles []*database.File
|
||||
|
||||
for _, data := range canFlush {
|
||||
@@ -542,7 +547,7 @@ func (s *Scanner) flushCompletedPendingFiles(ctx context.Context) error {
|
||||
// ScanPhaseResult contains the results of the scan phase
|
||||
type ScanPhaseResult struct {
|
||||
FilesToProcess []*FileToProcess
|
||||
UnchangedFileIDs []string // IDs of unchanged files to associate with snapshot
|
||||
UnchangedFileIDs []types.FileID // IDs of unchanged files to associate with snapshot
|
||||
}
|
||||
|
||||
// scanPhase performs the initial directory scan to identify files to process
|
||||
@@ -554,7 +559,7 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult
|
||||
estimatedTotal := int64(len(knownFiles))
|
||||
|
||||
var filesToProcess []*FileToProcess
|
||||
var unchangedFileIDs []string // Just IDs - no new records needed
|
||||
var unchangedFileIDs []types.FileID // Just IDs - no new records needed
|
||||
var mu sync.Mutex
|
||||
|
||||
// Set up periodic status output
|
||||
@@ -566,6 +571,11 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult
|
||||
log.Debug("Starting directory walk", "path", path)
|
||||
err := afero.Walk(s.fs, path, func(filePath string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
if s.skipErrors {
|
||||
log.Error("ERROR: Failed to access file (skipping due to --skip-errors)", "path", filePath, "error", err)
|
||||
fmt.Printf("ERROR: Failed to access %s: %v (skipping)\n", filePath, err)
|
||||
return nil // Continue scanning
|
||||
}
|
||||
log.Debug("Error accessing filesystem entry", "path", filePath, "error", err)
|
||||
return err
|
||||
}
|
||||
@@ -604,7 +614,7 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult
|
||||
FileInfo: info,
|
||||
File: file,
|
||||
})
|
||||
} else if file.ID != "" {
|
||||
} else if !file.ID.IsZero() {
|
||||
// Unchanged file with existing ID - just need snapshot association
|
||||
unchangedFileIDs = append(unchangedFileIDs, file.ID)
|
||||
}
|
||||
@@ -696,22 +706,23 @@ func (s *Scanner) checkFileInMemory(path string, info os.FileInfo, knownFiles ma
|
||||
// Create file record with ID set upfront
|
||||
// For new files, generate UUID immediately so it's available for chunk associations
|
||||
// For existing files, reuse the existing ID
|
||||
var fileID string
|
||||
var fileID types.FileID
|
||||
if exists {
|
||||
fileID = existingFile.ID
|
||||
} else {
|
||||
fileID = uuid.New().String()
|
||||
fileID = types.NewFileID()
|
||||
}
|
||||
|
||||
file := &database.File{
|
||||
ID: fileID,
|
||||
Path: path,
|
||||
MTime: info.ModTime(),
|
||||
CTime: info.ModTime(), // afero doesn't provide ctime
|
||||
Size: info.Size(),
|
||||
Mode: uint32(info.Mode()),
|
||||
UID: uid,
|
||||
GID: gid,
|
||||
ID: fileID,
|
||||
Path: types.FilePath(path),
|
||||
SourcePath: types.SourcePath(s.currentSourcePath), // Store source directory for restore path stripping
|
||||
MTime: info.ModTime(),
|
||||
CTime: info.ModTime(), // afero doesn't provide ctime
|
||||
Size: info.Size(),
|
||||
Mode: uint32(info.Mode()),
|
||||
UID: uid,
|
||||
GID: gid,
|
||||
}
|
||||
|
||||
// New file - needs processing
|
||||
@@ -734,7 +745,7 @@ func (s *Scanner) checkFileInMemory(path string, info os.FileInfo, knownFiles ma
|
||||
|
||||
// batchAddFilesToSnapshot adds existing file IDs to the snapshot association table
|
||||
// This is used for unchanged files that already have records in the database
|
||||
func (s *Scanner) batchAddFilesToSnapshot(ctx context.Context, fileIDs []string) error {
|
||||
func (s *Scanner) batchAddFilesToSnapshot(ctx context.Context, fileIDs []types.FileID) error {
|
||||
const batchSize = 1000
|
||||
|
||||
startTime := time.Now()
|
||||
@@ -817,6 +828,13 @@ func (s *Scanner) processPhase(ctx context.Context, filesToProcess []*FileToProc
|
||||
result.FilesSkipped++
|
||||
continue
|
||||
}
|
||||
// Skip file read errors if --skip-errors is enabled
|
||||
if s.skipErrors {
|
||||
log.Error("ERROR: Failed to process file (skipping due to --skip-errors)", "path", fileToProcess.Path, "error", err)
|
||||
fmt.Printf("ERROR: Failed to process %s: %v (skipping)\n", fileToProcess.Path, err)
|
||||
result.FilesSkipped++
|
||||
continue
|
||||
}
|
||||
return fmt.Errorf("processing file %s: %w", fileToProcess.Path, err)
|
||||
}
|
||||
|
||||
@@ -881,8 +899,12 @@ func (s *Scanner) processPhase(ctx context.Context, filesToProcess []*FileToProc
|
||||
for _, b := range blobs {
|
||||
// Blob metadata is already stored incrementally during packing
|
||||
// Just add the blob to the snapshot
|
||||
err := s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, b.ID, b.Hash)
|
||||
blobID, err := types.ParseBlobID(b.ID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("parsing blob ID: %w", err)
|
||||
}
|
||||
err = s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, blobID, types.BlobHash(b.Hash))
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("storing blob metadata: %w", err)
|
||||
@@ -984,14 +1006,21 @@ func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
|
||||
if dbCtx == nil {
|
||||
dbCtx = context.Background()
|
||||
}
|
||||
err := s.repos.WithTx(dbCtx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
|
||||
// Parse blob ID for typed operations
|
||||
finishedBlobID, err := types.ParseBlobID(finishedBlob.ID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("parsing finished blob ID: %w", err)
|
||||
}
|
||||
|
||||
err = s.repos.WithTx(dbCtx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
// Update blob upload timestamp
|
||||
if err := s.repos.Blobs.UpdateUploaded(ctx, tx, finishedBlob.ID); err != nil {
|
||||
return fmt.Errorf("updating blob upload timestamp: %w", err)
|
||||
}
|
||||
|
||||
// Add the blob to the snapshot
|
||||
if err := s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, finishedBlob.ID, finishedBlob.Hash); err != nil {
|
||||
if err := s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, finishedBlobID, types.BlobHash(finishedBlob.Hash)); err != nil {
|
||||
return fmt.Errorf("adding blob to snapshot: %w", err)
|
||||
}
|
||||
|
||||
@@ -1094,7 +1123,7 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
|
||||
fileChunk: database.FileChunk{
|
||||
FileID: fileToProcess.File.ID,
|
||||
Idx: chunkIndex,
|
||||
ChunkHash: chunk.Hash,
|
||||
ChunkHash: types.ChunkHash(chunk.Hash),
|
||||
},
|
||||
offset: chunk.Offset,
|
||||
size: chunk.Size,
|
||||
|
||||
Reference in New Issue
Block a user