Route scanner output through writer, fix S3 error handling, improve error messages
All checks were successful
check / check (push) Successful in 2m38s

Scanner now writes all user-facing output to an io.Writer (os.Stdout
when progress is enabled, io.Discard in --cron mode). This fixes the
long-standing issue where --cron still printed progress lines.

S3 HeadObject now properly distinguishes not-found from other errors
instead of swallowing all errors as not-found.

Config/CLI error messages include actionable hints (where to find the
config, how to generate keys, what storage options exist).
This commit is contained in:
2026-06-09 12:31:50 -04:00
parent 20d3a9ac8c
commit ebd6619638
6 changed files with 84 additions and 133 deletions

View File

@@ -5,6 +5,7 @@ import (
"database/sql"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"strings"
@@ -58,7 +59,8 @@ type Scanner struct {
exclude []string // Glob patterns for files/directories to exclude
compiledExclude []compiledPattern // Compiled glob patterns
progress *ProgressReporter
skipErrors bool // Skip file read errors (log loudly but continue)
skipErrors bool // Skip file read errors (log loudly but continue)
output io.Writer // User-facing output (os.Stdout or io.Discard in cron mode)
// In-memory cache of known chunk hashes for fast existence checks
knownChunks map[string]struct{}
@@ -139,6 +141,11 @@ func NewScanner(cfg ScannerConfig) *Scanner {
// Compile exclude patterns
compiledExclude := compileExcludePatterns(cfg.Exclude)
output := io.Writer(io.Discard)
if cfg.EnableProgress {
output = os.Stdout
}
return &Scanner{
fs: cfg.FS,
chunker: chunker.NewChunker(cfg.ChunkSize),
@@ -152,6 +159,7 @@ func NewScanner(cfg ScannerConfig) *Scanner {
compiledExclude: compiledExclude,
progress: progress,
skipErrors: cfg.SkipErrors,
output: output,
pendingChunkHashes: make(map[string]struct{}),
}
}
@@ -202,7 +210,7 @@ func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*Sc
// Phase 1c: Associate unchanged files with this snapshot (no new records needed)
if len(scanResult.UnchangedFileIDs) > 0 {
fmt.Printf("Associating %s unchanged files with snapshot...\n", formatNumber(len(scanResult.UnchangedFileIDs)))
_, _ = fmt.Fprintf(s.output, "Associating %s unchanged files with snapshot...\n", formatNumber(len(scanResult.UnchangedFileIDs)))
if err := s.batchAddFilesToSnapshot(ctx, scanResult.UnchangedFileIDs); err != nil {
return nil, fmt.Errorf("associating unchanged files: %w", err)
}
@@ -213,13 +221,13 @@ func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*Sc
// Phase 2: Process files and create chunks
if len(filesToProcess) > 0 {
fmt.Printf("Processing %s files...\n", formatNumber(len(filesToProcess)))
_, _ = fmt.Fprintf(s.output, "Processing %s files...\n", formatNumber(len(filesToProcess)))
log.Info("Phase 2/3: Creating snapshot (chunking, compressing, encrypting, and uploading blobs)")
if err := s.processPhase(ctx, filesToProcess, result); err != nil {
return nil, fmt.Errorf("process phase failed: %w", err)
}
} else {
fmt.Printf("No files need processing. Creating metadata-only snapshot.\n")
_, _ = fmt.Fprintf(s.output, "No files need processing. Creating metadata-only snapshot.\n")
log.Info("Phase 2/3: Skipping (no files need processing, metadata-only snapshot)")
}
@@ -232,18 +240,18 @@ func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*Sc
// loadDatabaseState loads known files and chunks from the database into memory for fast lookup
// This avoids per-file and per-chunk database queries during the scan and process phases
func (s *Scanner) loadDatabaseState(ctx context.Context, path string) (map[string]*database.File, error) {
fmt.Println("Loading known files from database...")
_, _ = fmt.Fprintln(s.output, "Loading known files from database...")
knownFiles, err := s.loadKnownFiles(ctx, path)
if err != nil {
return nil, fmt.Errorf("loading known files: %w", err)
}
fmt.Printf("Loaded %s known files from database\n", formatNumber(len(knownFiles)))
_, _ = fmt.Fprintf(s.output, "Loaded %s known files from database\n", formatNumber(len(knownFiles)))
fmt.Println("Loading known chunks from database...")
_, _ = fmt.Fprintln(s.output, "Loading known chunks from database...")
if err := s.loadKnownChunks(ctx); err != nil {
return nil, fmt.Errorf("loading known chunks: %w", err)
}
fmt.Printf("Loaded %s known chunks from database\n", formatNumber(len(s.knownChunks)))
_, _ = fmt.Fprintf(s.output, "Loaded %s known chunks from database\n", formatNumber(len(s.knownChunks)))
return knownFiles, nil
}
@@ -267,17 +275,17 @@ func (s *Scanner) summarizeScanPhase(result *ScanResult, filesToProcess []*FileT
"files_skipped", result.FilesSkipped,
"bytes_skipped", humanize.Bytes(uint64(result.BytesSkipped)))
fmt.Printf("Scan complete: %s examined (%s), %s to process (%s)",
_, _ = fmt.Fprintf(s.output, "Scan complete: %s examined (%s), %s to process (%s)",
formatNumber(result.FilesScanned),
humanize.Bytes(uint64(totalSizeToProcess+result.BytesSkipped)),
formatNumber(len(filesToProcess)),
humanize.Bytes(uint64(totalSizeToProcess)))
if result.FilesDeleted > 0 {
fmt.Printf(", %s deleted (%s)",
_, _ = fmt.Fprintf(s.output, ", %s deleted (%s)",
formatNumber(result.FilesDeleted),
humanize.Bytes(uint64(result.BytesDeleted)))
}
fmt.Println()
_, _ = fmt.Fprintln(s.output)
}
// finalizeScanResult populates final blob statistics in the scan result
@@ -619,7 +627,7 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult
if err != nil {
if s.skipErrors {
log.Error("ERROR: Failed to access file (skipping due to --skip-errors)", "path", filePath, "error", err)
fmt.Printf("ERROR: Failed to access %s: %v (skipping)\n", filePath, err)
_, _ = fmt.Fprintf(s.output, "ERROR: Failed to access %s: %v (skipping)\n", filePath, err)
return nil // Continue scanning
}
log.Debug("Error accessing filesystem entry", "path", filePath, "error", err)
@@ -673,7 +681,7 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult
// Output periodic status
if time.Since(lastStatusTime) >= statusInterval {
printScanProgressLine(filesScanned, changedCount, estimatedTotal, startTime)
s.printScanProgressLine(filesScanned, changedCount, estimatedTotal, startTime)
lastStatusTime = time.Now()
}
@@ -714,7 +722,7 @@ func (s *Scanner) updateScanEntryStats(result *ScanResult, needsProcessing bool,
// printScanProgressLine prints a periodic progress line during the scan phase,
// showing files scanned, percentage complete (if estimate available), and ETA
func printScanProgressLine(filesScanned int64, changedCount int, estimatedTotal int64, startTime time.Time) {
func (s *Scanner) printScanProgressLine(filesScanned int64, changedCount int, estimatedTotal int64, startTime time.Time) {
elapsed := time.Since(startTime)
rate := float64(filesScanned) / elapsed.Seconds()
@@ -732,19 +740,19 @@ func printScanProgressLine(filesScanned int64, changedCount int, estimatedTotal
if rate > 0 && remaining > 0 {
eta = time.Duration(float64(remaining)/rate) * time.Second
}
fmt.Printf("Scan: %s files (~%.0f%%), %s changed/new, %.0f files/sec, %s elapsed",
_, _ = fmt.Fprintf(s.output, "Scan: %s files (~%.0f%%), %s changed/new, %.0f files/sec, %s elapsed",
formatNumber(int(filesScanned)),
pct,
formatNumber(changedCount),
rate,
elapsed.Round(time.Second))
if eta > 0 {
fmt.Printf(", ETA %s", eta.Round(time.Second))
_, _ = fmt.Fprintf(s.output, ", ETA %s", eta.Round(time.Second))
}
fmt.Println()
_, _ = fmt.Fprintln(s.output)
} else {
// First backup - no estimate available
fmt.Printf("Scan: %s files, %s changed/new, %.0f files/sec, %s elapsed\n",
_, _ = fmt.Fprintf(s.output, "Scan: %s files, %s changed/new, %.0f files/sec, %s elapsed\n",
formatNumber(int(filesScanned)),
formatNumber(changedCount),
rate,
@@ -849,7 +857,7 @@ func (s *Scanner) batchAddFilesToSnapshot(ctx context.Context, fileIDs []types.F
elapsed := time.Since(startTime)
rate := float64(end) / elapsed.Seconds()
pct := float64(end) / float64(len(fileIDs)) * 100
fmt.Printf("Associating files: %s/%s (%.1f%%), %.0f files/sec\n",
_, _ = fmt.Fprintf(s.output, "Associating files: %s/%s (%.1f%%), %.0f files/sec\n",
formatNumber(end), formatNumber(len(fileIDs)), pct, rate)
lastStatusTime = time.Now()
}
@@ -857,7 +865,7 @@ func (s *Scanner) batchAddFilesToSnapshot(ctx context.Context, fileIDs []types.F
elapsed := time.Since(startTime)
rate := float64(len(fileIDs)) / elapsed.Seconds()
fmt.Printf("Associated %s unchanged files in %s (%.0f files/sec)\n",
_, _ = fmt.Fprintf(s.output, "Associated %s unchanged files in %s (%.0f files/sec)\n",
formatNumber(len(fileIDs)), elapsed.Round(time.Second), rate)
return nil
@@ -905,7 +913,7 @@ func (s *Scanner) processPhase(ctx context.Context, filesToProcess []*FileToProc
// Output periodic status
if time.Since(lastStatusTime) >= statusInterval {
printProcessingProgress(filesProcessed, totalFiles, bytesProcessed, totalBytes, startTime)
s.printProcessingProgress(filesProcessed, totalFiles, bytesProcessed, totalBytes, startTime)
lastStatusTime = time.Now()
}
}
@@ -927,7 +935,7 @@ func (s *Scanner) processFileWithErrorHandling(ctx context.Context, fileToProces
// Skip file read errors if --skip-errors is enabled
if s.skipErrors {
log.Error("ERROR: Failed to process file (skipping due to --skip-errors)", "path", fileToProcess.Path, "error", err)
fmt.Printf("ERROR: Failed to process %s: %v (skipping)\n", fileToProcess.Path, err)
_, _ = fmt.Fprintf(s.output, "ERROR: Failed to process %s: %v (skipping)\n", fileToProcess.Path, err)
result.FilesSkipped++
return true, nil
}
@@ -938,7 +946,7 @@ func (s *Scanner) processFileWithErrorHandling(ctx context.Context, fileToProces
// printProcessingProgress prints a periodic progress line during the process phase,
// showing files processed, bytes transferred, throughput, and ETA
func printProcessingProgress(filesProcessed, totalFiles int, bytesProcessed, totalBytes int64, startTime time.Time) {
func (s *Scanner) printProcessingProgress(filesProcessed, totalFiles int, bytesProcessed, totalBytes int64, startTime time.Time) {
elapsed := time.Since(startTime)
pct := float64(bytesProcessed) / float64(totalBytes) * 100
byteRate := float64(bytesProcessed) / elapsed.Seconds()
@@ -952,7 +960,7 @@ func printProcessingProgress(filesProcessed, totalFiles int, bytesProcessed, tot
}
// Format: Progress [5.7k/610k] 6.7 GB/44 GB (15.4%), 106MB/sec, 500 files/sec, running for 1m30s, ETA: 5m49s
fmt.Printf("Progress [%s/%s] %s/%s (%.1f%%), %s/sec, %.0f files/sec, running for %s",
_, _ = fmt.Fprintf(s.output, "Progress [%s/%s] %s/%s (%.1f%%), %s/sec, %.0f files/sec, running for %s",
formatCompact(filesProcessed),
formatCompact(totalFiles),
humanize.Bytes(uint64(bytesProcessed)),
@@ -962,9 +970,9 @@ func printProcessingProgress(filesProcessed, totalFiles int, bytesProcessed, tot
fileRate,
elapsed.Round(time.Second))
if eta > 0 {
fmt.Printf(", ETA: %s", eta.Round(time.Second))
_, _ = fmt.Fprintf(s.output, ", ETA: %s", eta.Round(time.Second))
}
fmt.Println()
_, _ = fmt.Fprintln(s.output)
}
// finalizeProcessPhase flushes the packer, writes remaining pending files to the database,
@@ -1056,7 +1064,7 @@ func (s *Scanner) uploadBlobIfNeeded(ctx context.Context, blobPath string, blobW
if _, err := s.storage.Stat(ctx, blobPath); err == nil {
log.Info("Blob already exists in storage, skipping upload",
"hash", finishedBlob.Hash, "size", humanize.Bytes(uint64(finishedBlob.Compressed)))
fmt.Printf("Blob exists: %s (%s, skipped upload)\n",
_, _ = fmt.Fprintf(s.output, "Blob exists: %s (%s, skipped upload)\n",
finishedBlob.Hash[:12]+"...", humanize.Bytes(uint64(finishedBlob.Compressed)))
return true, nil
}
@@ -1071,7 +1079,7 @@ func (s *Scanner) uploadBlobIfNeeded(ctx context.Context, blobPath string, blobW
uploadDuration := time.Since(startTime)
uploadSpeedBps := float64(finishedBlob.Compressed) / uploadDuration.Seconds()
fmt.Printf("Blob stored: %s (%s, %s/sec, %s)\n",
_, _ = fmt.Fprintf(s.output, "Blob stored: %s (%s, %s/sec, %s)\n",
finishedBlob.Hash[:12]+"...",
humanize.Bytes(uint64(finishedBlob.Compressed)),
humanize.Bytes(uint64(uploadSpeedBps)),
@@ -1329,7 +1337,7 @@ func (s *Scanner) detectDeletedFilesFromMap(ctx context.Context, knownFiles map[
}
if result.FilesDeleted > 0 {
fmt.Printf("Found %s deleted files\n", formatNumber(result.FilesDeleted))
_, _ = fmt.Fprintf(s.output, "Found %s deleted files\n", formatNumber(result.FilesDeleted))
}
return nil