diff --git a/TODO.md b/TODO.md index 4cc2a5c..5db0d3b 100644 --- a/TODO.md +++ b/TODO.md @@ -1,105 +1,44 @@ # Vaultik 1.0 TODO -Linear list of tasks to complete before 1.0 release. +Remaining tasks before 1.0 release. -## Rclone Storage Backend (Complete) +## Must-fix -Add rclone as a storage backend via Go library import, allowing vaultik to use any of rclone's 70+ supported cloud storage providers. +1. Scanner uses bare `fmt.Printf` (bypasses `--cron` silence) + - Route all user-facing output through a writer gated by progress/cron flags + - Affects `internal/snapshot/scanner.go` (~24 bare print calls) -**Configuration:** -```yaml -storage_url: "rclone://myremote/path/to/backups" -``` -User must have rclone configured separately (via `rclone config`). +1. S3 client error type checking + - `internal/s3/client.go:207` has a TODO for proper error type checking -**Implementation Steps:** -1. [x] Add rclone dependency to go.mod -2. [x] Create `internal/storage/rclone.go` implementing `Storer` interface - - `NewRcloneStorer(remote, path)` - init with `configfile.Install()` and `fs.NewFs()` - - `Put` / `PutWithProgress` - use `operations.Rcat()` - - `Get` - use `fs.NewObject()` then `obj.Open()` - - `Stat` - use `fs.NewObject()` for size/metadata - - `Delete` - use `obj.Remove()` - - `List` / `ListStream` - use `operations.ListFn()` - - `Info` - return remote name -3. [x] Update `internal/storage/url.go` - parse `rclone://remote/path` URLs -4. [x] Update `internal/storage/module.go` - add rclone case to `storerFromURL()` -5. [x] Test with real rclone remote +1. Error message polish + - Add actionable suggestions for common failures (missing config, bad + storage URL, failed S3 auth, missing age key on restore/verify) + - Only `restore.go` currently has the "did you set VAULTIK_AGE_SECRET_KEY?" hint -**Error Mapping:** -- `fs.ErrorObjectNotFound` → `ErrNotFound` -- `fs.ErrorDirNotFound` → `ErrNotFound` -- `fs.ErrorNotFoundInConfigFile` → `ErrRemoteNotFound` (new) +## Done ---- +- [x] Rclone storage backend +- [x] Release process (goreleaser, CGO-free cross-compile, checksums) +- [x] End-to-end integration test (backup → restore → verify → byte-compare) +- [x] Restore integration tests +- [x] `--prune` flag on `snapshot create` (per-name retention + orphan blob cleanup) +- [x] Per-name purge retention (`--keep-latest` per snapshot name, `--snapshot` filter) +- [x] CLI surface dedup (removed top-level `purge` and `verify` duplicates) +- [x] Exit codes (create/restore now exit non-zero on failure) +- [x] Deep verify implemented and wired up +- [x] Shallow verify timestamp parsing fixed +- [x] Daemon mode removed +- [x] Makefile targets separated (`lint`/`test`/`fmt`/`check`) +- [x] CGO eliminated (pure-Go SQLite via modernc.org/sqlite) +- [x] Version set correctly in releases via goreleaser ldflags -## CLI Polish (Priority) - -1. Improve error messages throughout - - Ensure all errors include actionable context - - Add suggestions for common issues (e.g., "did you set VAULTIK_AGE_SECRET_KEY?") - -## Security (Priority) - -1. Audit encryption implementation - - Verify age encryption is used correctly - - Ensure no plaintext leaks in logs or errors - - Verify blob hashes are computed correctly - -1. Secure memory handling for secrets - - Clear S3 credentials from memory after client init - - Document that age_secret_key is env-var only (already implemented) - -## Testing - -1. Write integration tests for restore command - -1. Write end-to-end integration test - - Create backup - - Verify backup - - Restore backup - - Compare restored files to originals - -1. Add tests for edge cases - - Empty directories - - Symlinks - - Special characters in filenames - - Very large files (multi-GB) - - Many small files (100k+) - -1. Add tests for error conditions - - Network failures during upload - - Disk full during restore - - Corrupted blobs - - Missing blobs - -## Performance - -1. Profile and optimize restore performance - - Parallel blob downloads - - Streaming decompression/decryption - - Efficient chunk reassembly - -1. Add bandwidth limiting option - - `--bwlimit` flag for upload/download speed limiting - -## Documentation - -1. Add man page or --help improvements - - Detailed help for each command - - Examples in help output - -## Final Polish - -1. Ensure version is set correctly in releases - -1. Create release process - - Binary releases for supported platforms - - Checksums for binaries - - Release notes template - -1. Final code review - - Remove debug statements - - Ensure consistent code style +## Post-1.0 +1. Edge-case tests (empty dirs, symlinks, special chars, multi-GB files, 100k+ small files) +1. Error-condition tests (network failures, disk full, corrupted/missing blobs) +1. Parallel blob downloads during restore +1. Bandwidth limiting (`--bwlimit`) +1. Security audit of encryption (verify no plaintext leaks, correct hash computation) +1. Man pages / richer `--help` examples 1. Tag and release v1.0.0 diff --git a/internal/cli/root.go b/internal/cli/root.go index f03a30d..774b7a3 100644 --- a/internal/cli/root.go +++ b/internal/cli/root.go @@ -79,5 +79,5 @@ func ResolveConfigPath() (string, error) { return defaultPath, nil } - return "", fmt.Errorf("no config file specified, VAULTIK_CONFIG not set, and %s not found", defaultPath) + return "", fmt.Errorf("no config file found; specify one with --config, set VAULTIK_CONFIG, or create %s", defaultPath) } diff --git a/internal/config/config.go b/internal/config/config.go index 05602ae..72cd666 100644 --- a/internal/config/config.go +++ b/internal/config/config.go @@ -236,11 +236,11 @@ func Load(path string) (*Config, error) { // Returns an error describing the first validation failure encountered. func (c *Config) Validate() error { if len(c.AgeRecipients) == 0 { - return fmt.Errorf("at least one age_recipient is required") + return fmt.Errorf("at least one age_recipient is required (generate with: age-keygen)") } if len(c.Snapshots) == 0 { - return fmt.Errorf("at least one snapshot must be configured") + return fmt.Errorf("at least one snapshot must be configured (see config.example.yml)") } for name, snap := range c.Snapshots { @@ -299,7 +299,7 @@ func (c *Config) validateStorage() error { // Legacy S3 configuration if c.S3.Endpoint == "" { - return fmt.Errorf("s3.endpoint is required (or set storage_url)") + return fmt.Errorf("storage not configured; set storage_url or provide s3.endpoint + s3.bucket + credentials") } if c.S3.Bucket == "" { diff --git a/internal/s3/client.go b/internal/s3/client.go index 3dedb80..2861be7 100644 --- a/internal/s3/client.go +++ b/internal/s3/client.go @@ -2,6 +2,7 @@ package s3 import ( "context" + "errors" "io" "sync/atomic" @@ -10,6 +11,7 @@ import ( "github.com/aws/aws-sdk-go-v2/credentials" "github.com/aws/aws-sdk-go-v2/feature/s3/manager" "github.com/aws/aws-sdk-go-v2/service/s3" + s3types "github.com/aws/aws-sdk-go-v2/service/s3/types" "github.com/aws/smithy-go/logging" ) @@ -203,9 +205,12 @@ func (c *Client) HeadObject(ctx context.Context, key string) (bool, error) { Key: aws.String(fullKey), }) if err != nil { - // Check if it's a not found error - // TODO: Add proper error type checking - return false, nil + var notFound *s3types.NotFound + var noSuchKey *s3types.NoSuchKey + if errors.As(err, ¬Found) || errors.As(err, &noSuchKey) { + return false, nil + } + return false, err } return true, nil } diff --git a/internal/snapshot/scanner.go b/internal/snapshot/scanner.go index 6043133..d6833c0 100644 --- a/internal/snapshot/scanner.go +++ b/internal/snapshot/scanner.go @@ -5,6 +5,7 @@ import ( "database/sql" "errors" "fmt" + "io" "os" "path/filepath" "strings" @@ -58,7 +59,8 @@ type Scanner struct { exclude []string // Glob patterns for files/directories to exclude compiledExclude []compiledPattern // Compiled glob patterns progress *ProgressReporter - skipErrors bool // Skip file read errors (log loudly but continue) + skipErrors bool // Skip file read errors (log loudly but continue) + output io.Writer // User-facing output (os.Stdout or io.Discard in cron mode) // In-memory cache of known chunk hashes for fast existence checks knownChunks map[string]struct{} @@ -139,6 +141,11 @@ func NewScanner(cfg ScannerConfig) *Scanner { // Compile exclude patterns compiledExclude := compileExcludePatterns(cfg.Exclude) + output := io.Writer(io.Discard) + if cfg.EnableProgress { + output = os.Stdout + } + return &Scanner{ fs: cfg.FS, chunker: chunker.NewChunker(cfg.ChunkSize), @@ -152,6 +159,7 @@ func NewScanner(cfg ScannerConfig) *Scanner { compiledExclude: compiledExclude, progress: progress, skipErrors: cfg.SkipErrors, + output: output, pendingChunkHashes: make(map[string]struct{}), } } @@ -202,7 +210,7 @@ func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*Sc // Phase 1c: Associate unchanged files with this snapshot (no new records needed) if len(scanResult.UnchangedFileIDs) > 0 { - fmt.Printf("Associating %s unchanged files with snapshot...\n", formatNumber(len(scanResult.UnchangedFileIDs))) + _, _ = fmt.Fprintf(s.output, "Associating %s unchanged files with snapshot...\n", formatNumber(len(scanResult.UnchangedFileIDs))) if err := s.batchAddFilesToSnapshot(ctx, scanResult.UnchangedFileIDs); err != nil { return nil, fmt.Errorf("associating unchanged files: %w", err) } @@ -213,13 +221,13 @@ func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*Sc // Phase 2: Process files and create chunks if len(filesToProcess) > 0 { - fmt.Printf("Processing %s files...\n", formatNumber(len(filesToProcess))) + _, _ = fmt.Fprintf(s.output, "Processing %s files...\n", formatNumber(len(filesToProcess))) log.Info("Phase 2/3: Creating snapshot (chunking, compressing, encrypting, and uploading blobs)") if err := s.processPhase(ctx, filesToProcess, result); err != nil { return nil, fmt.Errorf("process phase failed: %w", err) } } else { - fmt.Printf("No files need processing. Creating metadata-only snapshot.\n") + _, _ = fmt.Fprintf(s.output, "No files need processing. Creating metadata-only snapshot.\n") log.Info("Phase 2/3: Skipping (no files need processing, metadata-only snapshot)") } @@ -232,18 +240,18 @@ func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*Sc // loadDatabaseState loads known files and chunks from the database into memory for fast lookup // This avoids per-file and per-chunk database queries during the scan and process phases func (s *Scanner) loadDatabaseState(ctx context.Context, path string) (map[string]*database.File, error) { - fmt.Println("Loading known files from database...") + _, _ = fmt.Fprintln(s.output, "Loading known files from database...") knownFiles, err := s.loadKnownFiles(ctx, path) if err != nil { return nil, fmt.Errorf("loading known files: %w", err) } - fmt.Printf("Loaded %s known files from database\n", formatNumber(len(knownFiles))) + _, _ = fmt.Fprintf(s.output, "Loaded %s known files from database\n", formatNumber(len(knownFiles))) - fmt.Println("Loading known chunks from database...") + _, _ = fmt.Fprintln(s.output, "Loading known chunks from database...") if err := s.loadKnownChunks(ctx); err != nil { return nil, fmt.Errorf("loading known chunks: %w", err) } - fmt.Printf("Loaded %s known chunks from database\n", formatNumber(len(s.knownChunks))) + _, _ = fmt.Fprintf(s.output, "Loaded %s known chunks from database\n", formatNumber(len(s.knownChunks))) return knownFiles, nil } @@ -267,17 +275,17 @@ func (s *Scanner) summarizeScanPhase(result *ScanResult, filesToProcess []*FileT "files_skipped", result.FilesSkipped, "bytes_skipped", humanize.Bytes(uint64(result.BytesSkipped))) - fmt.Printf("Scan complete: %s examined (%s), %s to process (%s)", + _, _ = fmt.Fprintf(s.output, "Scan complete: %s examined (%s), %s to process (%s)", formatNumber(result.FilesScanned), humanize.Bytes(uint64(totalSizeToProcess+result.BytesSkipped)), formatNumber(len(filesToProcess)), humanize.Bytes(uint64(totalSizeToProcess))) if result.FilesDeleted > 0 { - fmt.Printf(", %s deleted (%s)", + _, _ = fmt.Fprintf(s.output, ", %s deleted (%s)", formatNumber(result.FilesDeleted), humanize.Bytes(uint64(result.BytesDeleted))) } - fmt.Println() + _, _ = fmt.Fprintln(s.output) } // finalizeScanResult populates final blob statistics in the scan result @@ -619,7 +627,7 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult if err != nil { if s.skipErrors { log.Error("ERROR: Failed to access file (skipping due to --skip-errors)", "path", filePath, "error", err) - fmt.Printf("ERROR: Failed to access %s: %v (skipping)\n", filePath, err) + _, _ = fmt.Fprintf(s.output, "ERROR: Failed to access %s: %v (skipping)\n", filePath, err) return nil // Continue scanning } log.Debug("Error accessing filesystem entry", "path", filePath, "error", err) @@ -673,7 +681,7 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult // Output periodic status if time.Since(lastStatusTime) >= statusInterval { - printScanProgressLine(filesScanned, changedCount, estimatedTotal, startTime) + s.printScanProgressLine(filesScanned, changedCount, estimatedTotal, startTime) lastStatusTime = time.Now() } @@ -714,7 +722,7 @@ func (s *Scanner) updateScanEntryStats(result *ScanResult, needsProcessing bool, // printScanProgressLine prints a periodic progress line during the scan phase, // showing files scanned, percentage complete (if estimate available), and ETA -func printScanProgressLine(filesScanned int64, changedCount int, estimatedTotal int64, startTime time.Time) { +func (s *Scanner) printScanProgressLine(filesScanned int64, changedCount int, estimatedTotal int64, startTime time.Time) { elapsed := time.Since(startTime) rate := float64(filesScanned) / elapsed.Seconds() @@ -732,19 +740,19 @@ func printScanProgressLine(filesScanned int64, changedCount int, estimatedTotal if rate > 0 && remaining > 0 { eta = time.Duration(float64(remaining)/rate) * time.Second } - fmt.Printf("Scan: %s files (~%.0f%%), %s changed/new, %.0f files/sec, %s elapsed", + _, _ = fmt.Fprintf(s.output, "Scan: %s files (~%.0f%%), %s changed/new, %.0f files/sec, %s elapsed", formatNumber(int(filesScanned)), pct, formatNumber(changedCount), rate, elapsed.Round(time.Second)) if eta > 0 { - fmt.Printf(", ETA %s", eta.Round(time.Second)) + _, _ = fmt.Fprintf(s.output, ", ETA %s", eta.Round(time.Second)) } - fmt.Println() + _, _ = fmt.Fprintln(s.output) } else { // First backup - no estimate available - fmt.Printf("Scan: %s files, %s changed/new, %.0f files/sec, %s elapsed\n", + _, _ = fmt.Fprintf(s.output, "Scan: %s files, %s changed/new, %.0f files/sec, %s elapsed\n", formatNumber(int(filesScanned)), formatNumber(changedCount), rate, @@ -849,7 +857,7 @@ func (s *Scanner) batchAddFilesToSnapshot(ctx context.Context, fileIDs []types.F elapsed := time.Since(startTime) rate := float64(end) / elapsed.Seconds() pct := float64(end) / float64(len(fileIDs)) * 100 - fmt.Printf("Associating files: %s/%s (%.1f%%), %.0f files/sec\n", + _, _ = fmt.Fprintf(s.output, "Associating files: %s/%s (%.1f%%), %.0f files/sec\n", formatNumber(end), formatNumber(len(fileIDs)), pct, rate) lastStatusTime = time.Now() } @@ -857,7 +865,7 @@ func (s *Scanner) batchAddFilesToSnapshot(ctx context.Context, fileIDs []types.F elapsed := time.Since(startTime) rate := float64(len(fileIDs)) / elapsed.Seconds() - fmt.Printf("Associated %s unchanged files in %s (%.0f files/sec)\n", + _, _ = fmt.Fprintf(s.output, "Associated %s unchanged files in %s (%.0f files/sec)\n", formatNumber(len(fileIDs)), elapsed.Round(time.Second), rate) return nil @@ -905,7 +913,7 @@ func (s *Scanner) processPhase(ctx context.Context, filesToProcess []*FileToProc // Output periodic status if time.Since(lastStatusTime) >= statusInterval { - printProcessingProgress(filesProcessed, totalFiles, bytesProcessed, totalBytes, startTime) + s.printProcessingProgress(filesProcessed, totalFiles, bytesProcessed, totalBytes, startTime) lastStatusTime = time.Now() } } @@ -927,7 +935,7 @@ func (s *Scanner) processFileWithErrorHandling(ctx context.Context, fileToProces // Skip file read errors if --skip-errors is enabled if s.skipErrors { log.Error("ERROR: Failed to process file (skipping due to --skip-errors)", "path", fileToProcess.Path, "error", err) - fmt.Printf("ERROR: Failed to process %s: %v (skipping)\n", fileToProcess.Path, err) + _, _ = fmt.Fprintf(s.output, "ERROR: Failed to process %s: %v (skipping)\n", fileToProcess.Path, err) result.FilesSkipped++ return true, nil } @@ -938,7 +946,7 @@ func (s *Scanner) processFileWithErrorHandling(ctx context.Context, fileToProces // printProcessingProgress prints a periodic progress line during the process phase, // showing files processed, bytes transferred, throughput, and ETA -func printProcessingProgress(filesProcessed, totalFiles int, bytesProcessed, totalBytes int64, startTime time.Time) { +func (s *Scanner) printProcessingProgress(filesProcessed, totalFiles int, bytesProcessed, totalBytes int64, startTime time.Time) { elapsed := time.Since(startTime) pct := float64(bytesProcessed) / float64(totalBytes) * 100 byteRate := float64(bytesProcessed) / elapsed.Seconds() @@ -952,7 +960,7 @@ func printProcessingProgress(filesProcessed, totalFiles int, bytesProcessed, tot } // Format: Progress [5.7k/610k] 6.7 GB/44 GB (15.4%), 106MB/sec, 500 files/sec, running for 1m30s, ETA: 5m49s - fmt.Printf("Progress [%s/%s] %s/%s (%.1f%%), %s/sec, %.0f files/sec, running for %s", + _, _ = fmt.Fprintf(s.output, "Progress [%s/%s] %s/%s (%.1f%%), %s/sec, %.0f files/sec, running for %s", formatCompact(filesProcessed), formatCompact(totalFiles), humanize.Bytes(uint64(bytesProcessed)), @@ -962,9 +970,9 @@ func printProcessingProgress(filesProcessed, totalFiles int, bytesProcessed, tot fileRate, elapsed.Round(time.Second)) if eta > 0 { - fmt.Printf(", ETA: %s", eta.Round(time.Second)) + _, _ = fmt.Fprintf(s.output, ", ETA: %s", eta.Round(time.Second)) } - fmt.Println() + _, _ = fmt.Fprintln(s.output) } // finalizeProcessPhase flushes the packer, writes remaining pending files to the database, @@ -1056,7 +1064,7 @@ func (s *Scanner) uploadBlobIfNeeded(ctx context.Context, blobPath string, blobW if _, err := s.storage.Stat(ctx, blobPath); err == nil { log.Info("Blob already exists in storage, skipping upload", "hash", finishedBlob.Hash, "size", humanize.Bytes(uint64(finishedBlob.Compressed))) - fmt.Printf("Blob exists: %s (%s, skipped upload)\n", + _, _ = fmt.Fprintf(s.output, "Blob exists: %s (%s, skipped upload)\n", finishedBlob.Hash[:12]+"...", humanize.Bytes(uint64(finishedBlob.Compressed))) return true, nil } @@ -1071,7 +1079,7 @@ func (s *Scanner) uploadBlobIfNeeded(ctx context.Context, blobPath string, blobW uploadDuration := time.Since(startTime) uploadSpeedBps := float64(finishedBlob.Compressed) / uploadDuration.Seconds() - fmt.Printf("Blob stored: %s (%s, %s/sec, %s)\n", + _, _ = fmt.Fprintf(s.output, "Blob stored: %s (%s, %s/sec, %s)\n", finishedBlob.Hash[:12]+"...", humanize.Bytes(uint64(finishedBlob.Compressed)), humanize.Bytes(uint64(uploadSpeedBps)), @@ -1329,7 +1337,7 @@ func (s *Scanner) detectDeletedFilesFromMap(ctx context.Context, knownFiles map[ } if result.FilesDeleted > 0 { - fmt.Printf("Found %s deleted files\n", formatNumber(result.FilesDeleted)) + _, _ = fmt.Fprintf(s.output, "Found %s deleted files\n", formatNumber(result.FilesDeleted)) } return nil diff --git a/internal/vaultik/verify.go b/internal/vaultik/verify.go index ba68ce3..542ae9f 100644 --- a/internal/vaultik/verify.go +++ b/internal/vaultik/verify.go @@ -57,9 +57,8 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error { } if !v.CanDecrypt() { - return v.deepVerifyFailure(result, opts, - "VAULTIK_AGE_SECRET_KEY environment variable not set - required for deep verification", - fmt.Errorf("VAULTIK_AGE_SECRET_KEY environment variable not set - required for deep verification")) + msg := "VAULTIK_AGE_SECRET_KEY not set; required for deep verification" + return v.deepVerifyFailure(result, opts, msg, fmt.Errorf("%s", msg)) } log.Info("Starting snapshot verification", "snapshot_id", snapshotID, "mode", "deep")