Refactor: Move Vaultik struct and methods to internal/vaultik package

- Created new internal/vaultik package with unified Vaultik struct
- Moved all command methods (snapshot, info, prune, verify) from CLI to vaultik package
- Implemented single constructor that handles crypto capabilities automatically
- Added CanDecrypt() method to check if decryption is available
- Updated all CLI commands to use the new vaultik.Vaultik struct
- Removed old fragmented App structs and WithCrypto wrapper
- Fixed context management - Vaultik now owns its context lifecycle
- Cleaned up package imports and dependencies

This creates a cleaner separation between CLI/Cobra code and business logic,
with all vaultik operations now centralized in the internal/vaultik package.
This commit is contained in:
2025-07-26 14:47:26 +02:00
parent 5c70405a85
commit e29a995120
22 changed files with 1494 additions and 1320 deletions

View File

@@ -22,6 +22,9 @@ const (
// DetailInterval defines how often multi-line detailed status reports are printed.
// These reports include comprehensive statistics about files, chunks, blobs, and uploads.
DetailInterval = 60 * time.Second
// UploadProgressInterval defines how often upload progress messages are logged.
UploadProgressInterval = 15 * time.Second
)
// ProgressStats holds atomic counters for progress tracking
@@ -52,9 +55,10 @@ type ProgressStats struct {
// UploadInfo tracks current upload progress
type UploadInfo struct {
BlobHash string
Size int64
StartTime time.Time
BlobHash string
Size int64
StartTime time.Time
LastLogTime time.Time
}
// ProgressReporter handles periodic progress reporting
@@ -330,6 +334,11 @@ func (pr *ProgressReporter) ReportUploadStart(blobHash string, size int64) {
StartTime: time.Now().UTC(),
}
pr.stats.CurrentUpload.Store(info)
// Log the start of upload
log.Info("Starting blob upload to S3",
"hash", blobHash[:8]+"...",
"size", humanize.Bytes(uint64(size)))
}
// ReportUploadComplete marks the completion of a blob upload
@@ -377,36 +386,34 @@ func (pr *ProgressReporter) UpdateChunkingActivity() {
func (pr *ProgressReporter) ReportUploadProgress(blobHash string, bytesUploaded, totalSize int64, instantSpeed float64) {
// Update the current upload info with progress
if uploadInfo, ok := pr.stats.CurrentUpload.Load().(*UploadInfo); ok && uploadInfo != nil {
// Format speed in bits/second
bitsPerSec := instantSpeed * 8
var speedStr string
if bitsPerSec >= 1e9 {
speedStr = fmt.Sprintf("%.1fGbit/sec", bitsPerSec/1e9)
} else if bitsPerSec >= 1e6 {
speedStr = fmt.Sprintf("%.0fMbit/sec", bitsPerSec/1e6)
} else if bitsPerSec >= 1e3 {
speedStr = fmt.Sprintf("%.0fKbit/sec", bitsPerSec/1e3)
} else {
speedStr = fmt.Sprintf("%.0fbit/sec", bitsPerSec)
now := time.Now()
// Only log at the configured interval
if now.Sub(uploadInfo.LastLogTime) >= UploadProgressInterval {
// Format speed in bits/second using humanize
bitsPerSec := instantSpeed * 8
speedStr := humanize.SI(bitsPerSec, "bit/sec")
percent := float64(bytesUploaded) / float64(totalSize) * 100
// Calculate ETA based on current speed
etaStr := "unknown"
if instantSpeed > 0 && bytesUploaded < totalSize {
remainingBytes := totalSize - bytesUploaded
remainingSeconds := float64(remainingBytes) / instantSpeed
eta := time.Duration(remainingSeconds * float64(time.Second))
etaStr = formatDuration(eta)
}
log.Info("Blob upload progress",
"hash", blobHash[:8]+"...",
"progress", fmt.Sprintf("%.1f%%", percent),
"uploaded", humanize.Bytes(uint64(bytesUploaded)),
"total", humanize.Bytes(uint64(totalSize)),
"speed", speedStr,
"eta", etaStr)
uploadInfo.LastLogTime = now
}
percent := float64(bytesUploaded) / float64(totalSize) * 100
// Calculate ETA based on current speed
etaStr := "unknown"
if instantSpeed > 0 && bytesUploaded < totalSize {
remainingBytes := totalSize - bytesUploaded
remainingSeconds := float64(remainingBytes) / instantSpeed
eta := time.Duration(remainingSeconds * float64(time.Second))
etaStr = formatDuration(eta)
}
log.Info("Blob upload progress",
"hash", blobHash[:8]+"...",
"progress", fmt.Sprintf("%.1f%%", percent),
"uploaded", humanize.Bytes(uint64(bytesUploaded)),
"total", humanize.Bytes(uint64(totalSize)),
"speed", speedStr,
"eta", etaStr)
}
}

View File

@@ -69,8 +69,10 @@ type ScannerConfig struct {
type ScanResult struct {
FilesScanned int
FilesSkipped int
FilesDeleted int
BytesScanned int64
BytesSkipped int64
BytesDeleted int64
ChunksCreated int
BlobsCreated int
StartTime time.Time
@@ -138,6 +140,11 @@ func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*Sc
defer s.progress.Stop()
}
// Phase 0: Check for deleted files from previous snapshots
if err := s.detectDeletedFiles(ctx, path, result); err != nil {
return nil, fmt.Errorf("detecting deleted files: %w", err)
}
// Phase 1: Scan directory and collect files to process
log.Info("Phase 1/3: Scanning directory structure")
filesToProcess, err := s.scanPhase(ctx, path, result)
@@ -163,28 +170,29 @@ func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*Sc
"files_skipped", result.FilesSkipped,
"bytes_skipped", humanize.Bytes(uint64(result.BytesSkipped)))
// Print detailed scan summary
fmt.Printf("\n=== Scan Summary ===\n")
fmt.Printf("Total files examined: %d\n", result.FilesScanned)
fmt.Printf("Files with content changes: %d\n", len(filesToProcess))
fmt.Printf("Files with unchanged content: %d\n", result.FilesSkipped)
fmt.Printf("Total size of changed files: %s\n", humanize.Bytes(uint64(totalSizeToProcess)))
fmt.Printf("Total size of unchanged files: %s\n", humanize.Bytes(uint64(result.BytesSkipped)))
if len(filesToProcess) > 0 {
fmt.Printf("\nStarting snapshot of %d changed files...\n\n", len(filesToProcess))
} else {
fmt.Printf("\nNo file contents have changed.\n")
fmt.Printf("Creating metadata-only snapshot to capture current state...\n\n")
// Print scan summary
fmt.Printf("Scan complete: %s examined (%s), %s to process (%s)",
formatNumber(result.FilesScanned),
humanize.Bytes(uint64(totalSizeToProcess+result.BytesSkipped)),
formatNumber(len(filesToProcess)),
humanize.Bytes(uint64(totalSizeToProcess)))
if result.FilesDeleted > 0 {
fmt.Printf(", %s deleted (%s)",
formatNumber(result.FilesDeleted),
humanize.Bytes(uint64(result.BytesDeleted)))
}
fmt.Println()
// Phase 2: Process files and create chunks
if len(filesToProcess) > 0 {
fmt.Printf("Processing %s files...\n", formatNumber(len(filesToProcess)))
log.Info("Phase 2/3: Creating snapshot (chunking, compressing, encrypting, and uploading blobs)")
if err := s.processPhase(ctx, filesToProcess, result); err != nil {
return nil, fmt.Errorf("process phase failed: %w", err)
}
} else {
log.Info("Phase 2/3: Skipping (no file contents changed, metadata-only snapshot)")
fmt.Printf("No files need processing. Creating metadata-only snapshot.\n")
log.Info("Phase 2/3: Skipping (no files need processing, metadata-only snapshot)")
}
// Get final stats from packer
@@ -266,10 +274,9 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult
changedCount := len(filesToProcess)
mu.Unlock()
fmt.Printf("Scan progress: %d files examined, %s total size, %d files changed\n",
filesScanned,
humanize.Bytes(uint64(bytesScanned)),
changedCount)
fmt.Printf("Scan progress: %s files examined, %s changed\n",
formatNumber(int(filesScanned)),
formatNumber(changedCount))
lastStatusTime = time.Now()
}
@@ -320,8 +327,7 @@ func (s *Scanner) processPhase(ctx context.Context, filesToProcess []*FileToProc
eta = elapsed / time.Duration(filesProcessed) * time.Duration(remaining)
}
fmt.Printf("Snapshot progress: %d/%d files processed, %d chunks created, %d blobs uploaded",
filesProcessed, totalFiles, result.ChunksCreated, result.BlobsCreated)
fmt.Printf("Progress: %s/%s files", formatNumber(filesProcessed), formatNumber(totalFiles))
if remaining > 0 && eta > 0 {
fmt.Printf(", ETA: %s", eta.Round(time.Second))
}
@@ -558,8 +564,6 @@ func (s *Scanner) associateExistingChunks(ctx context.Context, path string) erro
// handleBlobReady is called by the packer when a blob is finalized
func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
log.Debug("Invoking blob upload handler", "blob_hash", blobWithReader.Hash[:8]+"...")
startTime := time.Now().UTC()
finishedBlob := blobWithReader.FinishedBlob
@@ -854,3 +858,33 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
func (s *Scanner) GetProgress() *ProgressReporter {
return s.progress
}
// detectDeletedFiles finds files that existed in previous snapshots but no longer exist
func (s *Scanner) detectDeletedFiles(ctx context.Context, path string, result *ScanResult) error {
// Get all files with this path prefix from the database
files, err := s.repos.Files.ListByPrefix(ctx, path)
if err != nil {
return fmt.Errorf("listing files by prefix: %w", err)
}
for _, file := range files {
// Check if the file still exists on disk
_, err := s.fs.Stat(file.Path)
if os.IsNotExist(err) {
// File has been deleted
result.FilesDeleted++
result.BytesDeleted += file.Size
log.Debug("Detected deleted file", "path", file.Path, "size", file.Size)
}
}
return nil
}
// formatNumber formats a number with comma separators
func formatNumber(n int) string {
if n < 1000 {
return fmt.Sprintf("%d", n)
}
return humanize.Comma(int64(n))
}

View File

@@ -214,7 +214,7 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
log.Debug("Database copy complete", "size", getFileSize(tempDBPath))
// Step 2: Clean the temp database to only contain current snapshot data
log.Debug("Cleaning temporary database to contain only current snapshot data", "snapshot_id", snapshotID, "db_path", tempDBPath)
log.Debug("Cleaning temporary database", "snapshot_id", snapshotID)
stats, err := sm.cleanSnapshotDB(ctx, tempDBPath, snapshotID)
if err != nil {
return fmt.Errorf("cleaning snapshot database: %w", err)
@@ -231,29 +231,27 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
// Step 3: Dump the cleaned database to SQL
dumpPath := filepath.Join(tempDir, "snapshot.sql")
log.Debug("Dumping database to SQL", "source", tempDBPath, "destination", dumpPath)
if err := sm.dumpDatabase(tempDBPath, dumpPath); err != nil {
return fmt.Errorf("dumping database: %w", err)
}
log.Debug("SQL dump complete", "size", getFileSize(dumpPath))
log.Debug("SQL dump complete", "size", humanize.Bytes(uint64(getFileSize(dumpPath))))
// Step 4: Compress and encrypt the SQL dump
compressedPath := filepath.Join(tempDir, "snapshot.sql.zst.age")
log.Debug("Compressing and encrypting SQL dump", "source", dumpPath, "destination", compressedPath)
if err := sm.compressDump(dumpPath, compressedPath); err != nil {
return fmt.Errorf("compressing dump: %w", err)
}
log.Debug("Compression complete", "original_size", getFileSize(dumpPath), "compressed_size", getFileSize(compressedPath))
log.Debug("Compression complete",
"original_size", humanize.Bytes(uint64(getFileSize(dumpPath))),
"compressed_size", humanize.Bytes(uint64(getFileSize(compressedPath))))
// Step 5: Read compressed and encrypted data for upload
log.Debug("Reading compressed and encrypted data for upload", "path", compressedPath)
finalData, err := os.ReadFile(compressedPath)
if err != nil {
return fmt.Errorf("reading compressed dump: %w", err)
}
// Step 6: Generate blob manifest (before closing temp DB)
log.Debug("Generating blob manifest from temporary database", "db_path", tempDBPath)
blobManifest, err := sm.generateBlobManifest(ctx, tempDBPath, snapshotID)
if err != nil {
return fmt.Errorf("generating blob manifest: %w", err)
@@ -263,7 +261,6 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
// Upload database backup (compressed and encrypted)
dbKey := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID)
log.Debug("Uploading snapshot database to S3", "key", dbKey, "size", len(finalData))
dbUploadStart := time.Now()
if err := sm.s3Client.PutObject(ctx, dbKey, bytes.NewReader(finalData)); err != nil {
return fmt.Errorf("uploading snapshot database: %w", err)
@@ -278,7 +275,6 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
// Upload blob manifest (compressed only, not encrypted)
manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
log.Debug("Uploading blob manifest to S3", "key", manifestKey, "size", len(blobManifest))
manifestUploadStart := time.Now()
if err := sm.s3Client.PutObject(ctx, manifestKey, bytes.NewReader(blobManifest)); err != nil {
return fmt.Errorf("uploading blob manifest: %w", err)
@@ -411,7 +407,6 @@ func (sm *SnapshotManager) cleanSnapshotDB(ctx context.Context, dbPath string, s
stats.CompressedSize = compressedSize.Int64
stats.UncompressedSize = uncompressedSize.Int64
log.Debug("[Temp DB Cleanup] Database cleanup complete", "stats", stats)
return stats, nil
}
@@ -425,7 +420,7 @@ func (sm *SnapshotManager) dumpDatabase(dbPath, dumpPath string) error {
return fmt.Errorf("running sqlite3 dump: %w", err)
}
log.Debug("SQL dump generated", "size", len(output))
log.Debug("SQL dump generated", "size", humanize.Bytes(uint64(len(output))))
if err := os.WriteFile(dumpPath, output, 0644); err != nil {
return fmt.Errorf("writing dump file: %w", err)
}
@@ -435,43 +430,43 @@ func (sm *SnapshotManager) dumpDatabase(dbPath, dumpPath string) error {
// compressDump compresses the SQL dump using zstd
func (sm *SnapshotManager) compressDump(inputPath, outputPath string) error {
log.Debug("Opening SQL dump for compression", "path", inputPath)
input, err := os.Open(inputPath)
if err != nil {
return fmt.Errorf("opening input file: %w", err)
}
defer func() {
log.Debug("Closing input file", "path", inputPath)
if err := input.Close(); err != nil {
log.Debug("Failed to close input file", "path", inputPath, "error", err)
}
}()
log.Debug("Creating output file for compressed and encrypted data", "path", outputPath)
output, err := os.Create(outputPath)
if err != nil {
return fmt.Errorf("creating output file: %w", err)
}
defer func() {
log.Debug("Closing output file", "path", outputPath)
if err := output.Close(); err != nil {
log.Debug("Failed to close output file", "path", outputPath, "error", err)
}
}()
// Use blobgen for compression and encryption
log.Debug("Creating compressor/encryptor", "level", sm.config.CompressionLevel)
log.Debug("Compressing and encrypting data")
writer, err := blobgen.NewWriter(output, sm.config.CompressionLevel, sm.config.AgeRecipients)
if err != nil {
return fmt.Errorf("creating blobgen writer: %w", err)
}
// Track if writer has been closed to avoid double-close
writerClosed := false
defer func() {
if err := writer.Close(); err != nil {
log.Debug("Failed to close writer", "error", err)
if !writerClosed {
if err := writer.Close(); err != nil {
log.Debug("Failed to close writer", "error", err)
}
}
}()
log.Debug("Compressing and encrypting data")
if _, err := io.Copy(writer, input); err != nil {
return fmt.Errorf("compressing data: %w", err)
}
@@ -480,6 +475,7 @@ func (sm *SnapshotManager) compressDump(inputPath, outputPath string) error {
if err := writer.Close(); err != nil {
return fmt.Errorf("closing writer: %w", err)
}
writerClosed = true
log.Debug("Compression complete", "hash", fmt.Sprintf("%x", writer.Sum256()))
@@ -524,7 +520,6 @@ func copyFile(src, dst string) error {
// generateBlobManifest creates a compressed JSON list of all blobs in the snapshot
func (sm *SnapshotManager) generateBlobManifest(ctx context.Context, dbPath string, snapshotID string) ([]byte, error) {
log.Debug("Generating blob manifest", "db_path", dbPath, "snapshot_id", snapshotID)
// Open the cleaned database using the database package
db, err := database.New(ctx, dbPath)
@@ -573,7 +568,6 @@ func (sm *SnapshotManager) generateBlobManifest(ctx context.Context, dbPath stri
}
// Encode manifest
log.Debug("Encoding manifest")
compressedData, err := EncodeManifest(manifest, sm.config.CompressionLevel)
if err != nil {
return nil, fmt.Errorf("encoding manifest: %w", err)
@@ -731,6 +725,17 @@ func (sm *SnapshotManager) cleanupOrphanedData(ctx context.Context) error {
// deleteOtherSnapshots deletes all snapshots except the current one
func (sm *SnapshotManager) deleteOtherSnapshots(ctx context.Context, tx *sql.Tx, currentSnapshotID string) error {
log.Debug("[Temp DB Cleanup] Deleting all snapshot records except current", "keeping", currentSnapshotID)
// First delete uploads that reference other snapshots (no CASCADE DELETE on this FK)
database.LogSQL("Execute", "DELETE FROM uploads WHERE snapshot_id != ?", currentSnapshotID)
uploadResult, err := tx.ExecContext(ctx, "DELETE FROM uploads WHERE snapshot_id != ?", currentSnapshotID)
if err != nil {
return fmt.Errorf("deleting uploads for other snapshots: %w", err)
}
uploadsDeleted, _ := uploadResult.RowsAffected()
log.Debug("[Temp DB Cleanup] Deleted upload records", "count", uploadsDeleted)
// Now we can safely delete the snapshots
database.LogSQL("Execute", "DELETE FROM snapshots WHERE id != ?", currentSnapshotID)
result, err := tx.ExecContext(ctx, "DELETE FROM snapshots WHERE id != ?", currentSnapshotID)
if err != nil {
@@ -842,16 +847,21 @@ func (sm *SnapshotManager) deleteOrphanedBlobToChunkMappings(ctx context.Context
return nil
}
// deleteOrphanedChunks deletes chunks not referenced by any file
// deleteOrphanedChunks deletes chunks not referenced by any file or blob
func (sm *SnapshotManager) deleteOrphanedChunks(ctx context.Context, tx *sql.Tx) error {
log.Debug("[Temp DB Cleanup] Deleting orphaned chunk records")
database.LogSQL("Execute", `DELETE FROM chunks WHERE NOT EXISTS (SELECT 1 FROM file_chunks WHERE file_chunks.chunk_hash = chunks.chunk_hash)`)
result, err := tx.ExecContext(ctx, `
query := `
DELETE FROM chunks
WHERE NOT EXISTS (
SELECT 1 FROM file_chunks
WHERE file_chunks.chunk_hash = chunks.chunk_hash
)`)
)
AND NOT EXISTS (
SELECT 1 FROM blob_chunks
WHERE blob_chunks.chunk_hash = chunks.chunk_hash
)`
database.LogSQL("Execute", query)
result, err := tx.ExecContext(ctx, query)
if err != nil {
return fmt.Errorf("deleting orphaned chunks: %w", err)
}