Fix foreign key constraints and improve snapshot tracking

- Add unified compression/encryption package in internal/blobgen
- Update DATAMODEL.md to reflect current schema implementation
- Refactor snapshot cleanup into well-named methods for clarity
- Add snapshot_id to uploads table to track new blobs per snapshot
- Fix blob count reporting for incremental backups
- Add DeleteOrphaned method to BlobChunkRepository
- Fix cleanup order to respect foreign key constraints
- Update tests to reflect schema changes
This commit is contained in:
2025-07-26 02:22:25 +02:00
parent 78af626759
commit d3afa65420
28 changed files with 994 additions and 534 deletions

View File

@@ -4,6 +4,7 @@ import (
"context"
"encoding/json"
"fmt"
"io"
"os"
"path/filepath"
"sort"
@@ -13,7 +14,6 @@ import (
"git.eeqj.de/sneak/vaultik/internal/backup"
"git.eeqj.de/sneak/vaultik/internal/config"
"git.eeqj.de/sneak/vaultik/internal/crypto"
"git.eeqj.de/sneak/vaultik/internal/database"
"git.eeqj.de/sneak/vaultik/internal/globals"
"git.eeqj.de/sneak/vaultik/internal/log"
@@ -33,14 +33,18 @@ type SnapshotCreateOptions struct {
// SnapshotCreateApp contains all dependencies needed for creating snapshots
type SnapshotCreateApp struct {
Globals *globals.Globals
Config *config.Config
Repositories *database.Repositories
ScannerFactory backup.ScannerFactory
S3Client *s3.Client
DB *database.DB
Lifecycle fx.Lifecycle
Shutdowner fx.Shutdowner
Globals *globals.Globals
Config *config.Config
Repositories *database.Repositories
ScannerFactory backup.ScannerFactory
SnapshotManager *backup.SnapshotManager
S3Client *s3.Client
DB *database.DB
Lifecycle fx.Lifecycle
Shutdowner fx.Shutdowner
Stdout io.Writer
Stderr io.Writer
Stdin io.Reader
}
// SnapshotApp contains dependencies for snapshot commands
@@ -106,17 +110,22 @@ specifying a path using --config or by setting VAULTIK_CONFIG to a path.`,
s3.Module,
fx.Provide(fx.Annotate(
func(g *globals.Globals, cfg *config.Config, repos *database.Repositories,
scannerFactory backup.ScannerFactory, s3Client *s3.Client, db *database.DB,
scannerFactory backup.ScannerFactory, snapshotManager *backup.SnapshotManager,
s3Client *s3.Client, db *database.DB,
lc fx.Lifecycle, shutdowner fx.Shutdowner) *SnapshotCreateApp {
return &SnapshotCreateApp{
Globals: g,
Config: cfg,
Repositories: repos,
ScannerFactory: scannerFactory,
S3Client: s3Client,
DB: db,
Lifecycle: lc,
Shutdowner: shutdowner,
Globals: g,
Config: cfg,
Repositories: repos,
ScannerFactory: scannerFactory,
SnapshotManager: snapshotManager,
S3Client: s3Client,
DB: db,
Lifecycle: lc,
Shutdowner: shutdowner,
Stdout: os.Stdout,
Stderr: os.Stderr,
Stdin: os.Stdin,
}
},
)),
@@ -181,21 +190,10 @@ func (app *SnapshotCreateApp) runSnapshot(ctx context.Context, opts *SnapshotCre
hostname, _ = os.Hostname()
}
// Create encryptor if needed for snapshot manager
var encryptor backup.Encryptor
if len(app.Config.AgeRecipients) > 0 {
cryptoEncryptor, err := crypto.NewEncryptor(app.Config.AgeRecipients)
if err != nil {
return fmt.Errorf("creating encryptor: %w", err)
}
encryptor = cryptoEncryptor
}
snapshotManager := backup.NewSnapshotManager(app.Repositories, app.S3Client, encryptor)
// CRITICAL: This MUST succeed. If we fail to clean up incomplete snapshots,
// the deduplication logic will think files from the incomplete snapshot were
// already backed up and skip them, resulting in data loss.
if err := snapshotManager.CleanupIncompleteSnapshots(ctx, hostname); err != nil {
if err := app.SnapshotManager.CleanupIncompleteSnapshots(ctx, hostname); err != nil {
return fmt.Errorf("cleanup incomplete snapshots: %w", err)
}
@@ -234,8 +232,10 @@ func (app *SnapshotCreateApp) runSnapshot(ctx context.Context, opts *SnapshotCre
// Perform a single snapshot run
log.Notice("Starting snapshot", "source_dirs", len(resolvedDirs))
_, _ = fmt.Fprintf(app.Stdout, "Starting snapshot with %d source directories\n", len(resolvedDirs))
for i, dir := range resolvedDirs {
log.Info("Source directory", "index", i+1, "path", dir)
_, _ = fmt.Fprintf(app.Stdout, "Source directory %d: %s\n", i+1, dir)
}
// Statistics tracking
@@ -250,12 +250,12 @@ func (app *SnapshotCreateApp) runSnapshot(ctx context.Context, opts *SnapshotCre
uploadDuration := time.Duration(0)
// Create a new snapshot at the beginning
// (hostname, encryptor, and snapshotManager already created above for cleanup)
snapshotID, err := snapshotManager.CreateSnapshot(ctx, hostname, app.Globals.Version, app.Globals.Commit)
snapshotID, err := app.SnapshotManager.CreateSnapshot(ctx, hostname, app.Globals.Version, app.Globals.Commit)
if err != nil {
return fmt.Errorf("creating snapshot: %w", err)
}
log.Info("Created snapshot", "snapshot_id", snapshotID)
_, _ = fmt.Fprintf(app.Stdout, "\nCreated snapshot: %s\n", snapshotID)
for _, dir := range resolvedDirs {
// Check if context is cancelled
@@ -288,6 +288,13 @@ func (app *SnapshotCreateApp) runSnapshot(ctx context.Context, opts *SnapshotCre
"chunks", result.ChunksCreated,
"blobs", result.BlobsCreated,
"duration", result.EndTime.Sub(result.StartTime))
// Human-friendly output
_, _ = fmt.Fprintf(app.Stdout, "\nDirectory: %s\n", dir)
_, _ = fmt.Fprintf(app.Stdout, " Scanned: %d files (%s)\n", result.FilesScanned, humanize.Bytes(uint64(result.BytesScanned)))
_, _ = fmt.Fprintf(app.Stdout, " Skipped: %d files (%s) - already backed up\n", result.FilesSkipped, humanize.Bytes(uint64(result.BytesSkipped)))
_, _ = fmt.Fprintf(app.Stdout, " Created: %d chunks, %d blobs\n", result.ChunksCreated, result.BlobsCreated)
_, _ = fmt.Fprintf(app.Stdout, " Duration: %s\n", result.EndTime.Sub(result.StartTime).Round(time.Millisecond))
}
// Get upload statistics from scanner progress if available
@@ -312,19 +319,19 @@ func (app *SnapshotCreateApp) runSnapshot(ctx context.Context, opts *SnapshotCre
UploadDurationMs: uploadDuration.Milliseconds(),
}
if err := snapshotManager.UpdateSnapshotStatsExtended(ctx, snapshotID, extStats); err != nil {
if err := app.SnapshotManager.UpdateSnapshotStatsExtended(ctx, snapshotID, extStats); err != nil {
return fmt.Errorf("updating snapshot stats: %w", err)
}
// Mark snapshot as complete
if err := snapshotManager.CompleteSnapshot(ctx, snapshotID); err != nil {
if err := app.SnapshotManager.CompleteSnapshot(ctx, snapshotID); err != nil {
return fmt.Errorf("completing snapshot: %w", err)
}
// Export snapshot metadata
// Export snapshot metadata without closing the database
// The export function should handle its own database connection
if err := snapshotManager.ExportSnapshotMetadata(ctx, app.Config.IndexPath, snapshotID); err != nil {
if err := app.SnapshotManager.ExportSnapshotMetadata(ctx, app.Config.IndexPath, snapshotID); err != nil {
return fmt.Errorf("exporting snapshot metadata: %w", err)
}
@@ -373,29 +380,29 @@ func (app *SnapshotCreateApp) runSnapshot(ctx context.Context, opts *SnapshotCre
}
// Print comprehensive summary
log.Notice("=== Snapshot Summary ===")
log.Info("Snapshot ID", "id", snapshotID)
log.Info("Source files",
"total_count", formatNumber(totalFiles),
"total_size", humanize.Bytes(uint64(totalBytesAll)))
log.Info("Changed files",
"count", formatNumber(totalFilesChanged),
"size", humanize.Bytes(uint64(totalBytesChanged)))
log.Info("Unchanged files",
"count", formatNumber(totalFilesSkipped),
"size", humanize.Bytes(uint64(totalBytesSkipped)))
log.Info("Blob storage",
"total_uncompressed", humanize.Bytes(uint64(totalBlobSizeUncompressed)),
"total_compressed", humanize.Bytes(uint64(totalBlobSizeCompressed)),
"compression_ratio", fmt.Sprintf("%.2fx", compressionRatio),
"compression_level", app.Config.CompressionLevel)
log.Info("Upload activity",
"bytes_uploaded", humanize.Bytes(uint64(totalBytesUploaded)),
"blobs_uploaded", totalBlobsUploaded,
"upload_time", formatDuration(uploadDuration),
"avg_speed", avgUploadSpeed)
log.Info("Total time", "duration", formatDuration(snapshotDuration))
log.Notice("==========================")
_, _ = fmt.Fprintln(app.Stdout, "\n=== Snapshot Summary ===")
_, _ = fmt.Fprintf(app.Stdout, "Snapshot ID: %s\n", snapshotID)
_, _ = fmt.Fprintf(app.Stdout, "Source files: %s files, %s total\n",
formatNumber(totalFiles),
humanize.Bytes(uint64(totalBytesAll)))
_, _ = fmt.Fprintf(app.Stdout, "Changed files: %s files, %s\n",
formatNumber(totalFilesChanged),
humanize.Bytes(uint64(totalBytesChanged)))
_, _ = fmt.Fprintf(app.Stdout, "Unchanged files: %s files, %s\n",
formatNumber(totalFilesSkipped),
humanize.Bytes(uint64(totalBytesSkipped)))
_, _ = fmt.Fprintf(app.Stdout, "Blob storage: %s uncompressed, %s compressed (%.2fx ratio, level %d)\n",
humanize.Bytes(uint64(totalBlobSizeUncompressed)),
humanize.Bytes(uint64(totalBlobSizeCompressed)),
compressionRatio,
app.Config.CompressionLevel)
_, _ = fmt.Fprintf(app.Stdout, "Upload activity: %s uploaded, %d blobs, %s duration, %s avg speed\n",
humanize.Bytes(uint64(totalBytesUploaded)),
totalBlobsUploaded,
formatDuration(uploadDuration),
avgUploadSpeed)
_, _ = fmt.Fprintf(app.Stdout, "Total time: %s\n", formatDuration(snapshotDuration))
_, _ = fmt.Fprintln(app.Stdout, "==========================")
if opts.Prune {
log.Info("Pruning enabled - will delete old snapshots after snapshot")
@@ -729,13 +736,18 @@ func (app *SnapshotApp) downloadManifest(ctx context.Context, snapshotID string)
}
defer zr.Close()
// Decode JSON
var manifest []string
// Decode JSON - manifest is an object with a "blobs" field
var manifest struct {
SnapshotID string `json:"snapshot_id"`
Timestamp string `json:"timestamp"`
BlobCount int `json:"blob_count"`
Blobs []string `json:"blobs"`
}
if err := json.NewDecoder(zr).Decode(&manifest); err != nil {
return nil, fmt.Errorf("decoding manifest: %w", err)
}
return manifest, nil
return manifest.Blobs, nil
}
// deleteSnapshot removes a snapshot and its metadata
@@ -765,29 +777,21 @@ func (app *SnapshotApp) deleteSnapshot(ctx context.Context, snapshotID string) e
// parseSnapshotTimestamp extracts timestamp from snapshot ID
// Format: hostname-20240115-143052Z
func parseSnapshotTimestamp(snapshotID string) (time.Time, error) {
// Find the last hyphen to separate hostname from timestamp
lastHyphen := strings.LastIndex(snapshotID, "-")
if lastHyphen == -1 {
return time.Time{}, fmt.Errorf("invalid snapshot ID format")
// The snapshot ID format is: hostname-YYYYMMDD-HHMMSSZ
// We need to find the timestamp part which starts after the hostname
// Split by hyphen
parts := strings.Split(snapshotID, "-")
if len(parts) < 3 {
return time.Time{}, fmt.Errorf("invalid snapshot ID format: expected hostname-YYYYMMDD-HHMMSSZ")
}
// Extract timestamp part (everything after hostname)
timestampPart := snapshotID[lastHyphen+1:]
// The last two parts should be the date and time with Z suffix
dateStr := parts[len(parts)-2]
timeStr := parts[len(parts)-1]
// The timestamp format is YYYYMMDD-HHMMSSZ
// We need to find where the date ends and time begins
if len(timestampPart) < 8 {
return time.Time{}, fmt.Errorf("invalid snapshot ID format: timestamp too short")
}
// Find where the hostname ends by looking for pattern YYYYMMDD
hostnameEnd := strings.LastIndex(snapshotID[:lastHyphen], "-")
if hostnameEnd == -1 {
return time.Time{}, fmt.Errorf("invalid snapshot ID format: missing date separator")
}
// Get the full timestamp including date from before the last hyphen
fullTimestamp := snapshotID[hostnameEnd+1:]
// Reconstruct the full timestamp
fullTimestamp := dateStr + "-" + timeStr
// Parse the timestamp with Z suffix
return time.Parse("20060102-150405Z", fullTimestamp)