This commit represents a significant architectural overhaul of vaultik: Database Schema Changes: - Switch files table to use UUID primary keys instead of path-based keys - Add UUID primary keys to blobs table for immediate chunk association - Update all foreign key relationships to use UUIDs - Add comprehensive schema documentation in DATAMODEL.md - Add SQLite busy timeout handling for concurrent operations Streaming and Performance Improvements: - Implement true streaming blob packing without intermediate storage - Add streaming chunk processing to reduce memory usage - Improve progress reporting with real-time metrics - Add upload metrics tracking in new uploads table CLI Refactoring: - Restructure CLI to use subcommands: snapshot create/list/purge/verify - Add store info command for S3 configuration display - Add custom duration parser supporting days/weeks/months/years - Remove old backup.go in favor of enhanced snapshot.go - Add --cron flag for silent operation Configuration Changes: - Remove unused index_prefix configuration option - Add support for snapshot pruning retention policies - Improve configuration validation and error messages Testing Improvements: - Add comprehensive repository tests with edge cases - Add cascade delete debugging tests - Fix concurrent operation tests to use SQLite busy timeout - Remove tolerance for SQLITE_BUSY errors in tests Documentation: - Add MIT LICENSE file - Update README with new command structure - Add comprehensive DATAMODEL.md explaining database schema - Update DESIGN.md with UUID-based architecture Other Changes: - Add test-config.yml for testing - Update Makefile with better test output formatting - Fix various race conditions in concurrent operations - Improve error handling throughout
771 lines
26 KiB
Go
771 lines
26 KiB
Go
package backup
|
|
|
|
// Snapshot Metadata Export Process
|
|
// ================================
|
|
//
|
|
// The snapshot metadata contains all information needed to restore a backup.
|
|
// Instead of creating a custom format, we use a trimmed copy of the SQLite
|
|
// database containing only data relevant to the current snapshot.
|
|
//
|
|
// Process Overview:
|
|
// 1. After all files/chunks/blobs are backed up, create a snapshot record
|
|
// 2. Close the main database to ensure consistency
|
|
// 3. Copy the entire database to a temporary file
|
|
// 4. Open the temporary database
|
|
// 5. Delete all snapshots except the current one
|
|
// 6. Delete all orphaned records:
|
|
// - Files not referenced by any remaining snapshot
|
|
// - Chunks not referenced by any remaining files
|
|
// - Blobs not containing any remaining chunks
|
|
// - All related mapping tables (file_chunks, chunk_files, blob_chunks)
|
|
// 7. Close the temporary database
|
|
// 8. Use sqlite3 to dump the cleaned database to SQL
|
|
// 9. Delete the temporary database file
|
|
// 10. Compress the SQL dump with zstd
|
|
// 11. Encrypt the compressed dump with age (if encryption is enabled)
|
|
// 12. Upload to S3 as: snapshots/{snapshot-id}.sql.zst[.age]
|
|
// 13. Reopen the main database
|
|
//
|
|
// Advantages of this approach:
|
|
// - No custom metadata format needed
|
|
// - Reuses existing database schema and relationships
|
|
// - SQL dumps are portable and compress well
|
|
// - Restore process can simply execute the SQL
|
|
// - Atomic and consistent snapshot of all metadata
|
|
//
|
|
// TODO: Future improvements:
|
|
// - Add snapshot-file relationships to track which files belong to which snapshot
|
|
// - Implement incremental snapshots that reference previous snapshots
|
|
// - Add snapshot manifest with additional metadata (size, chunk count, etc.)
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"database/sql"
|
|
"encoding/json"
|
|
"fmt"
|
|
"io"
|
|
"os"
|
|
"os/exec"
|
|
"path/filepath"
|
|
"runtime"
|
|
"time"
|
|
|
|
"git.eeqj.de/sneak/vaultik/internal/database"
|
|
"git.eeqj.de/sneak/vaultik/internal/log"
|
|
"github.com/klauspost/compress/zstd"
|
|
)
|
|
|
|
// SnapshotManager handles snapshot creation and metadata export
|
|
type SnapshotManager struct {
|
|
repos *database.Repositories
|
|
s3Client S3Client
|
|
encryptor Encryptor
|
|
}
|
|
|
|
// Encryptor interface for snapshot encryption
|
|
type Encryptor interface {
|
|
Encrypt(data []byte) ([]byte, error)
|
|
}
|
|
|
|
// NewSnapshotManager creates a new snapshot manager
|
|
func NewSnapshotManager(repos *database.Repositories, s3Client S3Client, encryptor Encryptor) *SnapshotManager {
|
|
return &SnapshotManager{
|
|
repos: repos,
|
|
s3Client: s3Client,
|
|
encryptor: encryptor,
|
|
}
|
|
}
|
|
|
|
// CreateSnapshot creates a new snapshot record in the database at the start of a backup
|
|
func (sm *SnapshotManager) CreateSnapshot(ctx context.Context, hostname, version, gitRevision string) (string, error) {
|
|
snapshotID := fmt.Sprintf("%s-%s", hostname, time.Now().UTC().Format("20060102-150405Z"))
|
|
|
|
snapshot := &database.Snapshot{
|
|
ID: snapshotID,
|
|
Hostname: hostname,
|
|
VaultikVersion: version,
|
|
VaultikGitRevision: gitRevision,
|
|
StartedAt: time.Now().UTC(),
|
|
CompletedAt: nil, // Not completed yet
|
|
FileCount: 0,
|
|
ChunkCount: 0,
|
|
BlobCount: 0,
|
|
TotalSize: 0,
|
|
BlobSize: 0,
|
|
CompressionRatio: 1.0,
|
|
}
|
|
|
|
err := sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
|
return sm.repos.Snapshots.Create(ctx, tx, snapshot)
|
|
})
|
|
|
|
if err != nil {
|
|
return "", fmt.Errorf("creating snapshot: %w", err)
|
|
}
|
|
|
|
log.Info("Created snapshot", "snapshot_id", snapshotID)
|
|
return snapshotID, nil
|
|
}
|
|
|
|
// UpdateSnapshotStats updates the statistics for a snapshot during backup
|
|
func (sm *SnapshotManager) UpdateSnapshotStats(ctx context.Context, snapshotID string, stats BackupStats) error {
|
|
err := sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
|
return sm.repos.Snapshots.UpdateCounts(ctx, tx, snapshotID,
|
|
int64(stats.FilesScanned),
|
|
int64(stats.ChunksCreated),
|
|
int64(stats.BlobsCreated),
|
|
stats.BytesScanned,
|
|
stats.BytesUploaded,
|
|
)
|
|
})
|
|
|
|
if err != nil {
|
|
return fmt.Errorf("updating snapshot stats: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// UpdateSnapshotStatsExtended updates snapshot statistics with extended metrics.
|
|
// This includes compression level, uncompressed blob size, and upload duration.
|
|
func (sm *SnapshotManager) UpdateSnapshotStatsExtended(ctx context.Context, snapshotID string, stats ExtendedBackupStats) error {
|
|
return sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
|
// First update basic stats
|
|
if err := sm.repos.Snapshots.UpdateCounts(ctx, tx, snapshotID,
|
|
int64(stats.FilesScanned),
|
|
int64(stats.ChunksCreated),
|
|
int64(stats.BlobsCreated),
|
|
stats.BytesScanned,
|
|
stats.BytesUploaded,
|
|
); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Then update extended stats
|
|
return sm.repos.Snapshots.UpdateExtendedStats(ctx, tx, snapshotID,
|
|
stats.BlobUncompressedSize,
|
|
stats.CompressionLevel,
|
|
stats.UploadDurationMs,
|
|
)
|
|
})
|
|
}
|
|
|
|
// CompleteSnapshot marks a snapshot as completed and exports its metadata
|
|
func (sm *SnapshotManager) CompleteSnapshot(ctx context.Context, snapshotID string) error {
|
|
// Mark the snapshot as completed
|
|
err := sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
|
return sm.repos.Snapshots.MarkComplete(ctx, tx, snapshotID)
|
|
})
|
|
|
|
if err != nil {
|
|
return fmt.Errorf("marking snapshot complete: %w", err)
|
|
}
|
|
|
|
log.Info("Completed snapshot", "snapshot_id", snapshotID)
|
|
return nil
|
|
}
|
|
|
|
// ExportSnapshotMetadata exports snapshot metadata to S3
|
|
//
|
|
// This method executes the complete snapshot metadata export process:
|
|
// 1. Creates a temporary directory for working files
|
|
// 2. Copies the main database to preserve its state
|
|
// 3. Cleans the copy to contain only current snapshot data
|
|
// 4. Dumps the cleaned database to SQL
|
|
// 5. Compresses the SQL dump with zstd
|
|
// 6. Encrypts the compressed data (if encryption is enabled)
|
|
// 7. Uploads to S3 at: snapshots/{snapshot-id}.sql.zst[.age]
|
|
//
|
|
// The caller is responsible for:
|
|
// - Ensuring the main database is closed before calling this method
|
|
// - Reopening the main database after this method returns
|
|
//
|
|
// This ensures database consistency during the copy operation.
|
|
func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath string, snapshotID string) error {
|
|
log.Info("Phase 3/3: Exporting snapshot metadata", "snapshot_id", snapshotID, "source_db", dbPath)
|
|
|
|
// Create temp directory for all temporary files
|
|
tempDir, err := os.MkdirTemp("", "vaultik-snapshot-*")
|
|
if err != nil {
|
|
return fmt.Errorf("creating temp dir: %w", err)
|
|
}
|
|
log.Debug("Created temporary directory", "path", tempDir)
|
|
defer func() {
|
|
log.Debug("Cleaning up temporary directory", "path", tempDir)
|
|
if err := os.RemoveAll(tempDir); err != nil {
|
|
log.Debug("Failed to remove temp dir", "path", tempDir, "error", err)
|
|
}
|
|
}()
|
|
|
|
// Step 1: Copy database to temp file
|
|
// The main database should be closed at this point
|
|
tempDBPath := filepath.Join(tempDir, "snapshot.db")
|
|
log.Debug("Copying database to temporary location", "source", dbPath, "destination", tempDBPath)
|
|
if err := copyFile(dbPath, tempDBPath); err != nil {
|
|
return fmt.Errorf("copying database: %w", err)
|
|
}
|
|
log.Debug("Database copy complete", "size", getFileSize(tempDBPath))
|
|
|
|
// Step 2: Clean the temp database to only contain current snapshot data
|
|
log.Debug("Cleaning snapshot database to contain only current snapshot", "snapshot_id", snapshotID)
|
|
if err := sm.cleanSnapshotDB(ctx, tempDBPath, snapshotID); err != nil {
|
|
return fmt.Errorf("cleaning snapshot database: %w", err)
|
|
}
|
|
log.Debug("Database cleaning complete", "size_after_clean", getFileSize(tempDBPath))
|
|
|
|
// Step 3: Dump the cleaned database to SQL
|
|
dumpPath := filepath.Join(tempDir, "snapshot.sql")
|
|
log.Debug("Dumping database to SQL", "source", tempDBPath, "destination", dumpPath)
|
|
if err := sm.dumpDatabase(tempDBPath, dumpPath); err != nil {
|
|
return fmt.Errorf("dumping database: %w", err)
|
|
}
|
|
log.Debug("SQL dump complete", "size", getFileSize(dumpPath))
|
|
|
|
// Step 4: Compress the SQL dump
|
|
compressedPath := filepath.Join(tempDir, "snapshot.sql.zst")
|
|
log.Debug("Compressing SQL dump", "source", dumpPath, "destination", compressedPath)
|
|
if err := sm.compressDump(dumpPath, compressedPath); err != nil {
|
|
return fmt.Errorf("compressing dump: %w", err)
|
|
}
|
|
log.Debug("Compression complete", "original_size", getFileSize(dumpPath), "compressed_size", getFileSize(compressedPath))
|
|
|
|
// Step 5: Read compressed data for encryption/upload
|
|
log.Debug("Reading compressed data for upload", "path", compressedPath)
|
|
compressedData, err := os.ReadFile(compressedPath)
|
|
if err != nil {
|
|
return fmt.Errorf("reading compressed dump: %w", err)
|
|
}
|
|
|
|
// Step 6: Encrypt if encryptor is available
|
|
finalData := compressedData
|
|
if sm.encryptor != nil {
|
|
log.Debug("Encrypting snapshot data", "size_before", len(compressedData))
|
|
encrypted, err := sm.encryptor.Encrypt(compressedData)
|
|
if err != nil {
|
|
return fmt.Errorf("encrypting snapshot: %w", err)
|
|
}
|
|
finalData = encrypted
|
|
log.Debug("Encryption complete", "size_after", len(encrypted))
|
|
} else {
|
|
log.Debug("No encryption configured, using compressed data as-is")
|
|
}
|
|
|
|
// Step 7: Generate blob manifest (before closing temp DB)
|
|
log.Debug("Generating blob manifest from temporary database", "db_path", tempDBPath)
|
|
blobManifest, err := sm.generateBlobManifest(ctx, tempDBPath, snapshotID)
|
|
if err != nil {
|
|
return fmt.Errorf("generating blob manifest: %w", err)
|
|
}
|
|
|
|
// Step 8: Upload to S3 in snapshot subdirectory
|
|
// Upload database backup (encrypted)
|
|
dbKey := fmt.Sprintf("metadata/%s/db.zst", snapshotID)
|
|
if sm.encryptor != nil {
|
|
dbKey += ".age"
|
|
}
|
|
|
|
log.Debug("Uploading snapshot database to S3", "key", dbKey, "size", len(finalData))
|
|
if err := sm.s3Client.PutObject(ctx, dbKey, bytes.NewReader(finalData)); err != nil {
|
|
return fmt.Errorf("uploading snapshot database: %w", err)
|
|
}
|
|
log.Debug("Database upload complete", "key", dbKey)
|
|
|
|
// Upload blob manifest (unencrypted, compressed)
|
|
manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
|
|
log.Debug("Uploading blob manifest to S3", "key", manifestKey, "size", len(blobManifest))
|
|
if err := sm.s3Client.PutObject(ctx, manifestKey, bytes.NewReader(blobManifest)); err != nil {
|
|
return fmt.Errorf("uploading blob manifest: %w", err)
|
|
}
|
|
log.Debug("Manifest upload complete", "key", manifestKey)
|
|
|
|
log.Info("Uploaded snapshot metadata",
|
|
"snapshot_id", snapshotID,
|
|
"db_size", len(finalData),
|
|
"manifest_size", len(blobManifest))
|
|
return nil
|
|
}
|
|
|
|
// cleanSnapshotDB removes all data except for the specified snapshot
|
|
//
|
|
// Current implementation:
|
|
// Since we don't yet have snapshot-file relationships, this currently only
|
|
// removes other snapshots. In a complete implementation, it would:
|
|
//
|
|
// 1. Delete all snapshots except the current one
|
|
// 2. Delete files not belonging to the current snapshot
|
|
// 3. Delete file_chunks for deleted files (CASCADE)
|
|
// 4. Delete chunk_files for deleted files
|
|
// 5. Delete chunks with no remaining file references
|
|
// 6. Delete blob_chunks for deleted chunks
|
|
// 7. Delete blobs with no remaining chunks
|
|
//
|
|
// The order is important to maintain referential integrity.
|
|
//
|
|
// Future implementation when we have snapshot_files table:
|
|
//
|
|
// DELETE FROM snapshots WHERE id != ?;
|
|
// DELETE FROM files WHERE NOT EXISTS (
|
|
// SELECT 1 FROM snapshot_files
|
|
// WHERE snapshot_files.file_id = files.id
|
|
// AND snapshot_files.snapshot_id = ?
|
|
// );
|
|
// DELETE FROM chunks WHERE NOT EXISTS (
|
|
// SELECT 1 FROM file_chunks
|
|
// WHERE file_chunks.chunk_hash = chunks.chunk_hash
|
|
// );
|
|
// DELETE FROM blobs WHERE NOT EXISTS (
|
|
// SELECT 1 FROM blob_chunks
|
|
// WHERE blob_chunks.blob_hash = blobs.blob_hash
|
|
// );
|
|
func (sm *SnapshotManager) cleanSnapshotDB(ctx context.Context, dbPath string, snapshotID string) error {
|
|
// Open the temp database
|
|
db, err := database.New(ctx, dbPath)
|
|
if err != nil {
|
|
return fmt.Errorf("opening temp database: %w", err)
|
|
}
|
|
defer func() {
|
|
if err := db.Close(); err != nil {
|
|
log.Debug("Failed to close temp database", "error", err)
|
|
}
|
|
}()
|
|
|
|
// Start a transaction
|
|
tx, err := db.BeginTx(ctx, nil)
|
|
if err != nil {
|
|
return fmt.Errorf("beginning transaction: %w", err)
|
|
}
|
|
defer func() {
|
|
if rbErr := tx.Rollback(); rbErr != nil && rbErr != sql.ErrTxDone {
|
|
log.Debug("Failed to rollback transaction", "error", rbErr)
|
|
}
|
|
}()
|
|
|
|
// Step 1: Delete all other snapshots
|
|
log.Debug("Deleting other snapshots", "keeping", snapshotID)
|
|
database.LogSQL("Execute", "DELETE FROM snapshots WHERE id != ?", snapshotID)
|
|
result, err := tx.ExecContext(ctx, "DELETE FROM snapshots WHERE id != ?", snapshotID)
|
|
if err != nil {
|
|
return fmt.Errorf("deleting other snapshots: %w", err)
|
|
}
|
|
rowsAffected, _ := result.RowsAffected()
|
|
log.Debug("Deleted snapshots", "count", rowsAffected)
|
|
|
|
// Step 2: Delete files not in this snapshot
|
|
log.Debug("Deleting files not in current snapshot")
|
|
database.LogSQL("Execute", `DELETE FROM files WHERE NOT EXISTS (SELECT 1 FROM snapshot_files WHERE snapshot_files.file_id = files.id AND snapshot_files.snapshot_id = ?)`, snapshotID)
|
|
result, err = tx.ExecContext(ctx, `
|
|
DELETE FROM files
|
|
WHERE NOT EXISTS (
|
|
SELECT 1 FROM snapshot_files
|
|
WHERE snapshot_files.file_id = files.id
|
|
AND snapshot_files.snapshot_id = ?
|
|
)`, snapshotID)
|
|
if err != nil {
|
|
return fmt.Errorf("deleting orphaned files: %w", err)
|
|
}
|
|
rowsAffected, _ = result.RowsAffected()
|
|
log.Debug("Deleted files", "count", rowsAffected)
|
|
|
|
// Step 3: file_chunks will be deleted via CASCADE from files
|
|
log.Debug("file_chunks will be deleted via CASCADE")
|
|
|
|
// Step 4: Delete chunk_files for deleted files
|
|
log.Debug("Deleting orphaned chunk_files")
|
|
database.LogSQL("Execute", `DELETE FROM chunk_files WHERE NOT EXISTS (SELECT 1 FROM files WHERE files.id = chunk_files.file_id)`)
|
|
result, err = tx.ExecContext(ctx, `
|
|
DELETE FROM chunk_files
|
|
WHERE NOT EXISTS (
|
|
SELECT 1 FROM files
|
|
WHERE files.id = chunk_files.file_id
|
|
)`)
|
|
if err != nil {
|
|
return fmt.Errorf("deleting orphaned chunk_files: %w", err)
|
|
}
|
|
rowsAffected, _ = result.RowsAffected()
|
|
log.Debug("Deleted chunk_files", "count", rowsAffected)
|
|
|
|
// Step 5: Delete chunks with no remaining file references
|
|
log.Debug("Deleting orphaned chunks")
|
|
database.LogSQL("Execute", `DELETE FROM chunks WHERE NOT EXISTS (SELECT 1 FROM file_chunks WHERE file_chunks.chunk_hash = chunks.chunk_hash)`)
|
|
result, err = tx.ExecContext(ctx, `
|
|
DELETE FROM chunks
|
|
WHERE NOT EXISTS (
|
|
SELECT 1 FROM file_chunks
|
|
WHERE file_chunks.chunk_hash = chunks.chunk_hash
|
|
)`)
|
|
if err != nil {
|
|
return fmt.Errorf("deleting orphaned chunks: %w", err)
|
|
}
|
|
rowsAffected, _ = result.RowsAffected()
|
|
log.Debug("Deleted chunks", "count", rowsAffected)
|
|
|
|
// Step 6: Delete blob_chunks for deleted chunks
|
|
log.Debug("Deleting orphaned blob_chunks")
|
|
database.LogSQL("Execute", `DELETE FROM blob_chunks WHERE NOT EXISTS (SELECT 1 FROM chunks WHERE chunks.chunk_hash = blob_chunks.chunk_hash)`)
|
|
result, err = tx.ExecContext(ctx, `
|
|
DELETE FROM blob_chunks
|
|
WHERE NOT EXISTS (
|
|
SELECT 1 FROM chunks
|
|
WHERE chunks.chunk_hash = blob_chunks.chunk_hash
|
|
)`)
|
|
if err != nil {
|
|
return fmt.Errorf("deleting orphaned blob_chunks: %w", err)
|
|
}
|
|
rowsAffected, _ = result.RowsAffected()
|
|
log.Debug("Deleted blob_chunks", "count", rowsAffected)
|
|
|
|
// Step 7: Delete blobs not in this snapshot
|
|
log.Debug("Deleting blobs not in current snapshot")
|
|
database.LogSQL("Execute", `DELETE FROM blobs WHERE NOT EXISTS (SELECT 1 FROM snapshot_blobs WHERE snapshot_blobs.blob_hash = blobs.blob_hash AND snapshot_blobs.snapshot_id = ?)`, snapshotID)
|
|
result, err = tx.ExecContext(ctx, `
|
|
DELETE FROM blobs
|
|
WHERE NOT EXISTS (
|
|
SELECT 1 FROM snapshot_blobs
|
|
WHERE snapshot_blobs.blob_hash = blobs.blob_hash
|
|
AND snapshot_blobs.snapshot_id = ?
|
|
)`, snapshotID)
|
|
if err != nil {
|
|
return fmt.Errorf("deleting orphaned blobs: %w", err)
|
|
}
|
|
rowsAffected, _ = result.RowsAffected()
|
|
log.Debug("Deleted blobs not in snapshot", "count", rowsAffected)
|
|
|
|
// Step 8: Delete orphaned snapshot_files and snapshot_blobs
|
|
log.Debug("Deleting orphaned snapshot_files")
|
|
database.LogSQL("Execute", "DELETE FROM snapshot_files WHERE snapshot_id != ?", snapshotID)
|
|
result, err = tx.ExecContext(ctx, "DELETE FROM snapshot_files WHERE snapshot_id != ?", snapshotID)
|
|
if err != nil {
|
|
return fmt.Errorf("deleting orphaned snapshot_files: %w", err)
|
|
}
|
|
rowsAffected, _ = result.RowsAffected()
|
|
log.Debug("Deleted snapshot_files", "count", rowsAffected)
|
|
|
|
log.Debug("Deleting orphaned snapshot_blobs")
|
|
database.LogSQL("Execute", "DELETE FROM snapshot_blobs WHERE snapshot_id != ?", snapshotID)
|
|
result, err = tx.ExecContext(ctx, "DELETE FROM snapshot_blobs WHERE snapshot_id != ?", snapshotID)
|
|
if err != nil {
|
|
return fmt.Errorf("deleting orphaned snapshot_blobs: %w", err)
|
|
}
|
|
rowsAffected, _ = result.RowsAffected()
|
|
log.Debug("Deleted snapshot_blobs", "count", rowsAffected)
|
|
|
|
// Commit transaction
|
|
log.Debug("Committing cleanup transaction")
|
|
if err := tx.Commit(); err != nil {
|
|
return fmt.Errorf("committing transaction: %w", err)
|
|
}
|
|
|
|
log.Debug("Database cleanup complete")
|
|
return nil
|
|
}
|
|
|
|
// dumpDatabase creates a SQL dump of the database
|
|
func (sm *SnapshotManager) dumpDatabase(dbPath, dumpPath string) error {
|
|
log.Debug("Running sqlite3 dump command", "source", dbPath, "destination", dumpPath)
|
|
cmd := exec.Command("sqlite3", dbPath, ".dump")
|
|
|
|
output, err := cmd.Output()
|
|
if err != nil {
|
|
return fmt.Errorf("running sqlite3 dump: %w", err)
|
|
}
|
|
|
|
log.Debug("SQL dump generated", "size", len(output))
|
|
if err := os.WriteFile(dumpPath, output, 0644); err != nil {
|
|
return fmt.Errorf("writing dump file: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// compressDump compresses the SQL dump using zstd
|
|
func (sm *SnapshotManager) compressDump(inputPath, outputPath string) error {
|
|
log.Debug("Opening SQL dump for compression", "path", inputPath)
|
|
input, err := os.Open(inputPath)
|
|
if err != nil {
|
|
return fmt.Errorf("opening input file: %w", err)
|
|
}
|
|
defer func() {
|
|
log.Debug("Closing input file", "path", inputPath)
|
|
if err := input.Close(); err != nil {
|
|
log.Debug("Failed to close input file", "path", inputPath, "error", err)
|
|
}
|
|
}()
|
|
|
|
log.Debug("Creating output file for compressed data", "path", outputPath)
|
|
output, err := os.Create(outputPath)
|
|
if err != nil {
|
|
return fmt.Errorf("creating output file: %w", err)
|
|
}
|
|
defer func() {
|
|
log.Debug("Closing output file", "path", outputPath)
|
|
if err := output.Close(); err != nil {
|
|
log.Debug("Failed to close output file", "path", outputPath, "error", err)
|
|
}
|
|
}()
|
|
|
|
// Create zstd encoder with good compression and multithreading
|
|
log.Debug("Creating zstd compressor", "level", "SpeedBetterCompression", "concurrency", runtime.NumCPU())
|
|
zstdWriter, err := zstd.NewWriter(output,
|
|
zstd.WithEncoderLevel(zstd.SpeedBetterCompression),
|
|
zstd.WithEncoderConcurrency(runtime.NumCPU()),
|
|
zstd.WithWindowSize(4<<20), // 4MB window for metadata files
|
|
)
|
|
if err != nil {
|
|
return fmt.Errorf("creating zstd writer: %w", err)
|
|
}
|
|
defer func() {
|
|
if err := zstdWriter.Close(); err != nil {
|
|
log.Debug("Failed to close zstd writer", "error", err)
|
|
}
|
|
}()
|
|
|
|
log.Debug("Compressing data")
|
|
if _, err := io.Copy(zstdWriter, input); err != nil {
|
|
return fmt.Errorf("compressing data: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// copyFile copies a file from src to dst
|
|
func copyFile(src, dst string) error {
|
|
log.Debug("Opening source file for copy", "path", src)
|
|
sourceFile, err := os.Open(src)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer func() {
|
|
log.Debug("Closing source file", "path", src)
|
|
if err := sourceFile.Close(); err != nil {
|
|
log.Debug("Failed to close source file", "path", src, "error", err)
|
|
}
|
|
}()
|
|
|
|
log.Debug("Creating destination file", "path", dst)
|
|
destFile, err := os.Create(dst)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer func() {
|
|
log.Debug("Closing destination file", "path", dst)
|
|
if err := destFile.Close(); err != nil {
|
|
log.Debug("Failed to close destination file", "path", dst, "error", err)
|
|
}
|
|
}()
|
|
|
|
log.Debug("Copying file data")
|
|
n, err := io.Copy(destFile, sourceFile)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
log.Debug("File copy complete", "bytes_copied", n)
|
|
|
|
return nil
|
|
}
|
|
|
|
// generateBlobManifest creates a compressed JSON list of all blobs in the snapshot
|
|
func (sm *SnapshotManager) generateBlobManifest(ctx context.Context, dbPath string, snapshotID string) ([]byte, error) {
|
|
log.Debug("Generating blob manifest", "db_path", dbPath, "snapshot_id", snapshotID)
|
|
|
|
// Open the cleaned database using the database package
|
|
db, err := database.New(ctx, dbPath)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("opening database: %w", err)
|
|
}
|
|
defer func() { _ = db.Close() }()
|
|
|
|
// Create repositories to access the data
|
|
repos := database.NewRepositories(db)
|
|
|
|
// Get all blobs for this snapshot
|
|
log.Debug("Querying blobs for snapshot", "snapshot_id", snapshotID)
|
|
blobs, err := repos.Snapshots.GetBlobHashes(ctx, snapshotID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("getting snapshot blobs: %w", err)
|
|
}
|
|
log.Debug("Found blobs", "count", len(blobs))
|
|
|
|
// Create manifest structure
|
|
manifest := struct {
|
|
SnapshotID string `json:"snapshot_id"`
|
|
Timestamp string `json:"timestamp"`
|
|
BlobCount int `json:"blob_count"`
|
|
Blobs []string `json:"blobs"`
|
|
}{
|
|
SnapshotID: snapshotID,
|
|
Timestamp: time.Now().UTC().Format(time.RFC3339),
|
|
BlobCount: len(blobs),
|
|
Blobs: blobs,
|
|
}
|
|
|
|
// Marshal to JSON
|
|
log.Debug("Marshaling manifest to JSON")
|
|
jsonData, err := json.MarshalIndent(manifest, "", " ")
|
|
if err != nil {
|
|
return nil, fmt.Errorf("marshaling manifest: %w", err)
|
|
}
|
|
log.Debug("JSON manifest created", "size", len(jsonData))
|
|
|
|
// Compress with zstd
|
|
log.Debug("Compressing manifest with zstd")
|
|
compressed, err := compressData(jsonData)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("compressing manifest: %w", err)
|
|
}
|
|
log.Debug("Manifest compressed", "original_size", len(jsonData), "compressed_size", len(compressed))
|
|
|
|
log.Info("Generated blob manifest",
|
|
"snapshot_id", snapshotID,
|
|
"blob_count", len(blobs),
|
|
"json_size", len(jsonData),
|
|
"compressed_size", len(compressed))
|
|
|
|
return compressed, nil
|
|
}
|
|
|
|
// compressData compresses data using zstd
|
|
func compressData(data []byte) ([]byte, error) {
|
|
var buf bytes.Buffer
|
|
w, err := zstd.NewWriter(&buf,
|
|
zstd.WithEncoderLevel(zstd.SpeedBetterCompression),
|
|
)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
if _, err := w.Write(data); err != nil {
|
|
_ = w.Close()
|
|
return nil, err
|
|
}
|
|
|
|
if err := w.Close(); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
return buf.Bytes(), nil
|
|
}
|
|
|
|
// getFileSize returns the size of a file in bytes, or -1 if error
|
|
func getFileSize(path string) int64 {
|
|
info, err := os.Stat(path)
|
|
if err != nil {
|
|
return -1
|
|
}
|
|
return info.Size()
|
|
}
|
|
|
|
// BackupStats contains statistics from a backup operation
|
|
type BackupStats struct {
|
|
FilesScanned int
|
|
BytesScanned int64
|
|
ChunksCreated int
|
|
BlobsCreated int
|
|
BytesUploaded int64
|
|
}
|
|
|
|
// ExtendedBackupStats contains additional statistics for comprehensive tracking
|
|
type ExtendedBackupStats struct {
|
|
BackupStats
|
|
BlobUncompressedSize int64 // Total uncompressed size of all referenced blobs
|
|
CompressionLevel int // Compression level used for this snapshot
|
|
UploadDurationMs int64 // Total milliseconds spent uploading to S3
|
|
}
|
|
|
|
// CleanupIncompleteSnapshots removes incomplete snapshots that don't have metadata in S3.
|
|
// This is critical for data safety: incomplete snapshots can cause deduplication to skip
|
|
// files that were never successfully backed up, resulting in data loss.
|
|
func (sm *SnapshotManager) CleanupIncompleteSnapshots(ctx context.Context, hostname string) error {
|
|
log.Info("Checking for incomplete snapshots", "hostname", hostname)
|
|
|
|
// Get all incomplete snapshots for this hostname
|
|
incompleteSnapshots, err := sm.repos.Snapshots.GetIncompleteByHostname(ctx, hostname)
|
|
if err != nil {
|
|
return fmt.Errorf("getting incomplete snapshots: %w", err)
|
|
}
|
|
|
|
if len(incompleteSnapshots) == 0 {
|
|
log.Debug("No incomplete snapshots found")
|
|
return nil
|
|
}
|
|
|
|
log.Info("Found incomplete snapshots", "count", len(incompleteSnapshots))
|
|
|
|
// Check each incomplete snapshot for metadata in S3
|
|
for _, snapshot := range incompleteSnapshots {
|
|
// Check if metadata exists in S3
|
|
metadataKey := fmt.Sprintf("metadata/%s/db.zst", snapshot.ID)
|
|
_, err := sm.s3Client.StatObject(ctx, metadataKey)
|
|
|
|
if err != nil {
|
|
// Metadata doesn't exist in S3 - this is an incomplete snapshot
|
|
log.Info("Cleaning up incomplete snapshot", "snapshot_id", snapshot.ID, "started_at", snapshot.StartedAt)
|
|
|
|
// Delete the snapshot and all its associations
|
|
if err := sm.deleteSnapshot(ctx, snapshot.ID); err != nil {
|
|
return fmt.Errorf("deleting incomplete snapshot %s: %w", snapshot.ID, err)
|
|
}
|
|
|
|
log.Info("Deleted incomplete snapshot", "snapshot_id", snapshot.ID)
|
|
} else {
|
|
// Metadata exists - this snapshot was completed but database wasn't updated
|
|
// This shouldn't happen in normal operation, but mark it complete
|
|
log.Warn("Found snapshot with metadata but incomplete in DB", "snapshot_id", snapshot.ID)
|
|
if err := sm.repos.Snapshots.MarkComplete(ctx, nil, snapshot.ID); err != nil {
|
|
log.Error("Failed to mark snapshot complete", "snapshot_id", snapshot.ID, "error", err)
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// deleteSnapshot removes a snapshot and all its associations from the database
|
|
func (sm *SnapshotManager) deleteSnapshot(ctx context.Context, snapshotID string) error {
|
|
// Delete snapshot_files entries
|
|
if err := sm.repos.Snapshots.DeleteSnapshotFiles(ctx, snapshotID); err != nil {
|
|
return fmt.Errorf("deleting snapshot files: %w", err)
|
|
}
|
|
|
|
// Delete snapshot_blobs entries
|
|
if err := sm.repos.Snapshots.DeleteSnapshotBlobs(ctx, snapshotID); err != nil {
|
|
return fmt.Errorf("deleting snapshot blobs: %w", err)
|
|
}
|
|
|
|
// Delete the snapshot itself
|
|
if err := sm.repos.Snapshots.Delete(ctx, snapshotID); err != nil {
|
|
return fmt.Errorf("deleting snapshot: %w", err)
|
|
}
|
|
|
|
// Clean up orphaned data
|
|
log.Debug("Cleaning up orphaned data")
|
|
if err := sm.cleanupOrphanedData(ctx); err != nil {
|
|
return fmt.Errorf("cleaning up orphaned data: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// cleanupOrphanedData removes files, chunks, and blobs that are no longer referenced by any snapshot
|
|
func (sm *SnapshotManager) cleanupOrphanedData(ctx context.Context) error {
|
|
// Delete orphaned files (files not in any snapshot)
|
|
log.Debug("Deleting orphaned files")
|
|
if err := sm.repos.Files.DeleteOrphaned(ctx); err != nil {
|
|
return fmt.Errorf("deleting orphaned files: %w", err)
|
|
}
|
|
|
|
// Delete orphaned chunks (chunks not referenced by any file)
|
|
log.Debug("Deleting orphaned chunks")
|
|
if err := sm.repos.Chunks.DeleteOrphaned(ctx); err != nil {
|
|
return fmt.Errorf("deleting orphaned chunks: %w", err)
|
|
}
|
|
|
|
// Delete orphaned blobs (blobs not in any snapshot)
|
|
log.Debug("Deleting orphaned blobs")
|
|
if err := sm.repos.Blobs.DeleteOrphaned(ctx); err != nil {
|
|
return fmt.Errorf("deleting orphaned blobs: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|