Major refactoring: UUID-based storage, streaming architecture, and CLI improvements
This commit represents a significant architectural overhaul of vaultik: Database Schema Changes: - Switch files table to use UUID primary keys instead of path-based keys - Add UUID primary keys to blobs table for immediate chunk association - Update all foreign key relationships to use UUIDs - Add comprehensive schema documentation in DATAMODEL.md - Add SQLite busy timeout handling for concurrent operations Streaming and Performance Improvements: - Implement true streaming blob packing without intermediate storage - Add streaming chunk processing to reduce memory usage - Improve progress reporting with real-time metrics - Add upload metrics tracking in new uploads table CLI Refactoring: - Restructure CLI to use subcommands: snapshot create/list/purge/verify - Add store info command for S3 configuration display - Add custom duration parser supporting days/weeks/months/years - Remove old backup.go in favor of enhanced snapshot.go - Add --cron flag for silent operation Configuration Changes: - Remove unused index_prefix configuration option - Add support for snapshot pruning retention policies - Improve configuration validation and error messages Testing Improvements: - Add comprehensive repository tests with edge cases - Add cascade delete debugging tests - Fix concurrent operation tests to use SQLite busy timeout - Remove tolerance for SQLITE_BUSY errors in tests Documentation: - Add MIT LICENSE file - Update README with new command structure - Add comprehensive DATAMODEL.md explaining database schema - Update DESIGN.md with UUID-based architecture Other Changes: - Add test-config.yml for testing - Update Makefile with better test output formatting - Fix various race conditions in concurrent operations - Improve error handling throughout
This commit is contained in:
@@ -338,97 +338,103 @@ func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (str
|
||||
return nil
|
||||
}
|
||||
|
||||
// Process this file in a transaction
|
||||
// Create file record in a short transaction
|
||||
file := &database.File{
|
||||
Path: path,
|
||||
Size: info.Size(),
|
||||
Mode: uint32(info.Mode()),
|
||||
MTime: info.ModTime(),
|
||||
CTime: info.ModTime(), // Use mtime as ctime for test
|
||||
UID: 1000, // Default UID for test
|
||||
GID: 1000, // Default GID for test
|
||||
}
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
// Create file record
|
||||
file := &database.File{
|
||||
Path: path,
|
||||
Size: info.Size(),
|
||||
Mode: uint32(info.Mode()),
|
||||
MTime: info.ModTime(),
|
||||
CTime: info.ModTime(), // Use mtime as ctime for test
|
||||
UID: 1000, // Default UID for test
|
||||
GID: 1000, // Default GID for test
|
||||
}
|
||||
return b.repos.Files.Create(ctx, tx, file)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := b.repos.Files.Create(ctx, tx, file); err != nil {
|
||||
fileCount++
|
||||
totalSize += info.Size()
|
||||
|
||||
// Read and process file in chunks
|
||||
f, err := fsys.Open(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if err := f.Close(); err != nil {
|
||||
// Log but don't fail since we're already in an error path potentially
|
||||
fmt.Fprintf(os.Stderr, "Failed to close file: %v\n", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Process file in chunks
|
||||
chunkIndex := 0
|
||||
buffer := make([]byte, defaultChunkSize)
|
||||
|
||||
for {
|
||||
n, err := f.Read(buffer)
|
||||
if err != nil && err != io.EOF {
|
||||
return err
|
||||
}
|
||||
|
||||
fileCount++
|
||||
totalSize += info.Size()
|
||||
|
||||
// Read and process file in chunks
|
||||
f, err := fsys.Open(path)
|
||||
if err != nil {
|
||||
return err
|
||||
if n == 0 {
|
||||
break
|
||||
}
|
||||
defer func() {
|
||||
if err := f.Close(); err != nil {
|
||||
// Log but don't fail since we're already in an error path potentially
|
||||
fmt.Fprintf(os.Stderr, "Failed to close file: %v\n", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Process file in chunks
|
||||
chunkIndex := 0
|
||||
buffer := make([]byte, defaultChunkSize)
|
||||
chunkData := buffer[:n]
|
||||
chunkHash := calculateHash(chunkData)
|
||||
|
||||
for {
|
||||
n, err := f.Read(buffer)
|
||||
if err != nil && err != io.EOF {
|
||||
return err
|
||||
}
|
||||
if n == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
chunkData := buffer[:n]
|
||||
chunkHash := calculateHash(chunkData)
|
||||
|
||||
// Check if chunk already exists
|
||||
existingChunk, _ := b.repos.Chunks.GetByHash(ctx, chunkHash)
|
||||
if existingChunk == nil {
|
||||
// Create new chunk
|
||||
// Check if chunk already exists (outside of transaction)
|
||||
existingChunk, _ := b.repos.Chunks.GetByHash(ctx, chunkHash)
|
||||
if existingChunk == nil {
|
||||
// Create new chunk in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
chunk := &database.Chunk{
|
||||
ChunkHash: chunkHash,
|
||||
SHA256: chunkHash,
|
||||
Size: int64(n),
|
||||
}
|
||||
if err := b.repos.Chunks.Create(ctx, tx, chunk); err != nil {
|
||||
return err
|
||||
}
|
||||
processedChunks[chunkHash] = true
|
||||
return b.repos.Chunks.Create(ctx, tx, chunk)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
processedChunks[chunkHash] = true
|
||||
}
|
||||
|
||||
// Create file-chunk mapping
|
||||
// Create file-chunk mapping in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
fileChunk := &database.FileChunk{
|
||||
Path: path,
|
||||
FileID: file.ID,
|
||||
Idx: chunkIndex,
|
||||
ChunkHash: chunkHash,
|
||||
}
|
||||
if err := b.repos.FileChunks.Create(ctx, tx, fileChunk); err != nil {
|
||||
return err
|
||||
}
|
||||
return b.repos.FileChunks.Create(ctx, tx, fileChunk)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create chunk-file mapping
|
||||
// Create chunk-file mapping in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
chunkFile := &database.ChunkFile{
|
||||
ChunkHash: chunkHash,
|
||||
FilePath: path,
|
||||
FileID: file.ID,
|
||||
FileOffset: int64(chunkIndex * defaultChunkSize),
|
||||
Length: int64(n),
|
||||
}
|
||||
if err := b.repos.ChunkFiles.Create(ctx, tx, chunkFile); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
chunkIndex++
|
||||
return b.repos.ChunkFiles.Create(ctx, tx, chunkFile)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
chunkIndex++
|
||||
}
|
||||
|
||||
return err
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
@@ -436,61 +442,64 @@ func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (str
|
||||
}
|
||||
|
||||
// After all files are processed, create blobs for new chunks
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
for chunkHash := range processedChunks {
|
||||
// Get chunk data
|
||||
chunk, err := b.repos.Chunks.GetByHash(ctx, chunkHash)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
for chunkHash := range processedChunks {
|
||||
// Get chunk data (outside of transaction)
|
||||
chunk, err := b.repos.Chunks.GetByHash(ctx, chunkHash)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
chunkCount++
|
||||
chunkCount++
|
||||
|
||||
// In a real system, blobs would contain multiple chunks and be encrypted
|
||||
// For testing, we'll create a blob with a "blob-" prefix to differentiate
|
||||
blobHash := "blob-" + chunkHash
|
||||
// In a real system, blobs would contain multiple chunks and be encrypted
|
||||
// For testing, we'll create a blob with a "blob-" prefix to differentiate
|
||||
blobHash := "blob-" + chunkHash
|
||||
|
||||
// For the test, we'll create dummy data since we don't have the original
|
||||
dummyData := []byte(chunkHash)
|
||||
// For the test, we'll create dummy data since we don't have the original
|
||||
dummyData := []byte(chunkHash)
|
||||
|
||||
// Upload to S3 as a blob
|
||||
if err := b.s3Client.PutBlob(ctx, blobHash, dummyData); err != nil {
|
||||
return err
|
||||
}
|
||||
// Upload to S3 as a blob
|
||||
if err := b.s3Client.PutBlob(ctx, blobHash, dummyData); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Create blob entry
|
||||
// Create blob entry in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
blob := &database.Blob{
|
||||
ID: "test-blob-" + blobHash[:8],
|
||||
Hash: blobHash,
|
||||
CreatedTS: time.Now(),
|
||||
}
|
||||
if err := b.repos.Blobs.Create(ctx, tx, blob); err != nil {
|
||||
return err
|
||||
}
|
||||
blobCount++
|
||||
blobSize += chunk.Size
|
||||
return b.repos.Blobs.Create(ctx, tx, blob)
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Create blob-chunk mapping
|
||||
blobCount++
|
||||
blobSize += chunk.Size
|
||||
|
||||
// Create blob-chunk mapping in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
blobChunk := &database.BlobChunk{
|
||||
BlobID: blob.ID,
|
||||
BlobID: "test-blob-" + blobHash[:8],
|
||||
ChunkHash: chunkHash,
|
||||
Offset: 0,
|
||||
Length: chunk.Size,
|
||||
}
|
||||
if err := b.repos.BlobChunks.Create(ctx, tx, blobChunk); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Add blob to snapshot
|
||||
if err := b.repos.Snapshots.AddBlob(ctx, tx, snapshotID, blob.ID, blob.Hash); err != nil {
|
||||
return err
|
||||
}
|
||||
return b.repos.BlobChunks.Create(ctx, tx, blobChunk)
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return "", err
|
||||
// Add blob to snapshot in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return b.repos.Snapshots.AddBlob(ctx, tx, snapshotID, "test-blob-"+blobHash[:8], blobHash)
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
// Update snapshot with final counts
|
||||
|
||||
@@ -13,7 +13,9 @@ type ScannerParams struct {
|
||||
EnableProgress bool
|
||||
}
|
||||
|
||||
// Module exports backup functionality
|
||||
// Module exports backup functionality as an fx module.
|
||||
// It provides a ScannerFactory that can create Scanner instances
|
||||
// with custom parameters while sharing common dependencies.
|
||||
var Module = fx.Module("backup",
|
||||
fx.Provide(
|
||||
provideScannerFactory,
|
||||
|
||||
@@ -15,9 +15,13 @@ import (
|
||||
)
|
||||
|
||||
const (
|
||||
// Progress reporting intervals
|
||||
SummaryInterval = 10 * time.Second // One-line status updates
|
||||
DetailInterval = 60 * time.Second // Multi-line detailed status
|
||||
// SummaryInterval defines how often one-line status updates are printed.
|
||||
// These updates show current progress, ETA, and the file being processed.
|
||||
SummaryInterval = 10 * time.Second
|
||||
|
||||
// DetailInterval defines how often multi-line detailed status reports are printed.
|
||||
// These reports include comprehensive statistics about files, chunks, blobs, and uploads.
|
||||
DetailInterval = 60 * time.Second
|
||||
)
|
||||
|
||||
// ProgressStats holds atomic counters for progress tracking
|
||||
@@ -32,6 +36,7 @@ type ProgressStats struct {
|
||||
BlobsCreated atomic.Int64
|
||||
BlobsUploaded atomic.Int64
|
||||
BytesUploaded atomic.Int64
|
||||
UploadDurationMs atomic.Int64 // Total milliseconds spent uploading to S3
|
||||
CurrentFile atomic.Value // stores string
|
||||
TotalSize atomic.Int64 // Total size to process (set after scan phase)
|
||||
TotalFiles atomic.Int64 // Total files to process in phase 2
|
||||
@@ -66,8 +71,8 @@ type ProgressReporter struct {
|
||||
// NewProgressReporter creates a new progress reporter
|
||||
func NewProgressReporter() *ProgressReporter {
|
||||
stats := &ProgressStats{
|
||||
StartTime: time.Now(),
|
||||
lastDetailTime: time.Now(),
|
||||
StartTime: time.Now().UTC(),
|
||||
lastDetailTime: time.Now().UTC(),
|
||||
}
|
||||
stats.CurrentFile.Store("")
|
||||
|
||||
@@ -115,7 +120,7 @@ func (pr *ProgressReporter) GetStats() *ProgressStats {
|
||||
// SetTotalSize sets the total size to process (after scan phase)
|
||||
func (pr *ProgressReporter) SetTotalSize(size int64) {
|
||||
pr.stats.TotalSize.Store(size)
|
||||
pr.stats.ProcessStartTime.Store(time.Now())
|
||||
pr.stats.ProcessStartTime.Store(time.Now().UTC())
|
||||
}
|
||||
|
||||
// run is the main progress reporting loop
|
||||
@@ -186,7 +191,7 @@ func (pr *ProgressReporter) printSummaryStatus() {
|
||||
filesProcessed := pr.stats.FilesProcessed.Load()
|
||||
totalFiles := pr.stats.TotalFiles.Load()
|
||||
|
||||
status := fmt.Sprintf("Progress: %d/%d files, %s/%s (%.1f%%), %s/s%s",
|
||||
status := fmt.Sprintf("Snapshot progress: %d/%d files, %s/%s (%.1f%%), %s/s%s",
|
||||
filesProcessed,
|
||||
totalFiles,
|
||||
humanize.Bytes(uint64(bytesProcessed)),
|
||||
@@ -206,7 +211,7 @@ func (pr *ProgressReporter) printSummaryStatus() {
|
||||
// printDetailedStatus prints a multi-line detailed status
|
||||
func (pr *ProgressReporter) printDetailedStatus() {
|
||||
pr.stats.mu.Lock()
|
||||
pr.stats.lastDetailTime = time.Now()
|
||||
pr.stats.lastDetailTime = time.Now().UTC()
|
||||
pr.stats.mu.Unlock()
|
||||
|
||||
elapsed := time.Since(pr.stats.StartTime)
|
||||
@@ -225,7 +230,7 @@ func (pr *ProgressReporter) printDetailedStatus() {
|
||||
totalBytes := bytesScanned + bytesSkipped
|
||||
rate := float64(totalBytes) / elapsed.Seconds()
|
||||
|
||||
log.Notice("=== Backup Progress Report ===")
|
||||
log.Notice("=== Snapshot Progress Report ===")
|
||||
log.Info("Elapsed time", "duration", formatDuration(elapsed))
|
||||
|
||||
// Calculate and show ETA if we have data
|
||||
@@ -264,7 +269,7 @@ func (pr *ProgressReporter) printDetailedStatus() {
|
||||
"created", blobsCreated,
|
||||
"uploaded", blobsUploaded,
|
||||
"pending", blobsCreated-blobsUploaded)
|
||||
log.Info("Upload progress",
|
||||
log.Info("Total uploaded to S3",
|
||||
"uploaded", humanize.Bytes(uint64(bytesUploaded)),
|
||||
"compression_ratio", formatRatio(bytesUploaded, bytesScanned))
|
||||
if currentFile != "" {
|
||||
@@ -313,31 +318,8 @@ func truncatePath(path string, maxLen int) string {
|
||||
|
||||
// printUploadProgress prints upload progress
|
||||
func (pr *ProgressReporter) printUploadProgress(info *UploadInfo) {
|
||||
elapsed := time.Since(info.StartTime)
|
||||
if elapsed < time.Millisecond {
|
||||
elapsed = time.Millisecond // Avoid division by zero
|
||||
}
|
||||
|
||||
bytesPerSec := float64(info.Size) / elapsed.Seconds()
|
||||
bitsPerSec := bytesPerSec * 8
|
||||
|
||||
// Format speed in bits/second
|
||||
var speedStr string
|
||||
if bitsPerSec >= 1e9 {
|
||||
speedStr = fmt.Sprintf("%.1fGbit/sec", bitsPerSec/1e9)
|
||||
} else if bitsPerSec >= 1e6 {
|
||||
speedStr = fmt.Sprintf("%.0fMbit/sec", bitsPerSec/1e6)
|
||||
} else if bitsPerSec >= 1e3 {
|
||||
speedStr = fmt.Sprintf("%.0fKbit/sec", bitsPerSec/1e3)
|
||||
} else {
|
||||
speedStr = fmt.Sprintf("%.0fbit/sec", bitsPerSec)
|
||||
}
|
||||
|
||||
log.Info("Uploading blob",
|
||||
"hash", info.BlobHash[:8]+"...",
|
||||
"size", humanize.Bytes(uint64(info.Size)),
|
||||
"elapsed", formatDuration(elapsed),
|
||||
"speed", speedStr)
|
||||
// This function is called repeatedly during upload, not just at start
|
||||
// Don't print anything here - the actual progress is shown by ReportUploadProgress
|
||||
}
|
||||
|
||||
// ReportUploadStart marks the beginning of a blob upload
|
||||
@@ -345,7 +327,7 @@ func (pr *ProgressReporter) ReportUploadStart(blobHash string, size int64) {
|
||||
info := &UploadInfo{
|
||||
BlobHash: blobHash,
|
||||
Size: size,
|
||||
StartTime: time.Now(),
|
||||
StartTime: time.Now().UTC(),
|
||||
}
|
||||
pr.stats.CurrentUpload.Store(info)
|
||||
}
|
||||
@@ -355,6 +337,9 @@ func (pr *ProgressReporter) ReportUploadComplete(blobHash string, size int64, du
|
||||
// Clear current upload
|
||||
pr.stats.CurrentUpload.Store((*UploadInfo)(nil))
|
||||
|
||||
// Add to total upload duration
|
||||
pr.stats.UploadDurationMs.Add(duration.Milliseconds())
|
||||
|
||||
// Calculate speed
|
||||
if duration < time.Millisecond {
|
||||
duration = time.Millisecond
|
||||
@@ -374,7 +359,7 @@ func (pr *ProgressReporter) ReportUploadComplete(blobHash string, size int64, du
|
||||
speedStr = fmt.Sprintf("%.0fbit/sec", bitsPerSec)
|
||||
}
|
||||
|
||||
log.Info("Blob uploaded",
|
||||
log.Info("Blob upload completed",
|
||||
"hash", blobHash[:8]+"...",
|
||||
"size", humanize.Bytes(uint64(size)),
|
||||
"duration", formatDuration(duration),
|
||||
@@ -384,6 +369,44 @@ func (pr *ProgressReporter) ReportUploadComplete(blobHash string, size int64, du
|
||||
// UpdateChunkingActivity updates the last chunking time
|
||||
func (pr *ProgressReporter) UpdateChunkingActivity() {
|
||||
pr.stats.mu.Lock()
|
||||
pr.stats.lastChunkingTime = time.Now()
|
||||
pr.stats.lastChunkingTime = time.Now().UTC()
|
||||
pr.stats.mu.Unlock()
|
||||
}
|
||||
|
||||
// ReportUploadProgress reports current upload progress with instantaneous speed
|
||||
func (pr *ProgressReporter) ReportUploadProgress(blobHash string, bytesUploaded, totalSize int64, instantSpeed float64) {
|
||||
// Update the current upload info with progress
|
||||
if uploadInfo, ok := pr.stats.CurrentUpload.Load().(*UploadInfo); ok && uploadInfo != nil {
|
||||
// Format speed in bits/second
|
||||
bitsPerSec := instantSpeed * 8
|
||||
var speedStr string
|
||||
if bitsPerSec >= 1e9 {
|
||||
speedStr = fmt.Sprintf("%.1fGbit/sec", bitsPerSec/1e9)
|
||||
} else if bitsPerSec >= 1e6 {
|
||||
speedStr = fmt.Sprintf("%.0fMbit/sec", bitsPerSec/1e6)
|
||||
} else if bitsPerSec >= 1e3 {
|
||||
speedStr = fmt.Sprintf("%.0fKbit/sec", bitsPerSec/1e3)
|
||||
} else {
|
||||
speedStr = fmt.Sprintf("%.0fbit/sec", bitsPerSec)
|
||||
}
|
||||
|
||||
percent := float64(bytesUploaded) / float64(totalSize) * 100
|
||||
|
||||
// Calculate ETA based on current speed
|
||||
etaStr := "unknown"
|
||||
if instantSpeed > 0 && bytesUploaded < totalSize {
|
||||
remainingBytes := totalSize - bytesUploaded
|
||||
remainingSeconds := float64(remainingBytes) / instantSpeed
|
||||
eta := time.Duration(remainingSeconds * float64(time.Second))
|
||||
etaStr = formatDuration(eta)
|
||||
}
|
||||
|
||||
log.Info("Blob upload progress",
|
||||
"hash", blobHash[:8]+"...",
|
||||
"progress", fmt.Sprintf("%.1f%%", percent),
|
||||
"uploaded", humanize.Bytes(uint64(bytesUploaded)),
|
||||
"total", humanize.Bytes(uint64(totalSize)),
|
||||
"speed", speedStr,
|
||||
"eta", etaStr)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -15,6 +15,7 @@ import (
|
||||
"git.eeqj.de/sneak/vaultik/internal/crypto"
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/s3"
|
||||
"github.com/dustin/go-humanize"
|
||||
"github.com/spf13/afero"
|
||||
)
|
||||
@@ -49,6 +50,8 @@ type Scanner struct {
|
||||
// S3Client interface for blob storage operations
|
||||
type S3Client interface {
|
||||
PutObject(ctx context.Context, key string, data io.Reader) error
|
||||
PutObjectWithProgress(ctx context.Context, key string, data io.Reader, size int64, progress s3.ProgressCallback) error
|
||||
StatObject(ctx context.Context, key string) (*s3.ObjectInfo, error)
|
||||
}
|
||||
|
||||
// ScannerConfig contains configuration for the scanner
|
||||
@@ -125,7 +128,7 @@ func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*Sc
|
||||
s.snapshotID = snapshotID
|
||||
s.scanCtx = ctx
|
||||
result := &ScanResult{
|
||||
StartTime: time.Now(),
|
||||
StartTime: time.Now().UTC(),
|
||||
}
|
||||
|
||||
// Set blob handler for concurrent upload
|
||||
@@ -143,7 +146,7 @@ func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*Sc
|
||||
}
|
||||
|
||||
// Phase 1: Scan directory and collect files to process
|
||||
log.Info("Phase 1: Scanning directory structure")
|
||||
log.Info("Phase 1/3: Scanning directory structure")
|
||||
filesToProcess, err := s.scanPhase(ctx, path, result)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scan phase failed: %w", err)
|
||||
@@ -169,7 +172,7 @@ func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*Sc
|
||||
|
||||
// Phase 2: Process files and create chunks
|
||||
if len(filesToProcess) > 0 {
|
||||
log.Info("Phase 2: Processing files and creating chunks")
|
||||
log.Info("Phase 2/3: Creating snapshot (chunking, compressing, encrypting, and uploading blobs)")
|
||||
if err := s.processPhase(ctx, filesToProcess, result); err != nil {
|
||||
return nil, fmt.Errorf("process phase failed: %w", err)
|
||||
}
|
||||
@@ -179,7 +182,7 @@ func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*Sc
|
||||
blobs := s.packer.GetFinishedBlobs()
|
||||
result.BlobsCreated += len(blobs)
|
||||
|
||||
result.EndTime = time.Now()
|
||||
result.EndTime = time.Now().UTC()
|
||||
return result, nil
|
||||
}
|
||||
|
||||
@@ -290,21 +293,12 @@ func (s *Scanner) checkFileAndUpdateMetadata(ctx context.Context, path string, i
|
||||
default:
|
||||
}
|
||||
|
||||
var file *database.File
|
||||
var needsProcessing bool
|
||||
|
||||
// Use a short transaction just for the database operations
|
||||
err := s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||
var err error
|
||||
file, needsProcessing, err = s.checkFile(txCtx, tx, path, info, result)
|
||||
return err
|
||||
})
|
||||
|
||||
return file, needsProcessing, err
|
||||
// Process file without holding a long transaction
|
||||
return s.checkFile(ctx, path, info, result)
|
||||
}
|
||||
|
||||
// checkFile checks if a file needs processing and updates metadata within a transaction
|
||||
func (s *Scanner) checkFile(ctx context.Context, tx *sql.Tx, path string, info os.FileInfo, result *ScanResult) (*database.File, bool, error) {
|
||||
// checkFile checks if a file needs processing and updates metadata
|
||||
func (s *Scanner) checkFile(ctx context.Context, path string, info os.FileInfo, result *ScanResult) (*database.File, bool, error) {
|
||||
// Get file stats
|
||||
stat, ok := info.Sys().(interface {
|
||||
Uid() uint32
|
||||
@@ -338,25 +332,31 @@ func (s *Scanner) checkFile(ctx context.Context, tx *sql.Tx, path string, info o
|
||||
LinkTarget: linkTarget,
|
||||
}
|
||||
|
||||
// Check if file has changed since last backup
|
||||
// Check if file has changed since last backup (no transaction needed for read)
|
||||
log.Debug("Checking if file exists in database", "path", path)
|
||||
existingFile, err := s.repos.Files.GetByPathTx(ctx, tx, path)
|
||||
existingFile, err := s.repos.Files.GetByPath(ctx, path)
|
||||
if err != nil {
|
||||
return nil, false, fmt.Errorf("checking existing file: %w", err)
|
||||
}
|
||||
|
||||
fileChanged := existingFile == nil || s.hasFileChanged(existingFile, file)
|
||||
|
||||
// Always update file metadata
|
||||
// Update file metadata in a short transaction
|
||||
log.Debug("Updating file metadata", "path", path, "changed", fileChanged)
|
||||
if err := s.repos.Files.Create(ctx, tx, file); err != nil {
|
||||
err = s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return s.repos.Files.Create(ctx, tx, file)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
log.Debug("File metadata updated", "path", path)
|
||||
|
||||
// Add file to snapshot
|
||||
// Add file to snapshot in a short transaction
|
||||
log.Debug("Adding file to snapshot", "path", path, "snapshot", s.snapshotID)
|
||||
if err := s.repos.Snapshots.AddFile(ctx, tx, s.snapshotID, path); err != nil {
|
||||
err = s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return s.repos.Snapshots.AddFile(ctx, tx, s.snapshotID, path)
|
||||
})
|
||||
if err != nil {
|
||||
return nil, false, fmt.Errorf("adding file to snapshot: %w", err)
|
||||
}
|
||||
log.Debug("File added to snapshot", "path", path)
|
||||
@@ -381,7 +381,7 @@ func (s *Scanner) checkFile(ctx context.Context, tx *sql.Tx, path string, info o
|
||||
}
|
||||
// File hasn't changed, but we still need to associate existing chunks with this snapshot
|
||||
log.Debug("File hasn't changed, associating existing chunks", "path", path)
|
||||
if err := s.associateExistingChunks(ctx, tx, path); err != nil {
|
||||
if err := s.associateExistingChunks(ctx, path); err != nil {
|
||||
return nil, false, fmt.Errorf("associating existing chunks: %w", err)
|
||||
}
|
||||
log.Debug("Existing chunks associated", "path", path)
|
||||
@@ -421,25 +421,25 @@ func (s *Scanner) hasFileChanged(existingFile, newFile *database.File) bool {
|
||||
}
|
||||
|
||||
// associateExistingChunks links existing chunks from an unchanged file to the current snapshot
|
||||
func (s *Scanner) associateExistingChunks(ctx context.Context, tx *sql.Tx, path string) error {
|
||||
func (s *Scanner) associateExistingChunks(ctx context.Context, path string) error {
|
||||
log.Debug("associateExistingChunks start", "path", path)
|
||||
|
||||
// Get existing file chunks
|
||||
// Get existing file chunks (no transaction needed for read)
|
||||
log.Debug("Getting existing file chunks", "path", path)
|
||||
fileChunks, err := s.repos.FileChunks.GetByFileTx(ctx, tx, path)
|
||||
fileChunks, err := s.repos.FileChunks.GetByFile(ctx, path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting existing file chunks: %w", err)
|
||||
}
|
||||
log.Debug("Got file chunks", "path", path, "count", len(fileChunks))
|
||||
|
||||
// For each chunk, find its blob and associate with current snapshot
|
||||
processedBlobs := make(map[string]bool)
|
||||
// Collect unique blob IDs that need to be added to snapshot
|
||||
blobsToAdd := make(map[string]string) // blob ID -> blob hash
|
||||
for i, fc := range fileChunks {
|
||||
log.Debug("Processing chunk", "path", path, "chunk_index", i, "chunk_hash", fc.ChunkHash)
|
||||
|
||||
// Find which blob contains this chunk
|
||||
// Find which blob contains this chunk (no transaction needed for read)
|
||||
log.Debug("Finding blob for chunk", "chunk_hash", fc.ChunkHash)
|
||||
blobChunk, err := s.repos.BlobChunks.GetByChunkHashTx(ctx, tx, fc.ChunkHash)
|
||||
blobChunk, err := s.repos.BlobChunks.GetByChunkHash(ctx, fc.ChunkHash)
|
||||
if err != nil {
|
||||
return fmt.Errorf("finding blob for chunk %s: %w", fc.ChunkHash, err)
|
||||
}
|
||||
@@ -449,28 +449,39 @@ func (s *Scanner) associateExistingChunks(ctx context.Context, tx *sql.Tx, path
|
||||
}
|
||||
log.Debug("Found blob for chunk", "chunk_hash", fc.ChunkHash, "blob_id", blobChunk.BlobID)
|
||||
|
||||
// Get blob to find its hash
|
||||
blob, err := s.repos.Blobs.GetByID(ctx, blobChunk.BlobID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting blob %s: %w", blobChunk.BlobID, err)
|
||||
}
|
||||
if blob == nil {
|
||||
log.Warn("Blob record not found", "blob_id", blobChunk.BlobID)
|
||||
continue
|
||||
}
|
||||
|
||||
// Add blob to snapshot if not already processed
|
||||
if !processedBlobs[blobChunk.BlobID] {
|
||||
log.Debug("Adding blob to snapshot", "blob_id", blobChunk.BlobID, "blob_hash", blob.Hash, "snapshot", s.snapshotID)
|
||||
if err := s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, blobChunk.BlobID, blob.Hash); err != nil {
|
||||
return fmt.Errorf("adding existing blob to snapshot: %w", err)
|
||||
}
|
||||
log.Debug("Added blob to snapshot", "blob_id", blobChunk.BlobID)
|
||||
processedBlobs[blobChunk.BlobID] = true
|
||||
// Track blob ID for later processing
|
||||
if _, exists := blobsToAdd[blobChunk.BlobID]; !exists {
|
||||
blobsToAdd[blobChunk.BlobID] = "" // We'll get the hash later
|
||||
}
|
||||
}
|
||||
|
||||
log.Debug("associateExistingChunks complete", "path", path, "blobs_processed", len(processedBlobs))
|
||||
// Now get blob hashes outside of transaction operations
|
||||
for blobID := range blobsToAdd {
|
||||
blob, err := s.repos.Blobs.GetByID(ctx, blobID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting blob %s: %w", blobID, err)
|
||||
}
|
||||
if blob == nil {
|
||||
log.Warn("Blob record not found", "blob_id", blobID)
|
||||
delete(blobsToAdd, blobID)
|
||||
continue
|
||||
}
|
||||
blobsToAdd[blobID] = blob.Hash
|
||||
}
|
||||
|
||||
// Add blobs to snapshot using short transactions
|
||||
for blobID, blobHash := range blobsToAdd {
|
||||
log.Debug("Adding blob to snapshot", "blob_id", blobID, "blob_hash", blobHash, "snapshot", s.snapshotID)
|
||||
err := s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, blobID, blobHash)
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("adding existing blob to snapshot: %w", err)
|
||||
}
|
||||
log.Debug("Added blob to snapshot", "blob_id", blobID)
|
||||
}
|
||||
|
||||
log.Debug("associateExistingChunks complete", "path", path, "blobs_processed", len(blobsToAdd))
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -478,7 +489,7 @@ func (s *Scanner) associateExistingChunks(ctx context.Context, tx *sql.Tx, path
|
||||
func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
|
||||
log.Debug("Blob handler called", "blob_hash", blobWithReader.Hash[:8]+"...")
|
||||
|
||||
startTime := time.Now()
|
||||
startTime := time.Now().UTC()
|
||||
finishedBlob := blobWithReader.FinishedBlob
|
||||
|
||||
// Report upload start
|
||||
@@ -492,7 +503,40 @@ func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
|
||||
if ctx == nil {
|
||||
ctx = context.Background()
|
||||
}
|
||||
if err := s.s3Client.PutObject(ctx, "blobs/"+finishedBlob.Hash, blobWithReader.Reader); err != nil {
|
||||
|
||||
// Track bytes uploaded for accurate speed calculation
|
||||
lastProgressTime := time.Now()
|
||||
lastProgressBytes := int64(0)
|
||||
|
||||
progressCallback := func(uploaded int64) error {
|
||||
|
||||
// Calculate instantaneous speed
|
||||
now := time.Now()
|
||||
elapsed := now.Sub(lastProgressTime).Seconds()
|
||||
if elapsed > 0.5 { // Update speed every 0.5 seconds
|
||||
bytesSinceLastUpdate := uploaded - lastProgressBytes
|
||||
speed := float64(bytesSinceLastUpdate) / elapsed
|
||||
|
||||
if s.progress != nil {
|
||||
s.progress.ReportUploadProgress(finishedBlob.Hash, uploaded, finishedBlob.Compressed, speed)
|
||||
}
|
||||
|
||||
lastProgressTime = now
|
||||
lastProgressBytes = uploaded
|
||||
}
|
||||
|
||||
// Check for cancellation
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Create sharded path: blobs/ca/fe/cafebabe...
|
||||
blobPath := fmt.Sprintf("blobs/%s/%s/%s", finishedBlob.Hash[:2], finishedBlob.Hash[2:4], finishedBlob.Hash)
|
||||
if err := s.s3Client.PutObjectWithProgress(ctx, blobPath, blobWithReader.Reader, finishedBlob.Compressed, progressCallback); err != nil {
|
||||
return fmt.Errorf("uploading blob %s to S3: %w", finishedBlob.Hash, err)
|
||||
}
|
||||
|
||||
@@ -574,8 +618,8 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
|
||||
var chunks []chunkInfo
|
||||
chunkIndex := 0
|
||||
|
||||
// Process chunks in streaming fashion
|
||||
err = s.chunker.ChunkReaderStreaming(file, func(chunk chunker.Chunk) error {
|
||||
// Process chunks in streaming fashion and get full file hash
|
||||
fileHash, err := s.chunker.ChunkReaderStreaming(file, func(chunk chunker.Chunk) error {
|
||||
// Check for cancellation
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
@@ -589,17 +633,16 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
|
||||
"hash", chunk.Hash,
|
||||
"size", chunk.Size)
|
||||
|
||||
// Check if chunk already exists
|
||||
chunkExists := false
|
||||
err := s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||
existing, err := s.repos.Chunks.GetByHash(txCtx, chunk.Hash)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
chunkExists = (existing != nil)
|
||||
// Check if chunk already exists (outside of transaction)
|
||||
existing, err := s.repos.Chunks.GetByHash(ctx, chunk.Hash)
|
||||
if err != nil {
|
||||
return fmt.Errorf("checking chunk existence: %w", err)
|
||||
}
|
||||
chunkExists := (existing != nil)
|
||||
|
||||
// Store chunk if new
|
||||
if !chunkExists {
|
||||
// Store chunk if new
|
||||
if !chunkExists {
|
||||
err := s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||
dbChunk := &database.Chunk{
|
||||
ChunkHash: chunk.Hash,
|
||||
SHA256: chunk.Hash,
|
||||
@@ -608,17 +651,17 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
|
||||
if err := s.repos.Chunks.Create(txCtx, tx, dbChunk); err != nil {
|
||||
return fmt.Errorf("creating chunk: %w", err)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("storing chunk: %w", err)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("checking/storing chunk: %w", err)
|
||||
}
|
||||
|
||||
// Track file chunk association for later storage
|
||||
chunks = append(chunks, chunkInfo{
|
||||
fileChunk: database.FileChunk{
|
||||
Path: fileToProcess.Path,
|
||||
FileID: fileToProcess.File.ID,
|
||||
Idx: chunkIndex,
|
||||
ChunkHash: chunk.Hash,
|
||||
},
|
||||
@@ -683,6 +726,11 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
|
||||
return fmt.Errorf("chunking file: %w", err)
|
||||
}
|
||||
|
||||
log.Debug("Completed chunking file",
|
||||
"path", fileToProcess.Path,
|
||||
"file_hash", fileHash,
|
||||
"chunks", len(chunks))
|
||||
|
||||
// Store file-chunk associations and chunk-file mappings in database
|
||||
err = s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||
for _, ci := range chunks {
|
||||
@@ -694,7 +742,7 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
|
||||
// Create chunk-file mapping
|
||||
chunkFile := &database.ChunkFile{
|
||||
ChunkHash: ci.fileChunk.ChunkHash,
|
||||
FilePath: fileToProcess.Path,
|
||||
FileID: fileToProcess.File.ID,
|
||||
FileOffset: ci.offset,
|
||||
Length: ci.size,
|
||||
}
|
||||
@@ -704,7 +752,7 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
|
||||
}
|
||||
|
||||
// Add file to snapshot
|
||||
if err := s.repos.Snapshots.AddFile(txCtx, tx, s.snapshotID, fileToProcess.Path); err != nil {
|
||||
if err := s.repos.Snapshots.AddFileByID(txCtx, tx, s.snapshotID, fileToProcess.File.ID); err != nil {
|
||||
return fmt.Errorf("adding file to snapshot: %w", err)
|
||||
}
|
||||
|
||||
@@ -713,3 +761,8 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// GetProgress returns the progress reporter for this scanner
|
||||
func (s *Scanner) GetProgress() *ProgressReporter {
|
||||
return s.progress
|
||||
}
|
||||
|
||||
@@ -213,7 +213,7 @@ func TestScannerWithSymlinks(t *testing.T) {
|
||||
Repositories: repos,
|
||||
MaxBlobSize: int64(1024 * 1024),
|
||||
CompressionLevel: 3,
|
||||
AgeRecipients: []string{},
|
||||
AgeRecipients: []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
|
||||
})
|
||||
|
||||
// Create a snapshot record for testing
|
||||
@@ -314,7 +314,7 @@ func TestScannerLargeFile(t *testing.T) {
|
||||
Repositories: repos,
|
||||
MaxBlobSize: int64(1024 * 1024),
|
||||
CompressionLevel: 3,
|
||||
AgeRecipients: []string{},
|
||||
AgeRecipients: []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
|
||||
})
|
||||
|
||||
// Create a snapshot record for testing
|
||||
|
||||
@@ -78,21 +78,22 @@ func NewSnapshotManager(repos *database.Repositories, s3Client S3Client, encrypt
|
||||
}
|
||||
|
||||
// CreateSnapshot creates a new snapshot record in the database at the start of a backup
|
||||
func (sm *SnapshotManager) CreateSnapshot(ctx context.Context, hostname, version string) (string, error) {
|
||||
snapshotID := fmt.Sprintf("%s-%s", hostname, time.Now().Format("20060102-150405"))
|
||||
func (sm *SnapshotManager) CreateSnapshot(ctx context.Context, hostname, version, gitRevision string) (string, error) {
|
||||
snapshotID := fmt.Sprintf("%s-%s", hostname, time.Now().UTC().Format("20060102-150405Z"))
|
||||
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID,
|
||||
Hostname: hostname,
|
||||
VaultikVersion: version,
|
||||
StartedAt: time.Now(),
|
||||
CompletedAt: nil, // Not completed yet
|
||||
FileCount: 0,
|
||||
ChunkCount: 0,
|
||||
BlobCount: 0,
|
||||
TotalSize: 0,
|
||||
BlobSize: 0,
|
||||
CompressionRatio: 1.0,
|
||||
ID: snapshotID,
|
||||
Hostname: hostname,
|
||||
VaultikVersion: version,
|
||||
VaultikGitRevision: gitRevision,
|
||||
StartedAt: time.Now().UTC(),
|
||||
CompletedAt: nil, // Not completed yet
|
||||
FileCount: 0,
|
||||
ChunkCount: 0,
|
||||
BlobCount: 0,
|
||||
TotalSize: 0,
|
||||
BlobSize: 0,
|
||||
CompressionRatio: 1.0,
|
||||
}
|
||||
|
||||
err := sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
@@ -126,6 +127,30 @@ func (sm *SnapshotManager) UpdateSnapshotStats(ctx context.Context, snapshotID s
|
||||
return nil
|
||||
}
|
||||
|
||||
// UpdateSnapshotStatsExtended updates snapshot statistics with extended metrics.
|
||||
// This includes compression level, uncompressed blob size, and upload duration.
|
||||
func (sm *SnapshotManager) UpdateSnapshotStatsExtended(ctx context.Context, snapshotID string, stats ExtendedBackupStats) error {
|
||||
return sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
// First update basic stats
|
||||
if err := sm.repos.Snapshots.UpdateCounts(ctx, tx, snapshotID,
|
||||
int64(stats.FilesScanned),
|
||||
int64(stats.ChunksCreated),
|
||||
int64(stats.BlobsCreated),
|
||||
stats.BytesScanned,
|
||||
stats.BytesUploaded,
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Then update extended stats
|
||||
return sm.repos.Snapshots.UpdateExtendedStats(ctx, tx, snapshotID,
|
||||
stats.BlobUncompressedSize,
|
||||
stats.CompressionLevel,
|
||||
stats.UploadDurationMs,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
// CompleteSnapshot marks a snapshot as completed and exports its metadata
|
||||
func (sm *SnapshotManager) CompleteSnapshot(ctx context.Context, snapshotID string) error {
|
||||
// Mark the snapshot as completed
|
||||
@@ -158,14 +183,16 @@ func (sm *SnapshotManager) CompleteSnapshot(ctx context.Context, snapshotID stri
|
||||
//
|
||||
// This ensures database consistency during the copy operation.
|
||||
func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath string, snapshotID string) error {
|
||||
log.Info("Exporting snapshot metadata", "snapshot_id", snapshotID)
|
||||
log.Info("Phase 3/3: Exporting snapshot metadata", "snapshot_id", snapshotID, "source_db", dbPath)
|
||||
|
||||
// Create temp directory for all temporary files
|
||||
tempDir, err := os.MkdirTemp("", "vaultik-snapshot-*")
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating temp dir: %w", err)
|
||||
}
|
||||
log.Debug("Created temporary directory", "path", tempDir)
|
||||
defer func() {
|
||||
log.Debug("Cleaning up temporary directory", "path", tempDir)
|
||||
if err := os.RemoveAll(tempDir); err != nil {
|
||||
log.Debug("Failed to remove temp dir", "path", tempDir, "error", err)
|
||||
}
|
||||
@@ -174,28 +201,37 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
|
||||
// Step 1: Copy database to temp file
|
||||
// The main database should be closed at this point
|
||||
tempDBPath := filepath.Join(tempDir, "snapshot.db")
|
||||
log.Debug("Copying database to temporary location", "source", dbPath, "destination", tempDBPath)
|
||||
if err := copyFile(dbPath, tempDBPath); err != nil {
|
||||
return fmt.Errorf("copying database: %w", err)
|
||||
}
|
||||
log.Debug("Database copy complete", "size", getFileSize(tempDBPath))
|
||||
|
||||
// Step 2: Clean the temp database to only contain current snapshot data
|
||||
log.Debug("Cleaning snapshot database to contain only current snapshot", "snapshot_id", snapshotID)
|
||||
if err := sm.cleanSnapshotDB(ctx, tempDBPath, snapshotID); err != nil {
|
||||
return fmt.Errorf("cleaning snapshot database: %w", err)
|
||||
}
|
||||
log.Debug("Database cleaning complete", "size_after_clean", getFileSize(tempDBPath))
|
||||
|
||||
// Step 3: Dump the cleaned database to SQL
|
||||
dumpPath := filepath.Join(tempDir, "snapshot.sql")
|
||||
log.Debug("Dumping database to SQL", "source", tempDBPath, "destination", dumpPath)
|
||||
if err := sm.dumpDatabase(tempDBPath, dumpPath); err != nil {
|
||||
return fmt.Errorf("dumping database: %w", err)
|
||||
}
|
||||
log.Debug("SQL dump complete", "size", getFileSize(dumpPath))
|
||||
|
||||
// Step 4: Compress the SQL dump
|
||||
compressedPath := filepath.Join(tempDir, "snapshot.sql.zst")
|
||||
log.Debug("Compressing SQL dump", "source", dumpPath, "destination", compressedPath)
|
||||
if err := sm.compressDump(dumpPath, compressedPath); err != nil {
|
||||
return fmt.Errorf("compressing dump: %w", err)
|
||||
}
|
||||
log.Debug("Compression complete", "original_size", getFileSize(dumpPath), "compressed_size", getFileSize(compressedPath))
|
||||
|
||||
// Step 5: Read compressed data for encryption/upload
|
||||
log.Debug("Reading compressed data for upload", "path", compressedPath)
|
||||
compressedData, err := os.ReadFile(compressedPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading compressed dump: %w", err)
|
||||
@@ -204,14 +240,19 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
|
||||
// Step 6: Encrypt if encryptor is available
|
||||
finalData := compressedData
|
||||
if sm.encryptor != nil {
|
||||
log.Debug("Encrypting snapshot data", "size_before", len(compressedData))
|
||||
encrypted, err := sm.encryptor.Encrypt(compressedData)
|
||||
if err != nil {
|
||||
return fmt.Errorf("encrypting snapshot: %w", err)
|
||||
}
|
||||
finalData = encrypted
|
||||
log.Debug("Encryption complete", "size_after", len(encrypted))
|
||||
} else {
|
||||
log.Debug("No encryption configured, using compressed data as-is")
|
||||
}
|
||||
|
||||
// Step 7: Generate blob manifest (before closing temp DB)
|
||||
log.Debug("Generating blob manifest from temporary database", "db_path", tempDBPath)
|
||||
blobManifest, err := sm.generateBlobManifest(ctx, tempDBPath, snapshotID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("generating blob manifest: %w", err)
|
||||
@@ -224,15 +265,19 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
|
||||
dbKey += ".age"
|
||||
}
|
||||
|
||||
log.Debug("Uploading snapshot database to S3", "key", dbKey, "size", len(finalData))
|
||||
if err := sm.s3Client.PutObject(ctx, dbKey, bytes.NewReader(finalData)); err != nil {
|
||||
return fmt.Errorf("uploading snapshot database: %w", err)
|
||||
}
|
||||
log.Debug("Database upload complete", "key", dbKey)
|
||||
|
||||
// Upload blob manifest (unencrypted, compressed)
|
||||
manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
|
||||
log.Debug("Uploading blob manifest to S3", "key", manifestKey, "size", len(blobManifest))
|
||||
if err := sm.s3Client.PutObject(ctx, manifestKey, bytes.NewReader(blobManifest)); err != nil {
|
||||
return fmt.Errorf("uploading blob manifest: %w", err)
|
||||
}
|
||||
log.Debug("Manifest upload complete", "key", manifestKey)
|
||||
|
||||
log.Info("Uploaded snapshot metadata",
|
||||
"snapshot_id", snapshotID,
|
||||
@@ -260,14 +305,18 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
|
||||
// Future implementation when we have snapshot_files table:
|
||||
//
|
||||
// DELETE FROM snapshots WHERE id != ?;
|
||||
// DELETE FROM files WHERE path NOT IN (
|
||||
// SELECT file_path FROM snapshot_files WHERE snapshot_id = ?
|
||||
// DELETE FROM files WHERE NOT EXISTS (
|
||||
// SELECT 1 FROM snapshot_files
|
||||
// WHERE snapshot_files.file_id = files.id
|
||||
// AND snapshot_files.snapshot_id = ?
|
||||
// );
|
||||
// DELETE FROM chunks WHERE chunk_hash NOT IN (
|
||||
// SELECT DISTINCT chunk_hash FROM file_chunks
|
||||
// DELETE FROM chunks WHERE NOT EXISTS (
|
||||
// SELECT 1 FROM file_chunks
|
||||
// WHERE file_chunks.chunk_hash = chunks.chunk_hash
|
||||
// );
|
||||
// DELETE FROM blobs WHERE blob_hash NOT IN (
|
||||
// SELECT DISTINCT blob_hash FROM blob_chunks
|
||||
// DELETE FROM blobs WHERE NOT EXISTS (
|
||||
// SELECT 1 FROM blob_chunks
|
||||
// WHERE blob_chunks.blob_hash = blobs.blob_hash
|
||||
// );
|
||||
func (sm *SnapshotManager) cleanSnapshotDB(ctx context.Context, dbPath string, snapshotID string) error {
|
||||
// Open the temp database
|
||||
@@ -293,84 +342,127 @@ func (sm *SnapshotManager) cleanSnapshotDB(ctx context.Context, dbPath string, s
|
||||
}()
|
||||
|
||||
// Step 1: Delete all other snapshots
|
||||
_, err = tx.ExecContext(ctx, "DELETE FROM snapshots WHERE id != ?", snapshotID)
|
||||
log.Debug("Deleting other snapshots", "keeping", snapshotID)
|
||||
database.LogSQL("Execute", "DELETE FROM snapshots WHERE id != ?", snapshotID)
|
||||
result, err := tx.ExecContext(ctx, "DELETE FROM snapshots WHERE id != ?", snapshotID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("deleting other snapshots: %w", err)
|
||||
}
|
||||
rowsAffected, _ := result.RowsAffected()
|
||||
log.Debug("Deleted snapshots", "count", rowsAffected)
|
||||
|
||||
// Step 2: Delete files not in this snapshot
|
||||
_, err = tx.ExecContext(ctx, `
|
||||
log.Debug("Deleting files not in current snapshot")
|
||||
database.LogSQL("Execute", `DELETE FROM files WHERE NOT EXISTS (SELECT 1 FROM snapshot_files WHERE snapshot_files.file_id = files.id AND snapshot_files.snapshot_id = ?)`, snapshotID)
|
||||
result, err = tx.ExecContext(ctx, `
|
||||
DELETE FROM files
|
||||
WHERE path NOT IN (
|
||||
SELECT file_path FROM snapshot_files WHERE snapshot_id = ?
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM snapshot_files
|
||||
WHERE snapshot_files.file_id = files.id
|
||||
AND snapshot_files.snapshot_id = ?
|
||||
)`, snapshotID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("deleting orphaned files: %w", err)
|
||||
}
|
||||
rowsAffected, _ = result.RowsAffected()
|
||||
log.Debug("Deleted files", "count", rowsAffected)
|
||||
|
||||
// Step 3: file_chunks will be deleted via CASCADE from files
|
||||
log.Debug("file_chunks will be deleted via CASCADE")
|
||||
|
||||
// Step 4: Delete chunk_files for deleted files
|
||||
_, err = tx.ExecContext(ctx, `
|
||||
log.Debug("Deleting orphaned chunk_files")
|
||||
database.LogSQL("Execute", `DELETE FROM chunk_files WHERE NOT EXISTS (SELECT 1 FROM files WHERE files.id = chunk_files.file_id)`)
|
||||
result, err = tx.ExecContext(ctx, `
|
||||
DELETE FROM chunk_files
|
||||
WHERE file_path NOT IN (
|
||||
SELECT path FROM files
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM files
|
||||
WHERE files.id = chunk_files.file_id
|
||||
)`)
|
||||
if err != nil {
|
||||
return fmt.Errorf("deleting orphaned chunk_files: %w", err)
|
||||
}
|
||||
rowsAffected, _ = result.RowsAffected()
|
||||
log.Debug("Deleted chunk_files", "count", rowsAffected)
|
||||
|
||||
// Step 5: Delete chunks with no remaining file references
|
||||
_, err = tx.ExecContext(ctx, `
|
||||
log.Debug("Deleting orphaned chunks")
|
||||
database.LogSQL("Execute", `DELETE FROM chunks WHERE NOT EXISTS (SELECT 1 FROM file_chunks WHERE file_chunks.chunk_hash = chunks.chunk_hash)`)
|
||||
result, err = tx.ExecContext(ctx, `
|
||||
DELETE FROM chunks
|
||||
WHERE chunk_hash NOT IN (
|
||||
SELECT DISTINCT chunk_hash FROM file_chunks
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM file_chunks
|
||||
WHERE file_chunks.chunk_hash = chunks.chunk_hash
|
||||
)`)
|
||||
if err != nil {
|
||||
return fmt.Errorf("deleting orphaned chunks: %w", err)
|
||||
}
|
||||
rowsAffected, _ = result.RowsAffected()
|
||||
log.Debug("Deleted chunks", "count", rowsAffected)
|
||||
|
||||
// Step 6: Delete blob_chunks for deleted chunks
|
||||
_, err = tx.ExecContext(ctx, `
|
||||
log.Debug("Deleting orphaned blob_chunks")
|
||||
database.LogSQL("Execute", `DELETE FROM blob_chunks WHERE NOT EXISTS (SELECT 1 FROM chunks WHERE chunks.chunk_hash = blob_chunks.chunk_hash)`)
|
||||
result, err = tx.ExecContext(ctx, `
|
||||
DELETE FROM blob_chunks
|
||||
WHERE chunk_hash NOT IN (
|
||||
SELECT chunk_hash FROM chunks
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM chunks
|
||||
WHERE chunks.chunk_hash = blob_chunks.chunk_hash
|
||||
)`)
|
||||
if err != nil {
|
||||
return fmt.Errorf("deleting orphaned blob_chunks: %w", err)
|
||||
}
|
||||
rowsAffected, _ = result.RowsAffected()
|
||||
log.Debug("Deleted blob_chunks", "count", rowsAffected)
|
||||
|
||||
// Step 7: Delete blobs not in this snapshot
|
||||
_, err = tx.ExecContext(ctx, `
|
||||
log.Debug("Deleting blobs not in current snapshot")
|
||||
database.LogSQL("Execute", `DELETE FROM blobs WHERE NOT EXISTS (SELECT 1 FROM snapshot_blobs WHERE snapshot_blobs.blob_hash = blobs.blob_hash AND snapshot_blobs.snapshot_id = ?)`, snapshotID)
|
||||
result, err = tx.ExecContext(ctx, `
|
||||
DELETE FROM blobs
|
||||
WHERE blob_hash NOT IN (
|
||||
SELECT blob_hash FROM snapshot_blobs WHERE snapshot_id = ?
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM snapshot_blobs
|
||||
WHERE snapshot_blobs.blob_hash = blobs.blob_hash
|
||||
AND snapshot_blobs.snapshot_id = ?
|
||||
)`, snapshotID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("deleting orphaned blobs: %w", err)
|
||||
}
|
||||
rowsAffected, _ = result.RowsAffected()
|
||||
log.Debug("Deleted blobs not in snapshot", "count", rowsAffected)
|
||||
|
||||
// Step 8: Delete orphaned snapshot_files and snapshot_blobs
|
||||
_, err = tx.ExecContext(ctx, "DELETE FROM snapshot_files WHERE snapshot_id != ?", snapshotID)
|
||||
log.Debug("Deleting orphaned snapshot_files")
|
||||
database.LogSQL("Execute", "DELETE FROM snapshot_files WHERE snapshot_id != ?", snapshotID)
|
||||
result, err = tx.ExecContext(ctx, "DELETE FROM snapshot_files WHERE snapshot_id != ?", snapshotID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("deleting orphaned snapshot_files: %w", err)
|
||||
}
|
||||
rowsAffected, _ = result.RowsAffected()
|
||||
log.Debug("Deleted snapshot_files", "count", rowsAffected)
|
||||
|
||||
_, err = tx.ExecContext(ctx, "DELETE FROM snapshot_blobs WHERE snapshot_id != ?", snapshotID)
|
||||
log.Debug("Deleting orphaned snapshot_blobs")
|
||||
database.LogSQL("Execute", "DELETE FROM snapshot_blobs WHERE snapshot_id != ?", snapshotID)
|
||||
result, err = tx.ExecContext(ctx, "DELETE FROM snapshot_blobs WHERE snapshot_id != ?", snapshotID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("deleting orphaned snapshot_blobs: %w", err)
|
||||
}
|
||||
rowsAffected, _ = result.RowsAffected()
|
||||
log.Debug("Deleted snapshot_blobs", "count", rowsAffected)
|
||||
|
||||
// Commit transaction
|
||||
log.Debug("Committing cleanup transaction")
|
||||
if err := tx.Commit(); err != nil {
|
||||
return fmt.Errorf("committing transaction: %w", err)
|
||||
}
|
||||
|
||||
log.Debug("Database cleanup complete")
|
||||
return nil
|
||||
}
|
||||
|
||||
// dumpDatabase creates a SQL dump of the database
|
||||
func (sm *SnapshotManager) dumpDatabase(dbPath, dumpPath string) error {
|
||||
log.Debug("Running sqlite3 dump command", "source", dbPath, "destination", dumpPath)
|
||||
cmd := exec.Command("sqlite3", dbPath, ".dump")
|
||||
|
||||
output, err := cmd.Output()
|
||||
@@ -378,6 +470,7 @@ func (sm *SnapshotManager) dumpDatabase(dbPath, dumpPath string) error {
|
||||
return fmt.Errorf("running sqlite3 dump: %w", err)
|
||||
}
|
||||
|
||||
log.Debug("SQL dump generated", "size", len(output))
|
||||
if err := os.WriteFile(dumpPath, output, 0644); err != nil {
|
||||
return fmt.Errorf("writing dump file: %w", err)
|
||||
}
|
||||
@@ -387,27 +480,32 @@ func (sm *SnapshotManager) dumpDatabase(dbPath, dumpPath string) error {
|
||||
|
||||
// compressDump compresses the SQL dump using zstd
|
||||
func (sm *SnapshotManager) compressDump(inputPath, outputPath string) error {
|
||||
log.Debug("Opening SQL dump for compression", "path", inputPath)
|
||||
input, err := os.Open(inputPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("opening input file: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
log.Debug("Closing input file", "path", inputPath)
|
||||
if err := input.Close(); err != nil {
|
||||
log.Debug("Failed to close input file", "error", err)
|
||||
log.Debug("Failed to close input file", "path", inputPath, "error", err)
|
||||
}
|
||||
}()
|
||||
|
||||
log.Debug("Creating output file for compressed data", "path", outputPath)
|
||||
output, err := os.Create(outputPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating output file: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
log.Debug("Closing output file", "path", outputPath)
|
||||
if err := output.Close(); err != nil {
|
||||
log.Debug("Failed to close output file", "error", err)
|
||||
log.Debug("Failed to close output file", "path", outputPath, "error", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Create zstd encoder with good compression and multithreading
|
||||
log.Debug("Creating zstd compressor", "level", "SpeedBetterCompression", "concurrency", runtime.NumCPU())
|
||||
zstdWriter, err := zstd.NewWriter(output,
|
||||
zstd.WithEncoderLevel(zstd.SpeedBetterCompression),
|
||||
zstd.WithEncoderConcurrency(runtime.NumCPU()),
|
||||
@@ -422,6 +520,7 @@ func (sm *SnapshotManager) compressDump(inputPath, outputPath string) error {
|
||||
}
|
||||
}()
|
||||
|
||||
log.Debug("Compressing data")
|
||||
if _, err := io.Copy(zstdWriter, input); err != nil {
|
||||
return fmt.Errorf("compressing data: %w", err)
|
||||
}
|
||||
@@ -431,35 +530,44 @@ func (sm *SnapshotManager) compressDump(inputPath, outputPath string) error {
|
||||
|
||||
// copyFile copies a file from src to dst
|
||||
func copyFile(src, dst string) error {
|
||||
log.Debug("Opening source file for copy", "path", src)
|
||||
sourceFile, err := os.Open(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
log.Debug("Closing source file", "path", src)
|
||||
if err := sourceFile.Close(); err != nil {
|
||||
log.Debug("Failed to close source file", "error", err)
|
||||
log.Debug("Failed to close source file", "path", src, "error", err)
|
||||
}
|
||||
}()
|
||||
|
||||
log.Debug("Creating destination file", "path", dst)
|
||||
destFile, err := os.Create(dst)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
log.Debug("Closing destination file", "path", dst)
|
||||
if err := destFile.Close(); err != nil {
|
||||
log.Debug("Failed to close destination file", "error", err)
|
||||
log.Debug("Failed to close destination file", "path", dst, "error", err)
|
||||
}
|
||||
}()
|
||||
|
||||
if _, err := io.Copy(destFile, sourceFile); err != nil {
|
||||
log.Debug("Copying file data")
|
||||
n, err := io.Copy(destFile, sourceFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
log.Debug("File copy complete", "bytes_copied", n)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// generateBlobManifest creates a compressed JSON list of all blobs in the snapshot
|
||||
func (sm *SnapshotManager) generateBlobManifest(ctx context.Context, dbPath string, snapshotID string) ([]byte, error) {
|
||||
log.Debug("Generating blob manifest", "db_path", dbPath, "snapshot_id", snapshotID)
|
||||
|
||||
// Open the cleaned database using the database package
|
||||
db, err := database.New(ctx, dbPath)
|
||||
if err != nil {
|
||||
@@ -471,10 +579,12 @@ func (sm *SnapshotManager) generateBlobManifest(ctx context.Context, dbPath stri
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
// Get all blobs for this snapshot
|
||||
log.Debug("Querying blobs for snapshot", "snapshot_id", snapshotID)
|
||||
blobs, err := repos.Snapshots.GetBlobHashes(ctx, snapshotID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("getting snapshot blobs: %w", err)
|
||||
}
|
||||
log.Debug("Found blobs", "count", len(blobs))
|
||||
|
||||
// Create manifest structure
|
||||
manifest := struct {
|
||||
@@ -490,16 +600,20 @@ func (sm *SnapshotManager) generateBlobManifest(ctx context.Context, dbPath stri
|
||||
}
|
||||
|
||||
// Marshal to JSON
|
||||
log.Debug("Marshaling manifest to JSON")
|
||||
jsonData, err := json.MarshalIndent(manifest, "", " ")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshaling manifest: %w", err)
|
||||
}
|
||||
log.Debug("JSON manifest created", "size", len(jsonData))
|
||||
|
||||
// Compress with zstd
|
||||
log.Debug("Compressing manifest with zstd")
|
||||
compressed, err := compressData(jsonData)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("compressing manifest: %w", err)
|
||||
}
|
||||
log.Debug("Manifest compressed", "original_size", len(jsonData), "compressed_size", len(compressed))
|
||||
|
||||
log.Info("Generated blob manifest",
|
||||
"snapshot_id", snapshotID,
|
||||
@@ -532,6 +646,15 @@ func compressData(data []byte) ([]byte, error) {
|
||||
return buf.Bytes(), nil
|
||||
}
|
||||
|
||||
// getFileSize returns the size of a file in bytes, or -1 if error
|
||||
func getFileSize(path string) int64 {
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return -1
|
||||
}
|
||||
return info.Size()
|
||||
}
|
||||
|
||||
// BackupStats contains statistics from a backup operation
|
||||
type BackupStats struct {
|
||||
FilesScanned int
|
||||
@@ -540,3 +663,108 @@ type BackupStats struct {
|
||||
BlobsCreated int
|
||||
BytesUploaded int64
|
||||
}
|
||||
|
||||
// ExtendedBackupStats contains additional statistics for comprehensive tracking
|
||||
type ExtendedBackupStats struct {
|
||||
BackupStats
|
||||
BlobUncompressedSize int64 // Total uncompressed size of all referenced blobs
|
||||
CompressionLevel int // Compression level used for this snapshot
|
||||
UploadDurationMs int64 // Total milliseconds spent uploading to S3
|
||||
}
|
||||
|
||||
// CleanupIncompleteSnapshots removes incomplete snapshots that don't have metadata in S3.
|
||||
// This is critical for data safety: incomplete snapshots can cause deduplication to skip
|
||||
// files that were never successfully backed up, resulting in data loss.
|
||||
func (sm *SnapshotManager) CleanupIncompleteSnapshots(ctx context.Context, hostname string) error {
|
||||
log.Info("Checking for incomplete snapshots", "hostname", hostname)
|
||||
|
||||
// Get all incomplete snapshots for this hostname
|
||||
incompleteSnapshots, err := sm.repos.Snapshots.GetIncompleteByHostname(ctx, hostname)
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting incomplete snapshots: %w", err)
|
||||
}
|
||||
|
||||
if len(incompleteSnapshots) == 0 {
|
||||
log.Debug("No incomplete snapshots found")
|
||||
return nil
|
||||
}
|
||||
|
||||
log.Info("Found incomplete snapshots", "count", len(incompleteSnapshots))
|
||||
|
||||
// Check each incomplete snapshot for metadata in S3
|
||||
for _, snapshot := range incompleteSnapshots {
|
||||
// Check if metadata exists in S3
|
||||
metadataKey := fmt.Sprintf("metadata/%s/db.zst", snapshot.ID)
|
||||
_, err := sm.s3Client.StatObject(ctx, metadataKey)
|
||||
|
||||
if err != nil {
|
||||
// Metadata doesn't exist in S3 - this is an incomplete snapshot
|
||||
log.Info("Cleaning up incomplete snapshot", "snapshot_id", snapshot.ID, "started_at", snapshot.StartedAt)
|
||||
|
||||
// Delete the snapshot and all its associations
|
||||
if err := sm.deleteSnapshot(ctx, snapshot.ID); err != nil {
|
||||
return fmt.Errorf("deleting incomplete snapshot %s: %w", snapshot.ID, err)
|
||||
}
|
||||
|
||||
log.Info("Deleted incomplete snapshot", "snapshot_id", snapshot.ID)
|
||||
} else {
|
||||
// Metadata exists - this snapshot was completed but database wasn't updated
|
||||
// This shouldn't happen in normal operation, but mark it complete
|
||||
log.Warn("Found snapshot with metadata but incomplete in DB", "snapshot_id", snapshot.ID)
|
||||
if err := sm.repos.Snapshots.MarkComplete(ctx, nil, snapshot.ID); err != nil {
|
||||
log.Error("Failed to mark snapshot complete", "snapshot_id", snapshot.ID, "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// deleteSnapshot removes a snapshot and all its associations from the database
|
||||
func (sm *SnapshotManager) deleteSnapshot(ctx context.Context, snapshotID string) error {
|
||||
// Delete snapshot_files entries
|
||||
if err := sm.repos.Snapshots.DeleteSnapshotFiles(ctx, snapshotID); err != nil {
|
||||
return fmt.Errorf("deleting snapshot files: %w", err)
|
||||
}
|
||||
|
||||
// Delete snapshot_blobs entries
|
||||
if err := sm.repos.Snapshots.DeleteSnapshotBlobs(ctx, snapshotID); err != nil {
|
||||
return fmt.Errorf("deleting snapshot blobs: %w", err)
|
||||
}
|
||||
|
||||
// Delete the snapshot itself
|
||||
if err := sm.repos.Snapshots.Delete(ctx, snapshotID); err != nil {
|
||||
return fmt.Errorf("deleting snapshot: %w", err)
|
||||
}
|
||||
|
||||
// Clean up orphaned data
|
||||
log.Debug("Cleaning up orphaned data")
|
||||
if err := sm.cleanupOrphanedData(ctx); err != nil {
|
||||
return fmt.Errorf("cleaning up orphaned data: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanupOrphanedData removes files, chunks, and blobs that are no longer referenced by any snapshot
|
||||
func (sm *SnapshotManager) cleanupOrphanedData(ctx context.Context) error {
|
||||
// Delete orphaned files (files not in any snapshot)
|
||||
log.Debug("Deleting orphaned files")
|
||||
if err := sm.repos.Files.DeleteOrphaned(ctx); err != nil {
|
||||
return fmt.Errorf("deleting orphaned files: %w", err)
|
||||
}
|
||||
|
||||
// Delete orphaned chunks (chunks not referenced by any file)
|
||||
log.Debug("Deleting orphaned chunks")
|
||||
if err := sm.repos.Chunks.DeleteOrphaned(ctx); err != nil {
|
||||
return fmt.Errorf("deleting orphaned chunks: %w", err)
|
||||
}
|
||||
|
||||
// Delete orphaned blobs (blobs not in any snapshot)
|
||||
log.Debug("Deleting orphaned blobs")
|
||||
if err := sm.repos.Blobs.DeleteOrphaned(ctx); err != nil {
|
||||
return fmt.Errorf("deleting orphaned blobs: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user