Fix foreign key constraints and improve snapshot tracking

- Add unified compression/encryption package in internal/blobgen
- Update DATAMODEL.md to reflect current schema implementation
- Refactor snapshot cleanup into well-named methods for clarity
- Add snapshot_id to uploads table to track new blobs per snapshot
- Fix blob count reporting for incremental backups
- Add DeleteOrphaned method to BlobChunkRepository
- Fix cleanup order to respect foreign key constraints
- Update tests to reflect schema changes
This commit is contained in:
2025-07-26 02:22:25 +02:00
parent 78af626759
commit d3afa65420
28 changed files with 994 additions and 534 deletions

View File

@@ -393,7 +393,6 @@ func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (str
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
chunk := &database.Chunk{
ChunkHash: chunkHash,
SHA256: chunkHash,
Size: int64(n),
}
return b.repos.Chunks.Create(ctx, tx, chunk)

View File

@@ -19,6 +19,7 @@ type ScannerParams struct {
var Module = fx.Module("backup",
fx.Provide(
provideScannerFactory,
NewSnapshotManager,
),
)

View File

@@ -12,7 +12,6 @@ import (
"git.eeqj.de/sneak/vaultik/internal/blob"
"git.eeqj.de/sneak/vaultik/internal/chunker"
"git.eeqj.de/sneak/vaultik/internal/crypto"
"git.eeqj.de/sneak/vaultik/internal/database"
"git.eeqj.de/sneak/vaultik/internal/log"
"git.eeqj.de/sneak/vaultik/internal/s3"
@@ -86,17 +85,11 @@ func NewScanner(cfg ScannerConfig) *Scanner {
return nil
}
enc, err := crypto.NewEncryptor(cfg.AgeRecipients)
if err != nil {
log.Error("Failed to create encryptor", "error", err)
return nil
}
// Create blob packer with encryption
packerCfg := blob.PackerConfig{
MaxBlobSize: cfg.MaxBlobSize,
CompressionLevel: cfg.CompressionLevel,
Encryptor: enc,
Recipients: cfg.AgeRecipients,
Repositories: cfg.Repositories,
}
packer, err := blob.NewPacker(packerCfg)
@@ -182,6 +175,18 @@ func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*Sc
blobs := s.packer.GetFinishedBlobs()
result.BlobsCreated += len(blobs)
// Query database for actual blob count created during this snapshot
// The database is authoritative, especially for concurrent blob uploads
// We count uploads rather than all snapshot_blobs to get only NEW blobs
if s.snapshotID != "" {
uploadCount, err := s.repos.Uploads.GetCountBySnapshot(ctx, s.snapshotID)
if err != nil {
log.Warn("Failed to get upload count from database", "error", err)
} else {
result.BlobsCreated = int(uploadCount)
}
}
result.EndTime = time.Now().UTC()
return result, nil
}
@@ -341,24 +346,22 @@ func (s *Scanner) checkFile(ctx context.Context, path string, info os.FileInfo,
fileChanged := existingFile == nil || s.hasFileChanged(existingFile, file)
// Update file metadata in a short transaction
log.Debug("Updating file metadata", "path", path, "changed", fileChanged)
// Update file metadata and add to snapshot in a single transaction
log.Debug("Updating file metadata and adding to snapshot", "path", path, "changed", fileChanged, "snapshot", s.snapshotID)
err = s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
return s.repos.Files.Create(ctx, tx, file)
// First create/update the file
if err := s.repos.Files.Create(ctx, tx, file); err != nil {
return fmt.Errorf("creating file: %w", err)
}
// Then add it to the snapshot using the file ID
if err := s.repos.Snapshots.AddFileByID(ctx, tx, s.snapshotID, file.ID); err != nil {
return fmt.Errorf("adding file to snapshot: %w", err)
}
return nil
})
if err != nil {
return nil, false, err
}
log.Debug("File metadata updated", "path", path)
// Add file to snapshot in a short transaction
log.Debug("Adding file to snapshot", "path", path, "snapshot", s.snapshotID)
err = s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
return s.repos.Snapshots.AddFile(ctx, tx, s.snapshotID, path)
})
if err != nil {
return nil, false, fmt.Errorf("adding file to snapshot: %w", err)
}
log.Debug("File added to snapshot", "path", path)
result.FilesScanned++
@@ -542,6 +545,14 @@ func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
uploadDuration := time.Since(startTime)
// Log upload stats
uploadSpeed := float64(finishedBlob.Compressed) * 8 / uploadDuration.Seconds() // bits per second
log.Info("Uploaded blob to S3",
"path", blobPath,
"size", humanize.Bytes(uint64(finishedBlob.Compressed)),
"duration", uploadDuration,
"speed", humanize.SI(uploadSpeed, "bps"))
// Report upload complete
if s.progress != nil {
s.progress.ReportUploadComplete(finishedBlob.Hash, finishedBlob.Compressed, uploadDuration)
@@ -574,6 +585,7 @@ func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
// Record upload metrics
upload := &database.Upload{
BlobHash: finishedBlob.Hash,
SnapshotID: s.snapshotID,
UploadedAt: startTime,
Size: finishedBlob.Compressed,
DurationMs: uploadDuration.Milliseconds(),
@@ -645,7 +657,6 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
err := s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
dbChunk := &database.Chunk{
ChunkHash: chunk.Hash,
SHA256: chunk.Hash,
Size: chunk.Size,
}
if err := s.repos.Chunks.Create(txCtx, tx, dbChunk); err != nil {

View File

@@ -48,32 +48,39 @@ import (
"os"
"os/exec"
"path/filepath"
"runtime"
"time"
"git.eeqj.de/sneak/vaultik/internal/blobgen"
"git.eeqj.de/sneak/vaultik/internal/config"
"git.eeqj.de/sneak/vaultik/internal/database"
"git.eeqj.de/sneak/vaultik/internal/log"
"github.com/klauspost/compress/zstd"
"git.eeqj.de/sneak/vaultik/internal/s3"
"github.com/dustin/go-humanize"
"go.uber.org/fx"
)
// SnapshotManager handles snapshot creation and metadata export
type SnapshotManager struct {
repos *database.Repositories
s3Client S3Client
encryptor Encryptor
repos *database.Repositories
s3Client S3Client
config *config.Config
}
// Encryptor interface for snapshot encryption
type Encryptor interface {
Encrypt(data []byte) ([]byte, error)
// SnapshotManagerParams holds dependencies for NewSnapshotManager
type SnapshotManagerParams struct {
fx.In
Repos *database.Repositories
S3Client *s3.Client
Config *config.Config
}
// NewSnapshotManager creates a new snapshot manager
func NewSnapshotManager(repos *database.Repositories, s3Client S3Client, encryptor Encryptor) *SnapshotManager {
// NewSnapshotManager creates a new snapshot manager for dependency injection
func NewSnapshotManager(params SnapshotManagerParams) *SnapshotManager {
return &SnapshotManager{
repos: repos,
s3Client: s3Client,
encryptor: encryptor,
repos: params.Repos,
s3Client: params.S3Client,
config: params.Config,
}
}
@@ -208,11 +215,20 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
log.Debug("Database copy complete", "size", getFileSize(tempDBPath))
// Step 2: Clean the temp database to only contain current snapshot data
log.Debug("Cleaning snapshot database to contain only current snapshot", "snapshot_id", snapshotID)
if err := sm.cleanSnapshotDB(ctx, tempDBPath, snapshotID); err != nil {
log.Debug("Cleaning temporary snapshot database to contain only current snapshot", "snapshot_id", snapshotID, "db_path", tempDBPath)
stats, err := sm.cleanSnapshotDB(ctx, tempDBPath, snapshotID)
if err != nil {
return fmt.Errorf("cleaning snapshot database: %w", err)
}
log.Debug("Database cleaning complete", "size_after_clean", getFileSize(tempDBPath))
log.Info("Snapshot database cleanup complete",
"db_path", tempDBPath,
"size_after_clean", humanize.Bytes(uint64(getFileSize(tempDBPath))),
"files", stats.FileCount,
"chunks", stats.ChunkCount,
"blobs", stats.BlobCount,
"total_compressed_size", humanize.Bytes(uint64(stats.CompressedSize)),
"total_uncompressed_size", humanize.Bytes(uint64(stats.UncompressedSize)),
"compression_ratio", fmt.Sprintf("%.2fx", float64(stats.UncompressedSize)/float64(stats.CompressedSize)))
// Step 3: Dump the cleaned database to SQL
dumpPath := filepath.Join(tempDir, "snapshot.sql")
@@ -222,62 +238,59 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
}
log.Debug("SQL dump complete", "size", getFileSize(dumpPath))
// Step 4: Compress the SQL dump
compressedPath := filepath.Join(tempDir, "snapshot.sql.zst")
log.Debug("Compressing SQL dump", "source", dumpPath, "destination", compressedPath)
// Step 4: Compress and encrypt the SQL dump
compressedPath := filepath.Join(tempDir, "snapshot.sql.zst.age")
log.Debug("Compressing and encrypting SQL dump", "source", dumpPath, "destination", compressedPath)
if err := sm.compressDump(dumpPath, compressedPath); err != nil {
return fmt.Errorf("compressing dump: %w", err)
}
log.Debug("Compression complete", "original_size", getFileSize(dumpPath), "compressed_size", getFileSize(compressedPath))
// Step 5: Read compressed data for encryption/upload
log.Debug("Reading compressed data for upload", "path", compressedPath)
compressedData, err := os.ReadFile(compressedPath)
// Step 5: Read compressed and encrypted data for upload
log.Debug("Reading compressed and encrypted data for upload", "path", compressedPath)
finalData, err := os.ReadFile(compressedPath)
if err != nil {
return fmt.Errorf("reading compressed dump: %w", err)
}
// Step 6: Encrypt if encryptor is available
finalData := compressedData
if sm.encryptor != nil {
log.Debug("Encrypting snapshot data", "size_before", len(compressedData))
encrypted, err := sm.encryptor.Encrypt(compressedData)
if err != nil {
return fmt.Errorf("encrypting snapshot: %w", err)
}
finalData = encrypted
log.Debug("Encryption complete", "size_after", len(encrypted))
} else {
log.Debug("No encryption configured, using compressed data as-is")
}
// Step 7: Generate blob manifest (before closing temp DB)
// Step 6: Generate blob manifest (before closing temp DB)
log.Debug("Generating blob manifest from temporary database", "db_path", tempDBPath)
blobManifest, err := sm.generateBlobManifest(ctx, tempDBPath, snapshotID)
if err != nil {
return fmt.Errorf("generating blob manifest: %w", err)
}
// Step 8: Upload to S3 in snapshot subdirectory
// Upload database backup (encrypted)
dbKey := fmt.Sprintf("metadata/%s/db.zst", snapshotID)
if sm.encryptor != nil {
dbKey += ".age"
}
// Step 7: Upload to S3 in snapshot subdirectory
// Upload database backup (compressed and encrypted)
dbKey := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID)
log.Debug("Uploading snapshot database to S3", "key", dbKey, "size", len(finalData))
dbUploadStart := time.Now()
if err := sm.s3Client.PutObject(ctx, dbKey, bytes.NewReader(finalData)); err != nil {
return fmt.Errorf("uploading snapshot database: %w", err)
}
log.Debug("Database upload complete", "key", dbKey)
dbUploadDuration := time.Since(dbUploadStart)
dbUploadSpeed := float64(len(finalData)) * 8 / dbUploadDuration.Seconds() // bits per second
log.Info("Uploaded snapshot database to S3",
"path", dbKey,
"size", humanize.Bytes(uint64(len(finalData))),
"duration", dbUploadDuration,
"speed", humanize.SI(dbUploadSpeed, "bps"))
// Upload blob manifest (unencrypted, compressed)
manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
// Upload blob manifest (compressed and encrypted)
manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst.age", snapshotID)
log.Debug("Uploading blob manifest to S3", "key", manifestKey, "size", len(blobManifest))
manifestUploadStart := time.Now()
if err := sm.s3Client.PutObject(ctx, manifestKey, bytes.NewReader(blobManifest)); err != nil {
return fmt.Errorf("uploading blob manifest: %w", err)
}
log.Debug("Manifest upload complete", "key", manifestKey)
manifestUploadDuration := time.Since(manifestUploadStart)
manifestUploadSpeed := float64(len(blobManifest)) * 8 / manifestUploadDuration.Seconds() // bits per second
log.Info("Uploaded blob manifest to S3",
"path", manifestKey,
"size", humanize.Bytes(uint64(len(blobManifest))),
"duration", manifestUploadDuration,
"speed", humanize.SI(manifestUploadSpeed, "bps"))
log.Info("Uploaded snapshot metadata",
"snapshot_id", snapshotID,
@@ -286,43 +299,32 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
return nil
}
// CleanupStats contains statistics about cleaned snapshot database
type CleanupStats struct {
FileCount int
ChunkCount int
BlobCount int
CompressedSize int64
UncompressedSize int64
}
// cleanSnapshotDB removes all data except for the specified snapshot
//
// Current implementation:
// Since we don't yet have snapshot-file relationships, this currently only
// removes other snapshots. In a complete implementation, it would:
// The cleanup is performed in a specific order to maintain referential integrity:
// 1. Delete other snapshots
// 2. Delete orphaned snapshot associations (snapshot_files, snapshot_blobs) for deleted snapshots
// 3. Delete orphaned files (not in the current snapshot)
// 4. Delete orphaned chunk-to-file mappings (references to deleted files)
// 5. Delete orphaned blobs (not in the current snapshot)
// 6. Delete orphaned blob-to-chunk mappings (references to deleted chunks)
// 7. Delete orphaned chunks (not referenced by any file)
//
// 1. Delete all snapshots except the current one
// 2. Delete files not belonging to the current snapshot
// 3. Delete file_chunks for deleted files (CASCADE)
// 4. Delete chunk_files for deleted files
// 5. Delete chunks with no remaining file references
// 6. Delete blob_chunks for deleted chunks
// 7. Delete blobs with no remaining chunks
//
// The order is important to maintain referential integrity.
//
// Future implementation when we have snapshot_files table:
//
// DELETE FROM snapshots WHERE id != ?;
// DELETE FROM files WHERE NOT EXISTS (
// SELECT 1 FROM snapshot_files
// WHERE snapshot_files.file_id = files.id
// AND snapshot_files.snapshot_id = ?
// );
// DELETE FROM chunks WHERE NOT EXISTS (
// SELECT 1 FROM file_chunks
// WHERE file_chunks.chunk_hash = chunks.chunk_hash
// );
// DELETE FROM blobs WHERE NOT EXISTS (
// SELECT 1 FROM blob_chunks
// WHERE blob_chunks.blob_hash = blobs.blob_hash
// );
func (sm *SnapshotManager) cleanSnapshotDB(ctx context.Context, dbPath string, snapshotID string) error {
// Each step is implemented as a separate method for clarity and maintainability.
func (sm *SnapshotManager) cleanSnapshotDB(ctx context.Context, dbPath string, snapshotID string) (*CleanupStats, error) {
// Open the temp database
db, err := database.New(ctx, dbPath)
if err != nil {
return fmt.Errorf("opening temp database: %w", err)
return nil, fmt.Errorf("opening temp database: %w", err)
}
defer func() {
if err := db.Close(); err != nil {
@@ -333,7 +335,7 @@ func (sm *SnapshotManager) cleanSnapshotDB(ctx context.Context, dbPath string, s
// Start a transaction
tx, err := db.BeginTx(ctx, nil)
if err != nil {
return fmt.Errorf("beginning transaction: %w", err)
return nil, fmt.Errorf("beginning transaction: %w", err)
}
defer func() {
if rbErr := tx.Rollback(); rbErr != nil && rbErr != sql.ErrTxDone {
@@ -341,123 +343,77 @@ func (sm *SnapshotManager) cleanSnapshotDB(ctx context.Context, dbPath string, s
}
}()
// Step 1: Delete all other snapshots
log.Debug("Deleting other snapshots", "keeping", snapshotID)
database.LogSQL("Execute", "DELETE FROM snapshots WHERE id != ?", snapshotID)
result, err := tx.ExecContext(ctx, "DELETE FROM snapshots WHERE id != ?", snapshotID)
if err != nil {
return fmt.Errorf("deleting other snapshots: %w", err)
// Execute cleanup steps in order
if err := sm.deleteOtherSnapshots(ctx, tx, snapshotID); err != nil {
return nil, fmt.Errorf("step 1 - delete other snapshots: %w", err)
}
rowsAffected, _ := result.RowsAffected()
log.Debug("Deleted snapshots", "count", rowsAffected)
// Step 2: Delete files not in this snapshot
log.Debug("Deleting files not in current snapshot")
database.LogSQL("Execute", `DELETE FROM files WHERE NOT EXISTS (SELECT 1 FROM snapshot_files WHERE snapshot_files.file_id = files.id AND snapshot_files.snapshot_id = ?)`, snapshotID)
result, err = tx.ExecContext(ctx, `
DELETE FROM files
WHERE NOT EXISTS (
SELECT 1 FROM snapshot_files
WHERE snapshot_files.file_id = files.id
AND snapshot_files.snapshot_id = ?
)`, snapshotID)
if err != nil {
return fmt.Errorf("deleting orphaned files: %w", err)
if err := sm.deleteOrphanedSnapshotAssociations(ctx, tx, snapshotID); err != nil {
return nil, fmt.Errorf("step 2 - delete orphaned snapshot associations: %w", err)
}
rowsAffected, _ = result.RowsAffected()
log.Debug("Deleted files", "count", rowsAffected)
// Step 3: file_chunks will be deleted via CASCADE from files
log.Debug("file_chunks will be deleted via CASCADE")
// Step 4: Delete chunk_files for deleted files
log.Debug("Deleting orphaned chunk_files")
database.LogSQL("Execute", `DELETE FROM chunk_files WHERE NOT EXISTS (SELECT 1 FROM files WHERE files.id = chunk_files.file_id)`)
result, err = tx.ExecContext(ctx, `
DELETE FROM chunk_files
WHERE NOT EXISTS (
SELECT 1 FROM files
WHERE files.id = chunk_files.file_id
)`)
if err != nil {
return fmt.Errorf("deleting orphaned chunk_files: %w", err)
if err := sm.deleteOrphanedFiles(ctx, tx, snapshotID); err != nil {
return nil, fmt.Errorf("step 3 - delete orphaned files: %w", err)
}
rowsAffected, _ = result.RowsAffected()
log.Debug("Deleted chunk_files", "count", rowsAffected)
// Step 5: Delete chunks with no remaining file references
log.Debug("Deleting orphaned chunks")
database.LogSQL("Execute", `DELETE FROM chunks WHERE NOT EXISTS (SELECT 1 FROM file_chunks WHERE file_chunks.chunk_hash = chunks.chunk_hash)`)
result, err = tx.ExecContext(ctx, `
DELETE FROM chunks
WHERE NOT EXISTS (
SELECT 1 FROM file_chunks
WHERE file_chunks.chunk_hash = chunks.chunk_hash
)`)
if err != nil {
return fmt.Errorf("deleting orphaned chunks: %w", err)
if err := sm.deleteOrphanedChunkToFileMappings(ctx, tx); err != nil {
return nil, fmt.Errorf("step 4 - delete orphaned chunk-to-file mappings: %w", err)
}
rowsAffected, _ = result.RowsAffected()
log.Debug("Deleted chunks", "count", rowsAffected)
// Step 6: Delete blob_chunks for deleted chunks
log.Debug("Deleting orphaned blob_chunks")
database.LogSQL("Execute", `DELETE FROM blob_chunks WHERE NOT EXISTS (SELECT 1 FROM chunks WHERE chunks.chunk_hash = blob_chunks.chunk_hash)`)
result, err = tx.ExecContext(ctx, `
DELETE FROM blob_chunks
WHERE NOT EXISTS (
SELECT 1 FROM chunks
WHERE chunks.chunk_hash = blob_chunks.chunk_hash
)`)
if err != nil {
return fmt.Errorf("deleting orphaned blob_chunks: %w", err)
if err := sm.deleteOrphanedBlobs(ctx, tx, snapshotID); err != nil {
return nil, fmt.Errorf("step 5 - delete orphaned blobs: %w", err)
}
rowsAffected, _ = result.RowsAffected()
log.Debug("Deleted blob_chunks", "count", rowsAffected)
// Step 7: Delete blobs not in this snapshot
log.Debug("Deleting blobs not in current snapshot")
database.LogSQL("Execute", `DELETE FROM blobs WHERE NOT EXISTS (SELECT 1 FROM snapshot_blobs WHERE snapshot_blobs.blob_hash = blobs.blob_hash AND snapshot_blobs.snapshot_id = ?)`, snapshotID)
result, err = tx.ExecContext(ctx, `
DELETE FROM blobs
WHERE NOT EXISTS (
SELECT 1 FROM snapshot_blobs
WHERE snapshot_blobs.blob_hash = blobs.blob_hash
AND snapshot_blobs.snapshot_id = ?
)`, snapshotID)
if err != nil {
return fmt.Errorf("deleting orphaned blobs: %w", err)
if err := sm.deleteOrphanedBlobToChunkMappings(ctx, tx); err != nil {
return nil, fmt.Errorf("step 6 - delete orphaned blob-to-chunk mappings: %w", err)
}
rowsAffected, _ = result.RowsAffected()
log.Debug("Deleted blobs not in snapshot", "count", rowsAffected)
// Step 8: Delete orphaned snapshot_files and snapshot_blobs
log.Debug("Deleting orphaned snapshot_files")
database.LogSQL("Execute", "DELETE FROM snapshot_files WHERE snapshot_id != ?", snapshotID)
result, err = tx.ExecContext(ctx, "DELETE FROM snapshot_files WHERE snapshot_id != ?", snapshotID)
if err != nil {
return fmt.Errorf("deleting orphaned snapshot_files: %w", err)
if err := sm.deleteOrphanedChunks(ctx, tx); err != nil {
return nil, fmt.Errorf("step 7 - delete orphaned chunks: %w", err)
}
rowsAffected, _ = result.RowsAffected()
log.Debug("Deleted snapshot_files", "count", rowsAffected)
log.Debug("Deleting orphaned snapshot_blobs")
database.LogSQL("Execute", "DELETE FROM snapshot_blobs WHERE snapshot_id != ?", snapshotID)
result, err = tx.ExecContext(ctx, "DELETE FROM snapshot_blobs WHERE snapshot_id != ?", snapshotID)
if err != nil {
return fmt.Errorf("deleting orphaned snapshot_blobs: %w", err)
}
rowsAffected, _ = result.RowsAffected()
log.Debug("Deleted snapshot_blobs", "count", rowsAffected)
// Commit transaction
log.Debug("Committing cleanup transaction")
log.Debug("[Temp DB Cleanup] Committing cleanup transaction")
if err := tx.Commit(); err != nil {
return fmt.Errorf("committing transaction: %w", err)
return nil, fmt.Errorf("committing transaction: %w", err)
}
log.Debug("Database cleanup complete")
return nil
// Collect statistics about the cleaned database
stats := &CleanupStats{}
// Count files
var fileCount int
err = db.QueryRowWithLog(ctx, "SELECT COUNT(*) FROM files").Scan(&fileCount)
if err != nil {
return nil, fmt.Errorf("counting files: %w", err)
}
stats.FileCount = fileCount
// Count chunks
var chunkCount int
err = db.QueryRowWithLog(ctx, "SELECT COUNT(*) FROM chunks").Scan(&chunkCount)
if err != nil {
return nil, fmt.Errorf("counting chunks: %w", err)
}
stats.ChunkCount = chunkCount
// Count blobs and get sizes
var blobCount int
var compressedSize, uncompressedSize sql.NullInt64
err = db.QueryRowWithLog(ctx, `
SELECT COUNT(*), COALESCE(SUM(compressed_size), 0), COALESCE(SUM(uncompressed_size), 0)
FROM blobs
WHERE blob_hash IN (SELECT blob_hash FROM snapshot_blobs WHERE snapshot_id = ?)
`, snapshotID).Scan(&blobCount, &compressedSize, &uncompressedSize)
if err != nil {
return nil, fmt.Errorf("counting blobs and sizes: %w", err)
}
stats.BlobCount = blobCount
stats.CompressedSize = compressedSize.Int64
stats.UncompressedSize = uncompressedSize.Int64
log.Debug("[Temp DB Cleanup] Database cleanup complete", "stats", stats)
return stats, nil
}
// dumpDatabase creates a SQL dump of the database
@@ -492,7 +448,7 @@ func (sm *SnapshotManager) compressDump(inputPath, outputPath string) error {
}
}()
log.Debug("Creating output file for compressed data", "path", outputPath)
log.Debug("Creating output file for compressed and encrypted data", "path", outputPath)
output, err := os.Create(outputPath)
if err != nil {
return fmt.Errorf("creating output file: %w", err)
@@ -504,27 +460,30 @@ func (sm *SnapshotManager) compressDump(inputPath, outputPath string) error {
}
}()
// Create zstd encoder with good compression and multithreading
log.Debug("Creating zstd compressor", "level", "SpeedBetterCompression", "concurrency", runtime.NumCPU())
zstdWriter, err := zstd.NewWriter(output,
zstd.WithEncoderLevel(zstd.SpeedBetterCompression),
zstd.WithEncoderConcurrency(runtime.NumCPU()),
zstd.WithWindowSize(4<<20), // 4MB window for metadata files
)
// Use blobgen for compression and encryption
log.Debug("Creating compressor/encryptor", "level", sm.config.CompressionLevel)
writer, err := blobgen.NewWriter(output, sm.config.CompressionLevel, sm.config.AgeRecipients)
if err != nil {
return fmt.Errorf("creating zstd writer: %w", err)
return fmt.Errorf("creating blobgen writer: %w", err)
}
defer func() {
if err := zstdWriter.Close(); err != nil {
log.Debug("Failed to close zstd writer", "error", err)
if err := writer.Close(); err != nil {
log.Debug("Failed to close writer", "error", err)
}
}()
log.Debug("Compressing data")
if _, err := io.Copy(zstdWriter, input); err != nil {
log.Debug("Compressing and encrypting data")
if _, err := io.Copy(writer, input); err != nil {
return fmt.Errorf("compressing data: %w", err)
}
// Close writer to flush all data
if err := writer.Close(); err != nil {
return fmt.Errorf("closing writer: %w", err)
}
log.Debug("Compression complete", "hash", fmt.Sprintf("%x", writer.Sum256()))
return nil
}
@@ -607,44 +566,28 @@ func (sm *SnapshotManager) generateBlobManifest(ctx context.Context, dbPath stri
}
log.Debug("JSON manifest created", "size", len(jsonData))
// Compress with zstd
log.Debug("Compressing manifest with zstd")
compressed, err := compressData(jsonData)
// Compress and encrypt with blobgen
log.Debug("Compressing and encrypting manifest")
result, err := blobgen.CompressData(jsonData, sm.config.CompressionLevel, sm.config.AgeRecipients)
if err != nil {
return nil, fmt.Errorf("compressing manifest: %w", err)
}
log.Debug("Manifest compressed", "original_size", len(jsonData), "compressed_size", len(compressed))
log.Debug("Manifest compressed and encrypted",
"original_size", len(jsonData),
"compressed_size", result.CompressedSize,
"hash", result.SHA256)
log.Info("Generated blob manifest",
"snapshot_id", snapshotID,
"blob_count", len(blobs),
"json_size", len(jsonData),
"compressed_size", len(compressed))
"compressed_size", result.CompressedSize)
return compressed, nil
return result.Data, nil
}
// compressData compresses data using zstd
func compressData(data []byte) ([]byte, error) {
var buf bytes.Buffer
w, err := zstd.NewWriter(&buf,
zstd.WithEncoderLevel(zstd.SpeedBetterCompression),
)
if err != nil {
return nil, err
}
if _, err := w.Write(data); err != nil {
_ = w.Close()
return nil, err
}
if err := w.Close(); err != nil {
return nil, err
}
return buf.Bytes(), nil
}
// getFileSize returns the size of a file in bytes, or -1 if error
func getFileSize(path string) int64 {
@@ -738,7 +681,7 @@ func (sm *SnapshotManager) deleteSnapshot(ctx context.Context, snapshotID string
}
// Clean up orphaned data
log.Debug("Cleaning up orphaned data")
log.Debug("Cleaning up orphaned data in main database")
if err := sm.cleanupOrphanedData(ctx); err != nil {
return fmt.Errorf("cleaning up orphaned data: %w", err)
}
@@ -748,23 +691,170 @@ func (sm *SnapshotManager) deleteSnapshot(ctx context.Context, snapshotID string
// cleanupOrphanedData removes files, chunks, and blobs that are no longer referenced by any snapshot
func (sm *SnapshotManager) cleanupOrphanedData(ctx context.Context) error {
// Order is important to respect foreign key constraints:
// 1. Delete orphaned files (will cascade delete file_chunks)
// 2. Delete orphaned blobs (will cascade delete blob_chunks for deleted blobs)
// 3. Delete orphaned blob_chunks (where blob exists but chunk doesn't)
// 4. Delete orphaned chunks (now safe after all blob_chunks are gone)
// Delete orphaned files (files not in any snapshot)
log.Debug("Deleting orphaned files")
if err := sm.repos.Files.DeleteOrphaned(ctx); err != nil {
return fmt.Errorf("deleting orphaned files: %w", err)
}
// Delete orphaned chunks (chunks not referenced by any file)
log.Debug("Deleting orphaned chunks")
if err := sm.repos.Chunks.DeleteOrphaned(ctx); err != nil {
return fmt.Errorf("deleting orphaned chunks: %w", err)
}
// Delete orphaned blobs (blobs not in any snapshot)
// This will cascade delete blob_chunks for deleted blobs
log.Debug("Deleting orphaned blobs")
if err := sm.repos.Blobs.DeleteOrphaned(ctx); err != nil {
return fmt.Errorf("deleting orphaned blobs: %w", err)
}
// Delete orphaned blob_chunks entries
// This handles cases where the blob still exists but chunks were deleted
log.Debug("Deleting orphaned blob_chunks")
if err := sm.repos.BlobChunks.DeleteOrphaned(ctx); err != nil {
return fmt.Errorf("deleting orphaned blob_chunks: %w", err)
}
// Delete orphaned chunks (chunks not referenced by any file)
// This must come after cleaning up blob_chunks to avoid foreign key violations
log.Debug("Deleting orphaned chunks")
if err := sm.repos.Chunks.DeleteOrphaned(ctx); err != nil {
return fmt.Errorf("deleting orphaned chunks: %w", err)
}
return nil
}
// deleteOtherSnapshots deletes all snapshots except the current one
func (sm *SnapshotManager) deleteOtherSnapshots(ctx context.Context, tx *sql.Tx, currentSnapshotID string) error {
log.Debug("[Temp DB Cleanup] Deleting other snapshots", "keeping", currentSnapshotID)
database.LogSQL("Execute", "DELETE FROM snapshots WHERE id != ?", currentSnapshotID)
result, err := tx.ExecContext(ctx, "DELETE FROM snapshots WHERE id != ?", currentSnapshotID)
if err != nil {
return fmt.Errorf("deleting other snapshots: %w", err)
}
rowsAffected, _ := result.RowsAffected()
log.Debug("[Temp DB Cleanup] Deleted snapshots", "count", rowsAffected)
return nil
}
// deleteOrphanedSnapshotAssociations deletes snapshot_files and snapshot_blobs for deleted snapshots
func (sm *SnapshotManager) deleteOrphanedSnapshotAssociations(ctx context.Context, tx *sql.Tx, currentSnapshotID string) error {
// Delete orphaned snapshot_files
log.Debug("[Temp DB Cleanup] Deleting orphaned snapshot_files")
database.LogSQL("Execute", "DELETE FROM snapshot_files WHERE snapshot_id != ?", currentSnapshotID)
result, err := tx.ExecContext(ctx, "DELETE FROM snapshot_files WHERE snapshot_id != ?", currentSnapshotID)
if err != nil {
return fmt.Errorf("deleting orphaned snapshot_files: %w", err)
}
rowsAffected, _ := result.RowsAffected()
log.Debug("[Temp DB Cleanup] Deleted snapshot_files", "count", rowsAffected)
// Delete orphaned snapshot_blobs
log.Debug("[Temp DB Cleanup] Deleting orphaned snapshot_blobs")
database.LogSQL("Execute", "DELETE FROM snapshot_blobs WHERE snapshot_id != ?", currentSnapshotID)
result, err = tx.ExecContext(ctx, "DELETE FROM snapshot_blobs WHERE snapshot_id != ?", currentSnapshotID)
if err != nil {
return fmt.Errorf("deleting orphaned snapshot_blobs: %w", err)
}
rowsAffected, _ = result.RowsAffected()
log.Debug("[Temp DB Cleanup] Deleted snapshot_blobs", "count", rowsAffected)
return nil
}
// deleteOrphanedFiles deletes files not in the current snapshot
func (sm *SnapshotManager) deleteOrphanedFiles(ctx context.Context, tx *sql.Tx, currentSnapshotID string) error {
log.Debug("[Temp DB Cleanup] Deleting files not in current snapshot")
database.LogSQL("Execute", `DELETE FROM files WHERE NOT EXISTS (SELECT 1 FROM snapshot_files WHERE snapshot_files.file_id = files.id AND snapshot_files.snapshot_id = ?)`, currentSnapshotID)
result, err := tx.ExecContext(ctx, `
DELETE FROM files
WHERE NOT EXISTS (
SELECT 1 FROM snapshot_files
WHERE snapshot_files.file_id = files.id
AND snapshot_files.snapshot_id = ?
)`, currentSnapshotID)
if err != nil {
return fmt.Errorf("deleting orphaned files: %w", err)
}
rowsAffected, _ := result.RowsAffected()
log.Debug("[Temp DB Cleanup] Deleted files", "count", rowsAffected)
// Note: file_chunks will be deleted via CASCADE
log.Debug("[Temp DB Cleanup] file_chunks will be deleted via CASCADE")
return nil
}
// deleteOrphanedChunkToFileMappings deletes chunk_files entries for deleted files
func (sm *SnapshotManager) deleteOrphanedChunkToFileMappings(ctx context.Context, tx *sql.Tx) error {
log.Debug("[Temp DB Cleanup] Deleting orphaned chunk_files")
database.LogSQL("Execute", `DELETE FROM chunk_files WHERE NOT EXISTS (SELECT 1 FROM files WHERE files.id = chunk_files.file_id)`)
result, err := tx.ExecContext(ctx, `
DELETE FROM chunk_files
WHERE NOT EXISTS (
SELECT 1 FROM files
WHERE files.id = chunk_files.file_id
)`)
if err != nil {
return fmt.Errorf("deleting orphaned chunk_files: %w", err)
}
rowsAffected, _ := result.RowsAffected()
log.Debug("[Temp DB Cleanup] Deleted chunk_files", "count", rowsAffected)
return nil
}
// deleteOrphanedBlobs deletes blobs not in the current snapshot
func (sm *SnapshotManager) deleteOrphanedBlobs(ctx context.Context, tx *sql.Tx, currentSnapshotID string) error {
log.Debug("[Temp DB Cleanup] Deleting blobs not in current snapshot")
database.LogSQL("Execute", `DELETE FROM blobs WHERE NOT EXISTS (SELECT 1 FROM snapshot_blobs WHERE snapshot_blobs.blob_hash = blobs.blob_hash AND snapshot_blobs.snapshot_id = ?)`, currentSnapshotID)
result, err := tx.ExecContext(ctx, `
DELETE FROM blobs
WHERE NOT EXISTS (
SELECT 1 FROM snapshot_blobs
WHERE snapshot_blobs.blob_hash = blobs.blob_hash
AND snapshot_blobs.snapshot_id = ?
)`, currentSnapshotID)
if err != nil {
return fmt.Errorf("deleting orphaned blobs: %w", err)
}
rowsAffected, _ := result.RowsAffected()
log.Debug("[Temp DB Cleanup] Deleted blobs not in snapshot", "count", rowsAffected)
return nil
}
// deleteOrphanedBlobToChunkMappings deletes blob_chunks entries for deleted blobs
func (sm *SnapshotManager) deleteOrphanedBlobToChunkMappings(ctx context.Context, tx *sql.Tx) error {
log.Debug("[Temp DB Cleanup] Deleting orphaned blob_chunks")
database.LogSQL("Execute", `DELETE FROM blob_chunks WHERE NOT EXISTS (SELECT 1 FROM blobs WHERE blobs.id = blob_chunks.blob_id)`)
result, err := tx.ExecContext(ctx, `
DELETE FROM blob_chunks
WHERE NOT EXISTS (
SELECT 1 FROM blobs
WHERE blobs.id = blob_chunks.blob_id
)`)
if err != nil {
return fmt.Errorf("deleting orphaned blob_chunks: %w", err)
}
rowsAffected, _ := result.RowsAffected()
log.Debug("[Temp DB Cleanup] Deleted blob_chunks", "count", rowsAffected)
return nil
}
// deleteOrphanedChunks deletes chunks not referenced by any file
func (sm *SnapshotManager) deleteOrphanedChunks(ctx context.Context, tx *sql.Tx) error {
log.Debug("[Temp DB Cleanup] Deleting orphaned chunks")
database.LogSQL("Execute", `DELETE FROM chunks WHERE NOT EXISTS (SELECT 1 FROM file_chunks WHERE file_chunks.chunk_hash = chunks.chunk_hash)`)
result, err := tx.ExecContext(ctx, `
DELETE FROM chunks
WHERE NOT EXISTS (
SELECT 1 FROM file_chunks
WHERE file_chunks.chunk_hash = chunks.chunk_hash
)`)
if err != nil {
return fmt.Errorf("deleting orphaned chunks: %w", err)
}
rowsAffected, _ := result.RowsAffected()
log.Debug("[Temp DB Cleanup] Deleted chunks", "count", rowsAffected)
return nil
}

View File

@@ -6,10 +6,16 @@ import (
"path/filepath"
"testing"
"git.eeqj.de/sneak/vaultik/internal/config"
"git.eeqj.de/sneak/vaultik/internal/database"
"git.eeqj.de/sneak/vaultik/internal/log"
)
const (
// Test age public key for encryption
testAgeRecipient = "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
)
func TestCleanSnapshotDBEmptySnapshot(t *testing.T) {
// Initialize logger
log.Initialize(log.Config{})
@@ -41,7 +47,7 @@ func TestCleanSnapshotDBEmptySnapshot(t *testing.T) {
// Create some files and chunks not associated with any snapshot
file := &database.File{Path: "/orphan/file.txt", Size: 1000}
chunk := &database.Chunk{ChunkHash: "orphan-chunk", SHA256: "orphan-chunk", Size: 500}
chunk := &database.Chunk{ChunkHash: "orphan-chunk", Size: 500}
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
if err := repos.Files.Create(ctx, tx, file); err != nil {
@@ -64,9 +70,14 @@ func TestCleanSnapshotDBEmptySnapshot(t *testing.T) {
t.Fatalf("failed to copy database: %v", err)
}
// Create a mock config for testing
cfg := &config.Config{
CompressionLevel: 3,
AgeRecipients: []string{testAgeRecipient},
}
// Clean the database
sm := &SnapshotManager{}
if err := sm.cleanSnapshotDB(ctx, tempDBPath, snapshot.ID); err != nil {
sm := &SnapshotManager{config: cfg}
if _, err := sm.cleanSnapshotDB(ctx, tempDBPath, snapshot.ID); err != nil {
t.Fatalf("failed to clean snapshot database: %v", err)
}
@@ -136,9 +147,14 @@ func TestCleanSnapshotDBNonExistentSnapshot(t *testing.T) {
t.Fatalf("failed to copy database: %v", err)
}
// Create a mock config for testing
cfg := &config.Config{
CompressionLevel: 3,
AgeRecipients: []string{testAgeRecipient},
}
// Try to clean with non-existent snapshot
sm := &SnapshotManager{}
err = sm.cleanSnapshotDB(ctx, tempDBPath, "non-existent-snapshot")
sm := &SnapshotManager{config: cfg}
_, err = sm.cleanSnapshotDB(ctx, tempDBPath, "non-existent-snapshot")
// Should not error - it will just delete everything
if err != nil {