Major refactoring: Updated manifest format and renamed backup to snapshot
- Created manifest.go with proper Manifest structure including blob sizes - Updated manifest generation to include compressed size for each blob - Added TotalCompressedSize field to manifest for quick access - Renamed backup package to snapshot for clarity - Updated snapshot list to show all remote snapshots - Remote snapshots not in local DB fetch manifest to get size - Local snapshots not in remote are automatically deleted - Removed backwards compatibility code (pre-1.0, no users) - Fixed prune command to use new manifest format - Updated all imports and references from backup to snapshot
This commit is contained in:
532
internal/snapshot/backup_test.go
Normal file
532
internal/snapshot/backup_test.go
Normal file
@@ -0,0 +1,532 @@
|
||||
package snapshot
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"testing/fstest"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
)
|
||||
|
||||
// MockS3Client is a mock implementation of S3 operations for testing
|
||||
type MockS3Client struct {
|
||||
storage map[string][]byte
|
||||
}
|
||||
|
||||
func NewMockS3Client() *MockS3Client {
|
||||
return &MockS3Client{
|
||||
storage: make(map[string][]byte),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MockS3Client) PutBlob(ctx context.Context, hash string, data []byte) error {
|
||||
m.storage[hash] = data
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MockS3Client) GetBlob(ctx context.Context, hash string) ([]byte, error) {
|
||||
data, ok := m.storage[hash]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("blob not found: %s", hash)
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (m *MockS3Client) BlobExists(ctx context.Context, hash string) (bool, error) {
|
||||
_, ok := m.storage[hash]
|
||||
return ok, nil
|
||||
}
|
||||
|
||||
func (m *MockS3Client) CreateBucket(ctx context.Context, bucket string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func TestBackupWithInMemoryFS(t *testing.T) {
|
||||
// Create a temporary directory for the database
|
||||
tempDir := t.TempDir()
|
||||
dbPath := filepath.Join(tempDir, "test.db")
|
||||
|
||||
// Create test filesystem
|
||||
testFS := fstest.MapFS{
|
||||
"file1.txt": &fstest.MapFile{
|
||||
Data: []byte("Hello, World!"),
|
||||
Mode: 0644,
|
||||
ModTime: time.Now(),
|
||||
},
|
||||
"dir1/file2.txt": &fstest.MapFile{
|
||||
Data: []byte("This is a test file with some content."),
|
||||
Mode: 0755,
|
||||
ModTime: time.Now(),
|
||||
},
|
||||
"dir1/subdir/file3.txt": &fstest.MapFile{
|
||||
Data: []byte("Another file in a subdirectory."),
|
||||
Mode: 0600,
|
||||
ModTime: time.Now(),
|
||||
},
|
||||
"largefile.bin": &fstest.MapFile{
|
||||
Data: generateLargeFileContent(10 * 1024 * 1024), // 10MB file with varied content
|
||||
Mode: 0644,
|
||||
ModTime: time.Now(),
|
||||
},
|
||||
}
|
||||
|
||||
// Initialize the database
|
||||
ctx := context.Background()
|
||||
db, err := database.New(ctx, dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create database: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := db.Close(); err != nil {
|
||||
t.Logf("Failed to close database: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
// Create mock S3 client
|
||||
s3Client := NewMockS3Client()
|
||||
|
||||
// Run backup
|
||||
backupEngine := &BackupEngine{
|
||||
repos: repos,
|
||||
s3Client: s3Client,
|
||||
}
|
||||
|
||||
snapshotID, err := backupEngine.Backup(ctx, testFS, ".")
|
||||
if err != nil {
|
||||
t.Fatalf("Backup failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify snapshot was created
|
||||
snapshot, err := repos.Snapshots.GetByID(ctx, snapshotID)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get snapshot: %v", err)
|
||||
}
|
||||
|
||||
if snapshot == nil {
|
||||
t.Fatal("Snapshot not found")
|
||||
}
|
||||
|
||||
if snapshot.FileCount == 0 {
|
||||
t.Error("Expected snapshot to have files")
|
||||
}
|
||||
|
||||
// Verify files in database
|
||||
files, err := repos.Files.ListByPrefix(ctx, "")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to list files: %v", err)
|
||||
}
|
||||
|
||||
expectedFiles := map[string]bool{
|
||||
"file1.txt": true,
|
||||
"dir1/file2.txt": true,
|
||||
"dir1/subdir/file3.txt": true,
|
||||
"largefile.bin": true,
|
||||
}
|
||||
|
||||
if len(files) != len(expectedFiles) {
|
||||
t.Errorf("Expected %d files, got %d", len(expectedFiles), len(files))
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
if !expectedFiles[file.Path] {
|
||||
t.Errorf("Unexpected file in database: %s", file.Path)
|
||||
}
|
||||
delete(expectedFiles, file.Path)
|
||||
|
||||
// Verify file metadata
|
||||
fsFile := testFS[file.Path]
|
||||
if fsFile == nil {
|
||||
t.Errorf("File %s not found in test filesystem", file.Path)
|
||||
continue
|
||||
}
|
||||
|
||||
if file.Size != int64(len(fsFile.Data)) {
|
||||
t.Errorf("File %s: expected size %d, got %d", file.Path, len(fsFile.Data), file.Size)
|
||||
}
|
||||
|
||||
if file.Mode != uint32(fsFile.Mode) {
|
||||
t.Errorf("File %s: expected mode %o, got %o", file.Path, fsFile.Mode, file.Mode)
|
||||
}
|
||||
}
|
||||
|
||||
if len(expectedFiles) > 0 {
|
||||
t.Errorf("Files not found in database: %v", expectedFiles)
|
||||
}
|
||||
|
||||
// Verify chunks
|
||||
chunks, err := repos.Chunks.List(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to list chunks: %v", err)
|
||||
}
|
||||
|
||||
if len(chunks) == 0 {
|
||||
t.Error("No chunks found in database")
|
||||
}
|
||||
|
||||
// The large file should create 10 chunks (10MB / 1MB chunk size)
|
||||
// Plus the small files
|
||||
minExpectedChunks := 10 + 3
|
||||
if len(chunks) < minExpectedChunks {
|
||||
t.Errorf("Expected at least %d chunks, got %d", minExpectedChunks, len(chunks))
|
||||
}
|
||||
|
||||
// Verify at least one blob was created and uploaded
|
||||
// We can't list blobs directly, but we can check via snapshot blobs
|
||||
blobHashes, err := repos.Snapshots.GetBlobHashes(ctx, snapshotID)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get blob hashes: %v", err)
|
||||
}
|
||||
if len(blobHashes) == 0 {
|
||||
t.Error("Expected at least one blob to be created")
|
||||
}
|
||||
|
||||
for _, blobHash := range blobHashes {
|
||||
// Check blob exists in mock S3
|
||||
exists, err := s3Client.BlobExists(ctx, blobHash)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to check blob %s: %v", blobHash, err)
|
||||
}
|
||||
if !exists {
|
||||
t.Errorf("Blob %s not found in S3", blobHash)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackupDeduplication(t *testing.T) {
|
||||
// Create a temporary directory for the database
|
||||
tempDir := t.TempDir()
|
||||
dbPath := filepath.Join(tempDir, "test.db")
|
||||
|
||||
// Create test filesystem with duplicate content
|
||||
testFS := fstest.MapFS{
|
||||
"file1.txt": &fstest.MapFile{
|
||||
Data: []byte("Duplicate content"),
|
||||
Mode: 0644,
|
||||
ModTime: time.Now(),
|
||||
},
|
||||
"file2.txt": &fstest.MapFile{
|
||||
Data: []byte("Duplicate content"),
|
||||
Mode: 0644,
|
||||
ModTime: time.Now(),
|
||||
},
|
||||
"file3.txt": &fstest.MapFile{
|
||||
Data: []byte("Unique content"),
|
||||
Mode: 0644,
|
||||
ModTime: time.Now(),
|
||||
},
|
||||
}
|
||||
|
||||
// Initialize the database
|
||||
ctx := context.Background()
|
||||
db, err := database.New(ctx, dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create database: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := db.Close(); err != nil {
|
||||
t.Logf("Failed to close database: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
// Create mock S3 client
|
||||
s3Client := NewMockS3Client()
|
||||
|
||||
// Run backup
|
||||
backupEngine := &BackupEngine{
|
||||
repos: repos,
|
||||
s3Client: s3Client,
|
||||
}
|
||||
|
||||
_, err = backupEngine.Backup(ctx, testFS, ".")
|
||||
if err != nil {
|
||||
t.Fatalf("Backup failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify deduplication
|
||||
chunks, err := repos.Chunks.List(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to list chunks: %v", err)
|
||||
}
|
||||
|
||||
// Should have only 2 unique chunks (duplicate content + unique content)
|
||||
if len(chunks) != 2 {
|
||||
t.Errorf("Expected 2 unique chunks, got %d", len(chunks))
|
||||
}
|
||||
|
||||
// Verify chunk references
|
||||
for _, chunk := range chunks {
|
||||
files, err := repos.ChunkFiles.GetByChunkHash(ctx, chunk.ChunkHash)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to get files for chunk %s: %v", chunk.ChunkHash, err)
|
||||
}
|
||||
|
||||
// The duplicate content chunk should be referenced by 2 files
|
||||
if chunk.Size == int64(len("Duplicate content")) && len(files) != 2 {
|
||||
t.Errorf("Expected duplicate chunk to be referenced by 2 files, got %d", len(files))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// BackupEngine performs backup operations
|
||||
type BackupEngine struct {
|
||||
repos *database.Repositories
|
||||
s3Client interface {
|
||||
PutBlob(ctx context.Context, hash string, data []byte) error
|
||||
BlobExists(ctx context.Context, hash string) (bool, error)
|
||||
}
|
||||
}
|
||||
|
||||
// Backup performs a backup of the given filesystem
|
||||
func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (string, error) {
|
||||
// Create a new snapshot
|
||||
hostname, _ := os.Hostname()
|
||||
snapshotID := time.Now().Format(time.RFC3339)
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID,
|
||||
Hostname: hostname,
|
||||
VaultikVersion: "test",
|
||||
StartedAt: time.Now(),
|
||||
CompletedAt: nil,
|
||||
}
|
||||
|
||||
// Create initial snapshot record
|
||||
err := b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return b.repos.Snapshots.Create(ctx, tx, snapshot)
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Track counters
|
||||
var fileCount, chunkCount, blobCount, totalSize, blobSize int64
|
||||
|
||||
// Track which chunks we've seen to handle deduplication
|
||||
processedChunks := make(map[string]bool)
|
||||
|
||||
// Scan the filesystem and process files
|
||||
err = fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Skip directories
|
||||
if d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get file info
|
||||
info, err := d.Info()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Handle symlinks
|
||||
if info.Mode()&fs.ModeSymlink != 0 {
|
||||
// For testing, we'll skip symlinks since fstest doesn't support them well
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create file record in a short transaction
|
||||
file := &database.File{
|
||||
Path: path,
|
||||
Size: info.Size(),
|
||||
Mode: uint32(info.Mode()),
|
||||
MTime: info.ModTime(),
|
||||
CTime: info.ModTime(), // Use mtime as ctime for test
|
||||
UID: 1000, // Default UID for test
|
||||
GID: 1000, // Default GID for test
|
||||
}
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return b.repos.Files.Create(ctx, tx, file)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fileCount++
|
||||
totalSize += info.Size()
|
||||
|
||||
// Read and process file in chunks
|
||||
f, err := fsys.Open(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if err := f.Close(); err != nil {
|
||||
// Log but don't fail since we're already in an error path potentially
|
||||
fmt.Fprintf(os.Stderr, "Failed to close file: %v\n", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Process file in chunks
|
||||
chunkIndex := 0
|
||||
buffer := make([]byte, defaultChunkSize)
|
||||
|
||||
for {
|
||||
n, err := f.Read(buffer)
|
||||
if err != nil && err != io.EOF {
|
||||
return err
|
||||
}
|
||||
if n == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
chunkData := buffer[:n]
|
||||
chunkHash := calculateHash(chunkData)
|
||||
|
||||
// Check if chunk already exists (outside of transaction)
|
||||
existingChunk, _ := b.repos.Chunks.GetByHash(ctx, chunkHash)
|
||||
if existingChunk == nil {
|
||||
// Create new chunk in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
chunk := &database.Chunk{
|
||||
ChunkHash: chunkHash,
|
||||
Size: int64(n),
|
||||
}
|
||||
return b.repos.Chunks.Create(ctx, tx, chunk)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
processedChunks[chunkHash] = true
|
||||
}
|
||||
|
||||
// Create file-chunk mapping in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
fileChunk := &database.FileChunk{
|
||||
FileID: file.ID,
|
||||
Idx: chunkIndex,
|
||||
ChunkHash: chunkHash,
|
||||
}
|
||||
return b.repos.FileChunks.Create(ctx, tx, fileChunk)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create chunk-file mapping in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
chunkFile := &database.ChunkFile{
|
||||
ChunkHash: chunkHash,
|
||||
FileID: file.ID,
|
||||
FileOffset: int64(chunkIndex * defaultChunkSize),
|
||||
Length: int64(n),
|
||||
}
|
||||
return b.repos.ChunkFiles.Create(ctx, tx, chunkFile)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
chunkIndex++
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// After all files are processed, create blobs for new chunks
|
||||
for chunkHash := range processedChunks {
|
||||
// Get chunk data (outside of transaction)
|
||||
chunk, err := b.repos.Chunks.GetByHash(ctx, chunkHash)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
chunkCount++
|
||||
|
||||
// In a real system, blobs would contain multiple chunks and be encrypted
|
||||
// For testing, we'll create a blob with a "blob-" prefix to differentiate
|
||||
blobHash := "blob-" + chunkHash
|
||||
|
||||
// For the test, we'll create dummy data since we don't have the original
|
||||
dummyData := []byte(chunkHash)
|
||||
|
||||
// Upload to S3 as a blob
|
||||
if err := b.s3Client.PutBlob(ctx, blobHash, dummyData); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Create blob entry in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
blob := &database.Blob{
|
||||
ID: "test-blob-" + blobHash[:8],
|
||||
Hash: blobHash,
|
||||
CreatedTS: time.Now(),
|
||||
}
|
||||
return b.repos.Blobs.Create(ctx, tx, blob)
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
blobCount++
|
||||
blobSize += chunk.Size
|
||||
|
||||
// Create blob-chunk mapping in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
blobChunk := &database.BlobChunk{
|
||||
BlobID: "test-blob-" + blobHash[:8],
|
||||
ChunkHash: chunkHash,
|
||||
Offset: 0,
|
||||
Length: chunk.Size,
|
||||
}
|
||||
return b.repos.BlobChunks.Create(ctx, tx, blobChunk)
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Add blob to snapshot in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return b.repos.Snapshots.AddBlob(ctx, tx, snapshotID, "test-blob-"+blobHash[:8], blobHash)
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
// Update snapshot with final counts
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return b.repos.Snapshots.UpdateCounts(ctx, tx, snapshotID, fileCount, chunkCount, blobCount, totalSize, blobSize)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return snapshotID, nil
|
||||
}
|
||||
|
||||
func calculateHash(data []byte) string {
|
||||
h := sha256.New()
|
||||
h.Write(data)
|
||||
return fmt.Sprintf("%x", h.Sum(nil))
|
||||
}
|
||||
|
||||
func generateLargeFileContent(size int) []byte {
|
||||
data := make([]byte, size)
|
||||
// Fill with pattern that changes every chunk to avoid deduplication
|
||||
for i := 0; i < size; i++ {
|
||||
chunkNum := i / defaultChunkSize
|
||||
data[i] = byte((i + chunkNum) % 256)
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
const defaultChunkSize = 1024 * 1024 // 1MB chunks
|
||||
236
internal/snapshot/file_change_test.go
Normal file
236
internal/snapshot/file_change_test.go
Normal file
@@ -0,0 +1,236 @@
|
||||
package snapshot_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/snapshot"
|
||||
"github.com/spf13/afero"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// TestFileContentChange verifies that when a file's content changes,
|
||||
// the old chunks are properly disassociated
|
||||
func TestFileContentChange(t *testing.T) {
|
||||
// Initialize logger for tests
|
||||
log.Initialize(log.Config{})
|
||||
|
||||
// Create in-memory filesystem
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create initial file
|
||||
err := afero.WriteFile(fs, "/test.txt", []byte("Initial content"), 0644)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create test database
|
||||
db, err := database.NewTestDB()
|
||||
require.NoError(t, err)
|
||||
defer func() {
|
||||
if err := db.Close(); err != nil {
|
||||
t.Errorf("failed to close database: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
// Create scanner
|
||||
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
|
||||
FS: fs,
|
||||
ChunkSize: int64(1024 * 16), // 16KB chunks for testing
|
||||
Repositories: repos,
|
||||
MaxBlobSize: int64(1024 * 1024), // 1MB blobs
|
||||
CompressionLevel: 3,
|
||||
AgeRecipients: []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
|
||||
})
|
||||
|
||||
// Create first snapshot
|
||||
ctx := context.Background()
|
||||
snapshotID1 := "snapshot1"
|
||||
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID1,
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "test",
|
||||
StartedAt: time.Now(),
|
||||
}
|
||||
return repos.Snapshots.Create(ctx, tx, snapshot)
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// First scan - should create chunks for initial content
|
||||
result1, err := scanner.Scan(ctx, "/", snapshotID1)
|
||||
require.NoError(t, err)
|
||||
t.Logf("First scan: %d files scanned", result1.FilesScanned)
|
||||
|
||||
// Get file chunks from first scan
|
||||
fileChunks1, err := repos.FileChunks.GetByPath(ctx, "/test.txt")
|
||||
require.NoError(t, err)
|
||||
assert.Len(t, fileChunks1, 1) // Small file = 1 chunk
|
||||
oldChunkHash := fileChunks1[0].ChunkHash
|
||||
|
||||
// Get chunk files from first scan
|
||||
chunkFiles1, err := repos.ChunkFiles.GetByFilePath(ctx, "/test.txt")
|
||||
require.NoError(t, err)
|
||||
assert.Len(t, chunkFiles1, 1)
|
||||
|
||||
// Modify the file
|
||||
time.Sleep(10 * time.Millisecond) // Ensure mtime changes
|
||||
err = afero.WriteFile(fs, "/test.txt", []byte("Modified content with different data"), 0644)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create second snapshot
|
||||
snapshotID2 := "snapshot2"
|
||||
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID2,
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "test",
|
||||
StartedAt: time.Now(),
|
||||
}
|
||||
return repos.Snapshots.Create(ctx, tx, snapshot)
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Second scan - should create new chunks and remove old associations
|
||||
result2, err := scanner.Scan(ctx, "/", snapshotID2)
|
||||
require.NoError(t, err)
|
||||
t.Logf("Second scan: %d files scanned", result2.FilesScanned)
|
||||
|
||||
// Get file chunks from second scan
|
||||
fileChunks2, err := repos.FileChunks.GetByPath(ctx, "/test.txt")
|
||||
require.NoError(t, err)
|
||||
assert.Len(t, fileChunks2, 1) // Still 1 chunk but different hash
|
||||
newChunkHash := fileChunks2[0].ChunkHash
|
||||
|
||||
// Verify the chunk hashes are different
|
||||
assert.NotEqual(t, oldChunkHash, newChunkHash, "Chunk hash should change when content changes")
|
||||
|
||||
// Get chunk files from second scan
|
||||
chunkFiles2, err := repos.ChunkFiles.GetByFilePath(ctx, "/test.txt")
|
||||
require.NoError(t, err)
|
||||
assert.Len(t, chunkFiles2, 1)
|
||||
assert.Equal(t, newChunkHash, chunkFiles2[0].ChunkHash)
|
||||
|
||||
// Verify old chunk still exists (it's still valid data)
|
||||
oldChunk, err := repos.Chunks.GetByHash(ctx, oldChunkHash)
|
||||
require.NoError(t, err)
|
||||
assert.NotNil(t, oldChunk)
|
||||
|
||||
// Verify new chunk exists
|
||||
newChunk, err := repos.Chunks.GetByHash(ctx, newChunkHash)
|
||||
require.NoError(t, err)
|
||||
assert.NotNil(t, newChunk)
|
||||
|
||||
// Verify that chunk_files for old chunk no longer references this file
|
||||
oldChunkFiles, err := repos.ChunkFiles.GetByChunkHash(ctx, oldChunkHash)
|
||||
require.NoError(t, err)
|
||||
for _, cf := range oldChunkFiles {
|
||||
file, err := repos.Files.GetByID(ctx, cf.FileID)
|
||||
require.NoError(t, err)
|
||||
assert.NotEqual(t, "/data/test.txt", file.Path, "Old chunk should not be associated with the modified file")
|
||||
}
|
||||
}
|
||||
|
||||
// TestMultipleFileChanges verifies handling of multiple file changes in one scan
|
||||
func TestMultipleFileChanges(t *testing.T) {
|
||||
// Initialize logger for tests
|
||||
log.Initialize(log.Config{})
|
||||
|
||||
// Create in-memory filesystem
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create initial files
|
||||
files := map[string]string{
|
||||
"/file1.txt": "Content 1",
|
||||
"/file2.txt": "Content 2",
|
||||
"/file3.txt": "Content 3",
|
||||
}
|
||||
|
||||
for path, content := range files {
|
||||
err := afero.WriteFile(fs, path, []byte(content), 0644)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
// Create test database
|
||||
db, err := database.NewTestDB()
|
||||
require.NoError(t, err)
|
||||
defer func() {
|
||||
if err := db.Close(); err != nil {
|
||||
t.Errorf("failed to close database: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
// Create scanner
|
||||
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
|
||||
FS: fs,
|
||||
ChunkSize: int64(1024 * 16), // 16KB chunks for testing
|
||||
Repositories: repos,
|
||||
MaxBlobSize: int64(1024 * 1024), // 1MB blobs
|
||||
CompressionLevel: 3,
|
||||
AgeRecipients: []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
|
||||
})
|
||||
|
||||
// Create first snapshot
|
||||
ctx := context.Background()
|
||||
snapshotID1 := "snapshot1"
|
||||
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID1,
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "test",
|
||||
StartedAt: time.Now(),
|
||||
}
|
||||
return repos.Snapshots.Create(ctx, tx, snapshot)
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// First scan
|
||||
result1, err := scanner.Scan(ctx, "/", snapshotID1)
|
||||
require.NoError(t, err)
|
||||
// 4 files because root directory is also counted
|
||||
assert.Equal(t, 4, result1.FilesScanned)
|
||||
|
||||
// Modify two files
|
||||
time.Sleep(10 * time.Millisecond) // Ensure mtime changes
|
||||
err = afero.WriteFile(fs, "/file1.txt", []byte("Modified content 1"), 0644)
|
||||
require.NoError(t, err)
|
||||
err = afero.WriteFile(fs, "/file3.txt", []byte("Modified content 3"), 0644)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create second snapshot
|
||||
snapshotID2 := "snapshot2"
|
||||
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID2,
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "test",
|
||||
StartedAt: time.Now(),
|
||||
}
|
||||
return repos.Snapshots.Create(ctx, tx, snapshot)
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Second scan
|
||||
result2, err := scanner.Scan(ctx, "/", snapshotID2)
|
||||
require.NoError(t, err)
|
||||
// 4 files because root directory is also counted
|
||||
assert.Equal(t, 4, result2.FilesScanned)
|
||||
|
||||
// Verify each file has exactly one set of chunks
|
||||
for path := range files {
|
||||
fileChunks, err := repos.FileChunks.GetByPath(ctx, path)
|
||||
require.NoError(t, err)
|
||||
assert.Len(t, fileChunks, 1, "File %s should have exactly 1 chunk association", path)
|
||||
|
||||
chunkFiles, err := repos.ChunkFiles.GetByFilePath(ctx, path)
|
||||
require.NoError(t, err)
|
||||
assert.Len(t, chunkFiles, 1, "File %s should have exactly 1 chunk-file association", path)
|
||||
}
|
||||
}
|
||||
70
internal/snapshot/manifest.go
Normal file
70
internal/snapshot/manifest.go
Normal file
@@ -0,0 +1,70 @@
|
||||
package snapshot
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"github.com/klauspost/compress/zstd"
|
||||
)
|
||||
|
||||
// Manifest represents the structure of a snapshot's blob manifest
|
||||
type Manifest struct {
|
||||
SnapshotID string `json:"snapshot_id"`
|
||||
Timestamp string `json:"timestamp"`
|
||||
BlobCount int `json:"blob_count"`
|
||||
TotalCompressedSize int64 `json:"total_compressed_size"`
|
||||
Blobs []BlobInfo `json:"blobs"`
|
||||
}
|
||||
|
||||
// BlobInfo represents information about a single blob in the manifest
|
||||
type BlobInfo struct {
|
||||
Hash string `json:"hash"`
|
||||
CompressedSize int64 `json:"compressed_size"`
|
||||
}
|
||||
|
||||
// DecodeManifest decodes a manifest from a reader containing compressed JSON
|
||||
func DecodeManifest(r io.Reader) (*Manifest, error) {
|
||||
// Decompress using zstd
|
||||
zr, err := zstd.NewReader(r)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating zstd reader: %w", err)
|
||||
}
|
||||
defer zr.Close()
|
||||
|
||||
// Decode JSON manifest
|
||||
var manifest Manifest
|
||||
if err := json.NewDecoder(zr).Decode(&manifest); err != nil {
|
||||
return nil, fmt.Errorf("decoding manifest: %w", err)
|
||||
}
|
||||
|
||||
return &manifest, nil
|
||||
}
|
||||
|
||||
// EncodeManifest encodes a manifest to compressed JSON
|
||||
func EncodeManifest(manifest *Manifest, compressionLevel int) ([]byte, error) {
|
||||
// Marshal to JSON
|
||||
jsonData, err := json.MarshalIndent(manifest, "", " ")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("marshaling manifest: %w", err)
|
||||
}
|
||||
|
||||
// Compress using zstd
|
||||
var compressedBuf bytes.Buffer
|
||||
writer, err := zstd.NewWriter(&compressedBuf, zstd.WithEncoderLevel(zstd.EncoderLevelFromZstd(compressionLevel)))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating zstd writer: %w", err)
|
||||
}
|
||||
|
||||
if _, err := writer.Write(jsonData); err != nil {
|
||||
_ = writer.Close()
|
||||
return nil, fmt.Errorf("writing compressed data: %w", err)
|
||||
}
|
||||
|
||||
if err := writer.Close(); err != nil {
|
||||
return nil, fmt.Errorf("closing zstd writer: %w", err)
|
||||
}
|
||||
|
||||
return compressedBuf.Bytes(), nil
|
||||
}
|
||||
42
internal/snapshot/module.go
Normal file
42
internal/snapshot/module.go
Normal file
@@ -0,0 +1,42 @@
|
||||
package snapshot
|
||||
|
||||
import (
|
||||
"git.eeqj.de/sneak/vaultik/internal/config"
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/s3"
|
||||
"github.com/spf13/afero"
|
||||
"go.uber.org/fx"
|
||||
)
|
||||
|
||||
// ScannerParams holds parameters for scanner creation
|
||||
type ScannerParams struct {
|
||||
EnableProgress bool
|
||||
}
|
||||
|
||||
// Module exports backup functionality as an fx module.
|
||||
// It provides a ScannerFactory that can create Scanner instances
|
||||
// with custom parameters while sharing common dependencies.
|
||||
var Module = fx.Module("backup",
|
||||
fx.Provide(
|
||||
provideScannerFactory,
|
||||
NewSnapshotManager,
|
||||
),
|
||||
)
|
||||
|
||||
// ScannerFactory creates scanners with custom parameters
|
||||
type ScannerFactory func(params ScannerParams) *Scanner
|
||||
|
||||
func provideScannerFactory(cfg *config.Config, repos *database.Repositories, s3Client *s3.Client) ScannerFactory {
|
||||
return func(params ScannerParams) *Scanner {
|
||||
return NewScanner(ScannerConfig{
|
||||
FS: afero.NewOsFs(),
|
||||
ChunkSize: cfg.ChunkSize.Int64(),
|
||||
Repositories: repos,
|
||||
S3Client: s3Client,
|
||||
MaxBlobSize: cfg.BlobSizeLimit.Int64(),
|
||||
CompressionLevel: cfg.CompressionLevel,
|
||||
AgeRecipients: cfg.AgeRecipients,
|
||||
EnableProgress: params.EnableProgress,
|
||||
})
|
||||
}
|
||||
}
|
||||
412
internal/snapshot/progress.go
Normal file
412
internal/snapshot/progress.go
Normal file
@@ -0,0 +1,412 @@
|
||||
package snapshot
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
"os/signal"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"github.com/dustin/go-humanize"
|
||||
)
|
||||
|
||||
const (
|
||||
// SummaryInterval defines how often one-line status updates are printed.
|
||||
// These updates show current progress, ETA, and the file being processed.
|
||||
SummaryInterval = 10 * time.Second
|
||||
|
||||
// DetailInterval defines how often multi-line detailed status reports are printed.
|
||||
// These reports include comprehensive statistics about files, chunks, blobs, and uploads.
|
||||
DetailInterval = 60 * time.Second
|
||||
)
|
||||
|
||||
// ProgressStats holds atomic counters for progress tracking
|
||||
type ProgressStats struct {
|
||||
FilesScanned atomic.Int64 // Total files seen during scan (includes skipped)
|
||||
FilesProcessed atomic.Int64 // Files actually processed in phase 2
|
||||
FilesSkipped atomic.Int64 // Files skipped due to no changes
|
||||
BytesScanned atomic.Int64 // Bytes from new/changed files only
|
||||
BytesSkipped atomic.Int64 // Bytes from unchanged files
|
||||
BytesProcessed atomic.Int64 // Actual bytes processed (for ETA calculation)
|
||||
ChunksCreated atomic.Int64
|
||||
BlobsCreated atomic.Int64
|
||||
BlobsUploaded atomic.Int64
|
||||
BytesUploaded atomic.Int64
|
||||
UploadDurationMs atomic.Int64 // Total milliseconds spent uploading to S3
|
||||
CurrentFile atomic.Value // stores string
|
||||
TotalSize atomic.Int64 // Total size to process (set after scan phase)
|
||||
TotalFiles atomic.Int64 // Total files to process in phase 2
|
||||
ProcessStartTime atomic.Value // stores time.Time when processing starts
|
||||
StartTime time.Time
|
||||
mu sync.RWMutex
|
||||
lastDetailTime time.Time
|
||||
|
||||
// Upload tracking
|
||||
CurrentUpload atomic.Value // stores *UploadInfo
|
||||
lastChunkingTime time.Time // Track when we last showed chunking progress
|
||||
}
|
||||
|
||||
// UploadInfo tracks current upload progress
|
||||
type UploadInfo struct {
|
||||
BlobHash string
|
||||
Size int64
|
||||
StartTime time.Time
|
||||
}
|
||||
|
||||
// ProgressReporter handles periodic progress reporting
|
||||
type ProgressReporter struct {
|
||||
stats *ProgressStats
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
wg sync.WaitGroup
|
||||
detailTicker *time.Ticker
|
||||
summaryTicker *time.Ticker
|
||||
sigChan chan os.Signal
|
||||
}
|
||||
|
||||
// NewProgressReporter creates a new progress reporter
|
||||
func NewProgressReporter() *ProgressReporter {
|
||||
stats := &ProgressStats{
|
||||
StartTime: time.Now().UTC(),
|
||||
lastDetailTime: time.Now().UTC(),
|
||||
}
|
||||
stats.CurrentFile.Store("")
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
pr := &ProgressReporter{
|
||||
stats: stats,
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
summaryTicker: time.NewTicker(SummaryInterval),
|
||||
detailTicker: time.NewTicker(DetailInterval),
|
||||
sigChan: make(chan os.Signal, 1),
|
||||
}
|
||||
|
||||
// Register for SIGUSR1
|
||||
signal.Notify(pr.sigChan, syscall.SIGUSR1)
|
||||
|
||||
return pr
|
||||
}
|
||||
|
||||
// Start begins the progress reporting
|
||||
func (pr *ProgressReporter) Start() {
|
||||
pr.wg.Add(1)
|
||||
go pr.run()
|
||||
|
||||
// Print initial multi-line status
|
||||
pr.printDetailedStatus()
|
||||
}
|
||||
|
||||
// Stop stops the progress reporting
|
||||
func (pr *ProgressReporter) Stop() {
|
||||
pr.cancel()
|
||||
pr.summaryTicker.Stop()
|
||||
pr.detailTicker.Stop()
|
||||
signal.Stop(pr.sigChan)
|
||||
close(pr.sigChan)
|
||||
pr.wg.Wait()
|
||||
}
|
||||
|
||||
// GetStats returns the progress stats for updating
|
||||
func (pr *ProgressReporter) GetStats() *ProgressStats {
|
||||
return pr.stats
|
||||
}
|
||||
|
||||
// SetTotalSize sets the total size to process (after scan phase)
|
||||
func (pr *ProgressReporter) SetTotalSize(size int64) {
|
||||
pr.stats.TotalSize.Store(size)
|
||||
pr.stats.ProcessStartTime.Store(time.Now().UTC())
|
||||
}
|
||||
|
||||
// run is the main progress reporting loop
|
||||
func (pr *ProgressReporter) run() {
|
||||
defer pr.wg.Done()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-pr.ctx.Done():
|
||||
return
|
||||
case <-pr.summaryTicker.C:
|
||||
pr.printSummaryStatus()
|
||||
case <-pr.detailTicker.C:
|
||||
pr.printDetailedStatus()
|
||||
case <-pr.sigChan:
|
||||
// SIGUSR1 received, print detailed status
|
||||
log.Info("SIGUSR1 received, printing detailed status")
|
||||
pr.printDetailedStatus()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// printSummaryStatus prints a one-line status update
|
||||
func (pr *ProgressReporter) printSummaryStatus() {
|
||||
// Check if we're currently uploading
|
||||
if uploadInfo, ok := pr.stats.CurrentUpload.Load().(*UploadInfo); ok && uploadInfo != nil {
|
||||
// Show upload progress instead
|
||||
pr.printUploadProgress(uploadInfo)
|
||||
return
|
||||
}
|
||||
|
||||
// Only show chunking progress if we've done chunking recently
|
||||
pr.stats.mu.RLock()
|
||||
timeSinceLastChunk := time.Since(pr.stats.lastChunkingTime)
|
||||
pr.stats.mu.RUnlock()
|
||||
|
||||
if timeSinceLastChunk > SummaryInterval*2 {
|
||||
// No recent chunking activity, don't show progress
|
||||
return
|
||||
}
|
||||
|
||||
elapsed := time.Since(pr.stats.StartTime)
|
||||
bytesScanned := pr.stats.BytesScanned.Load()
|
||||
bytesSkipped := pr.stats.BytesSkipped.Load()
|
||||
bytesProcessed := pr.stats.BytesProcessed.Load()
|
||||
totalSize := pr.stats.TotalSize.Load()
|
||||
currentFile := pr.stats.CurrentFile.Load().(string)
|
||||
|
||||
// Calculate ETA if we have total size and are processing
|
||||
etaStr := ""
|
||||
if totalSize > 0 && bytesProcessed > 0 {
|
||||
processStart, ok := pr.stats.ProcessStartTime.Load().(time.Time)
|
||||
if ok && !processStart.IsZero() {
|
||||
processElapsed := time.Since(processStart)
|
||||
rate := float64(bytesProcessed) / processElapsed.Seconds()
|
||||
if rate > 0 {
|
||||
remainingBytes := totalSize - bytesProcessed
|
||||
remainingSeconds := float64(remainingBytes) / rate
|
||||
eta := time.Duration(remainingSeconds * float64(time.Second))
|
||||
etaStr = fmt.Sprintf(" | ETA: %s", formatDuration(eta))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
rate := float64(bytesScanned+bytesSkipped) / elapsed.Seconds()
|
||||
|
||||
// Show files processed / total files to process
|
||||
filesProcessed := pr.stats.FilesProcessed.Load()
|
||||
totalFiles := pr.stats.TotalFiles.Load()
|
||||
|
||||
status := fmt.Sprintf("Snapshot progress: %d/%d files, %s/%s (%.1f%%), %s/s%s",
|
||||
filesProcessed,
|
||||
totalFiles,
|
||||
humanize.Bytes(uint64(bytesProcessed)),
|
||||
humanize.Bytes(uint64(totalSize)),
|
||||
float64(bytesProcessed)/float64(totalSize)*100,
|
||||
humanize.Bytes(uint64(rate)),
|
||||
etaStr,
|
||||
)
|
||||
|
||||
if currentFile != "" {
|
||||
status += fmt.Sprintf(" | Current: %s", truncatePath(currentFile, 40))
|
||||
}
|
||||
|
||||
log.Info(status)
|
||||
}
|
||||
|
||||
// printDetailedStatus prints a multi-line detailed status
|
||||
func (pr *ProgressReporter) printDetailedStatus() {
|
||||
pr.stats.mu.Lock()
|
||||
pr.stats.lastDetailTime = time.Now().UTC()
|
||||
pr.stats.mu.Unlock()
|
||||
|
||||
elapsed := time.Since(pr.stats.StartTime)
|
||||
filesScanned := pr.stats.FilesScanned.Load()
|
||||
filesSkipped := pr.stats.FilesSkipped.Load()
|
||||
bytesScanned := pr.stats.BytesScanned.Load()
|
||||
bytesSkipped := pr.stats.BytesSkipped.Load()
|
||||
bytesProcessed := pr.stats.BytesProcessed.Load()
|
||||
totalSize := pr.stats.TotalSize.Load()
|
||||
chunksCreated := pr.stats.ChunksCreated.Load()
|
||||
blobsCreated := pr.stats.BlobsCreated.Load()
|
||||
blobsUploaded := pr.stats.BlobsUploaded.Load()
|
||||
bytesUploaded := pr.stats.BytesUploaded.Load()
|
||||
currentFile := pr.stats.CurrentFile.Load().(string)
|
||||
|
||||
totalBytes := bytesScanned + bytesSkipped
|
||||
rate := float64(totalBytes) / elapsed.Seconds()
|
||||
|
||||
log.Notice("=== Snapshot Progress Report ===")
|
||||
log.Info("Elapsed time", "duration", formatDuration(elapsed))
|
||||
|
||||
// Calculate and show ETA if we have data
|
||||
if totalSize > 0 && bytesProcessed > 0 {
|
||||
processStart, ok := pr.stats.ProcessStartTime.Load().(time.Time)
|
||||
if ok && !processStart.IsZero() {
|
||||
processElapsed := time.Since(processStart)
|
||||
processRate := float64(bytesProcessed) / processElapsed.Seconds()
|
||||
if processRate > 0 {
|
||||
remainingBytes := totalSize - bytesProcessed
|
||||
remainingSeconds := float64(remainingBytes) / processRate
|
||||
eta := time.Duration(remainingSeconds * float64(time.Second))
|
||||
percentComplete := float64(bytesProcessed) / float64(totalSize) * 100
|
||||
log.Info("Overall progress",
|
||||
"percent", fmt.Sprintf("%.1f%%", percentComplete),
|
||||
"processed", humanize.Bytes(uint64(bytesProcessed)),
|
||||
"total", humanize.Bytes(uint64(totalSize)),
|
||||
"rate", humanize.Bytes(uint64(processRate))+"/s",
|
||||
"eta", formatDuration(eta))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
log.Info("Files processed",
|
||||
"scanned", filesScanned,
|
||||
"skipped", filesSkipped,
|
||||
"total", filesScanned,
|
||||
"skip_rate", formatPercent(filesSkipped, filesScanned))
|
||||
log.Info("Data scanned",
|
||||
"new", humanize.Bytes(uint64(bytesScanned)),
|
||||
"skipped", humanize.Bytes(uint64(bytesSkipped)),
|
||||
"total", humanize.Bytes(uint64(totalBytes)),
|
||||
"scan_rate", humanize.Bytes(uint64(rate))+"/s")
|
||||
log.Info("Chunks created", "count", chunksCreated)
|
||||
log.Info("Blobs status",
|
||||
"created", blobsCreated,
|
||||
"uploaded", blobsUploaded,
|
||||
"pending", blobsCreated-blobsUploaded)
|
||||
log.Info("Total uploaded to S3",
|
||||
"uploaded", humanize.Bytes(uint64(bytesUploaded)),
|
||||
"compression_ratio", formatRatio(bytesUploaded, bytesScanned))
|
||||
if currentFile != "" {
|
||||
log.Info("Current file", "path", currentFile)
|
||||
}
|
||||
log.Notice("=============================")
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
|
||||
func formatDuration(d time.Duration) string {
|
||||
if d < 0 {
|
||||
return "unknown"
|
||||
}
|
||||
if d < time.Minute {
|
||||
return fmt.Sprintf("%ds", int(d.Seconds()))
|
||||
}
|
||||
if d < time.Hour {
|
||||
return fmt.Sprintf("%dm%ds", int(d.Minutes()), int(d.Seconds())%60)
|
||||
}
|
||||
return fmt.Sprintf("%dh%dm", int(d.Hours()), int(d.Minutes())%60)
|
||||
}
|
||||
|
||||
func formatPercent(numerator, denominator int64) string {
|
||||
if denominator == 0 {
|
||||
return "0.0%"
|
||||
}
|
||||
return fmt.Sprintf("%.1f%%", float64(numerator)/float64(denominator)*100)
|
||||
}
|
||||
|
||||
func formatRatio(compressed, uncompressed int64) string {
|
||||
if uncompressed == 0 {
|
||||
return "1.00"
|
||||
}
|
||||
ratio := float64(compressed) / float64(uncompressed)
|
||||
return fmt.Sprintf("%.2f", ratio)
|
||||
}
|
||||
|
||||
func truncatePath(path string, maxLen int) string {
|
||||
if len(path) <= maxLen {
|
||||
return path
|
||||
}
|
||||
// Keep the last maxLen-3 characters and prepend "..."
|
||||
return "..." + path[len(path)-(maxLen-3):]
|
||||
}
|
||||
|
||||
// printUploadProgress prints upload progress
|
||||
func (pr *ProgressReporter) printUploadProgress(info *UploadInfo) {
|
||||
// This function is called repeatedly during upload, not just at start
|
||||
// Don't print anything here - the actual progress is shown by ReportUploadProgress
|
||||
}
|
||||
|
||||
// ReportUploadStart marks the beginning of a blob upload
|
||||
func (pr *ProgressReporter) ReportUploadStart(blobHash string, size int64) {
|
||||
info := &UploadInfo{
|
||||
BlobHash: blobHash,
|
||||
Size: size,
|
||||
StartTime: time.Now().UTC(),
|
||||
}
|
||||
pr.stats.CurrentUpload.Store(info)
|
||||
}
|
||||
|
||||
// ReportUploadComplete marks the completion of a blob upload
|
||||
func (pr *ProgressReporter) ReportUploadComplete(blobHash string, size int64, duration time.Duration) {
|
||||
// Clear current upload
|
||||
pr.stats.CurrentUpload.Store((*UploadInfo)(nil))
|
||||
|
||||
// Add to total upload duration
|
||||
pr.stats.UploadDurationMs.Add(duration.Milliseconds())
|
||||
|
||||
// Calculate speed
|
||||
if duration < time.Millisecond {
|
||||
duration = time.Millisecond
|
||||
}
|
||||
bytesPerSec := float64(size) / duration.Seconds()
|
||||
bitsPerSec := bytesPerSec * 8
|
||||
|
||||
// Format speed
|
||||
var speedStr string
|
||||
if bitsPerSec >= 1e9 {
|
||||
speedStr = fmt.Sprintf("%.1fGbit/sec", bitsPerSec/1e9)
|
||||
} else if bitsPerSec >= 1e6 {
|
||||
speedStr = fmt.Sprintf("%.0fMbit/sec", bitsPerSec/1e6)
|
||||
} else if bitsPerSec >= 1e3 {
|
||||
speedStr = fmt.Sprintf("%.0fKbit/sec", bitsPerSec/1e3)
|
||||
} else {
|
||||
speedStr = fmt.Sprintf("%.0fbit/sec", bitsPerSec)
|
||||
}
|
||||
|
||||
log.Info("Blob upload completed",
|
||||
"hash", blobHash[:8]+"...",
|
||||
"size", humanize.Bytes(uint64(size)),
|
||||
"duration", formatDuration(duration),
|
||||
"speed", speedStr)
|
||||
}
|
||||
|
||||
// UpdateChunkingActivity updates the last chunking time
|
||||
func (pr *ProgressReporter) UpdateChunkingActivity() {
|
||||
pr.stats.mu.Lock()
|
||||
pr.stats.lastChunkingTime = time.Now().UTC()
|
||||
pr.stats.mu.Unlock()
|
||||
}
|
||||
|
||||
// ReportUploadProgress reports current upload progress with instantaneous speed
|
||||
func (pr *ProgressReporter) ReportUploadProgress(blobHash string, bytesUploaded, totalSize int64, instantSpeed float64) {
|
||||
// Update the current upload info with progress
|
||||
if uploadInfo, ok := pr.stats.CurrentUpload.Load().(*UploadInfo); ok && uploadInfo != nil {
|
||||
// Format speed in bits/second
|
||||
bitsPerSec := instantSpeed * 8
|
||||
var speedStr string
|
||||
if bitsPerSec >= 1e9 {
|
||||
speedStr = fmt.Sprintf("%.1fGbit/sec", bitsPerSec/1e9)
|
||||
} else if bitsPerSec >= 1e6 {
|
||||
speedStr = fmt.Sprintf("%.0fMbit/sec", bitsPerSec/1e6)
|
||||
} else if bitsPerSec >= 1e3 {
|
||||
speedStr = fmt.Sprintf("%.0fKbit/sec", bitsPerSec/1e3)
|
||||
} else {
|
||||
speedStr = fmt.Sprintf("%.0fbit/sec", bitsPerSec)
|
||||
}
|
||||
|
||||
percent := float64(bytesUploaded) / float64(totalSize) * 100
|
||||
|
||||
// Calculate ETA based on current speed
|
||||
etaStr := "unknown"
|
||||
if instantSpeed > 0 && bytesUploaded < totalSize {
|
||||
remainingBytes := totalSize - bytesUploaded
|
||||
remainingSeconds := float64(remainingBytes) / instantSpeed
|
||||
eta := time.Duration(remainingSeconds * float64(time.Second))
|
||||
etaStr = formatDuration(eta)
|
||||
}
|
||||
|
||||
log.Info("Blob upload progress",
|
||||
"hash", blobHash[:8]+"...",
|
||||
"progress", fmt.Sprintf("%.1f%%", percent),
|
||||
"uploaded", humanize.Bytes(uint64(bytesUploaded)),
|
||||
"total", humanize.Bytes(uint64(totalSize)),
|
||||
"speed", speedStr,
|
||||
"eta", etaStr)
|
||||
}
|
||||
}
|
||||
856
internal/snapshot/scanner.go
Normal file
856
internal/snapshot/scanner.go
Normal file
@@ -0,0 +1,856 @@
|
||||
package snapshot
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/blob"
|
||||
"git.eeqj.de/sneak/vaultik/internal/chunker"
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/s3"
|
||||
"github.com/dustin/go-humanize"
|
||||
"github.com/spf13/afero"
|
||||
)
|
||||
|
||||
// FileToProcess holds information about a file that needs processing
|
||||
type FileToProcess struct {
|
||||
Path string
|
||||
FileInfo os.FileInfo
|
||||
File *database.File
|
||||
}
|
||||
|
||||
// Scanner scans directories and populates the database with file and chunk information
|
||||
type Scanner struct {
|
||||
fs afero.Fs
|
||||
chunker *chunker.Chunker
|
||||
packer *blob.Packer
|
||||
repos *database.Repositories
|
||||
s3Client S3Client
|
||||
maxBlobSize int64
|
||||
compressionLevel int
|
||||
ageRecipient string
|
||||
snapshotID string // Current snapshot being processed
|
||||
progress *ProgressReporter
|
||||
|
||||
// Mutex for coordinating blob creation
|
||||
packerMu sync.Mutex // Blocks chunk production during blob creation
|
||||
|
||||
// Context for cancellation
|
||||
scanCtx context.Context
|
||||
}
|
||||
|
||||
// S3Client interface for blob storage operations
|
||||
type S3Client interface {
|
||||
PutObject(ctx context.Context, key string, data io.Reader) error
|
||||
PutObjectWithProgress(ctx context.Context, key string, data io.Reader, size int64, progress s3.ProgressCallback) error
|
||||
StatObject(ctx context.Context, key string) (*s3.ObjectInfo, error)
|
||||
}
|
||||
|
||||
// ScannerConfig contains configuration for the scanner
|
||||
type ScannerConfig struct {
|
||||
FS afero.Fs
|
||||
ChunkSize int64
|
||||
Repositories *database.Repositories
|
||||
S3Client S3Client
|
||||
MaxBlobSize int64
|
||||
CompressionLevel int
|
||||
AgeRecipients []string // Optional, empty means no encryption
|
||||
EnableProgress bool // Enable progress reporting
|
||||
}
|
||||
|
||||
// ScanResult contains the results of a scan operation
|
||||
type ScanResult struct {
|
||||
FilesScanned int
|
||||
FilesSkipped int
|
||||
BytesScanned int64
|
||||
BytesSkipped int64
|
||||
ChunksCreated int
|
||||
BlobsCreated int
|
||||
StartTime time.Time
|
||||
EndTime time.Time
|
||||
}
|
||||
|
||||
// NewScanner creates a new scanner instance
|
||||
func NewScanner(cfg ScannerConfig) *Scanner {
|
||||
// Create encryptor (required for blob packing)
|
||||
if len(cfg.AgeRecipients) == 0 {
|
||||
log.Error("No age recipients configured - encryption is required")
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create blob packer with encryption
|
||||
packerCfg := blob.PackerConfig{
|
||||
MaxBlobSize: cfg.MaxBlobSize,
|
||||
CompressionLevel: cfg.CompressionLevel,
|
||||
Recipients: cfg.AgeRecipients,
|
||||
Repositories: cfg.Repositories,
|
||||
}
|
||||
packer, err := blob.NewPacker(packerCfg)
|
||||
if err != nil {
|
||||
log.Error("Failed to create packer", "error", err)
|
||||
return nil
|
||||
}
|
||||
|
||||
var progress *ProgressReporter
|
||||
if cfg.EnableProgress {
|
||||
progress = NewProgressReporter()
|
||||
}
|
||||
|
||||
return &Scanner{
|
||||
fs: cfg.FS,
|
||||
chunker: chunker.NewChunker(cfg.ChunkSize),
|
||||
packer: packer,
|
||||
repos: cfg.Repositories,
|
||||
s3Client: cfg.S3Client,
|
||||
maxBlobSize: cfg.MaxBlobSize,
|
||||
compressionLevel: cfg.CompressionLevel,
|
||||
ageRecipient: strings.Join(cfg.AgeRecipients, ","),
|
||||
progress: progress,
|
||||
}
|
||||
}
|
||||
|
||||
// Scan scans a directory and populates the database
|
||||
func (s *Scanner) Scan(ctx context.Context, path string, snapshotID string) (*ScanResult, error) {
|
||||
s.snapshotID = snapshotID
|
||||
s.scanCtx = ctx
|
||||
result := &ScanResult{
|
||||
StartTime: time.Now().UTC(),
|
||||
}
|
||||
|
||||
// Set blob handler for concurrent upload
|
||||
if s.s3Client != nil {
|
||||
log.Debug("Setting blob handler for S3 uploads")
|
||||
s.packer.SetBlobHandler(s.handleBlobReady)
|
||||
} else {
|
||||
log.Debug("No S3 client configured, blobs will not be uploaded")
|
||||
}
|
||||
|
||||
// Start progress reporting if enabled
|
||||
if s.progress != nil {
|
||||
s.progress.Start()
|
||||
defer s.progress.Stop()
|
||||
}
|
||||
|
||||
// Phase 1: Scan directory and collect files to process
|
||||
log.Info("Phase 1/3: Scanning directory structure")
|
||||
filesToProcess, err := s.scanPhase(ctx, path, result)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scan phase failed: %w", err)
|
||||
}
|
||||
|
||||
// Calculate total size to process
|
||||
var totalSizeToProcess int64
|
||||
for _, file := range filesToProcess {
|
||||
totalSizeToProcess += file.FileInfo.Size()
|
||||
}
|
||||
|
||||
// Update progress with total size and file count
|
||||
if s.progress != nil {
|
||||
s.progress.SetTotalSize(totalSizeToProcess)
|
||||
s.progress.GetStats().TotalFiles.Store(int64(len(filesToProcess)))
|
||||
}
|
||||
|
||||
log.Info("Phase 1 complete",
|
||||
"total_files", len(filesToProcess),
|
||||
"total_size", humanize.Bytes(uint64(totalSizeToProcess)),
|
||||
"files_skipped", result.FilesSkipped,
|
||||
"bytes_skipped", humanize.Bytes(uint64(result.BytesSkipped)))
|
||||
|
||||
// Print detailed scan summary
|
||||
fmt.Printf("\n=== Scan Summary ===\n")
|
||||
fmt.Printf("Total files examined: %d\n", result.FilesScanned)
|
||||
fmt.Printf("Files with content changes: %d\n", len(filesToProcess))
|
||||
fmt.Printf("Files with unchanged content: %d\n", result.FilesSkipped)
|
||||
fmt.Printf("Total size of changed files: %s\n", humanize.Bytes(uint64(totalSizeToProcess)))
|
||||
fmt.Printf("Total size of unchanged files: %s\n", humanize.Bytes(uint64(result.BytesSkipped)))
|
||||
if len(filesToProcess) > 0 {
|
||||
fmt.Printf("\nStarting snapshot of %d changed files...\n\n", len(filesToProcess))
|
||||
} else {
|
||||
fmt.Printf("\nNo file contents have changed.\n")
|
||||
fmt.Printf("Creating metadata-only snapshot to capture current state...\n\n")
|
||||
}
|
||||
|
||||
// Phase 2: Process files and create chunks
|
||||
if len(filesToProcess) > 0 {
|
||||
log.Info("Phase 2/3: Creating snapshot (chunking, compressing, encrypting, and uploading blobs)")
|
||||
if err := s.processPhase(ctx, filesToProcess, result); err != nil {
|
||||
return nil, fmt.Errorf("process phase failed: %w", err)
|
||||
}
|
||||
} else {
|
||||
log.Info("Phase 2/3: Skipping (no file contents changed, metadata-only snapshot)")
|
||||
}
|
||||
|
||||
// Get final stats from packer
|
||||
blobs := s.packer.GetFinishedBlobs()
|
||||
result.BlobsCreated += len(blobs)
|
||||
|
||||
// Query database for actual blob count created during this snapshot
|
||||
// The database is authoritative, especially for concurrent blob uploads
|
||||
// We count uploads rather than all snapshot_blobs to get only NEW blobs
|
||||
if s.snapshotID != "" {
|
||||
uploadCount, err := s.repos.Uploads.GetCountBySnapshot(ctx, s.snapshotID)
|
||||
if err != nil {
|
||||
log.Warn("Failed to query upload count from database", "error", err)
|
||||
} else {
|
||||
result.BlobsCreated = int(uploadCount)
|
||||
}
|
||||
}
|
||||
|
||||
result.EndTime = time.Now().UTC()
|
||||
return result, nil
|
||||
}
|
||||
|
||||
// scanPhase performs the initial directory scan to identify files to process
|
||||
func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult) ([]*FileToProcess, error) {
|
||||
var filesToProcess []*FileToProcess
|
||||
var mu sync.Mutex
|
||||
|
||||
// Set up periodic status output
|
||||
lastStatusTime := time.Now()
|
||||
statusInterval := 15 * time.Second
|
||||
var filesScanned int64
|
||||
var bytesScanned int64
|
||||
|
||||
log.Debug("Starting directory walk", "path", path)
|
||||
err := afero.Walk(s.fs, path, func(path string, info os.FileInfo, err error) error {
|
||||
log.Debug("Scanning filesystem entry", "path", path)
|
||||
if err != nil {
|
||||
log.Debug("Error accessing filesystem entry", "path", path, "error", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Check context cancellation
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
// Check file and update metadata
|
||||
file, needsProcessing, err := s.checkFileAndUpdateMetadata(ctx, path, info, result)
|
||||
if err != nil {
|
||||
// Don't log context cancellation as an error
|
||||
if err == context.Canceled {
|
||||
return err
|
||||
}
|
||||
return fmt.Errorf("failed to check %s: %w", path, err)
|
||||
}
|
||||
|
||||
// If file needs processing, add to list
|
||||
if needsProcessing && info.Mode().IsRegular() && info.Size() > 0 {
|
||||
mu.Lock()
|
||||
filesToProcess = append(filesToProcess, &FileToProcess{
|
||||
Path: path,
|
||||
FileInfo: info,
|
||||
File: file,
|
||||
})
|
||||
mu.Unlock()
|
||||
}
|
||||
|
||||
// Update scan statistics
|
||||
if info.Mode().IsRegular() {
|
||||
filesScanned++
|
||||
bytesScanned += info.Size()
|
||||
}
|
||||
|
||||
// Output periodic status
|
||||
if time.Since(lastStatusTime) >= statusInterval {
|
||||
mu.Lock()
|
||||
changedCount := len(filesToProcess)
|
||||
mu.Unlock()
|
||||
|
||||
fmt.Printf("Scan progress: %d files examined, %s total size, %d files changed\n",
|
||||
filesScanned,
|
||||
humanize.Bytes(uint64(bytesScanned)),
|
||||
changedCount)
|
||||
lastStatusTime = time.Now()
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return filesToProcess, nil
|
||||
}
|
||||
|
||||
// processPhase processes the files that need backing up
|
||||
func (s *Scanner) processPhase(ctx context.Context, filesToProcess []*FileToProcess, result *ScanResult) error {
|
||||
// Set up periodic status output
|
||||
lastStatusTime := time.Now()
|
||||
statusInterval := 15 * time.Second
|
||||
startTime := time.Now()
|
||||
filesProcessed := 0
|
||||
totalFiles := len(filesToProcess)
|
||||
|
||||
// Process each file
|
||||
for _, fileToProcess := range filesToProcess {
|
||||
// Update progress
|
||||
if s.progress != nil {
|
||||
s.progress.GetStats().CurrentFile.Store(fileToProcess.Path)
|
||||
}
|
||||
|
||||
// Process file in streaming fashion
|
||||
if err := s.processFileStreaming(ctx, fileToProcess, result); err != nil {
|
||||
return fmt.Errorf("processing file %s: %w", fileToProcess.Path, err)
|
||||
}
|
||||
|
||||
// Update files processed counter
|
||||
if s.progress != nil {
|
||||
s.progress.GetStats().FilesProcessed.Add(1)
|
||||
}
|
||||
|
||||
filesProcessed++
|
||||
|
||||
// Output periodic status
|
||||
if time.Since(lastStatusTime) >= statusInterval {
|
||||
elapsed := time.Since(startTime)
|
||||
remaining := totalFiles - filesProcessed
|
||||
var eta time.Duration
|
||||
if filesProcessed > 0 {
|
||||
eta = elapsed / time.Duration(filesProcessed) * time.Duration(remaining)
|
||||
}
|
||||
|
||||
fmt.Printf("Snapshot progress: %d/%d files processed, %d chunks created, %d blobs uploaded",
|
||||
filesProcessed, totalFiles, result.ChunksCreated, result.BlobsCreated)
|
||||
if remaining > 0 && eta > 0 {
|
||||
fmt.Printf(", ETA: %s", eta.Round(time.Second))
|
||||
}
|
||||
fmt.Println()
|
||||
lastStatusTime = time.Now()
|
||||
}
|
||||
}
|
||||
|
||||
// Final flush (outside any transaction)
|
||||
s.packerMu.Lock()
|
||||
if err := s.packer.Flush(); err != nil {
|
||||
s.packerMu.Unlock()
|
||||
return fmt.Errorf("flushing packer: %w", err)
|
||||
}
|
||||
s.packerMu.Unlock()
|
||||
|
||||
// If no S3 client, store any remaining blobs
|
||||
if s.s3Client == nil {
|
||||
blobs := s.packer.GetFinishedBlobs()
|
||||
for _, b := range blobs {
|
||||
// Blob metadata is already stored incrementally during packing
|
||||
// Just add the blob to the snapshot
|
||||
err := s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, b.ID, b.Hash)
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("storing blob metadata: %w", err)
|
||||
}
|
||||
}
|
||||
result.BlobsCreated += len(blobs)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// checkFileAndUpdateMetadata checks if a file needs processing and updates metadata
|
||||
func (s *Scanner) checkFileAndUpdateMetadata(ctx context.Context, path string, info os.FileInfo, result *ScanResult) (*database.File, bool, error) {
|
||||
// Check context cancellation
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return nil, false, ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
// Process file without holding a long transaction
|
||||
return s.checkFile(ctx, path, info, result)
|
||||
}
|
||||
|
||||
// checkFile checks if a file needs processing and updates metadata
|
||||
func (s *Scanner) checkFile(ctx context.Context, path string, info os.FileInfo, result *ScanResult) (*database.File, bool, error) {
|
||||
// Get file stats
|
||||
stat, ok := info.Sys().(interface {
|
||||
Uid() uint32
|
||||
Gid() uint32
|
||||
})
|
||||
|
||||
var uid, gid uint32
|
||||
if ok {
|
||||
uid = stat.Uid()
|
||||
gid = stat.Gid()
|
||||
}
|
||||
|
||||
// Check if it's a symlink
|
||||
var linkTarget string
|
||||
if info.Mode()&os.ModeSymlink != 0 {
|
||||
// Read the symlink target
|
||||
if linker, ok := s.fs.(afero.LinkReader); ok {
|
||||
linkTarget, _ = linker.ReadlinkIfPossible(path)
|
||||
}
|
||||
}
|
||||
|
||||
// Create file record
|
||||
file := &database.File{
|
||||
Path: path,
|
||||
MTime: info.ModTime(),
|
||||
CTime: info.ModTime(), // afero doesn't provide ctime
|
||||
Size: info.Size(),
|
||||
Mode: uint32(info.Mode()),
|
||||
UID: uid,
|
||||
GID: gid,
|
||||
LinkTarget: linkTarget,
|
||||
}
|
||||
|
||||
// Check if file has changed since last backup (no transaction needed for read)
|
||||
log.Debug("Querying database for existing file record", "path", path)
|
||||
existingFile, err := s.repos.Files.GetByPath(ctx, path)
|
||||
if err != nil {
|
||||
return nil, false, fmt.Errorf("checking existing file: %w", err)
|
||||
}
|
||||
|
||||
fileChanged := existingFile == nil || s.hasFileChanged(existingFile, file)
|
||||
|
||||
// Update file metadata and add to snapshot in a single transaction
|
||||
log.Debug("Updating file record in database and adding to snapshot", "path", path, "changed", fileChanged, "snapshot", s.snapshotID)
|
||||
err = s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
// First create/update the file
|
||||
if err := s.repos.Files.Create(ctx, tx, file); err != nil {
|
||||
return fmt.Errorf("creating file: %w", err)
|
||||
}
|
||||
// Then add it to the snapshot using the file ID
|
||||
if err := s.repos.Snapshots.AddFileByID(ctx, tx, s.snapshotID, file.ID); err != nil {
|
||||
return fmt.Errorf("adding file to snapshot: %w", err)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return nil, false, err
|
||||
}
|
||||
log.Debug("File record added to snapshot association", "path", path)
|
||||
|
||||
result.FilesScanned++
|
||||
|
||||
// Update progress
|
||||
if s.progress != nil {
|
||||
stats := s.progress.GetStats()
|
||||
stats.FilesScanned.Add(1)
|
||||
stats.CurrentFile.Store(path)
|
||||
}
|
||||
|
||||
// Track skipped files
|
||||
if info.Mode().IsRegular() && info.Size() > 0 && !fileChanged {
|
||||
result.FilesSkipped++
|
||||
result.BytesSkipped += info.Size()
|
||||
if s.progress != nil {
|
||||
stats := s.progress.GetStats()
|
||||
stats.FilesSkipped.Add(1)
|
||||
stats.BytesSkipped.Add(info.Size())
|
||||
}
|
||||
// File hasn't changed, but we still need to associate existing chunks with this snapshot
|
||||
log.Debug("File content unchanged, reusing existing chunks and blobs", "path", path)
|
||||
if err := s.associateExistingChunks(ctx, path); err != nil {
|
||||
return nil, false, fmt.Errorf("associating existing chunks: %w", err)
|
||||
}
|
||||
log.Debug("Existing chunks and blobs associated with snapshot", "path", path)
|
||||
} else {
|
||||
// File changed or is not a regular file
|
||||
result.BytesScanned += info.Size()
|
||||
if s.progress != nil {
|
||||
s.progress.GetStats().BytesScanned.Add(info.Size())
|
||||
}
|
||||
}
|
||||
|
||||
return file, fileChanged, nil
|
||||
}
|
||||
|
||||
// hasFileChanged determines if a file has changed since last backup
|
||||
func (s *Scanner) hasFileChanged(existingFile, newFile *database.File) bool {
|
||||
// Check if any metadata has changed
|
||||
if existingFile.Size != newFile.Size {
|
||||
return true
|
||||
}
|
||||
if existingFile.MTime.Unix() != newFile.MTime.Unix() {
|
||||
return true
|
||||
}
|
||||
if existingFile.Mode != newFile.Mode {
|
||||
return true
|
||||
}
|
||||
if existingFile.UID != newFile.UID {
|
||||
return true
|
||||
}
|
||||
if existingFile.GID != newFile.GID {
|
||||
return true
|
||||
}
|
||||
if existingFile.LinkTarget != newFile.LinkTarget {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// associateExistingChunks links existing chunks from an unchanged file to the current snapshot
|
||||
func (s *Scanner) associateExistingChunks(ctx context.Context, path string) error {
|
||||
log.Debug("associateExistingChunks start", "path", path)
|
||||
|
||||
// Get existing file chunks (no transaction needed for read)
|
||||
log.Debug("Querying database for file's chunk associations", "path", path)
|
||||
fileChunks, err := s.repos.FileChunks.GetByFile(ctx, path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting existing file chunks: %w", err)
|
||||
}
|
||||
log.Debug("Retrieved file chunk associations from database", "path", path, "count", len(fileChunks))
|
||||
|
||||
// Collect unique blob IDs that need to be added to snapshot
|
||||
blobsToAdd := make(map[string]string) // blob ID -> blob hash
|
||||
for i, fc := range fileChunks {
|
||||
log.Debug("Looking up blob containing chunk", "path", path, "chunk_index", i, "chunk_hash", fc.ChunkHash)
|
||||
|
||||
// Find which blob contains this chunk (no transaction needed for read)
|
||||
log.Debug("Querying database for blob containing chunk", "chunk_hash", fc.ChunkHash)
|
||||
blobChunk, err := s.repos.BlobChunks.GetByChunkHash(ctx, fc.ChunkHash)
|
||||
if err != nil {
|
||||
return fmt.Errorf("finding blob for chunk %s: %w", fc.ChunkHash, err)
|
||||
}
|
||||
if blobChunk == nil {
|
||||
log.Warn("Chunk record exists in database but not associated with any blob", "chunk", fc.ChunkHash, "file", path)
|
||||
continue
|
||||
}
|
||||
log.Debug("Found blob record containing chunk", "chunk_hash", fc.ChunkHash, "blob_id", blobChunk.BlobID)
|
||||
|
||||
// Track blob ID for later processing
|
||||
if _, exists := blobsToAdd[blobChunk.BlobID]; !exists {
|
||||
blobsToAdd[blobChunk.BlobID] = "" // We'll get the hash later
|
||||
}
|
||||
}
|
||||
|
||||
// Now get blob hashes outside of transaction operations
|
||||
for blobID := range blobsToAdd {
|
||||
blob, err := s.repos.Blobs.GetByID(ctx, blobID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting blob %s: %w", blobID, err)
|
||||
}
|
||||
if blob == nil {
|
||||
log.Warn("Blob record missing from database", "blob_id", blobID)
|
||||
delete(blobsToAdd, blobID)
|
||||
continue
|
||||
}
|
||||
blobsToAdd[blobID] = blob.Hash
|
||||
}
|
||||
|
||||
// Add blobs to snapshot using short transactions
|
||||
for blobID, blobHash := range blobsToAdd {
|
||||
log.Debug("Adding blob reference to snapshot association", "blob_id", blobID, "blob_hash", blobHash, "snapshot", s.snapshotID)
|
||||
err := s.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, blobID, blobHash)
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("adding existing blob to snapshot: %w", err)
|
||||
}
|
||||
log.Debug("Created snapshot-blob association in database", "blob_id", blobID)
|
||||
}
|
||||
|
||||
log.Debug("associateExistingChunks complete", "path", path, "blobs_processed", len(blobsToAdd))
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleBlobReady is called by the packer when a blob is finalized
|
||||
func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
|
||||
log.Debug("Invoking blob upload handler", "blob_hash", blobWithReader.Hash[:8]+"...")
|
||||
|
||||
startTime := time.Now().UTC()
|
||||
finishedBlob := blobWithReader.FinishedBlob
|
||||
|
||||
// Report upload start
|
||||
if s.progress != nil {
|
||||
s.progress.ReportUploadStart(finishedBlob.Hash, finishedBlob.Compressed)
|
||||
}
|
||||
|
||||
// Upload to S3 first (without holding any locks)
|
||||
// Use scan context for cancellation support
|
||||
ctx := s.scanCtx
|
||||
if ctx == nil {
|
||||
ctx = context.Background()
|
||||
}
|
||||
|
||||
// Track bytes uploaded for accurate speed calculation
|
||||
lastProgressTime := time.Now()
|
||||
lastProgressBytes := int64(0)
|
||||
|
||||
progressCallback := func(uploaded int64) error {
|
||||
|
||||
// Calculate instantaneous speed
|
||||
now := time.Now()
|
||||
elapsed := now.Sub(lastProgressTime).Seconds()
|
||||
if elapsed > 0.5 { // Update speed every 0.5 seconds
|
||||
bytesSinceLastUpdate := uploaded - lastProgressBytes
|
||||
speed := float64(bytesSinceLastUpdate) / elapsed
|
||||
|
||||
if s.progress != nil {
|
||||
s.progress.ReportUploadProgress(finishedBlob.Hash, uploaded, finishedBlob.Compressed, speed)
|
||||
}
|
||||
|
||||
lastProgressTime = now
|
||||
lastProgressBytes = uploaded
|
||||
}
|
||||
|
||||
// Check for cancellation
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Create sharded path: blobs/ca/fe/cafebabe...
|
||||
blobPath := fmt.Sprintf("blobs/%s/%s/%s", finishedBlob.Hash[:2], finishedBlob.Hash[2:4], finishedBlob.Hash)
|
||||
if err := s.s3Client.PutObjectWithProgress(ctx, blobPath, blobWithReader.Reader, finishedBlob.Compressed, progressCallback); err != nil {
|
||||
return fmt.Errorf("uploading blob %s to S3: %w", finishedBlob.Hash, err)
|
||||
}
|
||||
|
||||
uploadDuration := time.Since(startTime)
|
||||
|
||||
// Log upload stats
|
||||
uploadSpeed := float64(finishedBlob.Compressed) * 8 / uploadDuration.Seconds() // bits per second
|
||||
log.Info("Successfully uploaded blob to S3 storage",
|
||||
"path", blobPath,
|
||||
"size", humanize.Bytes(uint64(finishedBlob.Compressed)),
|
||||
"duration", uploadDuration,
|
||||
"speed", humanize.SI(uploadSpeed, "bps"))
|
||||
|
||||
// Report upload complete
|
||||
if s.progress != nil {
|
||||
s.progress.ReportUploadComplete(finishedBlob.Hash, finishedBlob.Compressed, uploadDuration)
|
||||
}
|
||||
|
||||
// Update progress
|
||||
if s.progress != nil {
|
||||
stats := s.progress.GetStats()
|
||||
stats.BlobsUploaded.Add(1)
|
||||
stats.BytesUploaded.Add(finishedBlob.Compressed)
|
||||
stats.BlobsCreated.Add(1)
|
||||
}
|
||||
|
||||
// Store metadata in database (after upload is complete)
|
||||
dbCtx := s.scanCtx
|
||||
if dbCtx == nil {
|
||||
dbCtx = context.Background()
|
||||
}
|
||||
err := s.repos.WithTx(dbCtx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
// Update blob upload timestamp
|
||||
if err := s.repos.Blobs.UpdateUploaded(ctx, tx, finishedBlob.ID); err != nil {
|
||||
return fmt.Errorf("updating blob upload timestamp: %w", err)
|
||||
}
|
||||
|
||||
// Add the blob to the snapshot
|
||||
if err := s.repos.Snapshots.AddBlob(ctx, tx, s.snapshotID, finishedBlob.ID, finishedBlob.Hash); err != nil {
|
||||
return fmt.Errorf("adding blob to snapshot: %w", err)
|
||||
}
|
||||
|
||||
// Record upload metrics
|
||||
upload := &database.Upload{
|
||||
BlobHash: finishedBlob.Hash,
|
||||
SnapshotID: s.snapshotID,
|
||||
UploadedAt: startTime,
|
||||
Size: finishedBlob.Compressed,
|
||||
DurationMs: uploadDuration.Milliseconds(),
|
||||
}
|
||||
if err := s.repos.Uploads.Create(ctx, tx, upload); err != nil {
|
||||
return fmt.Errorf("recording upload metrics: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
// Cleanup temp file if needed
|
||||
if blobWithReader.TempFile != nil {
|
||||
tempName := blobWithReader.TempFile.Name()
|
||||
if err := blobWithReader.TempFile.Close(); err != nil {
|
||||
log.Fatal("Failed to close temp file", "file", tempName, "error", err)
|
||||
}
|
||||
if err := os.Remove(tempName); err != nil {
|
||||
log.Fatal("Failed to remove temp file", "file", tempName, "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// processFileStreaming processes a file by streaming chunks directly to the packer
|
||||
func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileToProcess, result *ScanResult) error {
|
||||
// Open the file
|
||||
file, err := s.fs.Open(fileToProcess.Path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("opening file: %w", err)
|
||||
}
|
||||
defer func() { _ = file.Close() }()
|
||||
|
||||
// We'll collect file chunks for database storage
|
||||
// but process them for packing as we go
|
||||
type chunkInfo struct {
|
||||
fileChunk database.FileChunk
|
||||
offset int64
|
||||
size int64
|
||||
}
|
||||
var chunks []chunkInfo
|
||||
chunkIndex := 0
|
||||
|
||||
// Process chunks in streaming fashion and get full file hash
|
||||
fileHash, err := s.chunker.ChunkReaderStreaming(file, func(chunk chunker.Chunk) error {
|
||||
// Check for cancellation
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
log.Debug("Processing content-defined chunk from file",
|
||||
"file", fileToProcess.Path,
|
||||
"chunk_index", chunkIndex,
|
||||
"hash", chunk.Hash,
|
||||
"size", chunk.Size)
|
||||
|
||||
// Check if chunk already exists (outside of transaction)
|
||||
existing, err := s.repos.Chunks.GetByHash(ctx, chunk.Hash)
|
||||
if err != nil {
|
||||
return fmt.Errorf("checking chunk existence: %w", err)
|
||||
}
|
||||
chunkExists := (existing != nil)
|
||||
|
||||
// Store chunk if new
|
||||
if !chunkExists {
|
||||
err := s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||
dbChunk := &database.Chunk{
|
||||
ChunkHash: chunk.Hash,
|
||||
Size: chunk.Size,
|
||||
}
|
||||
if err := s.repos.Chunks.Create(txCtx, tx, dbChunk); err != nil {
|
||||
return fmt.Errorf("creating chunk: %w", err)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("storing chunk: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Track file chunk association for later storage
|
||||
chunks = append(chunks, chunkInfo{
|
||||
fileChunk: database.FileChunk{
|
||||
FileID: fileToProcess.File.ID,
|
||||
Idx: chunkIndex,
|
||||
ChunkHash: chunk.Hash,
|
||||
},
|
||||
offset: chunk.Offset,
|
||||
size: chunk.Size,
|
||||
})
|
||||
|
||||
// Update stats
|
||||
if chunkExists {
|
||||
result.FilesSkipped++ // Track as skipped for now
|
||||
result.BytesSkipped += chunk.Size
|
||||
if s.progress != nil {
|
||||
s.progress.GetStats().BytesSkipped.Add(chunk.Size)
|
||||
}
|
||||
} else {
|
||||
result.ChunksCreated++
|
||||
result.BytesScanned += chunk.Size
|
||||
if s.progress != nil {
|
||||
s.progress.GetStats().ChunksCreated.Add(1)
|
||||
s.progress.GetStats().BytesProcessed.Add(chunk.Size)
|
||||
s.progress.UpdateChunkingActivity()
|
||||
}
|
||||
}
|
||||
|
||||
// Add chunk to packer immediately (streaming)
|
||||
// This happens outside the database transaction
|
||||
if !chunkExists {
|
||||
s.packerMu.Lock()
|
||||
err := s.packer.AddChunk(&blob.ChunkRef{
|
||||
Hash: chunk.Hash,
|
||||
Data: chunk.Data,
|
||||
})
|
||||
if err == blob.ErrBlobSizeLimitExceeded {
|
||||
// Finalize current blob and retry
|
||||
if err := s.packer.FinalizeBlob(); err != nil {
|
||||
s.packerMu.Unlock()
|
||||
return fmt.Errorf("finalizing blob: %w", err)
|
||||
}
|
||||
// Retry adding the chunk
|
||||
if err := s.packer.AddChunk(&blob.ChunkRef{
|
||||
Hash: chunk.Hash,
|
||||
Data: chunk.Data,
|
||||
}); err != nil {
|
||||
s.packerMu.Unlock()
|
||||
return fmt.Errorf("adding chunk after finalize: %w", err)
|
||||
}
|
||||
} else if err != nil {
|
||||
s.packerMu.Unlock()
|
||||
return fmt.Errorf("adding chunk to packer: %w", err)
|
||||
}
|
||||
s.packerMu.Unlock()
|
||||
}
|
||||
|
||||
// Clear chunk data from memory immediately after use
|
||||
chunk.Data = nil
|
||||
|
||||
chunkIndex++
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("chunking file: %w", err)
|
||||
}
|
||||
|
||||
log.Debug("Completed snapshotting file",
|
||||
"path", fileToProcess.Path,
|
||||
"file_hash", fileHash,
|
||||
"chunks", len(chunks))
|
||||
|
||||
// Store file-chunk associations and chunk-file mappings in database
|
||||
err = s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||
// First, delete all existing file_chunks and chunk_files for this file
|
||||
// This ensures old chunks are no longer associated when file content changes
|
||||
if err := s.repos.FileChunks.DeleteByFileID(txCtx, tx, fileToProcess.File.ID); err != nil {
|
||||
return fmt.Errorf("deleting old file chunks: %w", err)
|
||||
}
|
||||
if err := s.repos.ChunkFiles.DeleteByFileID(txCtx, tx, fileToProcess.File.ID); err != nil {
|
||||
return fmt.Errorf("deleting old chunk files: %w", err)
|
||||
}
|
||||
|
||||
for _, ci := range chunks {
|
||||
// Create file-chunk mapping
|
||||
if err := s.repos.FileChunks.Create(txCtx, tx, &ci.fileChunk); err != nil {
|
||||
return fmt.Errorf("creating file chunk: %w", err)
|
||||
}
|
||||
|
||||
// Create chunk-file mapping
|
||||
chunkFile := &database.ChunkFile{
|
||||
ChunkHash: ci.fileChunk.ChunkHash,
|
||||
FileID: fileToProcess.File.ID,
|
||||
FileOffset: ci.offset,
|
||||
Length: ci.size,
|
||||
}
|
||||
if err := s.repos.ChunkFiles.Create(txCtx, tx, chunkFile); err != nil {
|
||||
return fmt.Errorf("creating chunk file: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Add file to snapshot
|
||||
if err := s.repos.Snapshots.AddFileByID(txCtx, tx, s.snapshotID, fileToProcess.File.ID); err != nil {
|
||||
return fmt.Errorf("adding file to snapshot: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// GetProgress returns the progress reporter for this scanner
|
||||
func (s *Scanner) GetProgress() *ProgressReporter {
|
||||
return s.progress
|
||||
}
|
||||
381
internal/snapshot/scanner_test.go
Normal file
381
internal/snapshot/scanner_test.go
Normal file
@@ -0,0 +1,381 @@
|
||||
package snapshot_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/snapshot"
|
||||
"github.com/spf13/afero"
|
||||
)
|
||||
|
||||
func TestScannerSimpleDirectory(t *testing.T) {
|
||||
// Initialize logger for tests
|
||||
log.Initialize(log.Config{})
|
||||
|
||||
// Create in-memory filesystem
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create test directory structure
|
||||
testFiles := map[string]string{
|
||||
"/source/file1.txt": "Hello, world!", // 13 bytes
|
||||
"/source/file2.txt": "This is another file", // 20 bytes
|
||||
"/source/subdir/file3.txt": "File in subdirectory", // 20 bytes
|
||||
"/source/subdir/file4.txt": "Another file in subdirectory", // 28 bytes
|
||||
"/source/empty.txt": "", // 0 bytes
|
||||
"/source/subdir2/file5.txt": "Yet another file", // 16 bytes
|
||||
}
|
||||
|
||||
// Create files with specific times
|
||||
testTime := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
|
||||
for path, content := range testFiles {
|
||||
dir := filepath.Dir(path)
|
||||
if err := fs.MkdirAll(dir, 0755); err != nil {
|
||||
t.Fatalf("failed to create directory %s: %v", dir, err)
|
||||
}
|
||||
if err := afero.WriteFile(fs, path, []byte(content), 0644); err != nil {
|
||||
t.Fatalf("failed to write file %s: %v", path, err)
|
||||
}
|
||||
// Set times
|
||||
if err := fs.Chtimes(path, testTime, testTime); err != nil {
|
||||
t.Fatalf("failed to set times for %s: %v", path, err)
|
||||
}
|
||||
}
|
||||
|
||||
// Create test database
|
||||
db, err := database.NewTestDB()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test database: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := db.Close(); err != nil {
|
||||
t.Errorf("failed to close database: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
// Create scanner
|
||||
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
|
||||
FS: fs,
|
||||
ChunkSize: int64(1024 * 16), // 16KB chunks for testing
|
||||
Repositories: repos,
|
||||
MaxBlobSize: int64(1024 * 1024), // 1MB blobs
|
||||
CompressionLevel: 3,
|
||||
AgeRecipients: []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
|
||||
})
|
||||
|
||||
// Create a snapshot record for testing
|
||||
ctx := context.Background()
|
||||
snapshotID := "test-snapshot-001"
|
||||
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID,
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "test",
|
||||
StartedAt: time.Now(),
|
||||
CompletedAt: nil,
|
||||
FileCount: 0,
|
||||
ChunkCount: 0,
|
||||
BlobCount: 0,
|
||||
TotalSize: 0,
|
||||
BlobSize: 0,
|
||||
CompressionRatio: 1.0,
|
||||
}
|
||||
return repos.Snapshots.Create(ctx, tx, snapshot)
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create snapshot: %v", err)
|
||||
}
|
||||
|
||||
// Scan the directory
|
||||
var result *snapshot.ScanResult
|
||||
result, err = scanner.Scan(ctx, "/source", snapshotID)
|
||||
if err != nil {
|
||||
t.Fatalf("scan failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify results
|
||||
// We now scan 6 files + 3 directories (source, subdir, subdir2) = 9 entries
|
||||
if result.FilesScanned != 9 {
|
||||
t.Errorf("expected 9 entries scanned, got %d", result.FilesScanned)
|
||||
}
|
||||
|
||||
// Directories have their own sizes, so the total will be more than just file content
|
||||
if result.BytesScanned < 97 { // At minimum we have 97 bytes of file content
|
||||
t.Errorf("expected at least 97 bytes scanned, got %d", result.BytesScanned)
|
||||
}
|
||||
|
||||
// Verify files in database
|
||||
files, err := repos.Files.ListByPrefix(ctx, "/source")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to list files: %v", err)
|
||||
}
|
||||
|
||||
// We should have 6 files + 3 directories = 9 entries
|
||||
if len(files) != 9 {
|
||||
t.Errorf("expected 9 entries in database, got %d", len(files))
|
||||
}
|
||||
|
||||
// Verify specific file
|
||||
file1, err := repos.Files.GetByPath(ctx, "/source/file1.txt")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get file1.txt: %v", err)
|
||||
}
|
||||
|
||||
if file1.Size != 13 {
|
||||
t.Errorf("expected file1.txt size 13, got %d", file1.Size)
|
||||
}
|
||||
|
||||
if file1.Mode != 0644 {
|
||||
t.Errorf("expected file1.txt mode 0644, got %o", file1.Mode)
|
||||
}
|
||||
|
||||
// Verify chunks were created
|
||||
chunks, err := repos.FileChunks.GetByFile(ctx, "/source/file1.txt")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get chunks for file1.txt: %v", err)
|
||||
}
|
||||
|
||||
if len(chunks) != 1 { // Small file should be one chunk
|
||||
t.Errorf("expected 1 chunk for file1.txt, got %d", len(chunks))
|
||||
}
|
||||
|
||||
// Verify deduplication - file3.txt and file4.txt have different content
|
||||
// but we should still have the correct number of unique chunks
|
||||
allChunks, err := repos.Chunks.List(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to list all chunks: %v", err)
|
||||
}
|
||||
|
||||
// We should have at most 6 chunks (one per unique file content)
|
||||
// Empty file might not create a chunk
|
||||
if len(allChunks) > 6 {
|
||||
t.Errorf("expected at most 6 chunks, got %d", len(allChunks))
|
||||
}
|
||||
}
|
||||
|
||||
func TestScannerWithSymlinks(t *testing.T) {
|
||||
// Initialize logger for tests
|
||||
log.Initialize(log.Config{})
|
||||
|
||||
// Create in-memory filesystem
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create test files
|
||||
if err := fs.MkdirAll("/source", 0755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := afero.WriteFile(fs, "/source/target.txt", []byte("target content"), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := afero.WriteFile(fs, "/outside/file.txt", []byte("outside content"), 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Create symlinks (if supported by the filesystem)
|
||||
linker, ok := fs.(afero.Symlinker)
|
||||
if !ok {
|
||||
t.Skip("filesystem does not support symlinks")
|
||||
}
|
||||
|
||||
// Symlink to file in source
|
||||
if err := linker.SymlinkIfPossible("target.txt", "/source/link1.txt"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Symlink to file outside source
|
||||
if err := linker.SymlinkIfPossible("/outside/file.txt", "/source/link2.txt"); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Create test database
|
||||
db, err := database.NewTestDB()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test database: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := db.Close(); err != nil {
|
||||
t.Errorf("failed to close database: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
// Create scanner
|
||||
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
|
||||
FS: fs,
|
||||
ChunkSize: 1024 * 16,
|
||||
Repositories: repos,
|
||||
MaxBlobSize: int64(1024 * 1024),
|
||||
CompressionLevel: 3,
|
||||
AgeRecipients: []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
|
||||
})
|
||||
|
||||
// Create a snapshot record for testing
|
||||
ctx := context.Background()
|
||||
snapshotID := "test-snapshot-001"
|
||||
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID,
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "test",
|
||||
StartedAt: time.Now(),
|
||||
CompletedAt: nil,
|
||||
FileCount: 0,
|
||||
ChunkCount: 0,
|
||||
BlobCount: 0,
|
||||
TotalSize: 0,
|
||||
BlobSize: 0,
|
||||
CompressionRatio: 1.0,
|
||||
}
|
||||
return repos.Snapshots.Create(ctx, tx, snapshot)
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create snapshot: %v", err)
|
||||
}
|
||||
|
||||
// Scan the directory
|
||||
var result *snapshot.ScanResult
|
||||
result, err = scanner.Scan(ctx, "/source", snapshotID)
|
||||
if err != nil {
|
||||
t.Fatalf("scan failed: %v", err)
|
||||
}
|
||||
|
||||
// Should have scanned 3 files (target + 2 symlinks)
|
||||
if result.FilesScanned != 3 {
|
||||
t.Errorf("expected 3 files scanned, got %d", result.FilesScanned)
|
||||
}
|
||||
|
||||
// Check symlinks in database
|
||||
link1, err := repos.Files.GetByPath(ctx, "/source/link1.txt")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get link1.txt: %v", err)
|
||||
}
|
||||
|
||||
if link1.LinkTarget != "target.txt" {
|
||||
t.Errorf("expected link1.txt target 'target.txt', got %q", link1.LinkTarget)
|
||||
}
|
||||
|
||||
link2, err := repos.Files.GetByPath(ctx, "/source/link2.txt")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get link2.txt: %v", err)
|
||||
}
|
||||
|
||||
if link2.LinkTarget != "/outside/file.txt" {
|
||||
t.Errorf("expected link2.txt target '/outside/file.txt', got %q", link2.LinkTarget)
|
||||
}
|
||||
}
|
||||
|
||||
func TestScannerLargeFile(t *testing.T) {
|
||||
// Initialize logger for tests
|
||||
log.Initialize(log.Config{})
|
||||
|
||||
// Create in-memory filesystem
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create a large file that will require multiple chunks
|
||||
// Use random content to ensure good chunk boundaries
|
||||
largeContent := make([]byte, 1024*1024) // 1MB
|
||||
// Fill with pseudo-random data to ensure chunk boundaries
|
||||
for i := 0; i < len(largeContent); i++ {
|
||||
// Simple pseudo-random generator for deterministic tests
|
||||
largeContent[i] = byte((i * 7919) ^ (i >> 3))
|
||||
}
|
||||
|
||||
if err := fs.MkdirAll("/source", 0755); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := afero.WriteFile(fs, "/source/large.bin", largeContent, 0644); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Create test database
|
||||
db, err := database.NewTestDB()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test database: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := db.Close(); err != nil {
|
||||
t.Errorf("failed to close database: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
// Create scanner with 64KB average chunk size
|
||||
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
|
||||
FS: fs,
|
||||
ChunkSize: int64(1024 * 64), // 64KB average chunks
|
||||
Repositories: repos,
|
||||
MaxBlobSize: int64(1024 * 1024),
|
||||
CompressionLevel: 3,
|
||||
AgeRecipients: []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
|
||||
})
|
||||
|
||||
// Create a snapshot record for testing
|
||||
ctx := context.Background()
|
||||
snapshotID := "test-snapshot-001"
|
||||
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID,
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "test",
|
||||
StartedAt: time.Now(),
|
||||
CompletedAt: nil,
|
||||
FileCount: 0,
|
||||
ChunkCount: 0,
|
||||
BlobCount: 0,
|
||||
TotalSize: 0,
|
||||
BlobSize: 0,
|
||||
CompressionRatio: 1.0,
|
||||
}
|
||||
return repos.Snapshots.Create(ctx, tx, snapshot)
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create snapshot: %v", err)
|
||||
}
|
||||
|
||||
// Scan the directory
|
||||
var result *snapshot.ScanResult
|
||||
result, err = scanner.Scan(ctx, "/source", snapshotID)
|
||||
if err != nil {
|
||||
t.Fatalf("scan failed: %v", err)
|
||||
}
|
||||
|
||||
// We scan 1 file + 1 directory = 2 entries
|
||||
if result.FilesScanned != 2 {
|
||||
t.Errorf("expected 2 entries scanned, got %d", result.FilesScanned)
|
||||
}
|
||||
|
||||
// The file size should be at least 1MB
|
||||
if result.BytesScanned < 1024*1024 {
|
||||
t.Errorf("expected at least %d bytes scanned, got %d", 1024*1024, result.BytesScanned)
|
||||
}
|
||||
|
||||
// Verify chunks
|
||||
chunks, err := repos.FileChunks.GetByFile(ctx, "/source/large.bin")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get chunks: %v", err)
|
||||
}
|
||||
|
||||
// With content-defined chunking, the number of chunks depends on content
|
||||
// For a 1MB file, we should get at least 1 chunk
|
||||
if len(chunks) < 1 {
|
||||
t.Errorf("expected at least 1 chunk, got %d", len(chunks))
|
||||
}
|
||||
|
||||
// Log the actual number of chunks for debugging
|
||||
t.Logf("1MB file produced %d chunks with 64KB average chunk size", len(chunks))
|
||||
|
||||
// Verify chunk sequence
|
||||
for i, fc := range chunks {
|
||||
if fc.Idx != i {
|
||||
t.Errorf("chunk %d has incorrect sequence %d", i, fc.Idx)
|
||||
}
|
||||
}
|
||||
}
|
||||
861
internal/snapshot/snapshot.go
Normal file
861
internal/snapshot/snapshot.go
Normal file
@@ -0,0 +1,861 @@
|
||||
package snapshot
|
||||
|
||||
// Snapshot Metadata Export Process
|
||||
// ================================
|
||||
//
|
||||
// The snapshot metadata contains all information needed to restore a snapshot.
|
||||
// Instead of creating a custom format, we use a trimmed copy of the SQLite
|
||||
// database containing only data relevant to the current snapshot.
|
||||
//
|
||||
// Process Overview:
|
||||
// 1. After all files/chunks/blobs are backed up, create a snapshot record
|
||||
// 2. Close the main database to ensure consistency
|
||||
// 3. Copy the entire database to a temporary file
|
||||
// 4. Open the temporary database
|
||||
// 5. Delete all snapshots except the current one
|
||||
// 6. Delete all orphaned records:
|
||||
// - Files not referenced by any remaining snapshot
|
||||
// - Chunks not referenced by any remaining files
|
||||
// - Blobs not containing any remaining chunks
|
||||
// - All related mapping tables (file_chunks, chunk_files, blob_chunks)
|
||||
// 7. Close the temporary database
|
||||
// 8. Use sqlite3 to dump the cleaned database to SQL
|
||||
// 9. Delete the temporary database file
|
||||
// 10. Compress the SQL dump with zstd
|
||||
// 11. Encrypt the compressed dump with age (if encryption is enabled)
|
||||
// 12. Upload to S3 as: snapshots/{snapshot-id}.sql.zst[.age]
|
||||
// 13. Reopen the main database
|
||||
//
|
||||
// Advantages of this approach:
|
||||
// - No custom metadata format needed
|
||||
// - Reuses existing database schema and relationships
|
||||
// - SQL dumps are portable and compress well
|
||||
// - Restore process can simply execute the SQL
|
||||
// - Atomic and consistent snapshot of all metadata
|
||||
//
|
||||
// TODO: Future improvements:
|
||||
// - Add snapshot-file relationships to track which files belong to which snapshot
|
||||
// - Implement incremental snapshots that reference previous snapshots
|
||||
// - Add snapshot manifest with additional metadata (size, chunk count, etc.)
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/blobgen"
|
||||
"git.eeqj.de/sneak/vaultik/internal/config"
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/s3"
|
||||
"github.com/dustin/go-humanize"
|
||||
"go.uber.org/fx"
|
||||
)
|
||||
|
||||
// SnapshotManager handles snapshot creation and metadata export
|
||||
type SnapshotManager struct {
|
||||
repos *database.Repositories
|
||||
s3Client S3Client
|
||||
config *config.Config
|
||||
}
|
||||
|
||||
// SnapshotManagerParams holds dependencies for NewSnapshotManager
|
||||
type SnapshotManagerParams struct {
|
||||
fx.In
|
||||
|
||||
Repos *database.Repositories
|
||||
S3Client *s3.Client
|
||||
Config *config.Config
|
||||
}
|
||||
|
||||
// NewSnapshotManager creates a new snapshot manager for dependency injection
|
||||
func NewSnapshotManager(params SnapshotManagerParams) *SnapshotManager {
|
||||
return &SnapshotManager{
|
||||
repos: params.Repos,
|
||||
s3Client: params.S3Client,
|
||||
config: params.Config,
|
||||
}
|
||||
}
|
||||
|
||||
// CreateSnapshot creates a new snapshot record in the database at the start of a backup
|
||||
func (sm *SnapshotManager) CreateSnapshot(ctx context.Context, hostname, version, gitRevision string) (string, error) {
|
||||
snapshotID := fmt.Sprintf("%s-%s", hostname, time.Now().UTC().Format("20060102-150405Z"))
|
||||
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID,
|
||||
Hostname: hostname,
|
||||
VaultikVersion: version,
|
||||
VaultikGitRevision: gitRevision,
|
||||
StartedAt: time.Now().UTC(),
|
||||
CompletedAt: nil, // Not completed yet
|
||||
FileCount: 0,
|
||||
ChunkCount: 0,
|
||||
BlobCount: 0,
|
||||
TotalSize: 0,
|
||||
BlobSize: 0,
|
||||
CompressionRatio: 1.0,
|
||||
}
|
||||
|
||||
err := sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return sm.repos.Snapshots.Create(ctx, tx, snapshot)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("creating snapshot: %w", err)
|
||||
}
|
||||
|
||||
log.Info("Created snapshot", "snapshot_id", snapshotID)
|
||||
return snapshotID, nil
|
||||
}
|
||||
|
||||
// UpdateSnapshotStats updates the statistics for a snapshot during backup
|
||||
func (sm *SnapshotManager) UpdateSnapshotStats(ctx context.Context, snapshotID string, stats BackupStats) error {
|
||||
err := sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return sm.repos.Snapshots.UpdateCounts(ctx, tx, snapshotID,
|
||||
int64(stats.FilesScanned),
|
||||
int64(stats.ChunksCreated),
|
||||
int64(stats.BlobsCreated),
|
||||
stats.BytesScanned,
|
||||
stats.BytesUploaded,
|
||||
)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("updating snapshot stats: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// UpdateSnapshotStatsExtended updates snapshot statistics with extended metrics.
|
||||
// This includes compression level, uncompressed blob size, and upload duration.
|
||||
func (sm *SnapshotManager) UpdateSnapshotStatsExtended(ctx context.Context, snapshotID string, stats ExtendedBackupStats) error {
|
||||
return sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
// First update basic stats
|
||||
if err := sm.repos.Snapshots.UpdateCounts(ctx, tx, snapshotID,
|
||||
int64(stats.FilesScanned),
|
||||
int64(stats.ChunksCreated),
|
||||
int64(stats.BlobsCreated),
|
||||
stats.BytesScanned,
|
||||
stats.BytesUploaded,
|
||||
); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Then update extended stats
|
||||
return sm.repos.Snapshots.UpdateExtendedStats(ctx, tx, snapshotID,
|
||||
stats.BlobUncompressedSize,
|
||||
stats.CompressionLevel,
|
||||
stats.UploadDurationMs,
|
||||
)
|
||||
})
|
||||
}
|
||||
|
||||
// CompleteSnapshot marks a snapshot as completed and exports its metadata
|
||||
func (sm *SnapshotManager) CompleteSnapshot(ctx context.Context, snapshotID string) error {
|
||||
// Mark the snapshot as completed
|
||||
err := sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return sm.repos.Snapshots.MarkComplete(ctx, tx, snapshotID)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("marking snapshot complete: %w", err)
|
||||
}
|
||||
|
||||
log.Info("Completed snapshot", "snapshot_id", snapshotID)
|
||||
return nil
|
||||
}
|
||||
|
||||
// ExportSnapshotMetadata exports snapshot metadata to S3
|
||||
//
|
||||
// This method executes the complete snapshot metadata export process:
|
||||
// 1. Creates a temporary directory for working files
|
||||
// 2. Copies the main database to preserve its state
|
||||
// 3. Cleans the copy to contain only current snapshot data
|
||||
// 4. Dumps the cleaned database to SQL
|
||||
// 5. Compresses the SQL dump with zstd
|
||||
// 6. Encrypts the compressed data (if encryption is enabled)
|
||||
// 7. Uploads to S3 at: snapshots/{snapshot-id}.sql.zst[.age]
|
||||
//
|
||||
// The caller is responsible for:
|
||||
// - Ensuring the main database is closed before calling this method
|
||||
// - Reopening the main database after this method returns
|
||||
//
|
||||
// This ensures database consistency during the copy operation.
|
||||
func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath string, snapshotID string) error {
|
||||
log.Info("Phase 3/3: Exporting snapshot metadata", "snapshot_id", snapshotID, "source_db", dbPath)
|
||||
|
||||
// Create temp directory for all temporary files
|
||||
tempDir, err := os.MkdirTemp("", "vaultik-snapshot-*")
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating temp dir: %w", err)
|
||||
}
|
||||
log.Debug("Created temporary directory", "path", tempDir)
|
||||
defer func() {
|
||||
log.Debug("Cleaning up temporary directory", "path", tempDir)
|
||||
if err := os.RemoveAll(tempDir); err != nil {
|
||||
log.Debug("Failed to remove temp dir", "path", tempDir, "error", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Step 1: Copy database to temp file
|
||||
// The main database should be closed at this point
|
||||
tempDBPath := filepath.Join(tempDir, "snapshot.db")
|
||||
log.Debug("Copying database to temporary location", "source", dbPath, "destination", tempDBPath)
|
||||
if err := copyFile(dbPath, tempDBPath); err != nil {
|
||||
return fmt.Errorf("copying database: %w", err)
|
||||
}
|
||||
log.Debug("Database copy complete", "size", getFileSize(tempDBPath))
|
||||
|
||||
// Step 2: Clean the temp database to only contain current snapshot data
|
||||
log.Debug("Cleaning temporary database to contain only current snapshot data", "snapshot_id", snapshotID, "db_path", tempDBPath)
|
||||
stats, err := sm.cleanSnapshotDB(ctx, tempDBPath, snapshotID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cleaning snapshot database: %w", err)
|
||||
}
|
||||
log.Info("Temporary database cleanup complete",
|
||||
"db_path", tempDBPath,
|
||||
"size_after_clean", humanize.Bytes(uint64(getFileSize(tempDBPath))),
|
||||
"files", stats.FileCount,
|
||||
"chunks", stats.ChunkCount,
|
||||
"blobs", stats.BlobCount,
|
||||
"total_compressed_size", humanize.Bytes(uint64(stats.CompressedSize)),
|
||||
"total_uncompressed_size", humanize.Bytes(uint64(stats.UncompressedSize)),
|
||||
"compression_ratio", fmt.Sprintf("%.2fx", float64(stats.UncompressedSize)/float64(stats.CompressedSize)))
|
||||
|
||||
// Step 3: Dump the cleaned database to SQL
|
||||
dumpPath := filepath.Join(tempDir, "snapshot.sql")
|
||||
log.Debug("Dumping database to SQL", "source", tempDBPath, "destination", dumpPath)
|
||||
if err := sm.dumpDatabase(tempDBPath, dumpPath); err != nil {
|
||||
return fmt.Errorf("dumping database: %w", err)
|
||||
}
|
||||
log.Debug("SQL dump complete", "size", getFileSize(dumpPath))
|
||||
|
||||
// Step 4: Compress and encrypt the SQL dump
|
||||
compressedPath := filepath.Join(tempDir, "snapshot.sql.zst.age")
|
||||
log.Debug("Compressing and encrypting SQL dump", "source", dumpPath, "destination", compressedPath)
|
||||
if err := sm.compressDump(dumpPath, compressedPath); err != nil {
|
||||
return fmt.Errorf("compressing dump: %w", err)
|
||||
}
|
||||
log.Debug("Compression complete", "original_size", getFileSize(dumpPath), "compressed_size", getFileSize(compressedPath))
|
||||
|
||||
// Step 5: Read compressed and encrypted data for upload
|
||||
log.Debug("Reading compressed and encrypted data for upload", "path", compressedPath)
|
||||
finalData, err := os.ReadFile(compressedPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading compressed dump: %w", err)
|
||||
}
|
||||
|
||||
// Step 6: Generate blob manifest (before closing temp DB)
|
||||
log.Debug("Generating blob manifest from temporary database", "db_path", tempDBPath)
|
||||
blobManifest, err := sm.generateBlobManifest(ctx, tempDBPath, snapshotID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("generating blob manifest: %w", err)
|
||||
}
|
||||
|
||||
// Step 7: Upload to S3 in snapshot subdirectory
|
||||
// Upload database backup (compressed and encrypted)
|
||||
dbKey := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID)
|
||||
|
||||
log.Debug("Uploading snapshot database to S3", "key", dbKey, "size", len(finalData))
|
||||
dbUploadStart := time.Now()
|
||||
if err := sm.s3Client.PutObject(ctx, dbKey, bytes.NewReader(finalData)); err != nil {
|
||||
return fmt.Errorf("uploading snapshot database: %w", err)
|
||||
}
|
||||
dbUploadDuration := time.Since(dbUploadStart)
|
||||
dbUploadSpeed := float64(len(finalData)) * 8 / dbUploadDuration.Seconds() // bits per second
|
||||
log.Info("Uploaded snapshot database to S3",
|
||||
"path", dbKey,
|
||||
"size", humanize.Bytes(uint64(len(finalData))),
|
||||
"duration", dbUploadDuration,
|
||||
"speed", humanize.SI(dbUploadSpeed, "bps"))
|
||||
|
||||
// Upload blob manifest (compressed only, not encrypted)
|
||||
manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
|
||||
log.Debug("Uploading blob manifest to S3", "key", manifestKey, "size", len(blobManifest))
|
||||
manifestUploadStart := time.Now()
|
||||
if err := sm.s3Client.PutObject(ctx, manifestKey, bytes.NewReader(blobManifest)); err != nil {
|
||||
return fmt.Errorf("uploading blob manifest: %w", err)
|
||||
}
|
||||
manifestUploadDuration := time.Since(manifestUploadStart)
|
||||
manifestUploadSpeed := float64(len(blobManifest)) * 8 / manifestUploadDuration.Seconds() // bits per second
|
||||
log.Info("Uploaded blob manifest to S3",
|
||||
"path", manifestKey,
|
||||
"size", humanize.Bytes(uint64(len(blobManifest))),
|
||||
"duration", manifestUploadDuration,
|
||||
"speed", humanize.SI(manifestUploadSpeed, "bps"))
|
||||
|
||||
log.Info("Uploaded snapshot metadata",
|
||||
"snapshot_id", snapshotID,
|
||||
"db_size", len(finalData),
|
||||
"manifest_size", len(blobManifest))
|
||||
return nil
|
||||
}
|
||||
|
||||
// CleanupStats contains statistics about cleaned snapshot database
|
||||
type CleanupStats struct {
|
||||
FileCount int
|
||||
ChunkCount int
|
||||
BlobCount int
|
||||
CompressedSize int64
|
||||
UncompressedSize int64
|
||||
}
|
||||
|
||||
// cleanSnapshotDB removes all data except for the specified snapshot
|
||||
//
|
||||
// The cleanup is performed in a specific order to maintain referential integrity:
|
||||
// 1. Delete other snapshots
|
||||
// 2. Delete orphaned snapshot associations (snapshot_files, snapshot_blobs) for deleted snapshots
|
||||
// 3. Delete orphaned files (not in the current snapshot)
|
||||
// 4. Delete orphaned chunk-to-file mappings (references to deleted files)
|
||||
// 5. Delete orphaned blobs (not in the current snapshot)
|
||||
// 6. Delete orphaned blob-to-chunk mappings (references to deleted chunks)
|
||||
// 7. Delete orphaned chunks (not referenced by any file)
|
||||
//
|
||||
// Each step is implemented as a separate method for clarity and maintainability.
|
||||
func (sm *SnapshotManager) cleanSnapshotDB(ctx context.Context, dbPath string, snapshotID string) (*CleanupStats, error) {
|
||||
// Open the temp database
|
||||
db, err := database.New(ctx, dbPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("opening temp database: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := db.Close(); err != nil {
|
||||
log.Debug("Failed to close temp database", "error", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Start a transaction
|
||||
tx, err := db.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("beginning transaction: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
if rbErr := tx.Rollback(); rbErr != nil && rbErr != sql.ErrTxDone {
|
||||
log.Debug("Failed to rollback transaction", "error", rbErr)
|
||||
}
|
||||
}()
|
||||
|
||||
// Execute cleanup steps in order
|
||||
if err := sm.deleteOtherSnapshots(ctx, tx, snapshotID); err != nil {
|
||||
return nil, fmt.Errorf("step 1 - delete other snapshots: %w", err)
|
||||
}
|
||||
|
||||
if err := sm.deleteOrphanedSnapshotAssociations(ctx, tx, snapshotID); err != nil {
|
||||
return nil, fmt.Errorf("step 2 - delete orphaned snapshot associations: %w", err)
|
||||
}
|
||||
|
||||
if err := sm.deleteOrphanedFiles(ctx, tx, snapshotID); err != nil {
|
||||
return nil, fmt.Errorf("step 3 - delete orphaned files: %w", err)
|
||||
}
|
||||
|
||||
if err := sm.deleteOrphanedChunkToFileMappings(ctx, tx); err != nil {
|
||||
return nil, fmt.Errorf("step 4 - delete orphaned chunk-to-file mappings: %w", err)
|
||||
}
|
||||
|
||||
if err := sm.deleteOrphanedBlobs(ctx, tx, snapshotID); err != nil {
|
||||
return nil, fmt.Errorf("step 5 - delete orphaned blobs: %w", err)
|
||||
}
|
||||
|
||||
if err := sm.deleteOrphanedBlobToChunkMappings(ctx, tx); err != nil {
|
||||
return nil, fmt.Errorf("step 6 - delete orphaned blob-to-chunk mappings: %w", err)
|
||||
}
|
||||
|
||||
if err := sm.deleteOrphanedChunks(ctx, tx); err != nil {
|
||||
return nil, fmt.Errorf("step 7 - delete orphaned chunks: %w", err)
|
||||
}
|
||||
|
||||
// Commit transaction
|
||||
log.Debug("[Temp DB Cleanup] Committing cleanup transaction")
|
||||
if err := tx.Commit(); err != nil {
|
||||
return nil, fmt.Errorf("committing transaction: %w", err)
|
||||
}
|
||||
|
||||
// Collect statistics about the cleaned database
|
||||
stats := &CleanupStats{}
|
||||
|
||||
// Count files
|
||||
var fileCount int
|
||||
err = db.QueryRowWithLog(ctx, "SELECT COUNT(*) FROM files").Scan(&fileCount)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("counting files: %w", err)
|
||||
}
|
||||
stats.FileCount = fileCount
|
||||
|
||||
// Count chunks
|
||||
var chunkCount int
|
||||
err = db.QueryRowWithLog(ctx, "SELECT COUNT(*) FROM chunks").Scan(&chunkCount)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("counting chunks: %w", err)
|
||||
}
|
||||
stats.ChunkCount = chunkCount
|
||||
|
||||
// Count blobs and get sizes
|
||||
var blobCount int
|
||||
var compressedSize, uncompressedSize sql.NullInt64
|
||||
err = db.QueryRowWithLog(ctx, `
|
||||
SELECT COUNT(*), COALESCE(SUM(compressed_size), 0), COALESCE(SUM(uncompressed_size), 0)
|
||||
FROM blobs
|
||||
WHERE blob_hash IN (SELECT blob_hash FROM snapshot_blobs WHERE snapshot_id = ?)
|
||||
`, snapshotID).Scan(&blobCount, &compressedSize, &uncompressedSize)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("counting blobs and sizes: %w", err)
|
||||
}
|
||||
stats.BlobCount = blobCount
|
||||
stats.CompressedSize = compressedSize.Int64
|
||||
stats.UncompressedSize = uncompressedSize.Int64
|
||||
|
||||
log.Debug("[Temp DB Cleanup] Database cleanup complete", "stats", stats)
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// dumpDatabase creates a SQL dump of the database
|
||||
func (sm *SnapshotManager) dumpDatabase(dbPath, dumpPath string) error {
|
||||
log.Debug("Running sqlite3 dump command", "source", dbPath, "destination", dumpPath)
|
||||
cmd := exec.Command("sqlite3", dbPath, ".dump")
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return fmt.Errorf("running sqlite3 dump: %w", err)
|
||||
}
|
||||
|
||||
log.Debug("SQL dump generated", "size", len(output))
|
||||
if err := os.WriteFile(dumpPath, output, 0644); err != nil {
|
||||
return fmt.Errorf("writing dump file: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// compressDump compresses the SQL dump using zstd
|
||||
func (sm *SnapshotManager) compressDump(inputPath, outputPath string) error {
|
||||
log.Debug("Opening SQL dump for compression", "path", inputPath)
|
||||
input, err := os.Open(inputPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("opening input file: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
log.Debug("Closing input file", "path", inputPath)
|
||||
if err := input.Close(); err != nil {
|
||||
log.Debug("Failed to close input file", "path", inputPath, "error", err)
|
||||
}
|
||||
}()
|
||||
|
||||
log.Debug("Creating output file for compressed and encrypted data", "path", outputPath)
|
||||
output, err := os.Create(outputPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating output file: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
log.Debug("Closing output file", "path", outputPath)
|
||||
if err := output.Close(); err != nil {
|
||||
log.Debug("Failed to close output file", "path", outputPath, "error", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Use blobgen for compression and encryption
|
||||
log.Debug("Creating compressor/encryptor", "level", sm.config.CompressionLevel)
|
||||
writer, err := blobgen.NewWriter(output, sm.config.CompressionLevel, sm.config.AgeRecipients)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating blobgen writer: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := writer.Close(); err != nil {
|
||||
log.Debug("Failed to close writer", "error", err)
|
||||
}
|
||||
}()
|
||||
|
||||
log.Debug("Compressing and encrypting data")
|
||||
if _, err := io.Copy(writer, input); err != nil {
|
||||
return fmt.Errorf("compressing data: %w", err)
|
||||
}
|
||||
|
||||
// Close writer to flush all data
|
||||
if err := writer.Close(); err != nil {
|
||||
return fmt.Errorf("closing writer: %w", err)
|
||||
}
|
||||
|
||||
log.Debug("Compression complete", "hash", fmt.Sprintf("%x", writer.Sum256()))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// copyFile copies a file from src to dst
|
||||
func copyFile(src, dst string) error {
|
||||
log.Debug("Opening source file for copy", "path", src)
|
||||
sourceFile, err := os.Open(src)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
log.Debug("Closing source file", "path", src)
|
||||
if err := sourceFile.Close(); err != nil {
|
||||
log.Debug("Failed to close source file", "path", src, "error", err)
|
||||
}
|
||||
}()
|
||||
|
||||
log.Debug("Creating destination file", "path", dst)
|
||||
destFile, err := os.Create(dst)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
log.Debug("Closing destination file", "path", dst)
|
||||
if err := destFile.Close(); err != nil {
|
||||
log.Debug("Failed to close destination file", "path", dst, "error", err)
|
||||
}
|
||||
}()
|
||||
|
||||
log.Debug("Copying file data")
|
||||
n, err := io.Copy(destFile, sourceFile)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
log.Debug("File copy complete", "bytes_copied", n)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// generateBlobManifest creates a compressed JSON list of all blobs in the snapshot
|
||||
func (sm *SnapshotManager) generateBlobManifest(ctx context.Context, dbPath string, snapshotID string) ([]byte, error) {
|
||||
log.Debug("Generating blob manifest", "db_path", dbPath, "snapshot_id", snapshotID)
|
||||
|
||||
// Open the cleaned database using the database package
|
||||
db, err := database.New(ctx, dbPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("opening database: %w", err)
|
||||
}
|
||||
defer func() { _ = db.Close() }()
|
||||
|
||||
// Create repositories to access the data
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
// Get all blobs for this snapshot
|
||||
log.Debug("Querying blobs for snapshot", "snapshot_id", snapshotID)
|
||||
blobHashes, err := repos.Snapshots.GetBlobHashes(ctx, snapshotID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("getting snapshot blobs: %w", err)
|
||||
}
|
||||
log.Debug("Found blobs", "count", len(blobHashes))
|
||||
|
||||
// Get blob details including sizes
|
||||
blobs := make([]BlobInfo, 0, len(blobHashes))
|
||||
totalCompressedSize := int64(0)
|
||||
|
||||
for _, hash := range blobHashes {
|
||||
blob, err := repos.Blobs.GetByHash(ctx, hash)
|
||||
if err != nil {
|
||||
log.Warn("Failed to get blob details", "hash", hash, "error", err)
|
||||
continue
|
||||
}
|
||||
if blob != nil {
|
||||
blobs = append(blobs, BlobInfo{
|
||||
Hash: hash,
|
||||
CompressedSize: blob.CompressedSize,
|
||||
})
|
||||
totalCompressedSize += blob.CompressedSize
|
||||
}
|
||||
}
|
||||
|
||||
// Create manifest
|
||||
manifest := &Manifest{
|
||||
SnapshotID: snapshotID,
|
||||
Timestamp: time.Now().UTC().Format(time.RFC3339),
|
||||
BlobCount: len(blobs),
|
||||
TotalCompressedSize: totalCompressedSize,
|
||||
Blobs: blobs,
|
||||
}
|
||||
|
||||
// Encode manifest
|
||||
log.Debug("Encoding manifest")
|
||||
compressedData, err := EncodeManifest(manifest, sm.config.CompressionLevel)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("encoding manifest: %w", err)
|
||||
}
|
||||
|
||||
log.Info("Generated blob manifest",
|
||||
"snapshot_id", snapshotID,
|
||||
"blob_count", len(blobs),
|
||||
"total_compressed_size", totalCompressedSize,
|
||||
"manifest_size", len(compressedData))
|
||||
|
||||
return compressedData, nil
|
||||
}
|
||||
|
||||
// compressData compresses data using zstd
|
||||
|
||||
// getFileSize returns the size of a file in bytes, or -1 if error
|
||||
func getFileSize(path string) int64 {
|
||||
info, err := os.Stat(path)
|
||||
if err != nil {
|
||||
return -1
|
||||
}
|
||||
return info.Size()
|
||||
}
|
||||
|
||||
// BackupStats contains statistics from a backup operation
|
||||
type BackupStats struct {
|
||||
FilesScanned int
|
||||
BytesScanned int64
|
||||
ChunksCreated int
|
||||
BlobsCreated int
|
||||
BytesUploaded int64
|
||||
}
|
||||
|
||||
// ExtendedBackupStats contains additional statistics for comprehensive tracking
|
||||
type ExtendedBackupStats struct {
|
||||
BackupStats
|
||||
BlobUncompressedSize int64 // Total uncompressed size of all referenced blobs
|
||||
CompressionLevel int // Compression level used for this snapshot
|
||||
UploadDurationMs int64 // Total milliseconds spent uploading to S3
|
||||
}
|
||||
|
||||
// CleanupIncompleteSnapshots removes incomplete snapshots that don't have metadata in S3.
|
||||
// This is critical for data safety: incomplete snapshots can cause deduplication to skip
|
||||
// files that were never successfully backed up, resulting in data loss.
|
||||
func (sm *SnapshotManager) CleanupIncompleteSnapshots(ctx context.Context, hostname string) error {
|
||||
log.Info("Checking for incomplete snapshots", "hostname", hostname)
|
||||
|
||||
// Get all incomplete snapshots for this hostname
|
||||
incompleteSnapshots, err := sm.repos.Snapshots.GetIncompleteByHostname(ctx, hostname)
|
||||
if err != nil {
|
||||
return fmt.Errorf("getting incomplete snapshots: %w", err)
|
||||
}
|
||||
|
||||
if len(incompleteSnapshots) == 0 {
|
||||
log.Debug("No incomplete snapshots found")
|
||||
return nil
|
||||
}
|
||||
|
||||
log.Info("Found incomplete snapshots", "count", len(incompleteSnapshots))
|
||||
|
||||
// Check each incomplete snapshot for metadata in S3
|
||||
for _, snapshot := range incompleteSnapshots {
|
||||
// Check if metadata exists in S3
|
||||
metadataKey := fmt.Sprintf("metadata/%s/db.zst", snapshot.ID)
|
||||
_, err := sm.s3Client.StatObject(ctx, metadataKey)
|
||||
|
||||
if err != nil {
|
||||
// Metadata doesn't exist in S3 - this is an incomplete snapshot
|
||||
log.Info("Cleaning up incomplete snapshot record", "snapshot_id", snapshot.ID, "started_at", snapshot.StartedAt)
|
||||
|
||||
// Delete the snapshot and all its associations
|
||||
if err := sm.deleteSnapshot(ctx, snapshot.ID); err != nil {
|
||||
return fmt.Errorf("deleting incomplete snapshot %s: %w", snapshot.ID, err)
|
||||
}
|
||||
|
||||
log.Info("Deleted incomplete snapshot record and associated data", "snapshot_id", snapshot.ID)
|
||||
} else {
|
||||
// Metadata exists - this snapshot was completed but database wasn't updated
|
||||
// This shouldn't happen in normal operation, but mark it complete
|
||||
log.Warn("Found snapshot with S3 metadata but incomplete in database", "snapshot_id", snapshot.ID)
|
||||
if err := sm.repos.Snapshots.MarkComplete(ctx, nil, snapshot.ID); err != nil {
|
||||
log.Error("Failed to mark snapshot as complete in database", "snapshot_id", snapshot.ID, "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// deleteSnapshot removes a snapshot and all its associations from the database
|
||||
func (sm *SnapshotManager) deleteSnapshot(ctx context.Context, snapshotID string) error {
|
||||
// Delete snapshot_files entries
|
||||
if err := sm.repos.Snapshots.DeleteSnapshotFiles(ctx, snapshotID); err != nil {
|
||||
return fmt.Errorf("deleting snapshot files: %w", err)
|
||||
}
|
||||
|
||||
// Delete snapshot_blobs entries
|
||||
if err := sm.repos.Snapshots.DeleteSnapshotBlobs(ctx, snapshotID); err != nil {
|
||||
return fmt.Errorf("deleting snapshot blobs: %w", err)
|
||||
}
|
||||
|
||||
// Delete the snapshot itself
|
||||
if err := sm.repos.Snapshots.Delete(ctx, snapshotID); err != nil {
|
||||
return fmt.Errorf("deleting snapshot: %w", err)
|
||||
}
|
||||
|
||||
// Clean up orphaned data
|
||||
log.Debug("Cleaning up orphaned records in main database")
|
||||
if err := sm.cleanupOrphanedData(ctx); err != nil {
|
||||
return fmt.Errorf("cleaning up orphaned data: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanupOrphanedData removes files, chunks, and blobs that are no longer referenced by any snapshot
|
||||
func (sm *SnapshotManager) cleanupOrphanedData(ctx context.Context) error {
|
||||
// Order is important to respect foreign key constraints:
|
||||
// 1. Delete orphaned files (will cascade delete file_chunks)
|
||||
// 2. Delete orphaned blobs (will cascade delete blob_chunks for deleted blobs)
|
||||
// 3. Delete orphaned blob_chunks (where blob exists but chunk doesn't)
|
||||
// 4. Delete orphaned chunks (now safe after all blob_chunks are gone)
|
||||
|
||||
// Delete orphaned files (files not in any snapshot)
|
||||
log.Debug("Deleting orphaned file records from database")
|
||||
if err := sm.repos.Files.DeleteOrphaned(ctx); err != nil {
|
||||
return fmt.Errorf("deleting orphaned files: %w", err)
|
||||
}
|
||||
|
||||
// Delete orphaned blobs (blobs not in any snapshot)
|
||||
// This will cascade delete blob_chunks for deleted blobs
|
||||
log.Debug("Deleting orphaned blob records from database")
|
||||
if err := sm.repos.Blobs.DeleteOrphaned(ctx); err != nil {
|
||||
return fmt.Errorf("deleting orphaned blobs: %w", err)
|
||||
}
|
||||
|
||||
// Delete orphaned blob_chunks entries
|
||||
// This handles cases where the blob still exists but chunks were deleted
|
||||
log.Debug("Deleting orphaned blob_chunks associations from database")
|
||||
if err := sm.repos.BlobChunks.DeleteOrphaned(ctx); err != nil {
|
||||
return fmt.Errorf("deleting orphaned blob_chunks: %w", err)
|
||||
}
|
||||
|
||||
// Delete orphaned chunks (chunks not referenced by any file)
|
||||
// This must come after cleaning up blob_chunks to avoid foreign key violations
|
||||
log.Debug("Deleting orphaned chunk records from database")
|
||||
if err := sm.repos.Chunks.DeleteOrphaned(ctx); err != nil {
|
||||
return fmt.Errorf("deleting orphaned chunks: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// deleteOtherSnapshots deletes all snapshots except the current one
|
||||
func (sm *SnapshotManager) deleteOtherSnapshots(ctx context.Context, tx *sql.Tx, currentSnapshotID string) error {
|
||||
log.Debug("[Temp DB Cleanup] Deleting all snapshot records except current", "keeping", currentSnapshotID)
|
||||
database.LogSQL("Execute", "DELETE FROM snapshots WHERE id != ?", currentSnapshotID)
|
||||
result, err := tx.ExecContext(ctx, "DELETE FROM snapshots WHERE id != ?", currentSnapshotID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("deleting other snapshots: %w", err)
|
||||
}
|
||||
rowsAffected, _ := result.RowsAffected()
|
||||
log.Debug("[Temp DB Cleanup] Deleted snapshot records from database", "count", rowsAffected)
|
||||
return nil
|
||||
}
|
||||
|
||||
// deleteOrphanedSnapshotAssociations deletes snapshot_files and snapshot_blobs for deleted snapshots
|
||||
func (sm *SnapshotManager) deleteOrphanedSnapshotAssociations(ctx context.Context, tx *sql.Tx, currentSnapshotID string) error {
|
||||
// Delete orphaned snapshot_files
|
||||
log.Debug("[Temp DB Cleanup] Deleting orphaned snapshot_files associations")
|
||||
database.LogSQL("Execute", "DELETE FROM snapshot_files WHERE snapshot_id != ?", currentSnapshotID)
|
||||
result, err := tx.ExecContext(ctx, "DELETE FROM snapshot_files WHERE snapshot_id != ?", currentSnapshotID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("deleting orphaned snapshot_files: %w", err)
|
||||
}
|
||||
rowsAffected, _ := result.RowsAffected()
|
||||
log.Debug("[Temp DB Cleanup] Deleted snapshot_files associations", "count", rowsAffected)
|
||||
|
||||
// Delete orphaned snapshot_blobs
|
||||
log.Debug("[Temp DB Cleanup] Deleting orphaned snapshot_blobs associations")
|
||||
database.LogSQL("Execute", "DELETE FROM snapshot_blobs WHERE snapshot_id != ?", currentSnapshotID)
|
||||
result, err = tx.ExecContext(ctx, "DELETE FROM snapshot_blobs WHERE snapshot_id != ?", currentSnapshotID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("deleting orphaned snapshot_blobs: %w", err)
|
||||
}
|
||||
rowsAffected, _ = result.RowsAffected()
|
||||
log.Debug("[Temp DB Cleanup] Deleted snapshot_blobs associations", "count", rowsAffected)
|
||||
return nil
|
||||
}
|
||||
|
||||
// deleteOrphanedFiles deletes files not in the current snapshot
|
||||
func (sm *SnapshotManager) deleteOrphanedFiles(ctx context.Context, tx *sql.Tx, currentSnapshotID string) error {
|
||||
log.Debug("[Temp DB Cleanup] Deleting file records not referenced by current snapshot")
|
||||
database.LogSQL("Execute", `DELETE FROM files WHERE NOT EXISTS (SELECT 1 FROM snapshot_files WHERE snapshot_files.file_id = files.id AND snapshot_files.snapshot_id = ?)`, currentSnapshotID)
|
||||
result, err := tx.ExecContext(ctx, `
|
||||
DELETE FROM files
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM snapshot_files
|
||||
WHERE snapshot_files.file_id = files.id
|
||||
AND snapshot_files.snapshot_id = ?
|
||||
)`, currentSnapshotID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("deleting orphaned files: %w", err)
|
||||
}
|
||||
rowsAffected, _ := result.RowsAffected()
|
||||
log.Debug("[Temp DB Cleanup] Deleted file records from database", "count", rowsAffected)
|
||||
|
||||
// Note: file_chunks will be deleted via CASCADE
|
||||
log.Debug("[Temp DB Cleanup] file_chunks associations deleted via CASCADE")
|
||||
return nil
|
||||
}
|
||||
|
||||
// deleteOrphanedChunkToFileMappings deletes chunk_files entries for deleted files
|
||||
func (sm *SnapshotManager) deleteOrphanedChunkToFileMappings(ctx context.Context, tx *sql.Tx) error {
|
||||
log.Debug("[Temp DB Cleanup] Deleting orphaned chunk_files associations")
|
||||
database.LogSQL("Execute", `DELETE FROM chunk_files WHERE NOT EXISTS (SELECT 1 FROM files WHERE files.id = chunk_files.file_id)`)
|
||||
result, err := tx.ExecContext(ctx, `
|
||||
DELETE FROM chunk_files
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM files
|
||||
WHERE files.id = chunk_files.file_id
|
||||
)`)
|
||||
if err != nil {
|
||||
return fmt.Errorf("deleting orphaned chunk_files: %w", err)
|
||||
}
|
||||
rowsAffected, _ := result.RowsAffected()
|
||||
log.Debug("[Temp DB Cleanup] Deleted chunk_files associations", "count", rowsAffected)
|
||||
return nil
|
||||
}
|
||||
|
||||
// deleteOrphanedBlobs deletes blobs not in the current snapshot
|
||||
func (sm *SnapshotManager) deleteOrphanedBlobs(ctx context.Context, tx *sql.Tx, currentSnapshotID string) error {
|
||||
log.Debug("[Temp DB Cleanup] Deleting blob records not referenced by current snapshot")
|
||||
database.LogSQL("Execute", `DELETE FROM blobs WHERE NOT EXISTS (SELECT 1 FROM snapshot_blobs WHERE snapshot_blobs.blob_hash = blobs.blob_hash AND snapshot_blobs.snapshot_id = ?)`, currentSnapshotID)
|
||||
result, err := tx.ExecContext(ctx, `
|
||||
DELETE FROM blobs
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM snapshot_blobs
|
||||
WHERE snapshot_blobs.blob_hash = blobs.blob_hash
|
||||
AND snapshot_blobs.snapshot_id = ?
|
||||
)`, currentSnapshotID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("deleting orphaned blobs: %w", err)
|
||||
}
|
||||
rowsAffected, _ := result.RowsAffected()
|
||||
log.Debug("[Temp DB Cleanup] Deleted blob records from database", "count", rowsAffected)
|
||||
return nil
|
||||
}
|
||||
|
||||
// deleteOrphanedBlobToChunkMappings deletes blob_chunks entries for deleted blobs
|
||||
func (sm *SnapshotManager) deleteOrphanedBlobToChunkMappings(ctx context.Context, tx *sql.Tx) error {
|
||||
log.Debug("[Temp DB Cleanup] Deleting orphaned blob_chunks associations")
|
||||
database.LogSQL("Execute", `DELETE FROM blob_chunks WHERE NOT EXISTS (SELECT 1 FROM blobs WHERE blobs.id = blob_chunks.blob_id)`)
|
||||
result, err := tx.ExecContext(ctx, `
|
||||
DELETE FROM blob_chunks
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM blobs
|
||||
WHERE blobs.id = blob_chunks.blob_id
|
||||
)`)
|
||||
if err != nil {
|
||||
return fmt.Errorf("deleting orphaned blob_chunks: %w", err)
|
||||
}
|
||||
rowsAffected, _ := result.RowsAffected()
|
||||
log.Debug("[Temp DB Cleanup] Deleted blob_chunks associations", "count", rowsAffected)
|
||||
return nil
|
||||
}
|
||||
|
||||
// deleteOrphanedChunks deletes chunks not referenced by any file
|
||||
func (sm *SnapshotManager) deleteOrphanedChunks(ctx context.Context, tx *sql.Tx) error {
|
||||
log.Debug("[Temp DB Cleanup] Deleting orphaned chunk records")
|
||||
database.LogSQL("Execute", `DELETE FROM chunks WHERE NOT EXISTS (SELECT 1 FROM file_chunks WHERE file_chunks.chunk_hash = chunks.chunk_hash)`)
|
||||
result, err := tx.ExecContext(ctx, `
|
||||
DELETE FROM chunks
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM file_chunks
|
||||
WHERE file_chunks.chunk_hash = chunks.chunk_hash
|
||||
)`)
|
||||
if err != nil {
|
||||
return fmt.Errorf("deleting orphaned chunks: %w", err)
|
||||
}
|
||||
rowsAffected, _ := result.RowsAffected()
|
||||
log.Debug("[Temp DB Cleanup] Deleted chunk records from database", "count", rowsAffected)
|
||||
return nil
|
||||
}
|
||||
163
internal/snapshot/snapshot_test.go
Normal file
163
internal/snapshot/snapshot_test.go
Normal file
@@ -0,0 +1,163 @@
|
||||
package snapshot
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/config"
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
)
|
||||
|
||||
const (
|
||||
// Test age public key for encryption
|
||||
testAgeRecipient = "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
|
||||
)
|
||||
|
||||
func TestCleanSnapshotDBEmptySnapshot(t *testing.T) {
|
||||
// Initialize logger
|
||||
log.Initialize(log.Config{})
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// Create a test database
|
||||
tempDir := t.TempDir()
|
||||
dbPath := filepath.Join(tempDir, "test.db")
|
||||
db, err := database.New(ctx, dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create database: %v", err)
|
||||
}
|
||||
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
// Create an empty snapshot
|
||||
snapshot := &database.Snapshot{
|
||||
ID: "empty-snapshot",
|
||||
Hostname: "test-host",
|
||||
}
|
||||
|
||||
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return repos.Snapshots.Create(ctx, tx, snapshot)
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create snapshot: %v", err)
|
||||
}
|
||||
|
||||
// Create some files and chunks not associated with any snapshot
|
||||
file := &database.File{Path: "/orphan/file.txt", Size: 1000}
|
||||
chunk := &database.Chunk{ChunkHash: "orphan-chunk", Size: 500}
|
||||
|
||||
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
if err := repos.Files.Create(ctx, tx, file); err != nil {
|
||||
return err
|
||||
}
|
||||
return repos.Chunks.Create(ctx, tx, chunk)
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create orphan data: %v", err)
|
||||
}
|
||||
|
||||
// Close the database
|
||||
if err := db.Close(); err != nil {
|
||||
t.Fatalf("failed to close database: %v", err)
|
||||
}
|
||||
|
||||
// Copy database
|
||||
tempDBPath := filepath.Join(tempDir, "temp.db")
|
||||
if err := copyFile(dbPath, tempDBPath); err != nil {
|
||||
t.Fatalf("failed to copy database: %v", err)
|
||||
}
|
||||
|
||||
// Create a mock config for testing
|
||||
cfg := &config.Config{
|
||||
CompressionLevel: 3,
|
||||
AgeRecipients: []string{testAgeRecipient},
|
||||
}
|
||||
// Clean the database
|
||||
sm := &SnapshotManager{config: cfg}
|
||||
if _, err := sm.cleanSnapshotDB(ctx, tempDBPath, snapshot.ID); err != nil {
|
||||
t.Fatalf("failed to clean snapshot database: %v", err)
|
||||
}
|
||||
|
||||
// Verify the cleaned database
|
||||
cleanedDB, err := database.New(ctx, tempDBPath)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to open cleaned database: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := cleanedDB.Close(); err != nil {
|
||||
t.Errorf("failed to close database: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
cleanedRepos := database.NewRepositories(cleanedDB)
|
||||
|
||||
// Verify snapshot exists
|
||||
verifySnapshot, err := cleanedRepos.Snapshots.GetByID(ctx, snapshot.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get snapshot: %v", err)
|
||||
}
|
||||
if verifySnapshot == nil {
|
||||
t.Error("snapshot should exist")
|
||||
}
|
||||
|
||||
// Verify orphan file is gone
|
||||
f, err := cleanedRepos.Files.GetByPath(ctx, file.Path)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to check file: %v", err)
|
||||
}
|
||||
if f != nil {
|
||||
t.Error("orphan file should not exist")
|
||||
}
|
||||
|
||||
// Verify orphan chunk is gone
|
||||
c, err := cleanedRepos.Chunks.GetByHash(ctx, chunk.ChunkHash)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to check chunk: %v", err)
|
||||
}
|
||||
if c != nil {
|
||||
t.Error("orphan chunk should not exist")
|
||||
}
|
||||
}
|
||||
|
||||
func TestCleanSnapshotDBNonExistentSnapshot(t *testing.T) {
|
||||
// Initialize logger
|
||||
log.Initialize(log.Config{})
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// Create a test database
|
||||
tempDir := t.TempDir()
|
||||
dbPath := filepath.Join(tempDir, "test.db")
|
||||
db, err := database.New(ctx, dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create database: %v", err)
|
||||
}
|
||||
|
||||
// Close immediately
|
||||
if err := db.Close(); err != nil {
|
||||
t.Fatalf("failed to close database: %v", err)
|
||||
}
|
||||
|
||||
// Copy database
|
||||
tempDBPath := filepath.Join(tempDir, "temp.db")
|
||||
if err := copyFile(dbPath, tempDBPath); err != nil {
|
||||
t.Fatalf("failed to copy database: %v", err)
|
||||
}
|
||||
|
||||
// Create a mock config for testing
|
||||
cfg := &config.Config{
|
||||
CompressionLevel: 3,
|
||||
AgeRecipients: []string{testAgeRecipient},
|
||||
}
|
||||
// Try to clean with non-existent snapshot
|
||||
sm := &SnapshotManager{config: cfg}
|
||||
_, err = sm.cleanSnapshotDB(ctx, tempDBPath, "non-existent-snapshot")
|
||||
|
||||
// Should not error - it will just delete everything
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user