Major refactoring: Updated manifest format and renamed backup to snapshot
- Created manifest.go with proper Manifest structure including blob sizes - Updated manifest generation to include compressed size for each blob - Added TotalCompressedSize field to manifest for quick access - Renamed backup package to snapshot for clarity - Updated snapshot list to show all remote snapshots - Remote snapshots not in local DB fetch manifest to get size - Local snapshots not in remote are automatically deleted - Removed backwards compatibility code (pre-1.0, no users) - Fixed prune command to use new manifest format - Updated all imports and references from backup to snapshot
This commit is contained in:
532
internal/snapshot/backup_test.go
Normal file
532
internal/snapshot/backup_test.go
Normal file
@@ -0,0 +1,532 @@
|
||||
package snapshot
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"testing/fstest"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
)
|
||||
|
||||
// MockS3Client is a mock implementation of S3 operations for testing
|
||||
type MockS3Client struct {
|
||||
storage map[string][]byte
|
||||
}
|
||||
|
||||
func NewMockS3Client() *MockS3Client {
|
||||
return &MockS3Client{
|
||||
storage: make(map[string][]byte),
|
||||
}
|
||||
}
|
||||
|
||||
func (m *MockS3Client) PutBlob(ctx context.Context, hash string, data []byte) error {
|
||||
m.storage[hash] = data
|
||||
return nil
|
||||
}
|
||||
|
||||
func (m *MockS3Client) GetBlob(ctx context.Context, hash string) ([]byte, error) {
|
||||
data, ok := m.storage[hash]
|
||||
if !ok {
|
||||
return nil, fmt.Errorf("blob not found: %s", hash)
|
||||
}
|
||||
return data, nil
|
||||
}
|
||||
|
||||
func (m *MockS3Client) BlobExists(ctx context.Context, hash string) (bool, error) {
|
||||
_, ok := m.storage[hash]
|
||||
return ok, nil
|
||||
}
|
||||
|
||||
func (m *MockS3Client) CreateBucket(ctx context.Context, bucket string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func TestBackupWithInMemoryFS(t *testing.T) {
|
||||
// Create a temporary directory for the database
|
||||
tempDir := t.TempDir()
|
||||
dbPath := filepath.Join(tempDir, "test.db")
|
||||
|
||||
// Create test filesystem
|
||||
testFS := fstest.MapFS{
|
||||
"file1.txt": &fstest.MapFile{
|
||||
Data: []byte("Hello, World!"),
|
||||
Mode: 0644,
|
||||
ModTime: time.Now(),
|
||||
},
|
||||
"dir1/file2.txt": &fstest.MapFile{
|
||||
Data: []byte("This is a test file with some content."),
|
||||
Mode: 0755,
|
||||
ModTime: time.Now(),
|
||||
},
|
||||
"dir1/subdir/file3.txt": &fstest.MapFile{
|
||||
Data: []byte("Another file in a subdirectory."),
|
||||
Mode: 0600,
|
||||
ModTime: time.Now(),
|
||||
},
|
||||
"largefile.bin": &fstest.MapFile{
|
||||
Data: generateLargeFileContent(10 * 1024 * 1024), // 10MB file with varied content
|
||||
Mode: 0644,
|
||||
ModTime: time.Now(),
|
||||
},
|
||||
}
|
||||
|
||||
// Initialize the database
|
||||
ctx := context.Background()
|
||||
db, err := database.New(ctx, dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create database: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := db.Close(); err != nil {
|
||||
t.Logf("Failed to close database: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
// Create mock S3 client
|
||||
s3Client := NewMockS3Client()
|
||||
|
||||
// Run backup
|
||||
backupEngine := &BackupEngine{
|
||||
repos: repos,
|
||||
s3Client: s3Client,
|
||||
}
|
||||
|
||||
snapshotID, err := backupEngine.Backup(ctx, testFS, ".")
|
||||
if err != nil {
|
||||
t.Fatalf("Backup failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify snapshot was created
|
||||
snapshot, err := repos.Snapshots.GetByID(ctx, snapshotID)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get snapshot: %v", err)
|
||||
}
|
||||
|
||||
if snapshot == nil {
|
||||
t.Fatal("Snapshot not found")
|
||||
}
|
||||
|
||||
if snapshot.FileCount == 0 {
|
||||
t.Error("Expected snapshot to have files")
|
||||
}
|
||||
|
||||
// Verify files in database
|
||||
files, err := repos.Files.ListByPrefix(ctx, "")
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to list files: %v", err)
|
||||
}
|
||||
|
||||
expectedFiles := map[string]bool{
|
||||
"file1.txt": true,
|
||||
"dir1/file2.txt": true,
|
||||
"dir1/subdir/file3.txt": true,
|
||||
"largefile.bin": true,
|
||||
}
|
||||
|
||||
if len(files) != len(expectedFiles) {
|
||||
t.Errorf("Expected %d files, got %d", len(expectedFiles), len(files))
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
if !expectedFiles[file.Path] {
|
||||
t.Errorf("Unexpected file in database: %s", file.Path)
|
||||
}
|
||||
delete(expectedFiles, file.Path)
|
||||
|
||||
// Verify file metadata
|
||||
fsFile := testFS[file.Path]
|
||||
if fsFile == nil {
|
||||
t.Errorf("File %s not found in test filesystem", file.Path)
|
||||
continue
|
||||
}
|
||||
|
||||
if file.Size != int64(len(fsFile.Data)) {
|
||||
t.Errorf("File %s: expected size %d, got %d", file.Path, len(fsFile.Data), file.Size)
|
||||
}
|
||||
|
||||
if file.Mode != uint32(fsFile.Mode) {
|
||||
t.Errorf("File %s: expected mode %o, got %o", file.Path, fsFile.Mode, file.Mode)
|
||||
}
|
||||
}
|
||||
|
||||
if len(expectedFiles) > 0 {
|
||||
t.Errorf("Files not found in database: %v", expectedFiles)
|
||||
}
|
||||
|
||||
// Verify chunks
|
||||
chunks, err := repos.Chunks.List(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to list chunks: %v", err)
|
||||
}
|
||||
|
||||
if len(chunks) == 0 {
|
||||
t.Error("No chunks found in database")
|
||||
}
|
||||
|
||||
// The large file should create 10 chunks (10MB / 1MB chunk size)
|
||||
// Plus the small files
|
||||
minExpectedChunks := 10 + 3
|
||||
if len(chunks) < minExpectedChunks {
|
||||
t.Errorf("Expected at least %d chunks, got %d", minExpectedChunks, len(chunks))
|
||||
}
|
||||
|
||||
// Verify at least one blob was created and uploaded
|
||||
// We can't list blobs directly, but we can check via snapshot blobs
|
||||
blobHashes, err := repos.Snapshots.GetBlobHashes(ctx, snapshotID)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to get blob hashes: %v", err)
|
||||
}
|
||||
if len(blobHashes) == 0 {
|
||||
t.Error("Expected at least one blob to be created")
|
||||
}
|
||||
|
||||
for _, blobHash := range blobHashes {
|
||||
// Check blob exists in mock S3
|
||||
exists, err := s3Client.BlobExists(ctx, blobHash)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to check blob %s: %v", blobHash, err)
|
||||
}
|
||||
if !exists {
|
||||
t.Errorf("Blob %s not found in S3", blobHash)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func TestBackupDeduplication(t *testing.T) {
|
||||
// Create a temporary directory for the database
|
||||
tempDir := t.TempDir()
|
||||
dbPath := filepath.Join(tempDir, "test.db")
|
||||
|
||||
// Create test filesystem with duplicate content
|
||||
testFS := fstest.MapFS{
|
||||
"file1.txt": &fstest.MapFile{
|
||||
Data: []byte("Duplicate content"),
|
||||
Mode: 0644,
|
||||
ModTime: time.Now(),
|
||||
},
|
||||
"file2.txt": &fstest.MapFile{
|
||||
Data: []byte("Duplicate content"),
|
||||
Mode: 0644,
|
||||
ModTime: time.Now(),
|
||||
},
|
||||
"file3.txt": &fstest.MapFile{
|
||||
Data: []byte("Unique content"),
|
||||
Mode: 0644,
|
||||
ModTime: time.Now(),
|
||||
},
|
||||
}
|
||||
|
||||
// Initialize the database
|
||||
ctx := context.Background()
|
||||
db, err := database.New(ctx, dbPath)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create database: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := db.Close(); err != nil {
|
||||
t.Logf("Failed to close database: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
// Create mock S3 client
|
||||
s3Client := NewMockS3Client()
|
||||
|
||||
// Run backup
|
||||
backupEngine := &BackupEngine{
|
||||
repos: repos,
|
||||
s3Client: s3Client,
|
||||
}
|
||||
|
||||
_, err = backupEngine.Backup(ctx, testFS, ".")
|
||||
if err != nil {
|
||||
t.Fatalf("Backup failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify deduplication
|
||||
chunks, err := repos.Chunks.List(ctx)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to list chunks: %v", err)
|
||||
}
|
||||
|
||||
// Should have only 2 unique chunks (duplicate content + unique content)
|
||||
if len(chunks) != 2 {
|
||||
t.Errorf("Expected 2 unique chunks, got %d", len(chunks))
|
||||
}
|
||||
|
||||
// Verify chunk references
|
||||
for _, chunk := range chunks {
|
||||
files, err := repos.ChunkFiles.GetByChunkHash(ctx, chunk.ChunkHash)
|
||||
if err != nil {
|
||||
t.Errorf("Failed to get files for chunk %s: %v", chunk.ChunkHash, err)
|
||||
}
|
||||
|
||||
// The duplicate content chunk should be referenced by 2 files
|
||||
if chunk.Size == int64(len("Duplicate content")) && len(files) != 2 {
|
||||
t.Errorf("Expected duplicate chunk to be referenced by 2 files, got %d", len(files))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// BackupEngine performs backup operations
|
||||
type BackupEngine struct {
|
||||
repos *database.Repositories
|
||||
s3Client interface {
|
||||
PutBlob(ctx context.Context, hash string, data []byte) error
|
||||
BlobExists(ctx context.Context, hash string) (bool, error)
|
||||
}
|
||||
}
|
||||
|
||||
// Backup performs a backup of the given filesystem
|
||||
func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (string, error) {
|
||||
// Create a new snapshot
|
||||
hostname, _ := os.Hostname()
|
||||
snapshotID := time.Now().Format(time.RFC3339)
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID,
|
||||
Hostname: hostname,
|
||||
VaultikVersion: "test",
|
||||
StartedAt: time.Now(),
|
||||
CompletedAt: nil,
|
||||
}
|
||||
|
||||
// Create initial snapshot record
|
||||
err := b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return b.repos.Snapshots.Create(ctx, tx, snapshot)
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Track counters
|
||||
var fileCount, chunkCount, blobCount, totalSize, blobSize int64
|
||||
|
||||
// Track which chunks we've seen to handle deduplication
|
||||
processedChunks := make(map[string]bool)
|
||||
|
||||
// Scan the filesystem and process files
|
||||
err = fs.WalkDir(fsys, root, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Skip directories
|
||||
if d.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get file info
|
||||
info, err := d.Info()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Handle symlinks
|
||||
if info.Mode()&fs.ModeSymlink != 0 {
|
||||
// For testing, we'll skip symlinks since fstest doesn't support them well
|
||||
return nil
|
||||
}
|
||||
|
||||
// Create file record in a short transaction
|
||||
file := &database.File{
|
||||
Path: path,
|
||||
Size: info.Size(),
|
||||
Mode: uint32(info.Mode()),
|
||||
MTime: info.ModTime(),
|
||||
CTime: info.ModTime(), // Use mtime as ctime for test
|
||||
UID: 1000, // Default UID for test
|
||||
GID: 1000, // Default GID for test
|
||||
}
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return b.repos.Files.Create(ctx, tx, file)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
fileCount++
|
||||
totalSize += info.Size()
|
||||
|
||||
// Read and process file in chunks
|
||||
f, err := fsys.Open(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
defer func() {
|
||||
if err := f.Close(); err != nil {
|
||||
// Log but don't fail since we're already in an error path potentially
|
||||
fmt.Fprintf(os.Stderr, "Failed to close file: %v\n", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Process file in chunks
|
||||
chunkIndex := 0
|
||||
buffer := make([]byte, defaultChunkSize)
|
||||
|
||||
for {
|
||||
n, err := f.Read(buffer)
|
||||
if err != nil && err != io.EOF {
|
||||
return err
|
||||
}
|
||||
if n == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
chunkData := buffer[:n]
|
||||
chunkHash := calculateHash(chunkData)
|
||||
|
||||
// Check if chunk already exists (outside of transaction)
|
||||
existingChunk, _ := b.repos.Chunks.GetByHash(ctx, chunkHash)
|
||||
if existingChunk == nil {
|
||||
// Create new chunk in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
chunk := &database.Chunk{
|
||||
ChunkHash: chunkHash,
|
||||
Size: int64(n),
|
||||
}
|
||||
return b.repos.Chunks.Create(ctx, tx, chunk)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
processedChunks[chunkHash] = true
|
||||
}
|
||||
|
||||
// Create file-chunk mapping in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
fileChunk := &database.FileChunk{
|
||||
FileID: file.ID,
|
||||
Idx: chunkIndex,
|
||||
ChunkHash: chunkHash,
|
||||
}
|
||||
return b.repos.FileChunks.Create(ctx, tx, fileChunk)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Create chunk-file mapping in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
chunkFile := &database.ChunkFile{
|
||||
ChunkHash: chunkHash,
|
||||
FileID: file.ID,
|
||||
FileOffset: int64(chunkIndex * defaultChunkSize),
|
||||
Length: int64(n),
|
||||
}
|
||||
return b.repos.ChunkFiles.Create(ctx, tx, chunkFile)
|
||||
})
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
chunkIndex++
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// After all files are processed, create blobs for new chunks
|
||||
for chunkHash := range processedChunks {
|
||||
// Get chunk data (outside of transaction)
|
||||
chunk, err := b.repos.Chunks.GetByHash(ctx, chunkHash)
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
chunkCount++
|
||||
|
||||
// In a real system, blobs would contain multiple chunks and be encrypted
|
||||
// For testing, we'll create a blob with a "blob-" prefix to differentiate
|
||||
blobHash := "blob-" + chunkHash
|
||||
|
||||
// For the test, we'll create dummy data since we don't have the original
|
||||
dummyData := []byte(chunkHash)
|
||||
|
||||
// Upload to S3 as a blob
|
||||
if err := b.s3Client.PutBlob(ctx, blobHash, dummyData); err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Create blob entry in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
blob := &database.Blob{
|
||||
ID: "test-blob-" + blobHash[:8],
|
||||
Hash: blobHash,
|
||||
CreatedTS: time.Now(),
|
||||
}
|
||||
return b.repos.Blobs.Create(ctx, tx, blob)
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
blobCount++
|
||||
blobSize += chunk.Size
|
||||
|
||||
// Create blob-chunk mapping in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
blobChunk := &database.BlobChunk{
|
||||
BlobID: "test-blob-" + blobHash[:8],
|
||||
ChunkHash: chunkHash,
|
||||
Offset: 0,
|
||||
Length: chunk.Size,
|
||||
}
|
||||
return b.repos.BlobChunks.Create(ctx, tx, blobChunk)
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
// Add blob to snapshot in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return b.repos.Snapshots.AddBlob(ctx, tx, snapshotID, "test-blob-"+blobHash[:8], blobHash)
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
// Update snapshot with final counts
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return b.repos.Snapshots.UpdateCounts(ctx, tx, snapshotID, fileCount, chunkCount, blobCount, totalSize, blobSize)
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return snapshotID, nil
|
||||
}
|
||||
|
||||
func calculateHash(data []byte) string {
|
||||
h := sha256.New()
|
||||
h.Write(data)
|
||||
return fmt.Sprintf("%x", h.Sum(nil))
|
||||
}
|
||||
|
||||
func generateLargeFileContent(size int) []byte {
|
||||
data := make([]byte, size)
|
||||
// Fill with pattern that changes every chunk to avoid deduplication
|
||||
for i := 0; i < size; i++ {
|
||||
chunkNum := i / defaultChunkSize
|
||||
data[i] = byte((i + chunkNum) % 256)
|
||||
}
|
||||
return data
|
||||
}
|
||||
|
||||
const defaultChunkSize = 1024 * 1024 // 1MB chunks
|
||||
Reference in New Issue
Block a user