Major refactoring: UUID-based storage, streaming architecture, and CLI improvements

This commit represents a significant architectural overhaul of vaultik:

Database Schema Changes:
- Switch files table to use UUID primary keys instead of path-based keys
- Add UUID primary keys to blobs table for immediate chunk association
- Update all foreign key relationships to use UUIDs
- Add comprehensive schema documentation in DATAMODEL.md
- Add SQLite busy timeout handling for concurrent operations

Streaming and Performance Improvements:
- Implement true streaming blob packing without intermediate storage
- Add streaming chunk processing to reduce memory usage
- Improve progress reporting with real-time metrics
- Add upload metrics tracking in new uploads table

CLI Refactoring:
- Restructure CLI to use subcommands: snapshot create/list/purge/verify
- Add store info command for S3 configuration display
- Add custom duration parser supporting days/weeks/months/years
- Remove old backup.go in favor of enhanced snapshot.go
- Add --cron flag for silent operation

Configuration Changes:
- Remove unused index_prefix configuration option
- Add support for snapshot pruning retention policies
- Improve configuration validation and error messages

Testing Improvements:
- Add comprehensive repository tests with edge cases
- Add cascade delete debugging tests
- Fix concurrent operation tests to use SQLite busy timeout
- Remove tolerance for SQLITE_BUSY errors in tests

Documentation:
- Add MIT LICENSE file
- Update README with new command structure
- Add comprehensive DATAMODEL.md explaining database schema
- Update DESIGN.md with UUID-based architecture

Other Changes:
- Add test-config.yml for testing
- Update Makefile with better test output formatting
- Fix various race conditions in concurrent operations
- Improve error handling throughout
This commit is contained in:
2025-07-22 14:54:37 +02:00
parent 86b533d6ee
commit 78af626759
54 changed files with 5525 additions and 1109 deletions

View File

@@ -24,7 +24,7 @@ func (r *BlobChunkRepository) Create(ctx context.Context, tx *sql.Tx, bc *BlobCh
if tx != nil {
_, err = tx.ExecContext(ctx, query, bc.BlobID, bc.ChunkHash, bc.Offset, bc.Length)
} else {
_, err = r.db.ExecWithLock(ctx, query, bc.BlobID, bc.ChunkHash, bc.Offset, bc.Length)
_, err = r.db.ExecWithLog(ctx, query, bc.BlobID, bc.ChunkHash, bc.Offset, bc.Length)
}
if err != nil {

View File

@@ -2,7 +2,9 @@ package database
import (
"context"
"strings"
"testing"
"time"
)
func TestBlobChunkRepository(t *testing.T) {
@@ -10,78 +12,112 @@ func TestBlobChunkRepository(t *testing.T) {
defer cleanup()
ctx := context.Background()
repo := NewBlobChunkRepository(db)
repos := NewRepositories(db)
// Create blob first
blob := &Blob{
ID: "blob1-uuid",
Hash: "blob1-hash",
CreatedTS: time.Now(),
}
err := repos.Blobs.Create(ctx, nil, blob)
if err != nil {
t.Fatalf("failed to create blob: %v", err)
}
// Create chunks
chunks := []string{"chunk1", "chunk2", "chunk3"}
for _, chunkHash := range chunks {
chunk := &Chunk{
ChunkHash: chunkHash,
SHA256: chunkHash + "-sha",
Size: 1024,
}
err = repos.Chunks.Create(ctx, nil, chunk)
if err != nil {
t.Fatalf("failed to create chunk %s: %v", chunkHash, err)
}
}
// Test Create
bc1 := &BlobChunk{
BlobID: "blob1-uuid",
BlobID: blob.ID,
ChunkHash: "chunk1",
Offset: 0,
Length: 1024,
}
err := repo.Create(ctx, nil, bc1)
err = repos.BlobChunks.Create(ctx, nil, bc1)
if err != nil {
t.Fatalf("failed to create blob chunk: %v", err)
}
// Add more chunks to the same blob
bc2 := &BlobChunk{
BlobID: "blob1-uuid",
BlobID: blob.ID,
ChunkHash: "chunk2",
Offset: 1024,
Length: 2048,
}
err = repo.Create(ctx, nil, bc2)
err = repos.BlobChunks.Create(ctx, nil, bc2)
if err != nil {
t.Fatalf("failed to create second blob chunk: %v", err)
}
bc3 := &BlobChunk{
BlobID: "blob1-uuid",
BlobID: blob.ID,
ChunkHash: "chunk3",
Offset: 3072,
Length: 512,
}
err = repo.Create(ctx, nil, bc3)
err = repos.BlobChunks.Create(ctx, nil, bc3)
if err != nil {
t.Fatalf("failed to create third blob chunk: %v", err)
}
// Test GetByBlobID
chunks, err := repo.GetByBlobID(ctx, "blob1-uuid")
blobChunks, err := repos.BlobChunks.GetByBlobID(ctx, blob.ID)
if err != nil {
t.Fatalf("failed to get blob chunks: %v", err)
}
if len(chunks) != 3 {
t.Errorf("expected 3 chunks, got %d", len(chunks))
if len(blobChunks) != 3 {
t.Errorf("expected 3 chunks, got %d", len(blobChunks))
}
// Verify order by offset
expectedOffsets := []int64{0, 1024, 3072}
for i, chunk := range chunks {
if chunk.Offset != expectedOffsets[i] {
t.Errorf("wrong chunk order: expected offset %d, got %d", expectedOffsets[i], chunk.Offset)
for i, bc := range blobChunks {
if bc.Offset != expectedOffsets[i] {
t.Errorf("wrong chunk order: expected offset %d, got %d", expectedOffsets[i], bc.Offset)
}
}
// Test GetByChunkHash
bc, err := repo.GetByChunkHash(ctx, "chunk2")
bc, err := repos.BlobChunks.GetByChunkHash(ctx, "chunk2")
if err != nil {
t.Fatalf("failed to get blob chunk by chunk hash: %v", err)
}
if bc == nil {
t.Fatal("expected blob chunk, got nil")
}
if bc.BlobID != "blob1-uuid" {
t.Errorf("wrong blob ID: expected blob1-uuid, got %s", bc.BlobID)
if bc.BlobID != blob.ID {
t.Errorf("wrong blob ID: expected %s, got %s", blob.ID, bc.BlobID)
}
if bc.Offset != 1024 {
t.Errorf("wrong offset: expected 1024, got %d", bc.Offset)
}
// Test duplicate insert (should fail due to primary key constraint)
err = repos.BlobChunks.Create(ctx, nil, bc1)
if err == nil {
t.Fatal("duplicate blob_chunk insert should fail due to primary key constraint")
}
if !strings.Contains(err.Error(), "UNIQUE") && !strings.Contains(err.Error(), "constraint") {
t.Fatalf("expected constraint error, got: %v", err)
}
// Test non-existent chunk
bc, err = repo.GetByChunkHash(ctx, "nonexistent")
bc, err = repos.BlobChunks.GetByChunkHash(ctx, "nonexistent")
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
@@ -95,26 +131,61 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
defer cleanup()
ctx := context.Background()
repo := NewBlobChunkRepository(db)
repos := NewRepositories(db)
// Create blobs
blob1 := &Blob{
ID: "blob1-uuid",
Hash: "blob1-hash",
CreatedTS: time.Now(),
}
blob2 := &Blob{
ID: "blob2-uuid",
Hash: "blob2-hash",
CreatedTS: time.Now(),
}
err := repos.Blobs.Create(ctx, nil, blob1)
if err != nil {
t.Fatalf("failed to create blob1: %v", err)
}
err = repos.Blobs.Create(ctx, nil, blob2)
if err != nil {
t.Fatalf("failed to create blob2: %v", err)
}
// Create chunks
chunkHashes := []string{"chunk1", "chunk2", "chunk3"}
for _, chunkHash := range chunkHashes {
chunk := &Chunk{
ChunkHash: chunkHash,
SHA256: chunkHash + "-sha",
Size: 1024,
}
err = repos.Chunks.Create(ctx, nil, chunk)
if err != nil {
t.Fatalf("failed to create chunk %s: %v", chunkHash, err)
}
}
// Create chunks across multiple blobs
// Some chunks are shared between blobs (deduplication scenario)
blobChunks := []BlobChunk{
{BlobID: "blob1-uuid", ChunkHash: "chunk1", Offset: 0, Length: 1024},
{BlobID: "blob1-uuid", ChunkHash: "chunk2", Offset: 1024, Length: 1024},
{BlobID: "blob2-uuid", ChunkHash: "chunk2", Offset: 0, Length: 1024}, // chunk2 is shared
{BlobID: "blob2-uuid", ChunkHash: "chunk3", Offset: 1024, Length: 1024},
{BlobID: blob1.ID, ChunkHash: "chunk1", Offset: 0, Length: 1024},
{BlobID: blob1.ID, ChunkHash: "chunk2", Offset: 1024, Length: 1024},
{BlobID: blob2.ID, ChunkHash: "chunk2", Offset: 0, Length: 1024}, // chunk2 is shared
{BlobID: blob2.ID, ChunkHash: "chunk3", Offset: 1024, Length: 1024},
}
for _, bc := range blobChunks {
err := repo.Create(ctx, nil, &bc)
err := repos.BlobChunks.Create(ctx, nil, &bc)
if err != nil {
t.Fatalf("failed to create blob chunk: %v", err)
}
}
// Verify blob1 chunks
chunks, err := repo.GetByBlobID(ctx, "blob1-uuid")
chunks, err := repos.BlobChunks.GetByBlobID(ctx, blob1.ID)
if err != nil {
t.Fatalf("failed to get blob1 chunks: %v", err)
}
@@ -123,7 +194,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
}
// Verify blob2 chunks
chunks, err = repo.GetByBlobID(ctx, "blob2-uuid")
chunks, err = repos.BlobChunks.GetByBlobID(ctx, blob2.ID)
if err != nil {
t.Fatalf("failed to get blob2 chunks: %v", err)
}
@@ -132,7 +203,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
}
// Verify shared chunk
bc, err := repo.GetByChunkHash(ctx, "chunk2")
bc, err := repos.BlobChunks.GetByChunkHash(ctx, "chunk2")
if err != nil {
t.Fatalf("failed to get shared chunk: %v", err)
}
@@ -140,7 +211,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
t.Fatal("expected shared chunk, got nil")
}
// GetByChunkHash returns first match, should be blob1
if bc.BlobID != "blob1-uuid" {
t.Errorf("expected blob1-uuid for shared chunk, got %s", bc.BlobID)
if bc.BlobID != blob1.ID {
t.Errorf("expected %s for shared chunk, got %s", blob1.ID, bc.BlobID)
}
}

View File

@@ -5,6 +5,8 @@ import (
"database/sql"
"fmt"
"time"
"git.eeqj.de/sneak/vaultik/internal/log"
)
type BlobRepository struct {
@@ -36,7 +38,7 @@ func (r *BlobRepository) Create(ctx context.Context, tx *sql.Tx, blob *Blob) err
_, err = tx.ExecContext(ctx, query, blob.ID, blob.Hash, blob.CreatedTS.Unix(),
finishedTS, blob.UncompressedSize, blob.CompressedSize, uploadedTS)
} else {
_, err = r.db.ExecWithLock(ctx, query, blob.ID, blob.Hash, blob.CreatedTS.Unix(),
_, err = r.db.ExecWithLog(ctx, query, blob.ID, blob.Hash, blob.CreatedTS.Unix(),
finishedTS, blob.UncompressedSize, blob.CompressedSize, uploadedTS)
}
@@ -75,13 +77,13 @@ func (r *BlobRepository) GetByHash(ctx context.Context, hash string) (*Blob, err
return nil, fmt.Errorf("querying blob: %w", err)
}
blob.CreatedTS = time.Unix(createdTSUnix, 0)
blob.CreatedTS = time.Unix(createdTSUnix, 0).UTC()
if finishedTSUnix.Valid {
ts := time.Unix(finishedTSUnix.Int64, 0)
ts := time.Unix(finishedTSUnix.Int64, 0).UTC()
blob.FinishedTS = &ts
}
if uploadedTSUnix.Valid {
ts := time.Unix(uploadedTSUnix.Int64, 0)
ts := time.Unix(uploadedTSUnix.Int64, 0).UTC()
blob.UploadedTS = &ts
}
return &blob, nil
@@ -116,13 +118,13 @@ func (r *BlobRepository) GetByID(ctx context.Context, id string) (*Blob, error)
return nil, fmt.Errorf("querying blob: %w", err)
}
blob.CreatedTS = time.Unix(createdTSUnix, 0)
blob.CreatedTS = time.Unix(createdTSUnix, 0).UTC()
if finishedTSUnix.Valid {
ts := time.Unix(finishedTSUnix.Int64, 0)
ts := time.Unix(finishedTSUnix.Int64, 0).UTC()
blob.FinishedTS = &ts
}
if uploadedTSUnix.Valid {
ts := time.Unix(uploadedTSUnix.Int64, 0)
ts := time.Unix(uploadedTSUnix.Int64, 0).UTC()
blob.UploadedTS = &ts
}
return &blob, nil
@@ -136,12 +138,12 @@ func (r *BlobRepository) UpdateFinished(ctx context.Context, tx *sql.Tx, id stri
WHERE id = ?
`
now := time.Now().Unix()
now := time.Now().UTC().Unix()
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, hash, now, uncompressedSize, compressedSize, id)
} else {
_, err = r.db.ExecWithLock(ctx, query, hash, now, uncompressedSize, compressedSize, id)
_, err = r.db.ExecWithLog(ctx, query, hash, now, uncompressedSize, compressedSize, id)
}
if err != nil {
@@ -159,12 +161,12 @@ func (r *BlobRepository) UpdateUploaded(ctx context.Context, tx *sql.Tx, id stri
WHERE id = ?
`
now := time.Now().Unix()
now := time.Now().UTC().Unix()
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, now, id)
} else {
_, err = r.db.ExecWithLock(ctx, query, now, id)
_, err = r.db.ExecWithLog(ctx, query, now, id)
}
if err != nil {
@@ -173,3 +175,26 @@ func (r *BlobRepository) UpdateUploaded(ctx context.Context, tx *sql.Tx, id stri
return nil
}
// DeleteOrphaned deletes blobs that are not referenced by any snapshot
func (r *BlobRepository) DeleteOrphaned(ctx context.Context) error {
query := `
DELETE FROM blobs
WHERE NOT EXISTS (
SELECT 1 FROM snapshot_blobs
WHERE snapshot_blobs.blob_id = blobs.id
)
`
result, err := r.db.ExecWithLog(ctx, query)
if err != nil {
return fmt.Errorf("deleting orphaned blobs: %w", err)
}
rowsAffected, _ := result.RowsAffected()
if rowsAffected > 0 {
log.Debug("Deleted orphaned blobs", "count", rowsAffected)
}
return nil
}

View File

@@ -0,0 +1,124 @@
package database
import (
"context"
"fmt"
"testing"
"time"
)
// TestCascadeDeleteDebug tests cascade delete with debug output
func TestCascadeDeleteDebug(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Check if foreign keys are enabled
var fkEnabled int
err := db.conn.QueryRow("PRAGMA foreign_keys").Scan(&fkEnabled)
if err != nil {
t.Fatal(err)
}
t.Logf("Foreign keys enabled: %d", fkEnabled)
// Create a file
file := &File{
Path: "/cascade-test.txt",
MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
err = repos.Files.Create(ctx, nil, file)
if err != nil {
t.Fatalf("failed to create file: %v", err)
}
t.Logf("Created file with ID: %s", file.ID)
// Create chunks and file-chunk mappings
for i := 0; i < 3; i++ {
chunk := &Chunk{
ChunkHash: fmt.Sprintf("cascade-chunk-%d", i),
SHA256: fmt.Sprintf("cascade-sha-%d", i),
Size: 1024,
}
err = repos.Chunks.Create(ctx, nil, chunk)
if err != nil {
t.Fatalf("failed to create chunk: %v", err)
}
fc := &FileChunk{
FileID: file.ID,
Idx: i,
ChunkHash: chunk.ChunkHash,
}
err = repos.FileChunks.Create(ctx, nil, fc)
if err != nil {
t.Fatalf("failed to create file chunk: %v", err)
}
t.Logf("Created file chunk mapping: file_id=%s, idx=%d, chunk=%s", fc.FileID, fc.Idx, fc.ChunkHash)
}
// Verify file chunks exist
fileChunks, err := repos.FileChunks.GetByFileID(ctx, file.ID)
if err != nil {
t.Fatal(err)
}
t.Logf("File chunks before delete: %d", len(fileChunks))
// Check the foreign key constraint
var fkInfo string
err = db.conn.QueryRow(`
SELECT sql FROM sqlite_master
WHERE type='table' AND name='file_chunks'
`).Scan(&fkInfo)
if err != nil {
t.Fatal(err)
}
t.Logf("file_chunks table definition:\n%s", fkInfo)
// Delete the file
t.Log("Deleting file...")
err = repos.Files.DeleteByID(ctx, nil, file.ID)
if err != nil {
t.Fatalf("failed to delete file: %v", err)
}
// Verify file is gone
deletedFile, err := repos.Files.GetByID(ctx, file.ID)
if err != nil {
t.Fatal(err)
}
if deletedFile != nil {
t.Error("file should have been deleted")
} else {
t.Log("File was successfully deleted")
}
// Check file chunks after delete
fileChunks, err = repos.FileChunks.GetByFileID(ctx, file.ID)
if err != nil {
t.Fatal(err)
}
t.Logf("File chunks after delete: %d", len(fileChunks))
// Manually check the database
var count int
err = db.conn.QueryRow("SELECT COUNT(*) FROM file_chunks WHERE file_id = ?", file.ID).Scan(&count)
if err != nil {
t.Fatal(err)
}
t.Logf("Manual count of file_chunks for deleted file: %d", count)
if len(fileChunks) != 0 {
t.Errorf("expected 0 file chunks after cascade delete, got %d", len(fileChunks))
// List the remaining chunks
for _, fc := range fileChunks {
t.Logf("Remaining chunk: file_id=%s, idx=%d, chunk=%s", fc.FileID, fc.Idx, fc.ChunkHash)
}
}
}

View File

@@ -16,16 +16,16 @@ func NewChunkFileRepository(db *DB) *ChunkFileRepository {
func (r *ChunkFileRepository) Create(ctx context.Context, tx *sql.Tx, cf *ChunkFile) error {
query := `
INSERT INTO chunk_files (chunk_hash, file_path, file_offset, length)
INSERT INTO chunk_files (chunk_hash, file_id, file_offset, length)
VALUES (?, ?, ?, ?)
ON CONFLICT(chunk_hash, file_path) DO NOTHING
ON CONFLICT(chunk_hash, file_id) DO NOTHING
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, cf.ChunkHash, cf.FilePath, cf.FileOffset, cf.Length)
_, err = tx.ExecContext(ctx, query, cf.ChunkHash, cf.FileID, cf.FileOffset, cf.Length)
} else {
_, err = r.db.ExecWithLock(ctx, query, cf.ChunkHash, cf.FilePath, cf.FileOffset, cf.Length)
_, err = r.db.ExecWithLog(ctx, query, cf.ChunkHash, cf.FileID, cf.FileOffset, cf.Length)
}
if err != nil {
@@ -37,7 +37,7 @@ func (r *ChunkFileRepository) Create(ctx context.Context, tx *sql.Tx, cf *ChunkF
func (r *ChunkFileRepository) GetByChunkHash(ctx context.Context, chunkHash string) ([]*ChunkFile, error) {
query := `
SELECT chunk_hash, file_path, file_offset, length
SELECT chunk_hash, file_id, file_offset, length
FROM chunk_files
WHERE chunk_hash = ?
`
@@ -51,7 +51,7 @@ func (r *ChunkFileRepository) GetByChunkHash(ctx context.Context, chunkHash stri
var chunkFiles []*ChunkFile
for rows.Next() {
var cf ChunkFile
err := rows.Scan(&cf.ChunkHash, &cf.FilePath, &cf.FileOffset, &cf.Length)
err := rows.Scan(&cf.ChunkHash, &cf.FileID, &cf.FileOffset, &cf.Length)
if err != nil {
return nil, fmt.Errorf("scanning chunk file: %w", err)
}
@@ -63,9 +63,10 @@ func (r *ChunkFileRepository) GetByChunkHash(ctx context.Context, chunkHash stri
func (r *ChunkFileRepository) GetByFilePath(ctx context.Context, filePath string) ([]*ChunkFile, error) {
query := `
SELECT chunk_hash, file_path, file_offset, length
FROM chunk_files
WHERE file_path = ?
SELECT cf.chunk_hash, cf.file_id, cf.file_offset, cf.length
FROM chunk_files cf
JOIN files f ON cf.file_id = f.id
WHERE f.path = ?
`
rows, err := r.db.conn.QueryContext(ctx, query, filePath)
@@ -77,7 +78,34 @@ func (r *ChunkFileRepository) GetByFilePath(ctx context.Context, filePath string
var chunkFiles []*ChunkFile
for rows.Next() {
var cf ChunkFile
err := rows.Scan(&cf.ChunkHash, &cf.FilePath, &cf.FileOffset, &cf.Length)
err := rows.Scan(&cf.ChunkHash, &cf.FileID, &cf.FileOffset, &cf.Length)
if err != nil {
return nil, fmt.Errorf("scanning chunk file: %w", err)
}
chunkFiles = append(chunkFiles, &cf)
}
return chunkFiles, rows.Err()
}
// GetByFileID retrieves chunk files by file ID
func (r *ChunkFileRepository) GetByFileID(ctx context.Context, fileID string) ([]*ChunkFile, error) {
query := `
SELECT chunk_hash, file_id, file_offset, length
FROM chunk_files
WHERE file_id = ?
`
rows, err := r.db.conn.QueryContext(ctx, query, fileID)
if err != nil {
return nil, fmt.Errorf("querying chunk files: %w", err)
}
defer CloseRows(rows)
var chunkFiles []*ChunkFile
for rows.Next() {
var cf ChunkFile
err := rows.Scan(&cf.ChunkHash, &cf.FileID, &cf.FileOffset, &cf.Length)
if err != nil {
return nil, fmt.Errorf("scanning chunk file: %w", err)
}

View File

@@ -3,6 +3,7 @@ package database
import (
"context"
"testing"
"time"
)
func TestChunkFileRepository(t *testing.T) {
@@ -11,16 +12,49 @@ func TestChunkFileRepository(t *testing.T) {
ctx := context.Background()
repo := NewChunkFileRepository(db)
fileRepo := NewFileRepository(db)
// Create test files first
testTime := time.Now().Truncate(time.Second)
file1 := &File{
Path: "/file1.txt",
MTime: testTime,
CTime: testTime,
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
LinkTarget: "",
}
err := fileRepo.Create(ctx, nil, file1)
if err != nil {
t.Fatalf("failed to create file1: %v", err)
}
file2 := &File{
Path: "/file2.txt",
MTime: testTime,
CTime: testTime,
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
LinkTarget: "",
}
err = fileRepo.Create(ctx, nil, file2)
if err != nil {
t.Fatalf("failed to create file2: %v", err)
}
// Test Create
cf1 := &ChunkFile{
ChunkHash: "chunk1",
FilePath: "/file1.txt",
FileID: file1.ID,
FileOffset: 0,
Length: 1024,
}
err := repo.Create(ctx, nil, cf1)
err = repo.Create(ctx, nil, cf1)
if err != nil {
t.Fatalf("failed to create chunk file: %v", err)
}
@@ -28,7 +62,7 @@ func TestChunkFileRepository(t *testing.T) {
// Add same chunk in different file (deduplication scenario)
cf2 := &ChunkFile{
ChunkHash: "chunk1",
FilePath: "/file2.txt",
FileID: file2.ID,
FileOffset: 2048,
Length: 1024,
}
@@ -50,10 +84,10 @@ func TestChunkFileRepository(t *testing.T) {
foundFile1 := false
foundFile2 := false
for _, cf := range chunkFiles {
if cf.FilePath == "/file1.txt" && cf.FileOffset == 0 {
if cf.FileID == file1.ID && cf.FileOffset == 0 {
foundFile1 = true
}
if cf.FilePath == "/file2.txt" && cf.FileOffset == 2048 {
if cf.FileID == file2.ID && cf.FileOffset == 2048 {
foundFile2 = true
}
}
@@ -61,10 +95,10 @@ func TestChunkFileRepository(t *testing.T) {
t.Error("not all expected files found")
}
// Test GetByFilePath
chunkFiles, err = repo.GetByFilePath(ctx, "/file1.txt")
// Test GetByFileID
chunkFiles, err = repo.GetByFileID(ctx, file1.ID)
if err != nil {
t.Fatalf("failed to get chunks by file path: %v", err)
t.Fatalf("failed to get chunks by file ID: %v", err)
}
if len(chunkFiles) != 1 {
t.Errorf("expected 1 chunk for file, got %d", len(chunkFiles))
@@ -86,6 +120,23 @@ func TestChunkFileRepositoryComplexDeduplication(t *testing.T) {
ctx := context.Background()
repo := NewChunkFileRepository(db)
fileRepo := NewFileRepository(db)
// Create test files
testTime := time.Now().Truncate(time.Second)
file1 := &File{Path: "/file1.txt", MTime: testTime, CTime: testTime, Size: 3072, Mode: 0644, UID: 1000, GID: 1000}
file2 := &File{Path: "/file2.txt", MTime: testTime, CTime: testTime, Size: 3072, Mode: 0644, UID: 1000, GID: 1000}
file3 := &File{Path: "/file3.txt", MTime: testTime, CTime: testTime, Size: 2048, Mode: 0644, UID: 1000, GID: 1000}
if err := fileRepo.Create(ctx, nil, file1); err != nil {
t.Fatalf("failed to create file1: %v", err)
}
if err := fileRepo.Create(ctx, nil, file2); err != nil {
t.Fatalf("failed to create file2: %v", err)
}
if err := fileRepo.Create(ctx, nil, file3); err != nil {
t.Fatalf("failed to create file3: %v", err)
}
// Simulate a scenario where multiple files share chunks
// File1: chunk1, chunk2, chunk3
@@ -94,16 +145,16 @@ func TestChunkFileRepositoryComplexDeduplication(t *testing.T) {
chunkFiles := []ChunkFile{
// File1
{ChunkHash: "chunk1", FilePath: "/file1.txt", FileOffset: 0, Length: 1024},
{ChunkHash: "chunk2", FilePath: "/file1.txt", FileOffset: 1024, Length: 1024},
{ChunkHash: "chunk3", FilePath: "/file1.txt", FileOffset: 2048, Length: 1024},
{ChunkHash: "chunk1", FileID: file1.ID, FileOffset: 0, Length: 1024},
{ChunkHash: "chunk2", FileID: file1.ID, FileOffset: 1024, Length: 1024},
{ChunkHash: "chunk3", FileID: file1.ID, FileOffset: 2048, Length: 1024},
// File2
{ChunkHash: "chunk2", FilePath: "/file2.txt", FileOffset: 0, Length: 1024},
{ChunkHash: "chunk3", FilePath: "/file2.txt", FileOffset: 1024, Length: 1024},
{ChunkHash: "chunk4", FilePath: "/file2.txt", FileOffset: 2048, Length: 1024},
{ChunkHash: "chunk2", FileID: file2.ID, FileOffset: 0, Length: 1024},
{ChunkHash: "chunk3", FileID: file2.ID, FileOffset: 1024, Length: 1024},
{ChunkHash: "chunk4", FileID: file2.ID, FileOffset: 2048, Length: 1024},
// File3
{ChunkHash: "chunk1", FilePath: "/file3.txt", FileOffset: 0, Length: 1024},
{ChunkHash: "chunk4", FilePath: "/file3.txt", FileOffset: 1024, Length: 1024},
{ChunkHash: "chunk1", FileID: file3.ID, FileOffset: 0, Length: 1024},
{ChunkHash: "chunk4", FileID: file3.ID, FileOffset: 1024, Length: 1024},
}
for _, cf := range chunkFiles {
@@ -132,7 +183,7 @@ func TestChunkFileRepositoryComplexDeduplication(t *testing.T) {
}
// Test file2 chunks
chunks, err := repo.GetByFilePath(ctx, "/file2.txt")
chunks, err := repo.GetByFileID(ctx, file2.ID)
if err != nil {
t.Fatalf("failed to get chunks for file2: %v", err)
}

View File

@@ -4,6 +4,8 @@ import (
"context"
"database/sql"
"fmt"
"git.eeqj.de/sneak/vaultik/internal/log"
)
type ChunkRepository struct {
@@ -25,7 +27,7 @@ func (r *ChunkRepository) Create(ctx context.Context, tx *sql.Tx, chunk *Chunk)
if tx != nil {
_, err = tx.ExecContext(ctx, query, chunk.ChunkHash, chunk.SHA256, chunk.Size)
} else {
_, err = r.db.ExecWithLock(ctx, query, chunk.ChunkHash, chunk.SHA256, chunk.Size)
_, err = r.db.ExecWithLog(ctx, query, chunk.ChunkHash, chunk.SHA256, chunk.Size)
}
if err != nil {
@@ -139,3 +141,26 @@ func (r *ChunkRepository) ListUnpacked(ctx context.Context, limit int) ([]*Chunk
return chunks, rows.Err()
}
// DeleteOrphaned deletes chunks that are not referenced by any file
func (r *ChunkRepository) DeleteOrphaned(ctx context.Context) error {
query := `
DELETE FROM chunks
WHERE NOT EXISTS (
SELECT 1 FROM file_chunks
WHERE file_chunks.chunk_hash = chunks.chunk_hash
)
`
result, err := r.db.ExecWithLog(ctx, query)
if err != nil {
return fmt.Errorf("deleting orphaned chunks: %w", err)
}
rowsAffected, _ := result.RowsAffected()
if rowsAffected > 0 {
log.Debug("Deleted orphaned chunks", "count", rowsAffected)
}
return nil
}

View File

@@ -1,84 +1,158 @@
// Package database provides the local SQLite index for Vaultik backup operations.
// The database tracks files, chunks, and their associations with blobs.
//
// Blobs in Vaultik are the final storage units uploaded to S3. Each blob is a
// large (up to 10GB) file containing many compressed and encrypted chunks from
// multiple source files. Blobs are content-addressed, meaning their filename
// is derived from their SHA256 hash after compression and encryption.
//
// The database does not support migrations. If the schema changes, delete
// the local database and perform a full backup to recreate it.
package database
import (
"context"
"database/sql"
_ "embed"
"fmt"
"os"
"strings"
"sync"
"git.eeqj.de/sneak/vaultik/internal/log"
_ "modernc.org/sqlite"
)
//go:embed schema.sql
var schemaSQL string
// DB represents the Vaultik local index database connection.
// It uses SQLite to track file metadata, content-defined chunks, and blob associations.
// The database enables incremental backups by detecting changed files and
// supports deduplication by tracking which chunks are already stored in blobs.
// Write operations are synchronized through a mutex to ensure thread safety.
type DB struct {
conn *sql.DB
writeLock sync.Mutex
conn *sql.DB
path string
}
// New creates a new database connection at the specified path.
// It automatically handles database recovery, creates the schema if needed,
// and configures SQLite with appropriate settings for performance and reliability.
// The database uses WAL mode for better concurrency and sets a busy timeout
// to handle concurrent access gracefully.
//
// If the database appears locked, it will attempt recovery by removing stale
// lock files and switching temporarily to TRUNCATE journal mode.
//
// New creates a new database connection at the specified path.
// It automatically handles recovery from stale locks, creates the schema if needed,
// and configures SQLite with WAL mode for better concurrency.
// The path parameter can be a file path for persistent storage or ":memory:"
// for an in-memory database (useful for testing).
func New(ctx context.Context, path string) (*DB, error) {
log.Debug("Opening database connection", "path", path)
// First, try to recover from any stale locks
if err := recoverDatabase(ctx, path); err != nil {
log.Warn("Failed to recover database", "error", err)
}
// First attempt with standard WAL mode
conn, err := sql.Open("sqlite", path+"?_journal_mode=WAL&_synchronous=NORMAL&_busy_timeout=10000&_locking_mode=NORMAL")
log.Debug("Attempting to open database with WAL mode", "path", path)
conn, err := sql.Open(
"sqlite",
path+"?_journal_mode=WAL&_synchronous=NORMAL&_busy_timeout=10000&_locking_mode=NORMAL&_foreign_keys=ON",
)
if err == nil {
// Set connection pool settings to ensure proper cleanup
conn.SetMaxOpenConns(1) // SQLite only supports one writer
// Set connection pool settings
// SQLite can handle multiple readers but only one writer at a time.
// Setting MaxOpenConns to 1 ensures all writes are serialized through
// a single connection, preventing SQLITE_BUSY errors.
conn.SetMaxOpenConns(1)
conn.SetMaxIdleConns(1)
if err := conn.PingContext(ctx); err == nil {
// Success on first try
db := &DB{conn: conn}
log.Debug("Database opened successfully with WAL mode", "path", path)
// Enable foreign keys explicitly
if _, err := conn.ExecContext(ctx, "PRAGMA foreign_keys = ON"); err != nil {
log.Warn("Failed to enable foreign keys", "error", err)
}
db := &DB{conn: conn, path: path}
if err := db.createSchema(ctx); err != nil {
_ = conn.Close()
return nil, fmt.Errorf("creating schema: %w", err)
}
return db, nil
}
log.Debug("Failed to ping database, closing connection", "path", path, "error", err)
_ = conn.Close()
}
// If first attempt failed, try with TRUNCATE mode to clear any locks
log.Info("Database appears locked, attempting recovery with TRUNCATE mode")
conn, err = sql.Open("sqlite", path+"?_journal_mode=TRUNCATE&_synchronous=NORMAL&_busy_timeout=10000")
log.Info(
"Database appears locked, attempting recovery with TRUNCATE mode",
"path", path,
)
conn, err = sql.Open(
"sqlite",
path+"?_journal_mode=TRUNCATE&_synchronous=NORMAL&_busy_timeout=10000&_foreign_keys=ON",
)
if err != nil {
return nil, fmt.Errorf("opening database in recovery mode: %w", err)
}
// Set connection pool settings
// SQLite can handle multiple readers but only one writer at a time.
// Setting MaxOpenConns to 1 ensures all writes are serialized through
// a single connection, preventing SQLITE_BUSY errors.
conn.SetMaxOpenConns(1)
conn.SetMaxIdleConns(1)
if err := conn.PingContext(ctx); err != nil {
log.Debug("Failed to ping database in recovery mode, closing", "path", path, "error", err)
_ = conn.Close()
return nil, fmt.Errorf("database still locked after recovery attempt: %w", err)
return nil, fmt.Errorf(
"database still locked after recovery attempt: %w",
err,
)
}
log.Debug("Database opened in TRUNCATE mode", "path", path)
// Switch back to WAL mode
log.Debug("Switching database back to WAL mode", "path", path)
if _, err := conn.ExecContext(ctx, "PRAGMA journal_mode=WAL"); err != nil {
log.Warn("Failed to switch back to WAL mode", "error", err)
log.Warn("Failed to switch back to WAL mode", "path", path, "error", err)
}
db := &DB{conn: conn}
// Ensure foreign keys are enabled
if _, err := conn.ExecContext(ctx, "PRAGMA foreign_keys=ON"); err != nil {
log.Warn("Failed to enable foreign keys", "path", path, "error", err)
}
db := &DB{conn: conn, path: path}
if err := db.createSchema(ctx); err != nil {
_ = conn.Close()
return nil, fmt.Errorf("creating schema: %w", err)
}
log.Debug("Database connection established successfully", "path", path)
return db, nil
}
// Close closes the database connection.
// It ensures all pending operations are completed before closing.
// Returns an error if the database connection cannot be closed properly.
func (db *DB) Close() error {
log.Debug("Closing database connection")
log.Debug("Closing database connection", "path", db.path)
if err := db.conn.Close(); err != nil {
log.Error("Failed to close database", "error", err)
log.Error("Failed to close database", "path", db.path, "error", err)
return fmt.Errorf("failed to close database: %w", err)
}
log.Debug("Database connection closed successfully")
log.Debug("Database connection closed successfully", "path", db.path)
return nil
}
@@ -138,148 +212,79 @@ func recoverDatabase(ctx context.Context, path string) error {
return nil
}
// Conn returns the underlying *sql.DB connection.
// This should be used sparingly and primarily for read operations.
// For write operations, prefer using the ExecWithLog method.
func (db *DB) Conn() *sql.DB {
return db.conn
}
func (db *DB) BeginTx(ctx context.Context, opts *sql.TxOptions) (*sql.Tx, error) {
// BeginTx starts a new database transaction with the given options.
// The caller is responsible for committing or rolling back the transaction.
// For write transactions, consider using the Repositories.WithTx method instead,
// which handles locking and rollback automatically.
func (db *DB) BeginTx(
ctx context.Context,
opts *sql.TxOptions,
) (*sql.Tx, error) {
return db.conn.BeginTx(ctx, opts)
}
// LockForWrite acquires the write lock
func (db *DB) LockForWrite() {
log.Debug("Attempting to acquire write lock")
db.writeLock.Lock()
log.Debug("Write lock acquired")
}
// UnlockWrite releases the write lock
func (db *DB) UnlockWrite() {
log.Debug("Releasing write lock")
db.writeLock.Unlock()
log.Debug("Write lock released")
}
// ExecWithLock executes a write query with the write lock held
func (db *DB) ExecWithLock(ctx context.Context, query string, args ...interface{}) (sql.Result, error) {
db.writeLock.Lock()
defer db.writeLock.Unlock()
// Note: LockForWrite and UnlockWrite methods have been removed.
// SQLite handles its own locking internally, so explicit locking is not needed.
// ExecWithLog executes a write query with SQL logging.
// SQLite handles its own locking internally, so we just pass through to ExecContext.
// The query and args parameters follow the same format as sql.DB.ExecContext.
func (db *DB) ExecWithLog(
ctx context.Context,
query string,
args ...interface{},
) (sql.Result, error) {
LogSQL("Execute", query, args...)
return db.conn.ExecContext(ctx, query, args...)
}
// QueryRowWithLock executes a write query that returns a row with the write lock held
func (db *DB) QueryRowWithLock(ctx context.Context, query string, args ...interface{}) *sql.Row {
db.writeLock.Lock()
defer db.writeLock.Unlock()
// QueryRowWithLog executes a query that returns at most one row with SQL logging.
// This is useful for queries that modify data and return values (e.g., INSERT ... RETURNING).
// SQLite handles its own locking internally.
// The query and args parameters follow the same format as sql.DB.QueryRowContext.
func (db *DB) QueryRowWithLog(
ctx context.Context,
query string,
args ...interface{},
) *sql.Row {
LogSQL("QueryRow", query, args...)
return db.conn.QueryRowContext(ctx, query, args...)
}
func (db *DB) createSchema(ctx context.Context) error {
schema := `
CREATE TABLE IF NOT EXISTS files (
path TEXT PRIMARY KEY,
mtime INTEGER NOT NULL,
ctime INTEGER NOT NULL,
size INTEGER NOT NULL,
mode INTEGER NOT NULL,
uid INTEGER NOT NULL,
gid INTEGER NOT NULL,
link_target TEXT
);
CREATE TABLE IF NOT EXISTS file_chunks (
path TEXT NOT NULL,
idx INTEGER NOT NULL,
chunk_hash TEXT NOT NULL,
PRIMARY KEY (path, idx)
);
CREATE TABLE IF NOT EXISTS chunks (
chunk_hash TEXT PRIMARY KEY,
sha256 TEXT NOT NULL,
size INTEGER NOT NULL
);
CREATE TABLE IF NOT EXISTS blobs (
id TEXT PRIMARY KEY,
blob_hash TEXT UNIQUE,
created_ts INTEGER NOT NULL,
finished_ts INTEGER,
uncompressed_size INTEGER NOT NULL DEFAULT 0,
compressed_size INTEGER NOT NULL DEFAULT 0,
uploaded_ts INTEGER
);
CREATE TABLE IF NOT EXISTS blob_chunks (
blob_id TEXT NOT NULL,
chunk_hash TEXT NOT NULL,
offset INTEGER NOT NULL,
length INTEGER NOT NULL,
PRIMARY KEY (blob_id, chunk_hash),
FOREIGN KEY (blob_id) REFERENCES blobs(id)
);
CREATE TABLE IF NOT EXISTS chunk_files (
chunk_hash TEXT NOT NULL,
file_path TEXT NOT NULL,
file_offset INTEGER NOT NULL,
length INTEGER NOT NULL,
PRIMARY KEY (chunk_hash, file_path)
);
CREATE TABLE IF NOT EXISTS snapshots (
id TEXT PRIMARY KEY,
hostname TEXT NOT NULL,
vaultik_version TEXT NOT NULL,
started_at INTEGER NOT NULL,
completed_at INTEGER,
file_count INTEGER NOT NULL DEFAULT 0,
chunk_count INTEGER NOT NULL DEFAULT 0,
blob_count INTEGER NOT NULL DEFAULT 0,
total_size INTEGER NOT NULL DEFAULT 0,
blob_size INTEGER NOT NULL DEFAULT 0,
compression_ratio REAL NOT NULL DEFAULT 1.0
);
CREATE TABLE IF NOT EXISTS snapshot_files (
snapshot_id TEXT NOT NULL,
file_path TEXT NOT NULL,
PRIMARY KEY (snapshot_id, file_path),
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
FOREIGN KEY (file_path) REFERENCES files(path) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS snapshot_blobs (
snapshot_id TEXT NOT NULL,
blob_id TEXT NOT NULL,
blob_hash TEXT NOT NULL,
PRIMARY KEY (snapshot_id, blob_id),
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
FOREIGN KEY (blob_id) REFERENCES blobs(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS uploads (
blob_hash TEXT PRIMARY KEY,
uploaded_at INTEGER NOT NULL,
size INTEGER NOT NULL,
duration_ms INTEGER NOT NULL
);
`
_, err := db.conn.ExecContext(ctx, schema)
_, err := db.conn.ExecContext(ctx, schemaSQL)
return err
}
// NewTestDB creates an in-memory SQLite database for testing
// NewTestDB creates an in-memory SQLite database for testing purposes.
// The database is automatically initialized with the schema and is ready for use.
// Each call creates a new independent database instance.
func NewTestDB() (*DB, error) {
return New(context.Background(), ":memory:")
}
// LogSQL logs SQL queries if debug mode is enabled
// LogSQL logs SQL queries and their arguments when debug mode is enabled.
// Debug mode is activated by setting the GODEBUG environment variable to include "vaultik".
// This is useful for troubleshooting database operations and understanding query patterns.
//
// The operation parameter describes the type of SQL operation (e.g., "Execute", "Query").
// The query parameter is the SQL statement being executed.
// The args parameter contains the query arguments that will be interpolated.
func LogSQL(operation, query string, args ...interface{}) {
if strings.Contains(os.Getenv("GODEBUG"), "vaultik") {
log.Debug("SQL "+operation, "query", strings.TrimSpace(query), "args", fmt.Sprintf("%v", args))
log.Debug(
"SQL "+operation,
"query",
strings.TrimSpace(query),
"args",
fmt.Sprintf("%v", args),
)
}
}

View File

@@ -67,21 +67,26 @@ func TestDatabaseConcurrentAccess(t *testing.T) {
}()
// Test concurrent writes
done := make(chan bool, 10)
type result struct {
index int
err error
}
results := make(chan result, 10)
for i := 0; i < 10; i++ {
go func(i int) {
_, err := db.ExecWithLock(ctx, "INSERT INTO chunks (chunk_hash, sha256, size) VALUES (?, ?, ?)",
_, err := db.ExecWithLog(ctx, "INSERT INTO chunks (chunk_hash, sha256, size) VALUES (?, ?, ?)",
fmt.Sprintf("hash%d", i), fmt.Sprintf("sha%d", i), i*1024)
if err != nil {
t.Errorf("concurrent insert failed: %v", err)
}
done <- true
results <- result{index: i, err: err}
}(i)
}
// Wait for all goroutines
// Wait for all goroutines and check results
for i := 0; i < 10; i++ {
<-done
r := <-results
if r.err != nil {
t.Fatalf("concurrent insert %d failed: %v", r.index, r.err)
}
}
// Verify all inserts succeeded

View File

@@ -16,16 +16,16 @@ func NewFileChunkRepository(db *DB) *FileChunkRepository {
func (r *FileChunkRepository) Create(ctx context.Context, tx *sql.Tx, fc *FileChunk) error {
query := `
INSERT INTO file_chunks (path, idx, chunk_hash)
INSERT INTO file_chunks (file_id, idx, chunk_hash)
VALUES (?, ?, ?)
ON CONFLICT(path, idx) DO NOTHING
ON CONFLICT(file_id, idx) DO NOTHING
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, fc.Path, fc.Idx, fc.ChunkHash)
_, err = tx.ExecContext(ctx, query, fc.FileID, fc.Idx, fc.ChunkHash)
} else {
_, err = r.db.ExecWithLock(ctx, query, fc.Path, fc.Idx, fc.ChunkHash)
_, err = r.db.ExecWithLog(ctx, query, fc.FileID, fc.Idx, fc.ChunkHash)
}
if err != nil {
@@ -37,10 +37,11 @@ func (r *FileChunkRepository) Create(ctx context.Context, tx *sql.Tx, fc *FileCh
func (r *FileChunkRepository) GetByPath(ctx context.Context, path string) ([]*FileChunk, error) {
query := `
SELECT path, idx, chunk_hash
FROM file_chunks
WHERE path = ?
ORDER BY idx
SELECT fc.file_id, fc.idx, fc.chunk_hash
FROM file_chunks fc
JOIN files f ON fc.file_id = f.id
WHERE f.path = ?
ORDER BY fc.idx
`
rows, err := r.db.conn.QueryContext(ctx, query, path)
@@ -52,7 +53,35 @@ func (r *FileChunkRepository) GetByPath(ctx context.Context, path string) ([]*Fi
var fileChunks []*FileChunk
for rows.Next() {
var fc FileChunk
err := rows.Scan(&fc.Path, &fc.Idx, &fc.ChunkHash)
err := rows.Scan(&fc.FileID, &fc.Idx, &fc.ChunkHash)
if err != nil {
return nil, fmt.Errorf("scanning file chunk: %w", err)
}
fileChunks = append(fileChunks, &fc)
}
return fileChunks, rows.Err()
}
// GetByFileID retrieves file chunks by file ID
func (r *FileChunkRepository) GetByFileID(ctx context.Context, fileID string) ([]*FileChunk, error) {
query := `
SELECT file_id, idx, chunk_hash
FROM file_chunks
WHERE file_id = ?
ORDER BY idx
`
rows, err := r.db.conn.QueryContext(ctx, query, fileID)
if err != nil {
return nil, fmt.Errorf("querying file chunks: %w", err)
}
defer CloseRows(rows)
var fileChunks []*FileChunk
for rows.Next() {
var fc FileChunk
err := rows.Scan(&fc.FileID, &fc.Idx, &fc.ChunkHash)
if err != nil {
return nil, fmt.Errorf("scanning file chunk: %w", err)
}
@@ -65,10 +94,11 @@ func (r *FileChunkRepository) GetByPath(ctx context.Context, path string) ([]*Fi
// GetByPathTx retrieves file chunks within a transaction
func (r *FileChunkRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path string) ([]*FileChunk, error) {
query := `
SELECT path, idx, chunk_hash
FROM file_chunks
WHERE path = ?
ORDER BY idx
SELECT fc.file_id, fc.idx, fc.chunk_hash
FROM file_chunks fc
JOIN files f ON fc.file_id = f.id
WHERE f.path = ?
ORDER BY fc.idx
`
LogSQL("GetByPathTx", query, path)
@@ -81,7 +111,7 @@ func (r *FileChunkRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path
var fileChunks []*FileChunk
for rows.Next() {
var fc FileChunk
err := rows.Scan(&fc.Path, &fc.Idx, &fc.ChunkHash)
err := rows.Scan(&fc.FileID, &fc.Idx, &fc.ChunkHash)
if err != nil {
return nil, fmt.Errorf("scanning file chunk: %w", err)
}
@@ -93,13 +123,31 @@ func (r *FileChunkRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path
}
func (r *FileChunkRepository) DeleteByPath(ctx context.Context, tx *sql.Tx, path string) error {
query := `DELETE FROM file_chunks WHERE path = ?`
query := `DELETE FROM file_chunks WHERE file_id = (SELECT id FROM files WHERE path = ?)`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, path)
} else {
_, err = r.db.ExecWithLock(ctx, query, path)
_, err = r.db.ExecWithLog(ctx, query, path)
}
if err != nil {
return fmt.Errorf("deleting file chunks: %w", err)
}
return nil
}
// DeleteByFileID deletes all chunks for a file by its UUID
func (r *FileChunkRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fileID string) error {
query := `DELETE FROM file_chunks WHERE file_id = ?`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, fileID)
} else {
_, err = r.db.ExecWithLog(ctx, query, fileID)
}
if err != nil {

View File

@@ -4,6 +4,7 @@ import (
"context"
"fmt"
"testing"
"time"
)
func TestFileChunkRepository(t *testing.T) {
@@ -12,22 +13,40 @@ func TestFileChunkRepository(t *testing.T) {
ctx := context.Background()
repo := NewFileChunkRepository(db)
fileRepo := NewFileRepository(db)
// Create test file first
testTime := time.Now().Truncate(time.Second)
file := &File{
Path: "/test/file.txt",
MTime: testTime,
CTime: testTime,
Size: 3072,
Mode: 0644,
UID: 1000,
GID: 1000,
LinkTarget: "",
}
err := fileRepo.Create(ctx, nil, file)
if err != nil {
t.Fatalf("failed to create file: %v", err)
}
// Test Create
fc1 := &FileChunk{
Path: "/test/file.txt",
FileID: file.ID,
Idx: 0,
ChunkHash: "chunk1",
}
err := repo.Create(ctx, nil, fc1)
err = repo.Create(ctx, nil, fc1)
if err != nil {
t.Fatalf("failed to create file chunk: %v", err)
}
// Add more chunks for the same file
fc2 := &FileChunk{
Path: "/test/file.txt",
FileID: file.ID,
Idx: 1,
ChunkHash: "chunk2",
}
@@ -37,7 +56,7 @@ func TestFileChunkRepository(t *testing.T) {
}
fc3 := &FileChunk{
Path: "/test/file.txt",
FileID: file.ID,
Idx: 2,
ChunkHash: "chunk3",
}
@@ -46,8 +65,8 @@ func TestFileChunkRepository(t *testing.T) {
t.Fatalf("failed to create third file chunk: %v", err)
}
// Test GetByPath
chunks, err := repo.GetByPath(ctx, "/test/file.txt")
// Test GetByFile
chunks, err := repo.GetByFile(ctx, "/test/file.txt")
if err != nil {
t.Fatalf("failed to get file chunks: %v", err)
}
@@ -68,13 +87,13 @@ func TestFileChunkRepository(t *testing.T) {
t.Fatalf("failed to create duplicate file chunk: %v", err)
}
// Test DeleteByPath
err = repo.DeleteByPath(ctx, nil, "/test/file.txt")
// Test DeleteByFileID
err = repo.DeleteByFileID(ctx, nil, file.ID)
if err != nil {
t.Fatalf("failed to delete file chunks: %v", err)
}
chunks, err = repo.GetByPath(ctx, "/test/file.txt")
chunks, err = repo.GetByFileID(ctx, file.ID)
if err != nil {
t.Fatalf("failed to get deleted file chunks: %v", err)
}
@@ -89,15 +108,38 @@ func TestFileChunkRepositoryMultipleFiles(t *testing.T) {
ctx := context.Background()
repo := NewFileChunkRepository(db)
fileRepo := NewFileRepository(db)
// Create test files
testTime := time.Now().Truncate(time.Second)
filePaths := []string{"/file1.txt", "/file2.txt", "/file3.txt"}
files := make([]*File, len(filePaths))
for i, path := range filePaths {
file := &File{
Path: path,
MTime: testTime,
CTime: testTime,
Size: 2048,
Mode: 0644,
UID: 1000,
GID: 1000,
LinkTarget: "",
}
err := fileRepo.Create(ctx, nil, file)
if err != nil {
t.Fatalf("failed to create file %s: %v", path, err)
}
files[i] = file
}
// Create chunks for multiple files
files := []string{"/file1.txt", "/file2.txt", "/file3.txt"}
for _, path := range files {
for i := 0; i < 2; i++ {
for i, file := range files {
for j := 0; j < 2; j++ {
fc := &FileChunk{
Path: path,
Idx: i,
ChunkHash: fmt.Sprintf("%s_chunk%d", path, i),
FileID: file.ID,
Idx: j,
ChunkHash: fmt.Sprintf("file%d_chunk%d", i, j),
}
err := repo.Create(ctx, nil, fc)
if err != nil {
@@ -107,13 +149,13 @@ func TestFileChunkRepositoryMultipleFiles(t *testing.T) {
}
// Verify each file has correct chunks
for _, path := range files {
chunks, err := repo.GetByPath(ctx, path)
for i, file := range files {
chunks, err := repo.GetByFileID(ctx, file.ID)
if err != nil {
t.Fatalf("failed to get chunks for %s: %v", path, err)
t.Fatalf("failed to get chunks for file %d: %v", i, err)
}
if len(chunks) != 2 {
t.Errorf("expected 2 chunks for %s, got %d", path, len(chunks))
t.Errorf("expected 2 chunks for file %d, got %d", i, len(chunks))
}
}
}

View File

@@ -5,6 +5,9 @@ import (
"database/sql"
"fmt"
"time"
"git.eeqj.de/sneak/vaultik/internal/log"
"github.com/google/uuid"
)
type FileRepository struct {
@@ -16,10 +19,16 @@ func NewFileRepository(db *DB) *FileRepository {
}
func (r *FileRepository) Create(ctx context.Context, tx *sql.Tx, file *File) error {
// Generate UUID if not provided
if file.ID == "" {
file.ID = uuid.New().String()
}
query := `
INSERT INTO files (path, mtime, ctime, size, mode, uid, gid, link_target)
VALUES (?, ?, ?, ?, ?, ?, ?, ?)
INSERT INTO files (id, path, mtime, ctime, size, mode, uid, gid, link_target)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(path) DO UPDATE SET
id = excluded.id,
mtime = excluded.mtime,
ctime = excluded.ctime,
size = excluded.size,
@@ -27,14 +36,15 @@ func (r *FileRepository) Create(ctx context.Context, tx *sql.Tx, file *File) err
uid = excluded.uid,
gid = excluded.gid,
link_target = excluded.link_target
RETURNING id
`
var err error
if tx != nil {
LogSQL("Execute", query, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget)
_, err = tx.ExecContext(ctx, query, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget)
LogSQL("Execute", query, file.ID, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget)
err = tx.QueryRowContext(ctx, query, file.ID, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget).Scan(&file.ID)
} else {
_, err = r.db.ExecWithLock(ctx, query, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget)
err = r.db.QueryRowWithLog(ctx, query, file.ID, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget).Scan(&file.ID)
}
if err != nil {
@@ -46,7 +56,7 @@ func (r *FileRepository) Create(ctx context.Context, tx *sql.Tx, file *File) err
func (r *FileRepository) GetByPath(ctx context.Context, path string) (*File, error) {
query := `
SELECT path, mtime, ctime, size, mode, uid, gid, link_target
SELECT id, path, mtime, ctime, size, mode, uid, gid, link_target
FROM files
WHERE path = ?
`
@@ -56,6 +66,7 @@ func (r *FileRepository) GetByPath(ctx context.Context, path string) (*File, err
var linkTarget sql.NullString
err := r.db.conn.QueryRowContext(ctx, query, path).Scan(
&file.ID,
&file.Path,
&mtimeUnix,
&ctimeUnix,
@@ -73,8 +84,48 @@ func (r *FileRepository) GetByPath(ctx context.Context, path string) (*File, err
return nil, fmt.Errorf("querying file: %w", err)
}
file.MTime = time.Unix(mtimeUnix, 0)
file.CTime = time.Unix(ctimeUnix, 0)
file.MTime = time.Unix(mtimeUnix, 0).UTC()
file.CTime = time.Unix(ctimeUnix, 0).UTC()
if linkTarget.Valid {
file.LinkTarget = linkTarget.String
}
return &file, nil
}
// GetByID retrieves a file by its UUID
func (r *FileRepository) GetByID(ctx context.Context, id string) (*File, error) {
query := `
SELECT id, path, mtime, ctime, size, mode, uid, gid, link_target
FROM files
WHERE id = ?
`
var file File
var mtimeUnix, ctimeUnix int64
var linkTarget sql.NullString
err := r.db.conn.QueryRowContext(ctx, query, id).Scan(
&file.ID,
&file.Path,
&mtimeUnix,
&ctimeUnix,
&file.Size,
&file.Mode,
&file.UID,
&file.GID,
&linkTarget,
)
if err == sql.ErrNoRows {
return nil, nil
}
if err != nil {
return nil, fmt.Errorf("querying file: %w", err)
}
file.MTime = time.Unix(mtimeUnix, 0).UTC()
file.CTime = time.Unix(ctimeUnix, 0).UTC()
if linkTarget.Valid {
file.LinkTarget = linkTarget.String
}
@@ -84,7 +135,7 @@ func (r *FileRepository) GetByPath(ctx context.Context, path string) (*File, err
func (r *FileRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path string) (*File, error) {
query := `
SELECT path, mtime, ctime, size, mode, uid, gid, link_target
SELECT id, path, mtime, ctime, size, mode, uid, gid, link_target
FROM files
WHERE path = ?
`
@@ -95,6 +146,7 @@ func (r *FileRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path strin
LogSQL("GetByPathTx QueryRowContext", query, path)
err := tx.QueryRowContext(ctx, query, path).Scan(
&file.ID,
&file.Path,
&mtimeUnix,
&ctimeUnix,
@@ -113,8 +165,8 @@ func (r *FileRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path strin
return nil, fmt.Errorf("querying file: %w", err)
}
file.MTime = time.Unix(mtimeUnix, 0)
file.CTime = time.Unix(ctimeUnix, 0)
file.MTime = time.Unix(mtimeUnix, 0).UTC()
file.CTime = time.Unix(ctimeUnix, 0).UTC()
if linkTarget.Valid {
file.LinkTarget = linkTarget.String
}
@@ -124,7 +176,7 @@ func (r *FileRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path strin
func (r *FileRepository) ListModifiedSince(ctx context.Context, since time.Time) ([]*File, error) {
query := `
SELECT path, mtime, ctime, size, mode, uid, gid, link_target
SELECT id, path, mtime, ctime, size, mode, uid, gid, link_target
FROM files
WHERE mtime >= ?
ORDER BY path
@@ -143,6 +195,7 @@ func (r *FileRepository) ListModifiedSince(ctx context.Context, since time.Time)
var linkTarget sql.NullString
err := rows.Scan(
&file.ID,
&file.Path,
&mtimeUnix,
&ctimeUnix,
@@ -175,7 +228,25 @@ func (r *FileRepository) Delete(ctx context.Context, tx *sql.Tx, path string) er
if tx != nil {
_, err = tx.ExecContext(ctx, query, path)
} else {
_, err = r.db.ExecWithLock(ctx, query, path)
_, err = r.db.ExecWithLog(ctx, query, path)
}
if err != nil {
return fmt.Errorf("deleting file: %w", err)
}
return nil
}
// DeleteByID deletes a file by its UUID
func (r *FileRepository) DeleteByID(ctx context.Context, tx *sql.Tx, id string) error {
query := `DELETE FROM files WHERE id = ?`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, id)
} else {
_, err = r.db.ExecWithLog(ctx, query, id)
}
if err != nil {
@@ -187,7 +258,7 @@ func (r *FileRepository) Delete(ctx context.Context, tx *sql.Tx, path string) er
func (r *FileRepository) ListByPrefix(ctx context.Context, prefix string) ([]*File, error) {
query := `
SELECT path, mtime, ctime, size, mode, uid, gid, link_target
SELECT id, path, mtime, ctime, size, mode, uid, gid, link_target
FROM files
WHERE path LIKE ? || '%'
ORDER BY path
@@ -206,6 +277,7 @@ func (r *FileRepository) ListByPrefix(ctx context.Context, prefix string) ([]*Fi
var linkTarget sql.NullString
err := rows.Scan(
&file.ID,
&file.Path,
&mtimeUnix,
&ctimeUnix,
@@ -230,3 +302,26 @@ func (r *FileRepository) ListByPrefix(ctx context.Context, prefix string) ([]*Fi
return files, rows.Err()
}
// DeleteOrphaned deletes files that are not referenced by any snapshot
func (r *FileRepository) DeleteOrphaned(ctx context.Context) error {
query := `
DELETE FROM files
WHERE NOT EXISTS (
SELECT 1 FROM snapshot_files
WHERE snapshot_files.file_id = files.id
)
`
result, err := r.db.ExecWithLog(ctx, query)
if err != nil {
return fmt.Errorf("deleting orphaned files: %w", err)
}
rowsAffected, _ := result.RowsAffected()
if rowsAffected > 0 {
log.Debug("Deleted orphaned files", "count", rowsAffected)
}
return nil
}

View File

@@ -1,9 +1,15 @@
// Package database provides data models and repository interfaces for the Vaultik backup system.
// It includes types for files, chunks, blobs, snapshots, and their relationships.
package database
import "time"
// File represents a file record in the database
// File represents a file or directory in the backup system.
// It stores metadata about files including timestamps, permissions, ownership,
// and symlink targets. This information is used to restore files with their
// original attributes.
type File struct {
ID string // UUID primary key
Path string
MTime time.Time
CTime time.Time
@@ -14,37 +20,52 @@ type File struct {
LinkTarget string // empty for regular files, target path for symlinks
}
// IsSymlink returns true if this file is a symbolic link
// IsSymlink returns true if this file is a symbolic link.
// A file is considered a symlink if it has a non-empty LinkTarget.
func (f *File) IsSymlink() bool {
return f.LinkTarget != ""
}
// FileChunk represents the mapping between files and chunks
// FileChunk represents the mapping between files and their constituent chunks.
// Large files are split into multiple chunks for efficient deduplication and storage.
// The Idx field maintains the order of chunks within a file.
type FileChunk struct {
Path string
FileID string
Idx int
ChunkHash string
}
// Chunk represents a chunk record in the database
// Chunk represents a data chunk in the deduplication system.
// Files are split into chunks which are content-addressed by their hash.
// The ChunkHash is used for deduplication, while SHA256 provides
// an additional verification hash.
type Chunk struct {
ChunkHash string
SHA256 string
Size int64
}
// Blob represents a blob record in the database
// Blob represents a blob record in the database.
// A blob is Vaultik's final storage unit - a large file (up to 10GB) containing
// many compressed and encrypted chunks from multiple source files.
// Blobs are content-addressed, meaning their filename in S3 is derived from
// the SHA256 hash of their compressed and encrypted content.
// The blob creation process is: chunks are accumulated -> compressed with zstd
// -> encrypted with age -> hashed -> uploaded to S3 with the hash as filename.
type Blob struct {
ID string
Hash string // Can be empty until blob is finalized
CreatedTS time.Time
FinishedTS *time.Time // nil if not yet finalized
UncompressedSize int64
CompressedSize int64
UploadedTS *time.Time // nil if not yet uploaded
ID string // UUID assigned when blob creation starts
Hash string // SHA256 of final compressed+encrypted content (empty until finalized)
CreatedTS time.Time // When blob creation started
FinishedTS *time.Time // When blob was finalized (nil if still packing)
UncompressedSize int64 // Total size of raw chunks before compression
CompressedSize int64 // Size after compression and encryption
UploadedTS *time.Time // When blob was uploaded to S3 (nil if not uploaded)
}
// BlobChunk represents the mapping between blobs and chunks
// BlobChunk represents the mapping between blobs and the chunks they contain.
// This allows tracking which chunks are stored in which blobs, along with
// their position and size within the blob. The offset and length fields
// enable extracting specific chunks from a blob without processing the entire blob.
type BlobChunk struct {
BlobID string
ChunkHash string
@@ -52,27 +73,34 @@ type BlobChunk struct {
Length int64
}
// ChunkFile represents the reverse mapping of chunks to files
// ChunkFile represents the reverse mapping showing which files contain a specific chunk.
// This is used during deduplication to identify all files that share a chunk,
// which is important for garbage collection and integrity verification.
type ChunkFile struct {
ChunkHash string
FilePath string
FileID string
FileOffset int64
Length int64
}
// Snapshot represents a snapshot record in the database
type Snapshot struct {
ID string
Hostname string
VaultikVersion string
StartedAt time.Time
CompletedAt *time.Time // nil if still in progress
FileCount int64
ChunkCount int64
BlobCount int64
TotalSize int64 // Total size of all referenced files
BlobSize int64 // Total size of all referenced blobs (compressed and encrypted)
CompressionRatio float64 // Compression ratio (BlobSize / TotalSize)
ID string
Hostname string
VaultikVersion string
VaultikGitRevision string
StartedAt time.Time
CompletedAt *time.Time // nil if still in progress
FileCount int64
ChunkCount int64
BlobCount int64
TotalSize int64 // Total size of all referenced files
BlobSize int64 // Total size of all referenced blobs (compressed and encrypted)
BlobUncompressedSize int64 // Total uncompressed size of all referenced blobs
CompressionRatio float64 // Compression ratio (BlobSize / BlobUncompressedSize)
CompressionLevel int // Compression level used for this snapshot
UploadBytes int64 // Total bytes uploaded during this snapshot
UploadDurationMs int64 // Total milliseconds spent uploading to S3
}
// IsComplete returns true if the snapshot has completed
@@ -83,7 +111,7 @@ func (s *Snapshot) IsComplete() bool {
// SnapshotFile represents the mapping between snapshots and files
type SnapshotFile struct {
SnapshotID string
FilePath string
FileID string
}
// SnapshotBlob represents the mapping between snapshots and blobs

View File

@@ -6,6 +6,9 @@ import (
"fmt"
)
// Repositories provides access to all database repositories.
// It serves as a centralized access point for all database operations
// and manages transaction coordination across repositories.
type Repositories struct {
db *DB
Files *FileRepository
@@ -18,6 +21,8 @@ type Repositories struct {
Uploads *UploadRepository
}
// NewRepositories creates a new Repositories instance with all repository types.
// Each repository shares the same database connection for coordinated transactions.
func NewRepositories(db *DB) *Repositories {
return &Repositories{
db: db,
@@ -32,17 +37,16 @@ func NewRepositories(db *DB) *Repositories {
}
}
// TxFunc is a function that executes within a database transaction.
// The transaction is automatically committed if the function returns nil,
// or rolled back if it returns an error.
type TxFunc func(ctx context.Context, tx *sql.Tx) error
// WithTx executes a function within a write transaction.
// SQLite handles its own locking internally, so no explicit locking is needed.
// The transaction is automatically committed on success or rolled back on error.
// This method should be used for all write operations to ensure atomicity.
func (r *Repositories) WithTx(ctx context.Context, fn TxFunc) error {
// Acquire write lock for the entire transaction
LogSQL("WithTx", "Acquiring write lock", "")
r.db.LockForWrite()
defer func() {
LogSQL("WithTx", "Releasing write lock", "")
r.db.UnlockWrite()
}()
LogSQL("WithTx", "Beginning transaction", "")
tx, err := r.db.BeginTx(ctx, nil)
if err != nil {
@@ -71,6 +75,10 @@ func (r *Repositories) WithTx(ctx context.Context, fn TxFunc) error {
return tx.Commit()
}
// WithReadTx executes a function within a read-only transaction.
// Read transactions can run concurrently with other read transactions
// but will be blocked by write transactions. The transaction is
// automatically committed on success or rolled back on error.
func (r *Repositories) WithReadTx(ctx context.Context, fn TxFunc) error {
opts := &sql.TxOptions{
ReadOnly: true,

View File

@@ -52,7 +52,7 @@ func TestRepositoriesTransaction(t *testing.T) {
// Map chunks to file
fc1 := &FileChunk{
Path: file.Path,
FileID: file.ID,
Idx: 0,
ChunkHash: chunk1.ChunkHash,
}
@@ -61,7 +61,7 @@ func TestRepositoriesTransaction(t *testing.T) {
}
fc2 := &FileChunk{
Path: file.Path,
FileID: file.ID,
Idx: 1,
ChunkHash: chunk2.ChunkHash,
}
@@ -116,7 +116,7 @@ func TestRepositoriesTransaction(t *testing.T) {
t.Error("expected file after transaction")
}
chunks, err := repos.FileChunks.GetByPath(ctx, "/test/tx_file.txt")
chunks, err := repos.FileChunks.GetByFile(ctx, "/test/tx_file.txt")
if err != nil {
t.Fatalf("failed to get file chunks: %v", err)
}
@@ -218,7 +218,7 @@ func TestRepositoriesReadTransaction(t *testing.T) {
var retrievedFile *File
err = repos.WithReadTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
var err error
retrievedFile, err = repos.Files.GetByPath(ctx, "/test/read_file.txt")
retrievedFile, err = repos.Files.GetByPathTx(ctx, tx, "/test/read_file.txt")
if err != nil {
return err
}

View File

@@ -0,0 +1,876 @@
package database
import (
"context"
"database/sql"
"fmt"
"testing"
"time"
)
// TestFileRepositoryUUIDGeneration tests that files get unique UUIDs
func TestFileRepositoryUUIDGeneration(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repo := NewFileRepository(db)
// Create multiple files
files := []*File{
{
Path: "/file1.txt",
MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
},
{
Path: "/file2.txt",
MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 2048,
Mode: 0644,
UID: 1000,
GID: 1000,
},
}
uuids := make(map[string]bool)
for _, file := range files {
err := repo.Create(ctx, nil, file)
if err != nil {
t.Fatalf("failed to create file: %v", err)
}
// Check UUID was generated
if file.ID == "" {
t.Error("file ID was not generated")
}
// Check UUID is unique
if uuids[file.ID] {
t.Errorf("duplicate UUID generated: %s", file.ID)
}
uuids[file.ID] = true
}
}
// TestFileRepositoryGetByID tests retrieving files by UUID
func TestFileRepositoryGetByID(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repo := NewFileRepository(db)
// Create a file
file := &File{
Path: "/test.txt",
MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
err := repo.Create(ctx, nil, file)
if err != nil {
t.Fatalf("failed to create file: %v", err)
}
// Retrieve by ID
retrieved, err := repo.GetByID(ctx, file.ID)
if err != nil {
t.Fatalf("failed to get file by ID: %v", err)
}
if retrieved.ID != file.ID {
t.Errorf("ID mismatch: expected %s, got %s", file.ID, retrieved.ID)
}
if retrieved.Path != file.Path {
t.Errorf("Path mismatch: expected %s, got %s", file.Path, retrieved.Path)
}
// Test non-existent ID
nonExistent, err := repo.GetByID(ctx, "non-existent-uuid")
if err != nil {
t.Fatalf("GetByID should not return error for non-existent ID: %v", err)
}
if nonExistent != nil {
t.Error("expected nil for non-existent ID")
}
}
// TestOrphanedFileCleanup tests the cleanup of orphaned files
func TestOrphanedFileCleanup(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Create files
file1 := &File{
Path: "/orphaned.txt",
MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
file2 := &File{
Path: "/referenced.txt",
MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 2048,
Mode: 0644,
UID: 1000,
GID: 1000,
}
err := repos.Files.Create(ctx, nil, file1)
if err != nil {
t.Fatalf("failed to create file1: %v", err)
}
err = repos.Files.Create(ctx, nil, file2)
if err != nil {
t.Fatalf("failed to create file2: %v", err)
}
// Create a snapshot and reference only file2
snapshot := &Snapshot{
ID: "test-snapshot",
Hostname: "test-host",
StartedAt: time.Now(),
}
err = repos.Snapshots.Create(ctx, nil, snapshot)
if err != nil {
t.Fatalf("failed to create snapshot: %v", err)
}
// Add file2 to snapshot
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID, file2.ID)
if err != nil {
t.Fatalf("failed to add file to snapshot: %v", err)
}
// Run orphaned cleanup
err = repos.Files.DeleteOrphaned(ctx)
if err != nil {
t.Fatalf("failed to delete orphaned files: %v", err)
}
// Check that orphaned file is gone
orphanedFile, err := repos.Files.GetByID(ctx, file1.ID)
if err != nil {
t.Fatalf("error getting file: %v", err)
}
if orphanedFile != nil {
t.Error("orphaned file should have been deleted")
}
// Check that referenced file still exists
referencedFile, err := repos.Files.GetByID(ctx, file2.ID)
if err != nil {
t.Fatalf("error getting file: %v", err)
}
if referencedFile == nil {
t.Error("referenced file should not have been deleted")
}
}
// TestOrphanedChunkCleanup tests the cleanup of orphaned chunks
func TestOrphanedChunkCleanup(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Create chunks
chunk1 := &Chunk{
ChunkHash: "orphaned-chunk",
SHA256: "orphaned-chunk-sha",
Size: 1024,
}
chunk2 := &Chunk{
ChunkHash: "referenced-chunk",
SHA256: "referenced-chunk-sha",
Size: 1024,
}
err := repos.Chunks.Create(ctx, nil, chunk1)
if err != nil {
t.Fatalf("failed to create chunk1: %v", err)
}
err = repos.Chunks.Create(ctx, nil, chunk2)
if err != nil {
t.Fatalf("failed to create chunk2: %v", err)
}
// Create a file and reference only chunk2
file := &File{
Path: "/test.txt",
MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
err = repos.Files.Create(ctx, nil, file)
if err != nil {
t.Fatalf("failed to create file: %v", err)
}
// Create file-chunk mapping only for chunk2
fc := &FileChunk{
FileID: file.ID,
Idx: 0,
ChunkHash: chunk2.ChunkHash,
}
err = repos.FileChunks.Create(ctx, nil, fc)
if err != nil {
t.Fatalf("failed to create file chunk: %v", err)
}
// Run orphaned cleanup
err = repos.Chunks.DeleteOrphaned(ctx)
if err != nil {
t.Fatalf("failed to delete orphaned chunks: %v", err)
}
// Check that orphaned chunk is gone
orphanedChunk, err := repos.Chunks.GetByHash(ctx, chunk1.ChunkHash)
if err != nil {
t.Fatalf("error getting chunk: %v", err)
}
if orphanedChunk != nil {
t.Error("orphaned chunk should have been deleted")
}
// Check that referenced chunk still exists
referencedChunk, err := repos.Chunks.GetByHash(ctx, chunk2.ChunkHash)
if err != nil {
t.Fatalf("error getting chunk: %v", err)
}
if referencedChunk == nil {
t.Error("referenced chunk should not have been deleted")
}
}
// TestOrphanedBlobCleanup tests the cleanup of orphaned blobs
func TestOrphanedBlobCleanup(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Create blobs
blob1 := &Blob{
ID: "orphaned-blob-id",
Hash: "orphaned-blob",
CreatedTS: time.Now().Truncate(time.Second),
}
blob2 := &Blob{
ID: "referenced-blob-id",
Hash: "referenced-blob",
CreatedTS: time.Now().Truncate(time.Second),
}
err := repos.Blobs.Create(ctx, nil, blob1)
if err != nil {
t.Fatalf("failed to create blob1: %v", err)
}
err = repos.Blobs.Create(ctx, nil, blob2)
if err != nil {
t.Fatalf("failed to create blob2: %v", err)
}
// Create a snapshot and reference only blob2
snapshot := &Snapshot{
ID: "test-snapshot",
Hostname: "test-host",
StartedAt: time.Now(),
}
err = repos.Snapshots.Create(ctx, nil, snapshot)
if err != nil {
t.Fatalf("failed to create snapshot: %v", err)
}
// Add blob2 to snapshot
err = repos.Snapshots.AddBlob(ctx, nil, snapshot.ID, blob2.ID, blob2.Hash)
if err != nil {
t.Fatalf("failed to add blob to snapshot: %v", err)
}
// Run orphaned cleanup
err = repos.Blobs.DeleteOrphaned(ctx)
if err != nil {
t.Fatalf("failed to delete orphaned blobs: %v", err)
}
// Check that orphaned blob is gone
orphanedBlob, err := repos.Blobs.GetByID(ctx, blob1.ID)
if err != nil {
t.Fatalf("error getting blob: %v", err)
}
if orphanedBlob != nil {
t.Error("orphaned blob should have been deleted")
}
// Check that referenced blob still exists
referencedBlob, err := repos.Blobs.GetByID(ctx, blob2.ID)
if err != nil {
t.Fatalf("error getting blob: %v", err)
}
if referencedBlob == nil {
t.Error("referenced blob should not have been deleted")
}
}
// TestFileChunkRepositoryWithUUIDs tests file-chunk relationships with UUIDs
func TestFileChunkRepositoryWithUUIDs(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Create a file
file := &File{
Path: "/test.txt",
MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 3072,
Mode: 0644,
UID: 1000,
GID: 1000,
}
err := repos.Files.Create(ctx, nil, file)
if err != nil {
t.Fatalf("failed to create file: %v", err)
}
// Create chunks
chunks := []string{"chunk1", "chunk2", "chunk3"}
for i, chunkHash := range chunks {
chunk := &Chunk{
ChunkHash: chunkHash,
SHA256: fmt.Sprintf("sha-%s", chunkHash),
Size: 1024,
}
err = repos.Chunks.Create(ctx, nil, chunk)
if err != nil {
t.Fatalf("failed to create chunk: %v", err)
}
// Create file-chunk mapping
fc := &FileChunk{
FileID: file.ID,
Idx: i,
ChunkHash: chunkHash,
}
err = repos.FileChunks.Create(ctx, nil, fc)
if err != nil {
t.Fatalf("failed to create file chunk: %v", err)
}
}
// Test GetByFileID
fileChunks, err := repos.FileChunks.GetByFileID(ctx, file.ID)
if err != nil {
t.Fatalf("failed to get file chunks: %v", err)
}
if len(fileChunks) != 3 {
t.Errorf("expected 3 chunks, got %d", len(fileChunks))
}
// Test DeleteByFileID
err = repos.FileChunks.DeleteByFileID(ctx, nil, file.ID)
if err != nil {
t.Fatalf("failed to delete file chunks: %v", err)
}
fileChunks, err = repos.FileChunks.GetByFileID(ctx, file.ID)
if err != nil {
t.Fatalf("failed to get file chunks after delete: %v", err)
}
if len(fileChunks) != 0 {
t.Errorf("expected 0 chunks after delete, got %d", len(fileChunks))
}
}
// TestChunkFileRepositoryWithUUIDs tests chunk-file relationships with UUIDs
func TestChunkFileRepositoryWithUUIDs(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Create files
file1 := &File{
Path: "/file1.txt",
MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
file2 := &File{
Path: "/file2.txt",
MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
err := repos.Files.Create(ctx, nil, file1)
if err != nil {
t.Fatalf("failed to create file1: %v", err)
}
err = repos.Files.Create(ctx, nil, file2)
if err != nil {
t.Fatalf("failed to create file2: %v", err)
}
// Create a chunk that appears in both files (deduplication)
chunk := &Chunk{
ChunkHash: "shared-chunk",
SHA256: "shared-chunk-sha",
Size: 1024,
}
err = repos.Chunks.Create(ctx, nil, chunk)
if err != nil {
t.Fatalf("failed to create chunk: %v", err)
}
// Create chunk-file mappings
cf1 := &ChunkFile{
ChunkHash: chunk.ChunkHash,
FileID: file1.ID,
FileOffset: 0,
Length: 1024,
}
cf2 := &ChunkFile{
ChunkHash: chunk.ChunkHash,
FileID: file2.ID,
FileOffset: 512,
Length: 1024,
}
err = repos.ChunkFiles.Create(ctx, nil, cf1)
if err != nil {
t.Fatalf("failed to create chunk file 1: %v", err)
}
err = repos.ChunkFiles.Create(ctx, nil, cf2)
if err != nil {
t.Fatalf("failed to create chunk file 2: %v", err)
}
// Test GetByChunkHash
chunkFiles, err := repos.ChunkFiles.GetByChunkHash(ctx, chunk.ChunkHash)
if err != nil {
t.Fatalf("failed to get chunk files: %v", err)
}
if len(chunkFiles) != 2 {
t.Errorf("expected 2 files for chunk, got %d", len(chunkFiles))
}
// Test GetByFileID
chunkFiles, err = repos.ChunkFiles.GetByFileID(ctx, file1.ID)
if err != nil {
t.Fatalf("failed to get chunks by file ID: %v", err)
}
if len(chunkFiles) != 1 {
t.Errorf("expected 1 chunk for file, got %d", len(chunkFiles))
}
}
// TestSnapshotRepositoryExtendedFields tests snapshot with version and git revision
func TestSnapshotRepositoryExtendedFields(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repo := NewSnapshotRepository(db)
// Create snapshot with extended fields
snapshot := &Snapshot{
ID: "test-20250722-120000Z",
Hostname: "test-host",
VaultikVersion: "0.0.1",
VaultikGitRevision: "abc123def456",
StartedAt: time.Now(),
CompletedAt: nil,
FileCount: 100,
ChunkCount: 200,
BlobCount: 50,
TotalSize: 1024 * 1024,
BlobSize: 512 * 1024,
BlobUncompressedSize: 1024 * 1024,
CompressionLevel: 6,
CompressionRatio: 2.0,
UploadDurationMs: 5000,
}
err := repo.Create(ctx, nil, snapshot)
if err != nil {
t.Fatalf("failed to create snapshot: %v", err)
}
// Retrieve and verify
retrieved, err := repo.GetByID(ctx, snapshot.ID)
if err != nil {
t.Fatalf("failed to get snapshot: %v", err)
}
if retrieved.VaultikVersion != snapshot.VaultikVersion {
t.Errorf("version mismatch: expected %s, got %s", snapshot.VaultikVersion, retrieved.VaultikVersion)
}
if retrieved.VaultikGitRevision != snapshot.VaultikGitRevision {
t.Errorf("git revision mismatch: expected %s, got %s", snapshot.VaultikGitRevision, retrieved.VaultikGitRevision)
}
if retrieved.CompressionLevel != snapshot.CompressionLevel {
t.Errorf("compression level mismatch: expected %d, got %d", snapshot.CompressionLevel, retrieved.CompressionLevel)
}
if retrieved.BlobUncompressedSize != snapshot.BlobUncompressedSize {
t.Errorf("uncompressed size mismatch: expected %d, got %d", snapshot.BlobUncompressedSize, retrieved.BlobUncompressedSize)
}
if retrieved.UploadDurationMs != snapshot.UploadDurationMs {
t.Errorf("upload duration mismatch: expected %d, got %d", snapshot.UploadDurationMs, retrieved.UploadDurationMs)
}
}
// TestComplexOrphanedDataScenario tests a complex scenario with multiple relationships
func TestComplexOrphanedDataScenario(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Create snapshots
snapshot1 := &Snapshot{
ID: "snapshot1",
Hostname: "host1",
StartedAt: time.Now(),
}
snapshot2 := &Snapshot{
ID: "snapshot2",
Hostname: "host1",
StartedAt: time.Now(),
}
err := repos.Snapshots.Create(ctx, nil, snapshot1)
if err != nil {
t.Fatalf("failed to create snapshot1: %v", err)
}
err = repos.Snapshots.Create(ctx, nil, snapshot2)
if err != nil {
t.Fatalf("failed to create snapshot2: %v", err)
}
// Create files
files := make([]*File, 3)
for i := range files {
files[i] = &File{
Path: fmt.Sprintf("/file%d.txt", i),
MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
err = repos.Files.Create(ctx, nil, files[i])
if err != nil {
t.Fatalf("failed to create file%d: %v", i, err)
}
}
// Add files to snapshots
// Snapshot1: file0, file1
// Snapshot2: file1, file2
// file0: only in snapshot1
// file1: in both snapshots
// file2: only in snapshot2
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot1.ID, files[0].ID)
if err != nil {
t.Fatal(err)
}
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot1.ID, files[1].ID)
if err != nil {
t.Fatal(err)
}
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot2.ID, files[1].ID)
if err != nil {
t.Fatal(err)
}
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot2.ID, files[2].ID)
if err != nil {
t.Fatal(err)
}
// Delete snapshot1
err = repos.Snapshots.DeleteSnapshotFiles(ctx, snapshot1.ID)
if err != nil {
t.Fatal(err)
}
err = repos.Snapshots.Delete(ctx, snapshot1.ID)
if err != nil {
t.Fatal(err)
}
// Run orphaned cleanup
err = repos.Files.DeleteOrphaned(ctx)
if err != nil {
t.Fatal(err)
}
// Check results
// file0 should be deleted (only in deleted snapshot)
file0, err := repos.Files.GetByID(ctx, files[0].ID)
if err != nil {
t.Fatalf("error getting file0: %v", err)
}
if file0 != nil {
t.Error("file0 should have been deleted")
}
// file1 should exist (still in snapshot2)
file1, err := repos.Files.GetByID(ctx, files[1].ID)
if err != nil {
t.Fatalf("error getting file1: %v", err)
}
if file1 == nil {
t.Error("file1 should still exist")
}
// file2 should exist (still in snapshot2)
file2, err := repos.Files.GetByID(ctx, files[2].ID)
if err != nil {
t.Fatalf("error getting file2: %v", err)
}
if file2 == nil {
t.Error("file2 should still exist")
}
}
// TestCascadeDelete tests that cascade deletes work properly
func TestCascadeDelete(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Create a file
file := &File{
Path: "/cascade-test.txt",
MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
err := repos.Files.Create(ctx, nil, file)
if err != nil {
t.Fatalf("failed to create file: %v", err)
}
// Create chunks and file-chunk mappings
for i := 0; i < 3; i++ {
chunk := &Chunk{
ChunkHash: fmt.Sprintf("cascade-chunk-%d", i),
SHA256: fmt.Sprintf("cascade-sha-%d", i),
Size: 1024,
}
err = repos.Chunks.Create(ctx, nil, chunk)
if err != nil {
t.Fatalf("failed to create chunk: %v", err)
}
fc := &FileChunk{
FileID: file.ID,
Idx: i,
ChunkHash: chunk.ChunkHash,
}
err = repos.FileChunks.Create(ctx, nil, fc)
if err != nil {
t.Fatalf("failed to create file chunk: %v", err)
}
}
// Verify file chunks exist
fileChunks, err := repos.FileChunks.GetByFileID(ctx, file.ID)
if err != nil {
t.Fatal(err)
}
if len(fileChunks) != 3 {
t.Errorf("expected 3 file chunks, got %d", len(fileChunks))
}
// Delete the file
err = repos.Files.DeleteByID(ctx, nil, file.ID)
if err != nil {
t.Fatalf("failed to delete file: %v", err)
}
// Verify file chunks were cascade deleted
fileChunks, err = repos.FileChunks.GetByFileID(ctx, file.ID)
if err != nil {
t.Fatal(err)
}
if len(fileChunks) != 0 {
t.Errorf("expected 0 file chunks after cascade delete, got %d", len(fileChunks))
}
}
// TestTransactionIsolation tests that transactions properly isolate changes
func TestTransactionIsolation(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Start a transaction
err := repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
// Create a file within the transaction
file := &File{
Path: "/tx-test.txt",
MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
err := repos.Files.Create(ctx, tx, file)
if err != nil {
return err
}
// Within the same transaction, we should be able to query it
// Note: This would require modifying GetByPath to accept a tx parameter
// For now, we'll just test that rollback works
// Return an error to trigger rollback
return fmt.Errorf("intentional rollback")
})
if err == nil {
t.Fatal("expected error from transaction")
}
// Verify the file was not created (transaction rolled back)
files, err := repos.Files.ListByPrefix(ctx, "/tx-test")
if err != nil {
t.Fatal(err)
}
if len(files) != 0 {
t.Error("file should not exist after rollback")
}
}
// TestConcurrentOrphanedCleanup tests that concurrent cleanup operations don't interfere
func TestConcurrentOrphanedCleanup(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Set a 5-second busy timeout to handle concurrent operations
if _, err := db.conn.Exec("PRAGMA busy_timeout = 5000"); err != nil {
t.Fatalf("failed to set busy timeout: %v", err)
}
// Create a snapshot
snapshot := &Snapshot{
ID: "concurrent-test",
Hostname: "test-host",
StartedAt: time.Now(),
}
err := repos.Snapshots.Create(ctx, nil, snapshot)
if err != nil {
t.Fatal(err)
}
// Create many files, some orphaned
for i := 0; i < 20; i++ {
file := &File{
Path: fmt.Sprintf("/concurrent-%d.txt", i),
MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
err = repos.Files.Create(ctx, nil, file)
if err != nil {
t.Fatal(err)
}
// Add even-numbered files to snapshot
if i%2 == 0 {
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID, file.ID)
if err != nil {
t.Fatal(err)
}
}
}
// Run multiple cleanup operations concurrently
// Note: SQLite has limited support for concurrent writes, so we expect some to fail
done := make(chan error, 3)
for i := 0; i < 3; i++ {
go func() {
done <- repos.Files.DeleteOrphaned(ctx)
}()
}
// Wait for all to complete
for i := 0; i < 3; i++ {
err := <-done
if err != nil {
t.Errorf("cleanup %d failed: %v", i, err)
}
}
// Verify correct files were deleted
files, err := repos.Files.ListByPrefix(ctx, "/concurrent-")
if err != nil {
t.Fatal(err)
}
// Should have 10 files remaining (even numbered)
if len(files) != 10 {
t.Errorf("expected 10 files remaining, got %d", len(files))
}
// Verify all remaining files are even-numbered
for _, file := range files {
var num int
_, err := fmt.Sscanf(file.Path, "/concurrent-%d.txt", &num)
if err != nil {
t.Logf("failed to parse file number from %s: %v", file.Path, err)
}
if num%2 != 0 {
t.Errorf("odd-numbered file %s should have been deleted", file.Path)
}
}
}

View File

@@ -0,0 +1,165 @@
package database
import (
"context"
"testing"
"time"
)
// TestOrphanedFileCleanupDebug tests orphaned file cleanup with debug output
func TestOrphanedFileCleanupDebug(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Create files
file1 := &File{
Path: "/orphaned.txt",
MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
file2 := &File{
Path: "/referenced.txt",
MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 2048,
Mode: 0644,
UID: 1000,
GID: 1000,
}
err := repos.Files.Create(ctx, nil, file1)
if err != nil {
t.Fatalf("failed to create file1: %v", err)
}
t.Logf("Created file1 with ID: %s", file1.ID)
err = repos.Files.Create(ctx, nil, file2)
if err != nil {
t.Fatalf("failed to create file2: %v", err)
}
t.Logf("Created file2 with ID: %s", file2.ID)
// Create a snapshot and reference only file2
snapshot := &Snapshot{
ID: "test-snapshot",
Hostname: "test-host",
StartedAt: time.Now(),
}
err = repos.Snapshots.Create(ctx, nil, snapshot)
if err != nil {
t.Fatalf("failed to create snapshot: %v", err)
}
t.Logf("Created snapshot: %s", snapshot.ID)
// Check snapshot_files before adding
var count int
err = db.conn.QueryRow("SELECT COUNT(*) FROM snapshot_files").Scan(&count)
if err != nil {
t.Fatal(err)
}
t.Logf("snapshot_files count before add: %d", count)
// Add file2 to snapshot
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID, file2.ID)
if err != nil {
t.Fatalf("failed to add file to snapshot: %v", err)
}
t.Logf("Added file2 to snapshot")
// Check snapshot_files after adding
err = db.conn.QueryRow("SELECT COUNT(*) FROM snapshot_files").Scan(&count)
if err != nil {
t.Fatal(err)
}
t.Logf("snapshot_files count after add: %d", count)
// Check which files are referenced
rows, err := db.conn.Query("SELECT file_id FROM snapshot_files")
if err != nil {
t.Fatal(err)
}
defer func() {
if err := rows.Close(); err != nil {
t.Logf("failed to close rows: %v", err)
}
}()
t.Log("Files in snapshot_files:")
for rows.Next() {
var fileID string
if err := rows.Scan(&fileID); err != nil {
t.Fatal(err)
}
t.Logf(" - %s", fileID)
}
// Check files before cleanup
err = db.conn.QueryRow("SELECT COUNT(*) FROM files").Scan(&count)
if err != nil {
t.Fatal(err)
}
t.Logf("Files count before cleanup: %d", count)
// Run orphaned cleanup
err = repos.Files.DeleteOrphaned(ctx)
if err != nil {
t.Fatalf("failed to delete orphaned files: %v", err)
}
t.Log("Ran orphaned cleanup")
// Check files after cleanup
err = db.conn.QueryRow("SELECT COUNT(*) FROM files").Scan(&count)
if err != nil {
t.Fatal(err)
}
t.Logf("Files count after cleanup: %d", count)
// List remaining files
files, err := repos.Files.ListByPrefix(ctx, "/")
if err != nil {
t.Fatal(err)
}
t.Log("Remaining files:")
for _, f := range files {
t.Logf(" - ID: %s, Path: %s", f.ID, f.Path)
}
// Check that orphaned file is gone
orphanedFile, err := repos.Files.GetByID(ctx, file1.ID)
if err != nil {
t.Fatalf("error getting file: %v", err)
}
if orphanedFile != nil {
t.Error("orphaned file should have been deleted")
// Let's check why it wasn't deleted
var exists bool
err = db.conn.QueryRow(`
SELECT EXISTS(
SELECT 1 FROM snapshot_files
WHERE file_id = ?
)`, file1.ID).Scan(&exists)
if err != nil {
t.Fatal(err)
}
t.Logf("File1 exists in snapshot_files: %v", exists)
} else {
t.Log("Orphaned file was correctly deleted")
}
// Check that referenced file still exists
referencedFile, err := repos.Files.GetByID(ctx, file2.ID)
if err != nil {
t.Fatalf("error getting file: %v", err)
}
if referencedFile == nil {
t.Error("referenced file should not have been deleted")
} else {
t.Log("Referenced file correctly remains")
}
}

View File

@@ -0,0 +1,543 @@
package database
import (
"context"
"fmt"
"strings"
"testing"
"time"
)
// TestFileRepositoryEdgeCases tests edge cases for file repository
func TestFileRepositoryEdgeCases(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repo := NewFileRepository(db)
tests := []struct {
name string
file *File
wantErr bool
errMsg string
}{
{
name: "empty path",
file: &File{
Path: "",
MTime: time.Now(),
CTime: time.Now(),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
},
wantErr: false, // Empty strings are allowed, only NULL is not allowed
},
{
name: "very long path",
file: &File{
Path: "/" + strings.Repeat("a", 4096),
MTime: time.Now(),
CTime: time.Now(),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
},
wantErr: false,
},
{
name: "path with special characters",
file: &File{
Path: "/test/file with spaces and 特殊文字.txt",
MTime: time.Now(),
CTime: time.Now(),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
},
wantErr: false,
},
{
name: "zero size file",
file: &File{
Path: "/empty.txt",
MTime: time.Now(),
CTime: time.Now(),
Size: 0,
Mode: 0644,
UID: 1000,
GID: 1000,
},
wantErr: false,
},
{
name: "symlink with target",
file: &File{
Path: "/link",
MTime: time.Now(),
CTime: time.Now(),
Size: 0,
Mode: 0777 | 0120000, // symlink mode
UID: 1000,
GID: 1000,
LinkTarget: "/target",
},
wantErr: false,
},
}
for i, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Add a unique suffix to paths to avoid UNIQUE constraint violations
if tt.file.Path != "" {
tt.file.Path = fmt.Sprintf("%s_%d_%d", tt.file.Path, i, time.Now().UnixNano())
}
err := repo.Create(ctx, nil, tt.file)
if (err != nil) != tt.wantErr {
t.Errorf("Create() error = %v, wantErr %v", err, tt.wantErr)
}
if err != nil && tt.errMsg != "" && !strings.Contains(err.Error(), tt.errMsg) {
t.Errorf("Create() error = %v, want error containing %q", err, tt.errMsg)
}
})
}
}
// TestDuplicateHandling tests handling of duplicate entries
func TestDuplicateHandling(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Test duplicate file paths - Create uses UPSERT logic
t.Run("duplicate file paths", func(t *testing.T) {
file1 := &File{
Path: "/duplicate.txt",
MTime: time.Now(),
CTime: time.Now(),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
file2 := &File{
Path: "/duplicate.txt", // Same path
MTime: time.Now().Add(time.Hour),
CTime: time.Now().Add(time.Hour),
Size: 2048,
Mode: 0644,
UID: 1000,
GID: 1000,
}
err := repos.Files.Create(ctx, nil, file1)
if err != nil {
t.Fatalf("failed to create file1: %v", err)
}
originalID := file1.ID
// Create with same path should update the existing record (UPSERT behavior)
err = repos.Files.Create(ctx, nil, file2)
if err != nil {
t.Fatalf("failed to create file2: %v", err)
}
// Verify the file was updated, not duplicated
retrievedFile, err := repos.Files.GetByPath(ctx, "/duplicate.txt")
if err != nil {
t.Fatalf("failed to retrieve file: %v", err)
}
// The file should have been updated with file2's data
if retrievedFile.Size != 2048 {
t.Errorf("expected size 2048, got %d", retrievedFile.Size)
}
// ID might be different due to the UPSERT
if retrievedFile.ID != file2.ID {
t.Logf("File ID changed from %s to %s during upsert", originalID, retrievedFile.ID)
}
})
// Test duplicate chunk hashes
t.Run("duplicate chunk hashes", func(t *testing.T) {
chunk := &Chunk{
ChunkHash: "duplicate-chunk",
SHA256: "duplicate-sha",
Size: 1024,
}
err := repos.Chunks.Create(ctx, nil, chunk)
if err != nil {
t.Fatalf("failed to create chunk: %v", err)
}
// Creating the same chunk again should be idempotent (ON CONFLICT DO NOTHING)
err = repos.Chunks.Create(ctx, nil, chunk)
if err != nil {
t.Errorf("duplicate chunk creation should be idempotent, got error: %v", err)
}
})
// Test duplicate file-chunk mappings
t.Run("duplicate file-chunk mappings", func(t *testing.T) {
file := &File{
Path: "/test-dup-fc.txt",
MTime: time.Now(),
CTime: time.Now(),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
err := repos.Files.Create(ctx, nil, file)
if err != nil {
t.Fatal(err)
}
chunk := &Chunk{
ChunkHash: "test-chunk-dup",
SHA256: "test-sha-dup",
Size: 1024,
}
err = repos.Chunks.Create(ctx, nil, chunk)
if err != nil {
t.Fatal(err)
}
fc := &FileChunk{
FileID: file.ID,
Idx: 0,
ChunkHash: chunk.ChunkHash,
}
err = repos.FileChunks.Create(ctx, nil, fc)
if err != nil {
t.Fatal(err)
}
// Creating the same mapping again should be idempotent
err = repos.FileChunks.Create(ctx, nil, fc)
if err != nil {
t.Error("file-chunk creation should be idempotent")
}
})
}
// TestNullHandling tests handling of NULL values
func TestNullHandling(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Test file with no link target
t.Run("file without link target", func(t *testing.T) {
file := &File{
Path: "/regular.txt",
MTime: time.Now(),
CTime: time.Now(),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
LinkTarget: "", // Should be stored as NULL
}
err := repos.Files.Create(ctx, nil, file)
if err != nil {
t.Fatal(err)
}
retrieved, err := repos.Files.GetByID(ctx, file.ID)
if err != nil {
t.Fatal(err)
}
if retrieved.LinkTarget != "" {
t.Errorf("expected empty link target, got %q", retrieved.LinkTarget)
}
})
// Test snapshot with NULL completed_at
t.Run("incomplete snapshot", func(t *testing.T) {
snapshot := &Snapshot{
ID: "incomplete-test",
Hostname: "test-host",
StartedAt: time.Now(),
CompletedAt: nil, // Should remain NULL until completed
}
err := repos.Snapshots.Create(ctx, nil, snapshot)
if err != nil {
t.Fatal(err)
}
retrieved, err := repos.Snapshots.GetByID(ctx, snapshot.ID)
if err != nil {
t.Fatal(err)
}
if retrieved.CompletedAt != nil {
t.Error("expected nil CompletedAt for incomplete snapshot")
}
})
// Test blob with NULL uploaded_ts
t.Run("blob not uploaded", func(t *testing.T) {
blob := &Blob{
ID: "not-uploaded",
Hash: "test-hash",
CreatedTS: time.Now(),
UploadedTS: nil, // Not uploaded yet
}
err := repos.Blobs.Create(ctx, nil, blob)
if err != nil {
t.Fatal(err)
}
retrieved, err := repos.Blobs.GetByID(ctx, blob.ID)
if err != nil {
t.Fatal(err)
}
if retrieved.UploadedTS != nil {
t.Error("expected nil UploadedTS for non-uploaded blob")
}
})
}
// TestLargeDatasets tests operations with large amounts of data
func TestLargeDatasets(t *testing.T) {
if testing.Short() {
t.Skip("skipping large dataset test in short mode")
}
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Create a snapshot
snapshot := &Snapshot{
ID: "large-dataset-test",
Hostname: "test-host",
StartedAt: time.Now(),
}
err := repos.Snapshots.Create(ctx, nil, snapshot)
if err != nil {
t.Fatal(err)
}
// Create many files
const fileCount = 1000
fileIDs := make([]string, fileCount)
t.Run("create many files", func(t *testing.T) {
start := time.Now()
for i := 0; i < fileCount; i++ {
file := &File{
Path: fmt.Sprintf("/large/file%05d.txt", i),
MTime: time.Now(),
CTime: time.Now(),
Size: int64(i * 1024),
Mode: 0644,
UID: uint32(1000 + (i % 10)),
GID: uint32(1000 + (i % 10)),
}
err := repos.Files.Create(ctx, nil, file)
if err != nil {
t.Fatalf("failed to create file %d: %v", i, err)
}
fileIDs[i] = file.ID
// Add half to snapshot
if i%2 == 0 {
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID, file.ID)
if err != nil {
t.Fatal(err)
}
}
}
t.Logf("Created %d files in %v", fileCount, time.Since(start))
})
// Test ListByPrefix performance
t.Run("list by prefix performance", func(t *testing.T) {
start := time.Now()
files, err := repos.Files.ListByPrefix(ctx, "/large/")
if err != nil {
t.Fatal(err)
}
if len(files) != fileCount {
t.Errorf("expected %d files, got %d", fileCount, len(files))
}
t.Logf("Listed %d files in %v", len(files), time.Since(start))
})
// Test orphaned cleanup performance
t.Run("orphaned cleanup performance", func(t *testing.T) {
start := time.Now()
err := repos.Files.DeleteOrphaned(ctx)
if err != nil {
t.Fatal(err)
}
t.Logf("Cleaned up orphaned files in %v", time.Since(start))
// Verify correct number remain
files, err := repos.Files.ListByPrefix(ctx, "/large/")
if err != nil {
t.Fatal(err)
}
if len(files) != fileCount/2 {
t.Errorf("expected %d files after cleanup, got %d", fileCount/2, len(files))
}
})
}
// TestErrorPropagation tests that errors are properly propagated
func TestErrorPropagation(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Test GetByID with non-existent ID
t.Run("GetByID non-existent", func(t *testing.T) {
file, err := repos.Files.GetByID(ctx, "non-existent-uuid")
if err != nil {
t.Errorf("GetByID should not return error for non-existent ID, got: %v", err)
}
if file != nil {
t.Error("expected nil file for non-existent ID")
}
})
// Test GetByPath with non-existent path
t.Run("GetByPath non-existent", func(t *testing.T) {
file, err := repos.Files.GetByPath(ctx, "/non/existent/path.txt")
if err != nil {
t.Errorf("GetByPath should not return error for non-existent path, got: %v", err)
}
if file != nil {
t.Error("expected nil file for non-existent path")
}
})
// Test invalid foreign key reference
t.Run("invalid foreign key", func(t *testing.T) {
fc := &FileChunk{
FileID: "non-existent-file-id",
Idx: 0,
ChunkHash: "some-chunk",
}
err := repos.FileChunks.Create(ctx, nil, fc)
if err == nil {
t.Error("expected error for invalid foreign key")
}
if !strings.Contains(err.Error(), "FOREIGN KEY") {
t.Errorf("expected foreign key error, got: %v", err)
}
})
}
// TestQueryInjection tests that the system is safe from SQL injection
func TestQueryInjection(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Test various injection attempts
injectionTests := []string{
"'; DROP TABLE files; --",
"' OR '1'='1",
"'; DELETE FROM files WHERE '1'='1'; --",
`test'); DROP TABLE files; --`,
}
for _, injection := range injectionTests {
t.Run("injection attempt", func(t *testing.T) {
// Try injection in file path
file := &File{
Path: injection,
MTime: time.Now(),
CTime: time.Now(),
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
_ = repos.Files.Create(ctx, nil, file)
// Should either succeed (treating as normal string) or fail with constraint
// but should NOT execute the injected SQL
// Verify tables still exist
var count int
err := db.conn.QueryRow("SELECT COUNT(*) FROM files").Scan(&count)
if err != nil {
t.Fatal("files table was damaged by injection")
}
})
}
}
// TestTimezoneHandling tests that times are properly handled in UTC
func TestTimezoneHandling(t *testing.T) {
db, cleanup := setupTestDB(t)
defer cleanup()
ctx := context.Background()
repos := NewRepositories(db)
// Create file with specific timezone
loc, err := time.LoadLocation("America/New_York")
if err != nil {
t.Skip("timezone not available")
}
// Use Truncate to remove sub-second precision since we store as Unix timestamps
nyTime := time.Now().In(loc).Truncate(time.Second)
file := &File{
Path: "/timezone-test.txt",
MTime: nyTime,
CTime: nyTime,
Size: 1024,
Mode: 0644,
UID: 1000,
GID: 1000,
}
err = repos.Files.Create(ctx, nil, file)
if err != nil {
t.Fatal(err)
}
// Retrieve and verify times are in UTC
retrieved, err := repos.Files.GetByID(ctx, file.ID)
if err != nil {
t.Fatal(err)
}
// Check that times are equivalent (same instant)
if !retrieved.MTime.Equal(nyTime) {
t.Error("time was not preserved correctly")
}
// Check that retrieved time is in UTC
if retrieved.MTime.Location() != time.UTC {
t.Error("retrieved time is not in UTC")
}
}

View File

@@ -0,0 +1,113 @@
-- Vaultik Database Schema
-- Note: This database does not support migrations. If the schema changes,
-- delete the local database and perform a full backup to recreate it.
-- Files table: stores metadata about files in the filesystem
CREATE TABLE IF NOT EXISTS files (
id TEXT PRIMARY KEY, -- UUID
path TEXT NOT NULL UNIQUE,
mtime INTEGER NOT NULL,
ctime INTEGER NOT NULL,
size INTEGER NOT NULL,
mode INTEGER NOT NULL,
uid INTEGER NOT NULL,
gid INTEGER NOT NULL,
link_target TEXT
);
-- Create index on path for efficient lookups
CREATE INDEX IF NOT EXISTS idx_files_path ON files(path);
-- File chunks table: maps files to their constituent chunks
CREATE TABLE IF NOT EXISTS file_chunks (
file_id TEXT NOT NULL,
idx INTEGER NOT NULL,
chunk_hash TEXT NOT NULL,
PRIMARY KEY (file_id, idx),
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
);
-- Chunks table: stores unique content-defined chunks
CREATE TABLE IF NOT EXISTS chunks (
chunk_hash TEXT PRIMARY KEY,
sha256 TEXT NOT NULL,
size INTEGER NOT NULL
);
-- Blobs table: stores packed, compressed, and encrypted blob information
CREATE TABLE IF NOT EXISTS blobs (
id TEXT PRIMARY KEY,
blob_hash TEXT UNIQUE,
created_ts INTEGER NOT NULL,
finished_ts INTEGER,
uncompressed_size INTEGER NOT NULL DEFAULT 0,
compressed_size INTEGER NOT NULL DEFAULT 0,
uploaded_ts INTEGER
);
-- Blob chunks table: maps chunks to the blobs that contain them
CREATE TABLE IF NOT EXISTS blob_chunks (
blob_id TEXT NOT NULL,
chunk_hash TEXT NOT NULL,
offset INTEGER NOT NULL,
length INTEGER NOT NULL,
PRIMARY KEY (blob_id, chunk_hash),
FOREIGN KEY (blob_id) REFERENCES blobs(id)
);
-- Chunk files table: reverse mapping of chunks to files
CREATE TABLE IF NOT EXISTS chunk_files (
chunk_hash TEXT NOT NULL,
file_id TEXT NOT NULL,
file_offset INTEGER NOT NULL,
length INTEGER NOT NULL,
PRIMARY KEY (chunk_hash, file_id),
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
);
-- Snapshots table: tracks backup snapshots
CREATE TABLE IF NOT EXISTS snapshots (
id TEXT PRIMARY KEY,
hostname TEXT NOT NULL,
vaultik_version TEXT NOT NULL,
vaultik_git_revision TEXT NOT NULL,
started_at INTEGER NOT NULL,
completed_at INTEGER,
file_count INTEGER NOT NULL DEFAULT 0,
chunk_count INTEGER NOT NULL DEFAULT 0,
blob_count INTEGER NOT NULL DEFAULT 0,
total_size INTEGER NOT NULL DEFAULT 0,
blob_size INTEGER NOT NULL DEFAULT 0,
blob_uncompressed_size INTEGER NOT NULL DEFAULT 0,
compression_ratio REAL NOT NULL DEFAULT 1.0,
compression_level INTEGER NOT NULL DEFAULT 3,
upload_bytes INTEGER NOT NULL DEFAULT 0,
upload_duration_ms INTEGER NOT NULL DEFAULT 0
);
-- Snapshot files table: maps snapshots to files
CREATE TABLE IF NOT EXISTS snapshot_files (
snapshot_id TEXT NOT NULL,
file_id TEXT NOT NULL,
PRIMARY KEY (snapshot_id, file_id),
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
);
-- Snapshot blobs table: maps snapshots to blobs
CREATE TABLE IF NOT EXISTS snapshot_blobs (
snapshot_id TEXT NOT NULL,
blob_id TEXT NOT NULL,
blob_hash TEXT NOT NULL,
PRIMARY KEY (snapshot_id, blob_id),
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
FOREIGN KEY (blob_id) REFERENCES blobs(id) ON DELETE CASCADE
);
-- Uploads table: tracks blob upload metrics
CREATE TABLE IF NOT EXISTS uploads (
blob_hash TEXT PRIMARY KEY,
uploaded_at INTEGER NOT NULL,
size INTEGER NOT NULL,
duration_ms INTEGER NOT NULL
);

View File

@@ -17,8 +17,10 @@ func NewSnapshotRepository(db *DB) *SnapshotRepository {
func (r *SnapshotRepository) Create(ctx context.Context, tx *sql.Tx, snapshot *Snapshot) error {
query := `
INSERT INTO snapshots (id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
INSERT INTO snapshots (id, hostname, vaultik_version, vaultik_git_revision, started_at, completed_at,
file_count, chunk_count, blob_count, total_size, blob_size, blob_uncompressed_size,
compression_ratio, compression_level, upload_bytes, upload_duration_ms)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`
var completedAt *int64
@@ -29,11 +31,13 @@ func (r *SnapshotRepository) Create(ctx context.Context, tx *sql.Tx, snapshot *S
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.StartedAt.Unix(),
completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
_, err = tx.ExecContext(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.VaultikGitRevision, snapshot.StartedAt.Unix(),
completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.BlobUncompressedSize,
snapshot.CompressionRatio, snapshot.CompressionLevel, snapshot.UploadBytes, snapshot.UploadDurationMs)
} else {
_, err = r.db.ExecWithLock(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.StartedAt.Unix(),
completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
_, err = r.db.ExecWithLog(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.VaultikGitRevision, snapshot.StartedAt.Unix(),
completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.BlobUncompressedSize,
snapshot.CompressionRatio, snapshot.CompressionLevel, snapshot.UploadBytes, snapshot.UploadDurationMs)
}
if err != nil {
@@ -64,7 +68,7 @@ func (r *SnapshotRepository) UpdateCounts(ctx context.Context, tx *sql.Tx, snaps
if tx != nil {
_, err = tx.ExecContext(ctx, query, fileCount, chunkCount, blobCount, totalSize, blobSize, compressionRatio, snapshotID)
} else {
_, err = r.db.ExecWithLock(ctx, query, fileCount, chunkCount, blobCount, totalSize, blobSize, compressionRatio, snapshotID)
_, err = r.db.ExecWithLog(ctx, query, fileCount, chunkCount, blobCount, totalSize, blobSize, compressionRatio, snapshotID)
}
if err != nil {
@@ -74,9 +78,58 @@ func (r *SnapshotRepository) UpdateCounts(ctx context.Context, tx *sql.Tx, snaps
return nil
}
// UpdateExtendedStats updates extended statistics for a snapshot
func (r *SnapshotRepository) UpdateExtendedStats(ctx context.Context, tx *sql.Tx, snapshotID string, blobUncompressedSize int64, compressionLevel int, uploadDurationMs int64) error {
// Calculate compression ratio based on uncompressed vs compressed sizes
var compressionRatio float64
if blobUncompressedSize > 0 {
// Get current blob_size from DB to calculate ratio
var blobSize int64
queryGet := `SELECT blob_size FROM snapshots WHERE id = ?`
if tx != nil {
err := tx.QueryRowContext(ctx, queryGet, snapshotID).Scan(&blobSize)
if err != nil {
return fmt.Errorf("getting blob size: %w", err)
}
} else {
err := r.db.conn.QueryRowContext(ctx, queryGet, snapshotID).Scan(&blobSize)
if err != nil {
return fmt.Errorf("getting blob size: %w", err)
}
}
compressionRatio = float64(blobSize) / float64(blobUncompressedSize)
} else {
compressionRatio = 1.0
}
query := `
UPDATE snapshots
SET blob_uncompressed_size = ?,
compression_ratio = ?,
compression_level = ?,
upload_bytes = blob_size,
upload_duration_ms = ?
WHERE id = ?
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, blobUncompressedSize, compressionRatio, compressionLevel, uploadDurationMs, snapshotID)
} else {
_, err = r.db.ExecWithLog(ctx, query, blobUncompressedSize, compressionRatio, compressionLevel, uploadDurationMs, snapshotID)
}
if err != nil {
return fmt.Errorf("updating extended stats: %w", err)
}
return nil
}
func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*Snapshot, error) {
query := `
SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
SELECT id, hostname, vaultik_version, vaultik_git_revision, started_at, completed_at,
file_count, chunk_count, blob_count, total_size, blob_size, blob_uncompressed_size,
compression_ratio, compression_level, upload_bytes, upload_duration_ms
FROM snapshots
WHERE id = ?
`
@@ -89,6 +142,7 @@ func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*S
&snapshot.ID,
&snapshot.Hostname,
&snapshot.VaultikVersion,
&snapshot.VaultikGitRevision,
&startedAtUnix,
&completedAtUnix,
&snapshot.FileCount,
@@ -96,7 +150,11 @@ func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*S
&snapshot.BlobCount,
&snapshot.TotalSize,
&snapshot.BlobSize,
&snapshot.BlobUncompressedSize,
&snapshot.CompressionRatio,
&snapshot.CompressionLevel,
&snapshot.UploadBytes,
&snapshot.UploadDurationMs,
)
if err == sql.ErrNoRows {
@@ -106,9 +164,9 @@ func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*S
return nil, fmt.Errorf("querying snapshot: %w", err)
}
snapshot.StartedAt = time.Unix(startedAtUnix, 0)
snapshot.StartedAt = time.Unix(startedAtUnix, 0).UTC()
if completedAtUnix != nil {
t := time.Unix(*completedAtUnix, 0)
t := time.Unix(*completedAtUnix, 0).UTC()
snapshot.CompletedAt = &t
}
@@ -117,7 +175,7 @@ func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*S
func (r *SnapshotRepository) ListRecent(ctx context.Context, limit int) ([]*Snapshot, error) {
query := `
SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
SELECT id, hostname, vaultik_version, vaultik_git_revision, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
FROM snapshots
ORDER BY started_at DESC
LIMIT ?
@@ -139,6 +197,7 @@ func (r *SnapshotRepository) ListRecent(ctx context.Context, limit int) ([]*Snap
&snapshot.ID,
&snapshot.Hostname,
&snapshot.VaultikVersion,
&snapshot.VaultikGitRevision,
&startedAtUnix,
&completedAtUnix,
&snapshot.FileCount,
@@ -172,13 +231,13 @@ func (r *SnapshotRepository) MarkComplete(ctx context.Context, tx *sql.Tx, snaps
WHERE id = ?
`
completedAt := time.Now().Unix()
completedAt := time.Now().UTC().Unix()
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, completedAt, snapshotID)
} else {
_, err = r.db.ExecWithLock(ctx, query, completedAt, snapshotID)
_, err = r.db.ExecWithLog(ctx, query, completedAt, snapshotID)
}
if err != nil {
@@ -191,15 +250,36 @@ func (r *SnapshotRepository) MarkComplete(ctx context.Context, tx *sql.Tx, snaps
// AddFile adds a file to a snapshot
func (r *SnapshotRepository) AddFile(ctx context.Context, tx *sql.Tx, snapshotID string, filePath string) error {
query := `
INSERT OR IGNORE INTO snapshot_files (snapshot_id, file_path)
VALUES (?, ?)
INSERT OR IGNORE INTO snapshot_files (snapshot_id, file_id)
SELECT ?, id FROM files WHERE path = ?
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, snapshotID, filePath)
} else {
_, err = r.db.ExecWithLock(ctx, query, snapshotID, filePath)
_, err = r.db.ExecWithLog(ctx, query, snapshotID, filePath)
}
if err != nil {
return fmt.Errorf("adding file to snapshot: %w", err)
}
return nil
}
// AddFileByID adds a file to a snapshot by file ID
func (r *SnapshotRepository) AddFileByID(ctx context.Context, tx *sql.Tx, snapshotID string, fileID string) error {
query := `
INSERT OR IGNORE INTO snapshot_files (snapshot_id, file_id)
VALUES (?, ?)
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, snapshotID, fileID)
} else {
_, err = r.db.ExecWithLog(ctx, query, snapshotID, fileID)
}
if err != nil {
@@ -220,7 +300,7 @@ func (r *SnapshotRepository) AddBlob(ctx context.Context, tx *sql.Tx, snapshotID
if tx != nil {
_, err = tx.ExecContext(ctx, query, snapshotID, blobID, blobHash)
} else {
_, err = r.db.ExecWithLock(ctx, query, snapshotID, blobID, blobHash)
_, err = r.db.ExecWithLog(ctx, query, snapshotID, blobID, blobHash)
}
if err != nil {
@@ -260,7 +340,7 @@ func (r *SnapshotRepository) GetBlobHashes(ctx context.Context, snapshotID strin
// GetIncompleteSnapshots returns all snapshots that haven't been completed
func (r *SnapshotRepository) GetIncompleteSnapshots(ctx context.Context) ([]*Snapshot, error) {
query := `
SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
SELECT id, hostname, vaultik_version, vaultik_git_revision, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
FROM snapshots
WHERE completed_at IS NULL
ORDER BY started_at DESC
@@ -282,6 +362,7 @@ func (r *SnapshotRepository) GetIncompleteSnapshots(ctx context.Context) ([]*Sna
&snapshot.ID,
&snapshot.Hostname,
&snapshot.VaultikVersion,
&snapshot.VaultikGitRevision,
&startedAtUnix,
&completedAtUnix,
&snapshot.FileCount,
@@ -306,3 +387,90 @@ func (r *SnapshotRepository) GetIncompleteSnapshots(ctx context.Context) ([]*Sna
return snapshots, rows.Err()
}
// GetIncompleteByHostname returns all incomplete snapshots for a specific hostname
func (r *SnapshotRepository) GetIncompleteByHostname(ctx context.Context, hostname string) ([]*Snapshot, error) {
query := `
SELECT id, hostname, vaultik_version, vaultik_git_revision, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
FROM snapshots
WHERE completed_at IS NULL AND hostname = ?
ORDER BY started_at DESC
`
rows, err := r.db.conn.QueryContext(ctx, query, hostname)
if err != nil {
return nil, fmt.Errorf("querying incomplete snapshots: %w", err)
}
defer CloseRows(rows)
var snapshots []*Snapshot
for rows.Next() {
var snapshot Snapshot
var startedAtUnix int64
var completedAtUnix *int64
err := rows.Scan(
&snapshot.ID,
&snapshot.Hostname,
&snapshot.VaultikVersion,
&snapshot.VaultikGitRevision,
&startedAtUnix,
&completedAtUnix,
&snapshot.FileCount,
&snapshot.ChunkCount,
&snapshot.BlobCount,
&snapshot.TotalSize,
&snapshot.BlobSize,
&snapshot.CompressionRatio,
)
if err != nil {
return nil, fmt.Errorf("scanning snapshot: %w", err)
}
snapshot.StartedAt = time.Unix(startedAtUnix, 0).UTC()
if completedAtUnix != nil {
t := time.Unix(*completedAtUnix, 0).UTC()
snapshot.CompletedAt = &t
}
snapshots = append(snapshots, &snapshot)
}
return snapshots, rows.Err()
}
// Delete removes a snapshot record
func (r *SnapshotRepository) Delete(ctx context.Context, snapshotID string) error {
query := `DELETE FROM snapshots WHERE id = ?`
_, err := r.db.ExecWithLog(ctx, query, snapshotID)
if err != nil {
return fmt.Errorf("deleting snapshot: %w", err)
}
return nil
}
// DeleteSnapshotFiles removes all snapshot_files entries for a snapshot
func (r *SnapshotRepository) DeleteSnapshotFiles(ctx context.Context, snapshotID string) error {
query := `DELETE FROM snapshot_files WHERE snapshot_id = ?`
_, err := r.db.ExecWithLog(ctx, query, snapshotID)
if err != nil {
return fmt.Errorf("deleting snapshot files: %w", err)
}
return nil
}
// DeleteSnapshotBlobs removes all snapshot_blobs entries for a snapshot
func (r *SnapshotRepository) DeleteSnapshotBlobs(ctx context.Context, snapshotID string) error {
query := `DELETE FROM snapshot_blobs WHERE snapshot_id = ?`
_, err := r.db.ExecWithLog(ctx, query, snapshotID)
if err != nil {
return fmt.Errorf("deleting snapshot blobs: %w", err)
}
return nil
}