Major refactoring: UUID-based storage, streaming architecture, and CLI improvements

This commit represents a significant architectural overhaul of vaultik:

Database Schema Changes:
- Switch files table to use UUID primary keys instead of path-based keys
- Add UUID primary keys to blobs table for immediate chunk association
- Update all foreign key relationships to use UUIDs
- Add comprehensive schema documentation in DATAMODEL.md
- Add SQLite busy timeout handling for concurrent operations

Streaming and Performance Improvements:
- Implement true streaming blob packing without intermediate storage
- Add streaming chunk processing to reduce memory usage
- Improve progress reporting with real-time metrics
- Add upload metrics tracking in new uploads table

CLI Refactoring:
- Restructure CLI to use subcommands: snapshot create/list/purge/verify
- Add store info command for S3 configuration display
- Add custom duration parser supporting days/weeks/months/years
- Remove old backup.go in favor of enhanced snapshot.go
- Add --cron flag for silent operation

Configuration Changes:
- Remove unused index_prefix configuration option
- Add support for snapshot pruning retention policies
- Improve configuration validation and error messages

Testing Improvements:
- Add comprehensive repository tests with edge cases
- Add cascade delete debugging tests
- Fix concurrent operation tests to use SQLite busy timeout
- Remove tolerance for SQLITE_BUSY errors in tests

Documentation:
- Add MIT LICENSE file
- Update README with new command structure
- Add comprehensive DATAMODEL.md explaining database schema
- Update DESIGN.md with UUID-based architecture

Other Changes:
- Add test-config.yml for testing
- Update Makefile with better test output formatting
- Fix various race conditions in concurrent operations
- Improve error handling throughout
This commit is contained in:
2025-07-22 14:54:37 +02:00
parent 86b533d6ee
commit 78af626759
54 changed files with 5525 additions and 1109 deletions

View File

@@ -5,6 +5,8 @@ import (
"database/sql"
"fmt"
"time"
"git.eeqj.de/sneak/vaultik/internal/log"
)
type BlobRepository struct {
@@ -36,7 +38,7 @@ func (r *BlobRepository) Create(ctx context.Context, tx *sql.Tx, blob *Blob) err
_, err = tx.ExecContext(ctx, query, blob.ID, blob.Hash, blob.CreatedTS.Unix(),
finishedTS, blob.UncompressedSize, blob.CompressedSize, uploadedTS)
} else {
_, err = r.db.ExecWithLock(ctx, query, blob.ID, blob.Hash, blob.CreatedTS.Unix(),
_, err = r.db.ExecWithLog(ctx, query, blob.ID, blob.Hash, blob.CreatedTS.Unix(),
finishedTS, blob.UncompressedSize, blob.CompressedSize, uploadedTS)
}
@@ -75,13 +77,13 @@ func (r *BlobRepository) GetByHash(ctx context.Context, hash string) (*Blob, err
return nil, fmt.Errorf("querying blob: %w", err)
}
blob.CreatedTS = time.Unix(createdTSUnix, 0)
blob.CreatedTS = time.Unix(createdTSUnix, 0).UTC()
if finishedTSUnix.Valid {
ts := time.Unix(finishedTSUnix.Int64, 0)
ts := time.Unix(finishedTSUnix.Int64, 0).UTC()
blob.FinishedTS = &ts
}
if uploadedTSUnix.Valid {
ts := time.Unix(uploadedTSUnix.Int64, 0)
ts := time.Unix(uploadedTSUnix.Int64, 0).UTC()
blob.UploadedTS = &ts
}
return &blob, nil
@@ -116,13 +118,13 @@ func (r *BlobRepository) GetByID(ctx context.Context, id string) (*Blob, error)
return nil, fmt.Errorf("querying blob: %w", err)
}
blob.CreatedTS = time.Unix(createdTSUnix, 0)
blob.CreatedTS = time.Unix(createdTSUnix, 0).UTC()
if finishedTSUnix.Valid {
ts := time.Unix(finishedTSUnix.Int64, 0)
ts := time.Unix(finishedTSUnix.Int64, 0).UTC()
blob.FinishedTS = &ts
}
if uploadedTSUnix.Valid {
ts := time.Unix(uploadedTSUnix.Int64, 0)
ts := time.Unix(uploadedTSUnix.Int64, 0).UTC()
blob.UploadedTS = &ts
}
return &blob, nil
@@ -136,12 +138,12 @@ func (r *BlobRepository) UpdateFinished(ctx context.Context, tx *sql.Tx, id stri
WHERE id = ?
`
now := time.Now().Unix()
now := time.Now().UTC().Unix()
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, hash, now, uncompressedSize, compressedSize, id)
} else {
_, err = r.db.ExecWithLock(ctx, query, hash, now, uncompressedSize, compressedSize, id)
_, err = r.db.ExecWithLog(ctx, query, hash, now, uncompressedSize, compressedSize, id)
}
if err != nil {
@@ -159,12 +161,12 @@ func (r *BlobRepository) UpdateUploaded(ctx context.Context, tx *sql.Tx, id stri
WHERE id = ?
`
now := time.Now().Unix()
now := time.Now().UTC().Unix()
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, now, id)
} else {
_, err = r.db.ExecWithLock(ctx, query, now, id)
_, err = r.db.ExecWithLog(ctx, query, now, id)
}
if err != nil {
@@ -173,3 +175,26 @@ func (r *BlobRepository) UpdateUploaded(ctx context.Context, tx *sql.Tx, id stri
return nil
}
// DeleteOrphaned deletes blobs that are not referenced by any snapshot
func (r *BlobRepository) DeleteOrphaned(ctx context.Context) error {
query := `
DELETE FROM blobs
WHERE NOT EXISTS (
SELECT 1 FROM snapshot_blobs
WHERE snapshot_blobs.blob_id = blobs.id
)
`
result, err := r.db.ExecWithLog(ctx, query)
if err != nil {
return fmt.Errorf("deleting orphaned blobs: %w", err)
}
rowsAffected, _ := result.RowsAffected()
if rowsAffected > 0 {
log.Debug("Deleted orphaned blobs", "count", rowsAffected)
}
return nil
}