Major refactoring: UUID-based storage, streaming architecture, and CLI improvements

This commit represents a significant architectural overhaul of vaultik:

Database Schema Changes:
- Switch files table to use UUID primary keys instead of path-based keys
- Add UUID primary keys to blobs table for immediate chunk association
- Update all foreign key relationships to use UUIDs
- Add comprehensive schema documentation in DATAMODEL.md
- Add SQLite busy timeout handling for concurrent operations

Streaming and Performance Improvements:
- Implement true streaming blob packing without intermediate storage
- Add streaming chunk processing to reduce memory usage
- Improve progress reporting with real-time metrics
- Add upload metrics tracking in new uploads table

CLI Refactoring:
- Restructure CLI to use subcommands: snapshot create/list/purge/verify
- Add store info command for S3 configuration display
- Add custom duration parser supporting days/weeks/months/years
- Remove old backup.go in favor of enhanced snapshot.go
- Add --cron flag for silent operation

Configuration Changes:
- Remove unused index_prefix configuration option
- Add support for snapshot pruning retention policies
- Improve configuration validation and error messages

Testing Improvements:
- Add comprehensive repository tests with edge cases
- Add cascade delete debugging tests
- Fix concurrent operation tests to use SQLite busy timeout
- Remove tolerance for SQLITE_BUSY errors in tests

Documentation:
- Add MIT LICENSE file
- Update README with new command structure
- Add comprehensive DATAMODEL.md explaining database schema
- Update DESIGN.md with UUID-based architecture

Other Changes:
- Add test-config.yml for testing
- Update Makefile with better test output formatting
- Fix various race conditions in concurrent operations
- Improve error handling throughout
This commit is contained in:
2025-07-22 14:54:37 +02:00
parent 86b533d6ee
commit 78af626759
54 changed files with 5525 additions and 1109 deletions

View File

@@ -17,8 +17,10 @@ func NewSnapshotRepository(db *DB) *SnapshotRepository {
func (r *SnapshotRepository) Create(ctx context.Context, tx *sql.Tx, snapshot *Snapshot) error {
query := `
INSERT INTO snapshots (id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
INSERT INTO snapshots (id, hostname, vaultik_version, vaultik_git_revision, started_at, completed_at,
file_count, chunk_count, blob_count, total_size, blob_size, blob_uncompressed_size,
compression_ratio, compression_level, upload_bytes, upload_duration_ms)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`
var completedAt *int64
@@ -29,11 +31,13 @@ func (r *SnapshotRepository) Create(ctx context.Context, tx *sql.Tx, snapshot *S
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.StartedAt.Unix(),
completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
_, err = tx.ExecContext(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.VaultikGitRevision, snapshot.StartedAt.Unix(),
completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.BlobUncompressedSize,
snapshot.CompressionRatio, snapshot.CompressionLevel, snapshot.UploadBytes, snapshot.UploadDurationMs)
} else {
_, err = r.db.ExecWithLock(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.StartedAt.Unix(),
completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
_, err = r.db.ExecWithLog(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.VaultikGitRevision, snapshot.StartedAt.Unix(),
completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.BlobUncompressedSize,
snapshot.CompressionRatio, snapshot.CompressionLevel, snapshot.UploadBytes, snapshot.UploadDurationMs)
}
if err != nil {
@@ -64,7 +68,7 @@ func (r *SnapshotRepository) UpdateCounts(ctx context.Context, tx *sql.Tx, snaps
if tx != nil {
_, err = tx.ExecContext(ctx, query, fileCount, chunkCount, blobCount, totalSize, blobSize, compressionRatio, snapshotID)
} else {
_, err = r.db.ExecWithLock(ctx, query, fileCount, chunkCount, blobCount, totalSize, blobSize, compressionRatio, snapshotID)
_, err = r.db.ExecWithLog(ctx, query, fileCount, chunkCount, blobCount, totalSize, blobSize, compressionRatio, snapshotID)
}
if err != nil {
@@ -74,9 +78,58 @@ func (r *SnapshotRepository) UpdateCounts(ctx context.Context, tx *sql.Tx, snaps
return nil
}
// UpdateExtendedStats updates extended statistics for a snapshot
func (r *SnapshotRepository) UpdateExtendedStats(ctx context.Context, tx *sql.Tx, snapshotID string, blobUncompressedSize int64, compressionLevel int, uploadDurationMs int64) error {
// Calculate compression ratio based on uncompressed vs compressed sizes
var compressionRatio float64
if blobUncompressedSize > 0 {
// Get current blob_size from DB to calculate ratio
var blobSize int64
queryGet := `SELECT blob_size FROM snapshots WHERE id = ?`
if tx != nil {
err := tx.QueryRowContext(ctx, queryGet, snapshotID).Scan(&blobSize)
if err != nil {
return fmt.Errorf("getting blob size: %w", err)
}
} else {
err := r.db.conn.QueryRowContext(ctx, queryGet, snapshotID).Scan(&blobSize)
if err != nil {
return fmt.Errorf("getting blob size: %w", err)
}
}
compressionRatio = float64(blobSize) / float64(blobUncompressedSize)
} else {
compressionRatio = 1.0
}
query := `
UPDATE snapshots
SET blob_uncompressed_size = ?,
compression_ratio = ?,
compression_level = ?,
upload_bytes = blob_size,
upload_duration_ms = ?
WHERE id = ?
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, blobUncompressedSize, compressionRatio, compressionLevel, uploadDurationMs, snapshotID)
} else {
_, err = r.db.ExecWithLog(ctx, query, blobUncompressedSize, compressionRatio, compressionLevel, uploadDurationMs, snapshotID)
}
if err != nil {
return fmt.Errorf("updating extended stats: %w", err)
}
return nil
}
func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*Snapshot, error) {
query := `
SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
SELECT id, hostname, vaultik_version, vaultik_git_revision, started_at, completed_at,
file_count, chunk_count, blob_count, total_size, blob_size, blob_uncompressed_size,
compression_ratio, compression_level, upload_bytes, upload_duration_ms
FROM snapshots
WHERE id = ?
`
@@ -89,6 +142,7 @@ func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*S
&snapshot.ID,
&snapshot.Hostname,
&snapshot.VaultikVersion,
&snapshot.VaultikGitRevision,
&startedAtUnix,
&completedAtUnix,
&snapshot.FileCount,
@@ -96,7 +150,11 @@ func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*S
&snapshot.BlobCount,
&snapshot.TotalSize,
&snapshot.BlobSize,
&snapshot.BlobUncompressedSize,
&snapshot.CompressionRatio,
&snapshot.CompressionLevel,
&snapshot.UploadBytes,
&snapshot.UploadDurationMs,
)
if err == sql.ErrNoRows {
@@ -106,9 +164,9 @@ func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*S
return nil, fmt.Errorf("querying snapshot: %w", err)
}
snapshot.StartedAt = time.Unix(startedAtUnix, 0)
snapshot.StartedAt = time.Unix(startedAtUnix, 0).UTC()
if completedAtUnix != nil {
t := time.Unix(*completedAtUnix, 0)
t := time.Unix(*completedAtUnix, 0).UTC()
snapshot.CompletedAt = &t
}
@@ -117,7 +175,7 @@ func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*S
func (r *SnapshotRepository) ListRecent(ctx context.Context, limit int) ([]*Snapshot, error) {
query := `
SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
SELECT id, hostname, vaultik_version, vaultik_git_revision, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
FROM snapshots
ORDER BY started_at DESC
LIMIT ?
@@ -139,6 +197,7 @@ func (r *SnapshotRepository) ListRecent(ctx context.Context, limit int) ([]*Snap
&snapshot.ID,
&snapshot.Hostname,
&snapshot.VaultikVersion,
&snapshot.VaultikGitRevision,
&startedAtUnix,
&completedAtUnix,
&snapshot.FileCount,
@@ -172,13 +231,13 @@ func (r *SnapshotRepository) MarkComplete(ctx context.Context, tx *sql.Tx, snaps
WHERE id = ?
`
completedAt := time.Now().Unix()
completedAt := time.Now().UTC().Unix()
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, completedAt, snapshotID)
} else {
_, err = r.db.ExecWithLock(ctx, query, completedAt, snapshotID)
_, err = r.db.ExecWithLog(ctx, query, completedAt, snapshotID)
}
if err != nil {
@@ -191,15 +250,36 @@ func (r *SnapshotRepository) MarkComplete(ctx context.Context, tx *sql.Tx, snaps
// AddFile adds a file to a snapshot
func (r *SnapshotRepository) AddFile(ctx context.Context, tx *sql.Tx, snapshotID string, filePath string) error {
query := `
INSERT OR IGNORE INTO snapshot_files (snapshot_id, file_path)
VALUES (?, ?)
INSERT OR IGNORE INTO snapshot_files (snapshot_id, file_id)
SELECT ?, id FROM files WHERE path = ?
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, snapshotID, filePath)
} else {
_, err = r.db.ExecWithLock(ctx, query, snapshotID, filePath)
_, err = r.db.ExecWithLog(ctx, query, snapshotID, filePath)
}
if err != nil {
return fmt.Errorf("adding file to snapshot: %w", err)
}
return nil
}
// AddFileByID adds a file to a snapshot by file ID
func (r *SnapshotRepository) AddFileByID(ctx context.Context, tx *sql.Tx, snapshotID string, fileID string) error {
query := `
INSERT OR IGNORE INTO snapshot_files (snapshot_id, file_id)
VALUES (?, ?)
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, snapshotID, fileID)
} else {
_, err = r.db.ExecWithLog(ctx, query, snapshotID, fileID)
}
if err != nil {
@@ -220,7 +300,7 @@ func (r *SnapshotRepository) AddBlob(ctx context.Context, tx *sql.Tx, snapshotID
if tx != nil {
_, err = tx.ExecContext(ctx, query, snapshotID, blobID, blobHash)
} else {
_, err = r.db.ExecWithLock(ctx, query, snapshotID, blobID, blobHash)
_, err = r.db.ExecWithLog(ctx, query, snapshotID, blobID, blobHash)
}
if err != nil {
@@ -260,7 +340,7 @@ func (r *SnapshotRepository) GetBlobHashes(ctx context.Context, snapshotID strin
// GetIncompleteSnapshots returns all snapshots that haven't been completed
func (r *SnapshotRepository) GetIncompleteSnapshots(ctx context.Context) ([]*Snapshot, error) {
query := `
SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
SELECT id, hostname, vaultik_version, vaultik_git_revision, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
FROM snapshots
WHERE completed_at IS NULL
ORDER BY started_at DESC
@@ -282,6 +362,7 @@ func (r *SnapshotRepository) GetIncompleteSnapshots(ctx context.Context) ([]*Sna
&snapshot.ID,
&snapshot.Hostname,
&snapshot.VaultikVersion,
&snapshot.VaultikGitRevision,
&startedAtUnix,
&completedAtUnix,
&snapshot.FileCount,
@@ -306,3 +387,90 @@ func (r *SnapshotRepository) GetIncompleteSnapshots(ctx context.Context) ([]*Sna
return snapshots, rows.Err()
}
// GetIncompleteByHostname returns all incomplete snapshots for a specific hostname
func (r *SnapshotRepository) GetIncompleteByHostname(ctx context.Context, hostname string) ([]*Snapshot, error) {
query := `
SELECT id, hostname, vaultik_version, vaultik_git_revision, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
FROM snapshots
WHERE completed_at IS NULL AND hostname = ?
ORDER BY started_at DESC
`
rows, err := r.db.conn.QueryContext(ctx, query, hostname)
if err != nil {
return nil, fmt.Errorf("querying incomplete snapshots: %w", err)
}
defer CloseRows(rows)
var snapshots []*Snapshot
for rows.Next() {
var snapshot Snapshot
var startedAtUnix int64
var completedAtUnix *int64
err := rows.Scan(
&snapshot.ID,
&snapshot.Hostname,
&snapshot.VaultikVersion,
&snapshot.VaultikGitRevision,
&startedAtUnix,
&completedAtUnix,
&snapshot.FileCount,
&snapshot.ChunkCount,
&snapshot.BlobCount,
&snapshot.TotalSize,
&snapshot.BlobSize,
&snapshot.CompressionRatio,
)
if err != nil {
return nil, fmt.Errorf("scanning snapshot: %w", err)
}
snapshot.StartedAt = time.Unix(startedAtUnix, 0).UTC()
if completedAtUnix != nil {
t := time.Unix(*completedAtUnix, 0).UTC()
snapshot.CompletedAt = &t
}
snapshots = append(snapshots, &snapshot)
}
return snapshots, rows.Err()
}
// Delete removes a snapshot record
func (r *SnapshotRepository) Delete(ctx context.Context, snapshotID string) error {
query := `DELETE FROM snapshots WHERE id = ?`
_, err := r.db.ExecWithLog(ctx, query, snapshotID)
if err != nil {
return fmt.Errorf("deleting snapshot: %w", err)
}
return nil
}
// DeleteSnapshotFiles removes all snapshot_files entries for a snapshot
func (r *SnapshotRepository) DeleteSnapshotFiles(ctx context.Context, snapshotID string) error {
query := `DELETE FROM snapshot_files WHERE snapshot_id = ?`
_, err := r.db.ExecWithLog(ctx, query, snapshotID)
if err != nil {
return fmt.Errorf("deleting snapshot files: %w", err)
}
return nil
}
// DeleteSnapshotBlobs removes all snapshot_blobs entries for a snapshot
func (r *SnapshotRepository) DeleteSnapshotBlobs(ctx context.Context, snapshotID string) error {
query := `DELETE FROM snapshot_blobs WHERE snapshot_id = ?`
_, err := r.db.ExecWithLog(ctx, query, snapshotID)
if err != nil {
return fmt.Errorf("deleting snapshot blobs: %w", err)
}
return nil
}