Major refactoring: UUID-based storage, streaming architecture, and CLI improvements
This commit represents a significant architectural overhaul of vaultik: Database Schema Changes: - Switch files table to use UUID primary keys instead of path-based keys - Add UUID primary keys to blobs table for immediate chunk association - Update all foreign key relationships to use UUIDs - Add comprehensive schema documentation in DATAMODEL.md - Add SQLite busy timeout handling for concurrent operations Streaming and Performance Improvements: - Implement true streaming blob packing without intermediate storage - Add streaming chunk processing to reduce memory usage - Improve progress reporting with real-time metrics - Add upload metrics tracking in new uploads table CLI Refactoring: - Restructure CLI to use subcommands: snapshot create/list/purge/verify - Add store info command for S3 configuration display - Add custom duration parser supporting days/weeks/months/years - Remove old backup.go in favor of enhanced snapshot.go - Add --cron flag for silent operation Configuration Changes: - Remove unused index_prefix configuration option - Add support for snapshot pruning retention policies - Improve configuration validation and error messages Testing Improvements: - Add comprehensive repository tests with edge cases - Add cascade delete debugging tests - Fix concurrent operation tests to use SQLite busy timeout - Remove tolerance for SQLITE_BUSY errors in tests Documentation: - Add MIT LICENSE file - Update README with new command structure - Add comprehensive DATAMODEL.md explaining database schema - Update DESIGN.md with UUID-based architecture Other Changes: - Add test-config.yml for testing - Update Makefile with better test output formatting - Fix various race conditions in concurrent operations - Improve error handling throughout
This commit is contained in:
@@ -16,16 +16,16 @@ func NewChunkFileRepository(db *DB) *ChunkFileRepository {
|
||||
|
||||
func (r *ChunkFileRepository) Create(ctx context.Context, tx *sql.Tx, cf *ChunkFile) error {
|
||||
query := `
|
||||
INSERT INTO chunk_files (chunk_hash, file_path, file_offset, length)
|
||||
INSERT INTO chunk_files (chunk_hash, file_id, file_offset, length)
|
||||
VALUES (?, ?, ?, ?)
|
||||
ON CONFLICT(chunk_hash, file_path) DO NOTHING
|
||||
ON CONFLICT(chunk_hash, file_id) DO NOTHING
|
||||
`
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, cf.ChunkHash, cf.FilePath, cf.FileOffset, cf.Length)
|
||||
_, err = tx.ExecContext(ctx, query, cf.ChunkHash, cf.FileID, cf.FileOffset, cf.Length)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLock(ctx, query, cf.ChunkHash, cf.FilePath, cf.FileOffset, cf.Length)
|
||||
_, err = r.db.ExecWithLog(ctx, query, cf.ChunkHash, cf.FileID, cf.FileOffset, cf.Length)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
@@ -37,7 +37,7 @@ func (r *ChunkFileRepository) Create(ctx context.Context, tx *sql.Tx, cf *ChunkF
|
||||
|
||||
func (r *ChunkFileRepository) GetByChunkHash(ctx context.Context, chunkHash string) ([]*ChunkFile, error) {
|
||||
query := `
|
||||
SELECT chunk_hash, file_path, file_offset, length
|
||||
SELECT chunk_hash, file_id, file_offset, length
|
||||
FROM chunk_files
|
||||
WHERE chunk_hash = ?
|
||||
`
|
||||
@@ -51,7 +51,7 @@ func (r *ChunkFileRepository) GetByChunkHash(ctx context.Context, chunkHash stri
|
||||
var chunkFiles []*ChunkFile
|
||||
for rows.Next() {
|
||||
var cf ChunkFile
|
||||
err := rows.Scan(&cf.ChunkHash, &cf.FilePath, &cf.FileOffset, &cf.Length)
|
||||
err := rows.Scan(&cf.ChunkHash, &cf.FileID, &cf.FileOffset, &cf.Length)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning chunk file: %w", err)
|
||||
}
|
||||
@@ -63,9 +63,10 @@ func (r *ChunkFileRepository) GetByChunkHash(ctx context.Context, chunkHash stri
|
||||
|
||||
func (r *ChunkFileRepository) GetByFilePath(ctx context.Context, filePath string) ([]*ChunkFile, error) {
|
||||
query := `
|
||||
SELECT chunk_hash, file_path, file_offset, length
|
||||
FROM chunk_files
|
||||
WHERE file_path = ?
|
||||
SELECT cf.chunk_hash, cf.file_id, cf.file_offset, cf.length
|
||||
FROM chunk_files cf
|
||||
JOIN files f ON cf.file_id = f.id
|
||||
WHERE f.path = ?
|
||||
`
|
||||
|
||||
rows, err := r.db.conn.QueryContext(ctx, query, filePath)
|
||||
@@ -77,7 +78,34 @@ func (r *ChunkFileRepository) GetByFilePath(ctx context.Context, filePath string
|
||||
var chunkFiles []*ChunkFile
|
||||
for rows.Next() {
|
||||
var cf ChunkFile
|
||||
err := rows.Scan(&cf.ChunkHash, &cf.FilePath, &cf.FileOffset, &cf.Length)
|
||||
err := rows.Scan(&cf.ChunkHash, &cf.FileID, &cf.FileOffset, &cf.Length)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning chunk file: %w", err)
|
||||
}
|
||||
chunkFiles = append(chunkFiles, &cf)
|
||||
}
|
||||
|
||||
return chunkFiles, rows.Err()
|
||||
}
|
||||
|
||||
// GetByFileID retrieves chunk files by file ID
|
||||
func (r *ChunkFileRepository) GetByFileID(ctx context.Context, fileID string) ([]*ChunkFile, error) {
|
||||
query := `
|
||||
SELECT chunk_hash, file_id, file_offset, length
|
||||
FROM chunk_files
|
||||
WHERE file_id = ?
|
||||
`
|
||||
|
||||
rows, err := r.db.conn.QueryContext(ctx, query, fileID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("querying chunk files: %w", err)
|
||||
}
|
||||
defer CloseRows(rows)
|
||||
|
||||
var chunkFiles []*ChunkFile
|
||||
for rows.Next() {
|
||||
var cf ChunkFile
|
||||
err := rows.Scan(&cf.ChunkHash, &cf.FileID, &cf.FileOffset, &cf.Length)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning chunk file: %w", err)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user