Major refactoring: UUID-based storage, streaming architecture, and CLI improvements

This commit represents a significant architectural overhaul of vaultik: Database Schema Changes: - Switch files table to use UUID primary keys instead of path-based keys - Add UUID primary keys to blobs table for immediate chunk association - Update all foreign key relationships to use UUIDs - Add comprehensive schema documentation in DATAMODEL.md - Add SQLite busy timeout handling for concurrent operations Streaming and Performance Improvements: - Implement true streaming blob packing without intermediate storage - Add streaming chunk processing to reduce memory usage - Improve progress reporting with real-time metrics - Add upload metrics tracking in new uploads table CLI Refactoring: - Restructure CLI to use subcommands: snapshot create/list/purge/verify - Add store info command for S3 configuration display - Add custom duration parser supporting days/weeks/months/years - Remove old backup.go in favor of enhanced snapshot.go - Add --cron flag for silent operation Configuration Changes: - Remove unused index_prefix configuration option - Add support for snapshot pruning retention policies - Improve configuration validation and error messages Testing Improvements: - Add comprehensive repository tests with edge cases - Add cascade delete debugging tests - Fix concurrent operation tests to use SQLite busy timeout - Remove tolerance for SQLITE_BUSY errors in tests Documentation: - Add MIT LICENSE file - Update README with new command structure - Add comprehensive DATAMODEL.md explaining database schema - Update DESIGN.md with UUID-based architecture Other Changes: - Add test-config.yml for testing - Update Makefile with better test output formatting - Fix various race conditions in concurrent operations - Improve error handling throughout
2025-07-22 14:54:37 +02:00
parent 86b533d6ee
commit 78af626759
54 changed files with 5525 additions and 1109 deletions
--- a/internal/database/file_chunks.go
+++ b/internal/database/file_chunks.go
@@ -16,16 +16,16 @@ func NewFileChunkRepository(db *DB) *FileChunkRepository {

 func (r *FileChunkRepository) Create(ctx context.Context, tx *sql.Tx, fc *FileChunk) error {
 	query := `
-		INSERT INTO file_chunks (path, idx, chunk_hash)
+		INSERT INTO file_chunks (file_id, idx, chunk_hash)
 		VALUES (?, ?, ?)
-		ON CONFLICT(path, idx) DO NOTHING
+		ON CONFLICT(file_id, idx) DO NOTHING
 	`

 	var err error
 	if tx != nil {
-		_, err = tx.ExecContext(ctx, query, fc.Path, fc.Idx, fc.ChunkHash)
+		_, err = tx.ExecContext(ctx, query, fc.FileID, fc.Idx, fc.ChunkHash)
 	} else {
-		_, err = r.db.ExecWithLock(ctx, query, fc.Path, fc.Idx, fc.ChunkHash)
+		_, err = r.db.ExecWithLog(ctx, query, fc.FileID, fc.Idx, fc.ChunkHash)
 	}

 	if err != nil {
@@ -37,10 +37,11 @@ func (r *FileChunkRepository) Create(ctx context.Context, tx *sql.Tx, fc *FileCh

 func (r *FileChunkRepository) GetByPath(ctx context.Context, path string) ([]*FileChunk, error) {
 	query := `
-		SELECT path, idx, chunk_hash
-		FROM file_chunks
-		WHERE path = ?
-		ORDER BY idx
+		SELECT fc.file_id, fc.idx, fc.chunk_hash
+		FROM file_chunks fc
+		JOIN files f ON fc.file_id = f.id
+		WHERE f.path = ?
+		ORDER BY fc.idx
 	`

 	rows, err := r.db.conn.QueryContext(ctx, query, path)
@@ -52,7 +53,35 @@ func (r *FileChunkRepository) GetByPath(ctx context.Context, path string) ([]*Fi
 	var fileChunks []*FileChunk
 	for rows.Next() {
 		var fc FileChunk
-		err := rows.Scan(&fc.Path, &fc.Idx, &fc.ChunkHash)
+		err := rows.Scan(&fc.FileID, &fc.Idx, &fc.ChunkHash)
+		if err != nil {
+			return nil, fmt.Errorf("scanning file chunk: %w", err)
+		}
+		fileChunks = append(fileChunks, &fc)
+	}
+
+	return fileChunks, rows.Err()
+}
+
+// GetByFileID retrieves file chunks by file ID
+func (r *FileChunkRepository) GetByFileID(ctx context.Context, fileID string) ([]*FileChunk, error) {
+	query := `
+		SELECT file_id, idx, chunk_hash
+		FROM file_chunks
+		WHERE file_id = ?
+		ORDER BY idx
+	`
+
+	rows, err := r.db.conn.QueryContext(ctx, query, fileID)
+	if err != nil {
+		return nil, fmt.Errorf("querying file chunks: %w", err)
+	}
+	defer CloseRows(rows)
+
+	var fileChunks []*FileChunk
+	for rows.Next() {
+		var fc FileChunk
+		err := rows.Scan(&fc.FileID, &fc.Idx, &fc.ChunkHash)
 		if err != nil {
 			return nil, fmt.Errorf("scanning file chunk: %w", err)
 		}
@@ -65,10 +94,11 @@ func (r *FileChunkRepository) GetByPath(ctx context.Context, path string) ([]*Fi
 // GetByPathTx retrieves file chunks within a transaction
 func (r *FileChunkRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path string) ([]*FileChunk, error) {
 	query := `
-		SELECT path, idx, chunk_hash
-		FROM file_chunks
-		WHERE path = ?
-		ORDER BY idx
+		SELECT fc.file_id, fc.idx, fc.chunk_hash
+		FROM file_chunks fc
+		JOIN files f ON fc.file_id = f.id
+		WHERE f.path = ?
+		ORDER BY fc.idx
 	`

 	LogSQL("GetByPathTx", query, path)
@@ -81,7 +111,7 @@ func (r *FileChunkRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path
 	var fileChunks []*FileChunk
 	for rows.Next() {
 		var fc FileChunk
-		err := rows.Scan(&fc.Path, &fc.Idx, &fc.ChunkHash)
+		err := rows.Scan(&fc.FileID, &fc.Idx, &fc.ChunkHash)
 		if err != nil {
 			return nil, fmt.Errorf("scanning file chunk: %w", err)
 		}
@@ -93,13 +123,31 @@ func (r *FileChunkRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path
 }

 func (r *FileChunkRepository) DeleteByPath(ctx context.Context, tx *sql.Tx, path string) error {
-	query := `DELETE FROM file_chunks WHERE path = ?`
+	query := `DELETE FROM file_chunks WHERE file_id = (SELECT id FROM files WHERE path = ?)`

 	var err error
 	if tx != nil {
 		_, err = tx.ExecContext(ctx, query, path)
 	} else {
-		_, err = r.db.ExecWithLock(ctx, query, path)
+		_, err = r.db.ExecWithLog(ctx, query, path)
+	}
+
+	if err != nil {
+		return fmt.Errorf("deleting file chunks: %w", err)
+	}
+
+	return nil
+}
+
+// DeleteByFileID deletes all chunks for a file by its UUID
+func (r *FileChunkRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fileID string) error {
+	query := `DELETE FROM file_chunks WHERE file_id = ?`
+
+	var err error
+	if tx != nil {
+		_, err = tx.ExecContext(ctx, query, fileID)
+	} else {
+		_, err = r.db.ExecWithLog(ctx, query, fileID)
 	}

 	if err != nil {