This commit represents a significant architectural overhaul of vaultik: Database Schema Changes: - Switch files table to use UUID primary keys instead of path-based keys - Add UUID primary keys to blobs table for immediate chunk association - Update all foreign key relationships to use UUIDs - Add comprehensive schema documentation in DATAMODEL.md - Add SQLite busy timeout handling for concurrent operations Streaming and Performance Improvements: - Implement true streaming blob packing without intermediate storage - Add streaming chunk processing to reduce memory usage - Improve progress reporting with real-time metrics - Add upload metrics tracking in new uploads table CLI Refactoring: - Restructure CLI to use subcommands: snapshot create/list/purge/verify - Add store info command for S3 configuration display - Add custom duration parser supporting days/weeks/months/years - Remove old backup.go in favor of enhanced snapshot.go - Add --cron flag for silent operation Configuration Changes: - Remove unused index_prefix configuration option - Add support for snapshot pruning retention policies - Improve configuration validation and error messages Testing Improvements: - Add comprehensive repository tests with edge cases - Add cascade delete debugging tests - Fix concurrent operation tests to use SQLite busy timeout - Remove tolerance for SQLITE_BUSY errors in tests Documentation: - Add MIT LICENSE file - Update README with new command structure - Add comprehensive DATAMODEL.md explaining database schema - Update DESIGN.md with UUID-based architecture Other Changes: - Add test-config.yml for testing - Update Makefile with better test output formatting - Fix various race conditions in concurrent operations - Improve error handling throughout
175 lines
4.4 KiB
Go
175 lines
4.4 KiB
Go
package database
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
"fmt"
|
|
)
|
|
|
|
type FileChunkRepository struct {
|
|
db *DB
|
|
}
|
|
|
|
func NewFileChunkRepository(db *DB) *FileChunkRepository {
|
|
return &FileChunkRepository{db: db}
|
|
}
|
|
|
|
func (r *FileChunkRepository) Create(ctx context.Context, tx *sql.Tx, fc *FileChunk) error {
|
|
query := `
|
|
INSERT INTO file_chunks (file_id, idx, chunk_hash)
|
|
VALUES (?, ?, ?)
|
|
ON CONFLICT(file_id, idx) DO NOTHING
|
|
`
|
|
|
|
var err error
|
|
if tx != nil {
|
|
_, err = tx.ExecContext(ctx, query, fc.FileID, fc.Idx, fc.ChunkHash)
|
|
} else {
|
|
_, err = r.db.ExecWithLog(ctx, query, fc.FileID, fc.Idx, fc.ChunkHash)
|
|
}
|
|
|
|
if err != nil {
|
|
return fmt.Errorf("inserting file_chunk: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (r *FileChunkRepository) GetByPath(ctx context.Context, path string) ([]*FileChunk, error) {
|
|
query := `
|
|
SELECT fc.file_id, fc.idx, fc.chunk_hash
|
|
FROM file_chunks fc
|
|
JOIN files f ON fc.file_id = f.id
|
|
WHERE f.path = ?
|
|
ORDER BY fc.idx
|
|
`
|
|
|
|
rows, err := r.db.conn.QueryContext(ctx, query, path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("querying file chunks: %w", err)
|
|
}
|
|
defer CloseRows(rows)
|
|
|
|
var fileChunks []*FileChunk
|
|
for rows.Next() {
|
|
var fc FileChunk
|
|
err := rows.Scan(&fc.FileID, &fc.Idx, &fc.ChunkHash)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("scanning file chunk: %w", err)
|
|
}
|
|
fileChunks = append(fileChunks, &fc)
|
|
}
|
|
|
|
return fileChunks, rows.Err()
|
|
}
|
|
|
|
// GetByFileID retrieves file chunks by file ID
|
|
func (r *FileChunkRepository) GetByFileID(ctx context.Context, fileID string) ([]*FileChunk, error) {
|
|
query := `
|
|
SELECT file_id, idx, chunk_hash
|
|
FROM file_chunks
|
|
WHERE file_id = ?
|
|
ORDER BY idx
|
|
`
|
|
|
|
rows, err := r.db.conn.QueryContext(ctx, query, fileID)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("querying file chunks: %w", err)
|
|
}
|
|
defer CloseRows(rows)
|
|
|
|
var fileChunks []*FileChunk
|
|
for rows.Next() {
|
|
var fc FileChunk
|
|
err := rows.Scan(&fc.FileID, &fc.Idx, &fc.ChunkHash)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("scanning file chunk: %w", err)
|
|
}
|
|
fileChunks = append(fileChunks, &fc)
|
|
}
|
|
|
|
return fileChunks, rows.Err()
|
|
}
|
|
|
|
// GetByPathTx retrieves file chunks within a transaction
|
|
func (r *FileChunkRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path string) ([]*FileChunk, error) {
|
|
query := `
|
|
SELECT fc.file_id, fc.idx, fc.chunk_hash
|
|
FROM file_chunks fc
|
|
JOIN files f ON fc.file_id = f.id
|
|
WHERE f.path = ?
|
|
ORDER BY fc.idx
|
|
`
|
|
|
|
LogSQL("GetByPathTx", query, path)
|
|
rows, err := tx.QueryContext(ctx, query, path)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("querying file chunks: %w", err)
|
|
}
|
|
defer CloseRows(rows)
|
|
|
|
var fileChunks []*FileChunk
|
|
for rows.Next() {
|
|
var fc FileChunk
|
|
err := rows.Scan(&fc.FileID, &fc.Idx, &fc.ChunkHash)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("scanning file chunk: %w", err)
|
|
}
|
|
fileChunks = append(fileChunks, &fc)
|
|
}
|
|
LogSQL("GetByPathTx", "Complete", path, "count", len(fileChunks))
|
|
|
|
return fileChunks, rows.Err()
|
|
}
|
|
|
|
func (r *FileChunkRepository) DeleteByPath(ctx context.Context, tx *sql.Tx, path string) error {
|
|
query := `DELETE FROM file_chunks WHERE file_id = (SELECT id FROM files WHERE path = ?)`
|
|
|
|
var err error
|
|
if tx != nil {
|
|
_, err = tx.ExecContext(ctx, query, path)
|
|
} else {
|
|
_, err = r.db.ExecWithLog(ctx, query, path)
|
|
}
|
|
|
|
if err != nil {
|
|
return fmt.Errorf("deleting file chunks: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// DeleteByFileID deletes all chunks for a file by its UUID
|
|
func (r *FileChunkRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fileID string) error {
|
|
query := `DELETE FROM file_chunks WHERE file_id = ?`
|
|
|
|
var err error
|
|
if tx != nil {
|
|
_, err = tx.ExecContext(ctx, query, fileID)
|
|
} else {
|
|
_, err = r.db.ExecWithLog(ctx, query, fileID)
|
|
}
|
|
|
|
if err != nil {
|
|
return fmt.Errorf("deleting file chunks: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// GetByFile is an alias for GetByPath for compatibility
|
|
func (r *FileChunkRepository) GetByFile(ctx context.Context, path string) ([]*FileChunk, error) {
|
|
LogSQL("GetByFile", "Starting", path)
|
|
result, err := r.GetByPath(ctx, path)
|
|
LogSQL("GetByFile", "Complete", path, "count", len(result))
|
|
return result, err
|
|
}
|
|
|
|
// GetByFileTx retrieves file chunks within a transaction
|
|
func (r *FileChunkRepository) GetByFileTx(ctx context.Context, tx *sql.Tx, path string) ([]*FileChunk, error) {
|
|
LogSQL("GetByFileTx", "Starting", path)
|
|
result, err := r.GetByPathTx(ctx, tx, path)
|
|
LogSQL("GetByFileTx", "Complete", path, "count", len(result))
|
|
return result, err
|
|
}
|