vaultik/internal/database/file_chunks.go
sneak 78af626759 Major refactoring: UUID-based storage, streaming architecture, and CLI improvements
This commit represents a significant architectural overhaul of vaultik:

Database Schema Changes:
- Switch files table to use UUID primary keys instead of path-based keys
- Add UUID primary keys to blobs table for immediate chunk association
- Update all foreign key relationships to use UUIDs
- Add comprehensive schema documentation in DATAMODEL.md
- Add SQLite busy timeout handling for concurrent operations

Streaming and Performance Improvements:
- Implement true streaming blob packing without intermediate storage
- Add streaming chunk processing to reduce memory usage
- Improve progress reporting with real-time metrics
- Add upload metrics tracking in new uploads table

CLI Refactoring:
- Restructure CLI to use subcommands: snapshot create/list/purge/verify
- Add store info command for S3 configuration display
- Add custom duration parser supporting days/weeks/months/years
- Remove old backup.go in favor of enhanced snapshot.go
- Add --cron flag for silent operation

Configuration Changes:
- Remove unused index_prefix configuration option
- Add support for snapshot pruning retention policies
- Improve configuration validation and error messages

Testing Improvements:
- Add comprehensive repository tests with edge cases
- Add cascade delete debugging tests
- Fix concurrent operation tests to use SQLite busy timeout
- Remove tolerance for SQLITE_BUSY errors in tests

Documentation:
- Add MIT LICENSE file
- Update README with new command structure
- Add comprehensive DATAMODEL.md explaining database schema
- Update DESIGN.md with UUID-based architecture

Other Changes:
- Add test-config.yml for testing
- Update Makefile with better test output formatting
- Fix various race conditions in concurrent operations
- Improve error handling throughout
2025-07-22 14:56:44 +02:00

175 lines
4.4 KiB
Go

package database
import (
"context"
"database/sql"
"fmt"
)
type FileChunkRepository struct {
db *DB
}
func NewFileChunkRepository(db *DB) *FileChunkRepository {
return &FileChunkRepository{db: db}
}
func (r *FileChunkRepository) Create(ctx context.Context, tx *sql.Tx, fc *FileChunk) error {
query := `
INSERT INTO file_chunks (file_id, idx, chunk_hash)
VALUES (?, ?, ?)
ON CONFLICT(file_id, idx) DO NOTHING
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, fc.FileID, fc.Idx, fc.ChunkHash)
} else {
_, err = r.db.ExecWithLog(ctx, query, fc.FileID, fc.Idx, fc.ChunkHash)
}
if err != nil {
return fmt.Errorf("inserting file_chunk: %w", err)
}
return nil
}
func (r *FileChunkRepository) GetByPath(ctx context.Context, path string) ([]*FileChunk, error) {
query := `
SELECT fc.file_id, fc.idx, fc.chunk_hash
FROM file_chunks fc
JOIN files f ON fc.file_id = f.id
WHERE f.path = ?
ORDER BY fc.idx
`
rows, err := r.db.conn.QueryContext(ctx, query, path)
if err != nil {
return nil, fmt.Errorf("querying file chunks: %w", err)
}
defer CloseRows(rows)
var fileChunks []*FileChunk
for rows.Next() {
var fc FileChunk
err := rows.Scan(&fc.FileID, &fc.Idx, &fc.ChunkHash)
if err != nil {
return nil, fmt.Errorf("scanning file chunk: %w", err)
}
fileChunks = append(fileChunks, &fc)
}
return fileChunks, rows.Err()
}
// GetByFileID retrieves file chunks by file ID
func (r *FileChunkRepository) GetByFileID(ctx context.Context, fileID string) ([]*FileChunk, error) {
query := `
SELECT file_id, idx, chunk_hash
FROM file_chunks
WHERE file_id = ?
ORDER BY idx
`
rows, err := r.db.conn.QueryContext(ctx, query, fileID)
if err != nil {
return nil, fmt.Errorf("querying file chunks: %w", err)
}
defer CloseRows(rows)
var fileChunks []*FileChunk
for rows.Next() {
var fc FileChunk
err := rows.Scan(&fc.FileID, &fc.Idx, &fc.ChunkHash)
if err != nil {
return nil, fmt.Errorf("scanning file chunk: %w", err)
}
fileChunks = append(fileChunks, &fc)
}
return fileChunks, rows.Err()
}
// GetByPathTx retrieves file chunks within a transaction
func (r *FileChunkRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path string) ([]*FileChunk, error) {
query := `
SELECT fc.file_id, fc.idx, fc.chunk_hash
FROM file_chunks fc
JOIN files f ON fc.file_id = f.id
WHERE f.path = ?
ORDER BY fc.idx
`
LogSQL("GetByPathTx", query, path)
rows, err := tx.QueryContext(ctx, query, path)
if err != nil {
return nil, fmt.Errorf("querying file chunks: %w", err)
}
defer CloseRows(rows)
var fileChunks []*FileChunk
for rows.Next() {
var fc FileChunk
err := rows.Scan(&fc.FileID, &fc.Idx, &fc.ChunkHash)
if err != nil {
return nil, fmt.Errorf("scanning file chunk: %w", err)
}
fileChunks = append(fileChunks, &fc)
}
LogSQL("GetByPathTx", "Complete", path, "count", len(fileChunks))
return fileChunks, rows.Err()
}
func (r *FileChunkRepository) DeleteByPath(ctx context.Context, tx *sql.Tx, path string) error {
query := `DELETE FROM file_chunks WHERE file_id = (SELECT id FROM files WHERE path = ?)`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, path)
} else {
_, err = r.db.ExecWithLog(ctx, query, path)
}
if err != nil {
return fmt.Errorf("deleting file chunks: %w", err)
}
return nil
}
// DeleteByFileID deletes all chunks for a file by its UUID
func (r *FileChunkRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fileID string) error {
query := `DELETE FROM file_chunks WHERE file_id = ?`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, fileID)
} else {
_, err = r.db.ExecWithLog(ctx, query, fileID)
}
if err != nil {
return fmt.Errorf("deleting file chunks: %w", err)
}
return nil
}
// GetByFile is an alias for GetByPath for compatibility
func (r *FileChunkRepository) GetByFile(ctx context.Context, path string) ([]*FileChunk, error) {
LogSQL("GetByFile", "Starting", path)
result, err := r.GetByPath(ctx, path)
LogSQL("GetByFile", "Complete", path, "count", len(result))
return result, err
}
// GetByFileTx retrieves file chunks within a transaction
func (r *FileChunkRepository) GetByFileTx(ctx context.Context, tx *sql.Tx, path string) ([]*FileChunk, error) {
LogSQL("GetByFileTx", "Starting", path)
result, err := r.GetByPathTx(ctx, tx, path)
LogSQL("GetByFileTx", "Complete", path, "count", len(result))
return result, err
}