vaultik/internal/database/blob_chunks.go
sneak 78af626759 Major refactoring: UUID-based storage, streaming architecture, and CLI improvements
This commit represents a significant architectural overhaul of vaultik:

Database Schema Changes:
- Switch files table to use UUID primary keys instead of path-based keys
- Add UUID primary keys to blobs table for immediate chunk association
- Update all foreign key relationships to use UUIDs
- Add comprehensive schema documentation in DATAMODEL.md
- Add SQLite busy timeout handling for concurrent operations

Streaming and Performance Improvements:
- Implement true streaming blob packing without intermediate storage
- Add streaming chunk processing to reduce memory usage
- Improve progress reporting with real-time metrics
- Add upload metrics tracking in new uploads table

CLI Refactoring:
- Restructure CLI to use subcommands: snapshot create/list/purge/verify
- Add store info command for S3 configuration display
- Add custom duration parser supporting days/weeks/months/years
- Remove old backup.go in favor of enhanced snapshot.go
- Add --cron flag for silent operation

Configuration Changes:
- Remove unused index_prefix configuration option
- Add support for snapshot pruning retention policies
- Improve configuration validation and error messages

Testing Improvements:
- Add comprehensive repository tests with edge cases
- Add cascade delete debugging tests
- Fix concurrent operation tests to use SQLite busy timeout
- Remove tolerance for SQLITE_BUSY errors in tests

Documentation:
- Add MIT LICENSE file
- Update README with new command structure
- Add comprehensive DATAMODEL.md explaining database schema
- Update DESIGN.md with UUID-based architecture

Other Changes:
- Add test-config.yml for testing
- Update Makefile with better test output formatting
- Fix various race conditions in concurrent operations
- Improve error handling throughout
2025-07-22 14:56:44 +02:00

124 lines
2.9 KiB
Go

package database
import (
"context"
"database/sql"
"fmt"
)
type BlobChunkRepository struct {
db *DB
}
func NewBlobChunkRepository(db *DB) *BlobChunkRepository {
return &BlobChunkRepository{db: db}
}
func (r *BlobChunkRepository) Create(ctx context.Context, tx *sql.Tx, bc *BlobChunk) error {
query := `
INSERT INTO blob_chunks (blob_id, chunk_hash, offset, length)
VALUES (?, ?, ?, ?)
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, bc.BlobID, bc.ChunkHash, bc.Offset, bc.Length)
} else {
_, err = r.db.ExecWithLog(ctx, query, bc.BlobID, bc.ChunkHash, bc.Offset, bc.Length)
}
if err != nil {
return fmt.Errorf("inserting blob_chunk: %w", err)
}
return nil
}
func (r *BlobChunkRepository) GetByBlobID(ctx context.Context, blobID string) ([]*BlobChunk, error) {
query := `
SELECT blob_id, chunk_hash, offset, length
FROM blob_chunks
WHERE blob_id = ?
ORDER BY offset
`
rows, err := r.db.conn.QueryContext(ctx, query, blobID)
if err != nil {
return nil, fmt.Errorf("querying blob chunks: %w", err)
}
defer CloseRows(rows)
var blobChunks []*BlobChunk
for rows.Next() {
var bc BlobChunk
err := rows.Scan(&bc.BlobID, &bc.ChunkHash, &bc.Offset, &bc.Length)
if err != nil {
return nil, fmt.Errorf("scanning blob chunk: %w", err)
}
blobChunks = append(blobChunks, &bc)
}
return blobChunks, rows.Err()
}
func (r *BlobChunkRepository) GetByChunkHash(ctx context.Context, chunkHash string) (*BlobChunk, error) {
query := `
SELECT blob_id, chunk_hash, offset, length
FROM blob_chunks
WHERE chunk_hash = ?
LIMIT 1
`
LogSQL("GetByChunkHash", query, chunkHash)
var bc BlobChunk
err := r.db.conn.QueryRowContext(ctx, query, chunkHash).Scan(
&bc.BlobID,
&bc.ChunkHash,
&bc.Offset,
&bc.Length,
)
if err == sql.ErrNoRows {
LogSQL("GetByChunkHash", "No rows found", chunkHash)
return nil, nil
}
if err != nil {
LogSQL("GetByChunkHash", "Error", chunkHash, err)
return nil, fmt.Errorf("querying blob chunk: %w", err)
}
LogSQL("GetByChunkHash", "Found blob", chunkHash, "blob", bc.BlobID)
return &bc, nil
}
// GetByChunkHashTx retrieves a blob chunk within a transaction
func (r *BlobChunkRepository) GetByChunkHashTx(ctx context.Context, tx *sql.Tx, chunkHash string) (*BlobChunk, error) {
query := `
SELECT blob_id, chunk_hash, offset, length
FROM blob_chunks
WHERE chunk_hash = ?
LIMIT 1
`
LogSQL("GetByChunkHashTx", query, chunkHash)
var bc BlobChunk
err := tx.QueryRowContext(ctx, query, chunkHash).Scan(
&bc.BlobID,
&bc.ChunkHash,
&bc.Offset,
&bc.Length,
)
if err == sql.ErrNoRows {
LogSQL("GetByChunkHashTx", "No rows found", chunkHash)
return nil, nil
}
if err != nil {
LogSQL("GetByChunkHashTx", "Error", chunkHash, err)
return nil, fmt.Errorf("querying blob chunk: %w", err)
}
LogSQL("GetByChunkHashTx", "Found blob", chunkHash, "blob", bc.BlobID)
return &bc, nil
}