Refactor blob storage to use UUID primary keys and implement streaming chunking

- Changed blob table to use ID (UUID) as primary key instead of hash
- Blob records are now created at packing start, enabling immediate chunk associations
- Implemented streaming chunking to process large files without memory exhaustion
- Fixed blob manifest generation to include all referenced blobs
- Updated all foreign key references from blob_hash to blob_id
- Added progress reporting and improved error handling
- Enforced encryption requirement for all blob packing
- Updated tests to use test encryption keys
- Added Cyrillic transliteration to README
This commit is contained in:
2025-07-22 07:43:39 +02:00
parent 26db096913
commit 86b533d6ee
49 changed files with 5709 additions and 324 deletions

View File

@@ -16,15 +16,15 @@ func NewBlobChunkRepository(db *DB) *BlobChunkRepository {
func (r *BlobChunkRepository) Create(ctx context.Context, tx *sql.Tx, bc *BlobChunk) error {
query := `
INSERT INTO blob_chunks (blob_hash, chunk_hash, offset, length)
INSERT INTO blob_chunks (blob_id, chunk_hash, offset, length)
VALUES (?, ?, ?, ?)
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, bc.BlobHash, bc.ChunkHash, bc.Offset, bc.Length)
_, err = tx.ExecContext(ctx, query, bc.BlobID, bc.ChunkHash, bc.Offset, bc.Length)
} else {
_, err = r.db.ExecWithLock(ctx, query, bc.BlobHash, bc.ChunkHash, bc.Offset, bc.Length)
_, err = r.db.ExecWithLock(ctx, query, bc.BlobID, bc.ChunkHash, bc.Offset, bc.Length)
}
if err != nil {
@@ -34,15 +34,15 @@ func (r *BlobChunkRepository) Create(ctx context.Context, tx *sql.Tx, bc *BlobCh
return nil
}
func (r *BlobChunkRepository) GetByBlobHash(ctx context.Context, blobHash string) ([]*BlobChunk, error) {
func (r *BlobChunkRepository) GetByBlobID(ctx context.Context, blobID string) ([]*BlobChunk, error) {
query := `
SELECT blob_hash, chunk_hash, offset, length
SELECT blob_id, chunk_hash, offset, length
FROM blob_chunks
WHERE blob_hash = ?
WHERE blob_id = ?
ORDER BY offset
`
rows, err := r.db.conn.QueryContext(ctx, query, blobHash)
rows, err := r.db.conn.QueryContext(ctx, query, blobID)
if err != nil {
return nil, fmt.Errorf("querying blob chunks: %w", err)
}
@@ -51,7 +51,7 @@ func (r *BlobChunkRepository) GetByBlobHash(ctx context.Context, blobHash string
var blobChunks []*BlobChunk
for rows.Next() {
var bc BlobChunk
err := rows.Scan(&bc.BlobHash, &bc.ChunkHash, &bc.Offset, &bc.Length)
err := rows.Scan(&bc.BlobID, &bc.ChunkHash, &bc.Offset, &bc.Length)
if err != nil {
return nil, fmt.Errorf("scanning blob chunk: %w", err)
}
@@ -63,26 +63,61 @@ func (r *BlobChunkRepository) GetByBlobHash(ctx context.Context, blobHash string
func (r *BlobChunkRepository) GetByChunkHash(ctx context.Context, chunkHash string) (*BlobChunk, error) {
query := `
SELECT blob_hash, chunk_hash, offset, length
SELECT blob_id, chunk_hash, offset, length
FROM blob_chunks
WHERE chunk_hash = ?
LIMIT 1
`
LogSQL("GetByChunkHash", query, chunkHash)
var bc BlobChunk
err := r.db.conn.QueryRowContext(ctx, query, chunkHash).Scan(
&bc.BlobHash,
&bc.BlobID,
&bc.ChunkHash,
&bc.Offset,
&bc.Length,
)
if err == sql.ErrNoRows {
LogSQL("GetByChunkHash", "No rows found", chunkHash)
return nil, nil
}
if err != nil {
LogSQL("GetByChunkHash", "Error", chunkHash, err)
return nil, fmt.Errorf("querying blob chunk: %w", err)
}
LogSQL("GetByChunkHash", "Found blob", chunkHash, "blob", bc.BlobID)
return &bc, nil
}
// GetByChunkHashTx retrieves a blob chunk within a transaction
func (r *BlobChunkRepository) GetByChunkHashTx(ctx context.Context, tx *sql.Tx, chunkHash string) (*BlobChunk, error) {
query := `
SELECT blob_id, chunk_hash, offset, length
FROM blob_chunks
WHERE chunk_hash = ?
LIMIT 1
`
LogSQL("GetByChunkHashTx", query, chunkHash)
var bc BlobChunk
err := tx.QueryRowContext(ctx, query, chunkHash).Scan(
&bc.BlobID,
&bc.ChunkHash,
&bc.Offset,
&bc.Length,
)
if err == sql.ErrNoRows {
LogSQL("GetByChunkHashTx", "No rows found", chunkHash)
return nil, nil
}
if err != nil {
LogSQL("GetByChunkHashTx", "Error", chunkHash, err)
return nil, fmt.Errorf("querying blob chunk: %w", err)
}
LogSQL("GetByChunkHashTx", "Found blob", chunkHash, "blob", bc.BlobID)
return &bc, nil
}

View File

@@ -14,7 +14,7 @@ func TestBlobChunkRepository(t *testing.T) {
// Test Create
bc1 := &BlobChunk{
BlobHash: "blob1",
BlobID: "blob1-uuid",
ChunkHash: "chunk1",
Offset: 0,
Length: 1024,
@@ -27,7 +27,7 @@ func TestBlobChunkRepository(t *testing.T) {
// Add more chunks to the same blob
bc2 := &BlobChunk{
BlobHash: "blob1",
BlobID: "blob1-uuid",
ChunkHash: "chunk2",
Offset: 1024,
Length: 2048,
@@ -38,7 +38,7 @@ func TestBlobChunkRepository(t *testing.T) {
}
bc3 := &BlobChunk{
BlobHash: "blob1",
BlobID: "blob1-uuid",
ChunkHash: "chunk3",
Offset: 3072,
Length: 512,
@@ -48,8 +48,8 @@ func TestBlobChunkRepository(t *testing.T) {
t.Fatalf("failed to create third blob chunk: %v", err)
}
// Test GetByBlobHash
chunks, err := repo.GetByBlobHash(ctx, "blob1")
// Test GetByBlobID
chunks, err := repo.GetByBlobID(ctx, "blob1-uuid")
if err != nil {
t.Fatalf("failed to get blob chunks: %v", err)
}
@@ -73,8 +73,8 @@ func TestBlobChunkRepository(t *testing.T) {
if bc == nil {
t.Fatal("expected blob chunk, got nil")
}
if bc.BlobHash != "blob1" {
t.Errorf("wrong blob hash: expected blob1, got %s", bc.BlobHash)
if bc.BlobID != "blob1-uuid" {
t.Errorf("wrong blob ID: expected blob1-uuid, got %s", bc.BlobID)
}
if bc.Offset != 1024 {
t.Errorf("wrong offset: expected 1024, got %d", bc.Offset)
@@ -100,10 +100,10 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
// Create chunks across multiple blobs
// Some chunks are shared between blobs (deduplication scenario)
blobChunks := []BlobChunk{
{BlobHash: "blob1", ChunkHash: "chunk1", Offset: 0, Length: 1024},
{BlobHash: "blob1", ChunkHash: "chunk2", Offset: 1024, Length: 1024},
{BlobHash: "blob2", ChunkHash: "chunk2", Offset: 0, Length: 1024}, // chunk2 is shared
{BlobHash: "blob2", ChunkHash: "chunk3", Offset: 1024, Length: 1024},
{BlobID: "blob1-uuid", ChunkHash: "chunk1", Offset: 0, Length: 1024},
{BlobID: "blob1-uuid", ChunkHash: "chunk2", Offset: 1024, Length: 1024},
{BlobID: "blob2-uuid", ChunkHash: "chunk2", Offset: 0, Length: 1024}, // chunk2 is shared
{BlobID: "blob2-uuid", ChunkHash: "chunk3", Offset: 1024, Length: 1024},
}
for _, bc := range blobChunks {
@@ -114,7 +114,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
}
// Verify blob1 chunks
chunks, err := repo.GetByBlobHash(ctx, "blob1")
chunks, err := repo.GetByBlobID(ctx, "blob1-uuid")
if err != nil {
t.Fatalf("failed to get blob1 chunks: %v", err)
}
@@ -123,7 +123,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
}
// Verify blob2 chunks
chunks, err = repo.GetByBlobHash(ctx, "blob2")
chunks, err = repo.GetByBlobID(ctx, "blob2-uuid")
if err != nil {
t.Fatalf("failed to get blob2 chunks: %v", err)
}
@@ -140,7 +140,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
t.Fatal("expected shared chunk, got nil")
}
// GetByChunkHash returns first match, should be blob1
if bc.BlobHash != "blob1" {
t.Errorf("expected blob1 for shared chunk, got %s", bc.BlobHash)
if bc.BlobID != "blob1-uuid" {
t.Errorf("expected blob1-uuid for shared chunk, got %s", bc.BlobID)
}
}

View File

@@ -17,15 +17,27 @@ func NewBlobRepository(db *DB) *BlobRepository {
func (r *BlobRepository) Create(ctx context.Context, tx *sql.Tx, blob *Blob) error {
query := `
INSERT INTO blobs (blob_hash, created_ts)
VALUES (?, ?)
INSERT INTO blobs (id, blob_hash, created_ts, finished_ts, uncompressed_size, compressed_size, uploaded_ts)
VALUES (?, ?, ?, ?, ?, ?, ?)
`
var finishedTS, uploadedTS *int64
if blob.FinishedTS != nil {
ts := blob.FinishedTS.Unix()
finishedTS = &ts
}
if blob.UploadedTS != nil {
ts := blob.UploadedTS.Unix()
uploadedTS = &ts
}
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, blob.BlobHash, blob.CreatedTS.Unix())
_, err = tx.ExecContext(ctx, query, blob.ID, blob.Hash, blob.CreatedTS.Unix(),
finishedTS, blob.UncompressedSize, blob.CompressedSize, uploadedTS)
} else {
_, err = r.db.ExecWithLock(ctx, query, blob.BlobHash, blob.CreatedTS.Unix())
_, err = r.db.ExecWithLock(ctx, query, blob.ID, blob.Hash, blob.CreatedTS.Unix(),
finishedTS, blob.UncompressedSize, blob.CompressedSize, uploadedTS)
}
if err != nil {
@@ -37,17 +49,23 @@ func (r *BlobRepository) Create(ctx context.Context, tx *sql.Tx, blob *Blob) err
func (r *BlobRepository) GetByHash(ctx context.Context, hash string) (*Blob, error) {
query := `
SELECT blob_hash, created_ts
SELECT id, blob_hash, created_ts, finished_ts, uncompressed_size, compressed_size, uploaded_ts
FROM blobs
WHERE blob_hash = ?
`
var blob Blob
var createdTSUnix int64
var finishedTSUnix, uploadedTSUnix sql.NullInt64
err := r.db.conn.QueryRowContext(ctx, query, hash).Scan(
&blob.BlobHash,
&blob.ID,
&blob.Hash,
&createdTSUnix,
&finishedTSUnix,
&blob.UncompressedSize,
&blob.CompressedSize,
&uploadedTSUnix,
)
if err == sql.ErrNoRows {
@@ -58,39 +76,100 @@ func (r *BlobRepository) GetByHash(ctx context.Context, hash string) (*Blob, err
}
blob.CreatedTS = time.Unix(createdTSUnix, 0)
if finishedTSUnix.Valid {
ts := time.Unix(finishedTSUnix.Int64, 0)
blob.FinishedTS = &ts
}
if uploadedTSUnix.Valid {
ts := time.Unix(uploadedTSUnix.Int64, 0)
blob.UploadedTS = &ts
}
return &blob, nil
}
func (r *BlobRepository) List(ctx context.Context, limit, offset int) ([]*Blob, error) {
// GetByID retrieves a blob by its ID
func (r *BlobRepository) GetByID(ctx context.Context, id string) (*Blob, error) {
query := `
SELECT blob_hash, created_ts
SELECT id, blob_hash, created_ts, finished_ts, uncompressed_size, compressed_size, uploaded_ts
FROM blobs
ORDER BY blob_hash
LIMIT ? OFFSET ?
WHERE id = ?
`
rows, err := r.db.conn.QueryContext(ctx, query, limit, offset)
var blob Blob
var createdTSUnix int64
var finishedTSUnix, uploadedTSUnix sql.NullInt64
err := r.db.conn.QueryRowContext(ctx, query, id).Scan(
&blob.ID,
&blob.Hash,
&createdTSUnix,
&finishedTSUnix,
&blob.UncompressedSize,
&blob.CompressedSize,
&uploadedTSUnix,
)
if err == sql.ErrNoRows {
return nil, nil
}
if err != nil {
return nil, fmt.Errorf("querying blobs: %w", err)
}
defer CloseRows(rows)
var blobs []*Blob
for rows.Next() {
var blob Blob
var createdTSUnix int64
err := rows.Scan(
&blob.BlobHash,
&createdTSUnix,
)
if err != nil {
return nil, fmt.Errorf("scanning blob: %w", err)
}
blob.CreatedTS = time.Unix(createdTSUnix, 0)
blobs = append(blobs, &blob)
return nil, fmt.Errorf("querying blob: %w", err)
}
return blobs, rows.Err()
blob.CreatedTS = time.Unix(createdTSUnix, 0)
if finishedTSUnix.Valid {
ts := time.Unix(finishedTSUnix.Int64, 0)
blob.FinishedTS = &ts
}
if uploadedTSUnix.Valid {
ts := time.Unix(uploadedTSUnix.Int64, 0)
blob.UploadedTS = &ts
}
return &blob, nil
}
// UpdateFinished updates a blob when it's finalized
func (r *BlobRepository) UpdateFinished(ctx context.Context, tx *sql.Tx, id string, hash string, uncompressedSize, compressedSize int64) error {
query := `
UPDATE blobs
SET blob_hash = ?, finished_ts = ?, uncompressed_size = ?, compressed_size = ?
WHERE id = ?
`
now := time.Now().Unix()
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, hash, now, uncompressedSize, compressedSize, id)
} else {
_, err = r.db.ExecWithLock(ctx, query, hash, now, uncompressedSize, compressedSize, id)
}
if err != nil {
return fmt.Errorf("updating blob: %w", err)
}
return nil
}
// UpdateUploaded marks a blob as uploaded
func (r *BlobRepository) UpdateUploaded(ctx context.Context, tx *sql.Tx, id string) error {
query := `
UPDATE blobs
SET uploaded_ts = ?
WHERE id = ?
`
now := time.Now().Unix()
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, now, id)
} else {
_, err = r.db.ExecWithLock(ctx, query, now, id)
}
if err != nil {
return fmt.Errorf("marking blob as uploaded: %w", err)
}
return nil
}

View File

@@ -15,7 +15,8 @@ func TestBlobRepository(t *testing.T) {
// Test Create
blob := &Blob{
BlobHash: "blobhash123",
ID: "test-blob-id-123",
Hash: "blobhash123",
CreatedTS: time.Now().Truncate(time.Second),
}
@@ -25,23 +26,36 @@ func TestBlobRepository(t *testing.T) {
}
// Test GetByHash
retrieved, err := repo.GetByHash(ctx, blob.BlobHash)
retrieved, err := repo.GetByHash(ctx, blob.Hash)
if err != nil {
t.Fatalf("failed to get blob: %v", err)
}
if retrieved == nil {
t.Fatal("expected blob, got nil")
}
if retrieved.BlobHash != blob.BlobHash {
t.Errorf("blob hash mismatch: got %s, want %s", retrieved.BlobHash, blob.BlobHash)
if retrieved.Hash != blob.Hash {
t.Errorf("blob hash mismatch: got %s, want %s", retrieved.Hash, blob.Hash)
}
if !retrieved.CreatedTS.Equal(blob.CreatedTS) {
t.Errorf("created timestamp mismatch: got %v, want %v", retrieved.CreatedTS, blob.CreatedTS)
}
// Test List
// Test GetByID
retrievedByID, err := repo.GetByID(ctx, blob.ID)
if err != nil {
t.Fatalf("failed to get blob by ID: %v", err)
}
if retrievedByID == nil {
t.Fatal("expected blob, got nil")
}
if retrievedByID.ID != blob.ID {
t.Errorf("blob ID mismatch: got %s, want %s", retrievedByID.ID, blob.ID)
}
// Test with second blob
blob2 := &Blob{
BlobHash: "blobhash456",
ID: "test-blob-id-456",
Hash: "blobhash456",
CreatedTS: time.Now().Truncate(time.Second),
}
err = repo.Create(ctx, nil, blob2)
@@ -49,29 +63,45 @@ func TestBlobRepository(t *testing.T) {
t.Fatalf("failed to create second blob: %v", err)
}
blobs, err := repo.List(ctx, 10, 0)
// Test UpdateFinished
now := time.Now()
err = repo.UpdateFinished(ctx, nil, blob.ID, blob.Hash, 1000, 500)
if err != nil {
t.Fatalf("failed to list blobs: %v", err)
}
if len(blobs) != 2 {
t.Errorf("expected 2 blobs, got %d", len(blobs))
t.Fatalf("failed to update blob as finished: %v", err)
}
// Test pagination
blobs, err = repo.List(ctx, 1, 0)
// Verify update
updated, err := repo.GetByID(ctx, blob.ID)
if err != nil {
t.Fatalf("failed to list blobs with limit: %v", err)
t.Fatalf("failed to get updated blob: %v", err)
}
if len(blobs) != 1 {
t.Errorf("expected 1 blob with limit, got %d", len(blobs))
if updated.FinishedTS == nil {
t.Fatal("expected finished timestamp to be set")
}
if updated.UncompressedSize != 1000 {
t.Errorf("expected uncompressed size 1000, got %d", updated.UncompressedSize)
}
if updated.CompressedSize != 500 {
t.Errorf("expected compressed size 500, got %d", updated.CompressedSize)
}
blobs, err = repo.List(ctx, 1, 1)
// Test UpdateUploaded
err = repo.UpdateUploaded(ctx, nil, blob.ID)
if err != nil {
t.Fatalf("failed to list blobs with offset: %v", err)
t.Fatalf("failed to update blob as uploaded: %v", err)
}
if len(blobs) != 1 {
t.Errorf("expected 1 blob with offset, got %d", len(blobs))
// Verify upload update
uploaded, err := repo.GetByID(ctx, blob.ID)
if err != nil {
t.Fatalf("failed to get uploaded blob: %v", err)
}
if uploaded.UploadedTS == nil {
t.Fatal("expected uploaded timestamp to be set")
}
// Allow 1 second tolerance for timestamp comparison
if uploaded.UploadedTS.Before(now.Add(-1 * time.Second)) {
t.Error("uploaded timestamp should be around test time")
}
}
@@ -83,7 +113,8 @@ func TestBlobRepositoryDuplicate(t *testing.T) {
repo := NewBlobRepository(db)
blob := &Blob{
BlobHash: "duplicate_blob",
ID: "duplicate-test-id",
Hash: "duplicate_blob",
CreatedTS: time.Now().Truncate(time.Second),
}

View File

@@ -4,8 +4,11 @@ import (
"context"
"database/sql"
"fmt"
"os"
"strings"
"sync"
"git.eeqj.de/sneak/vaultik/internal/log"
_ "modernc.org/sqlite"
)
@@ -15,23 +18,54 @@ type DB struct {
}
func New(ctx context.Context, path string) (*DB, error) {
conn, err := sql.Open("sqlite", path+"?_journal_mode=WAL&_synchronous=NORMAL&_busy_timeout=5000")
if err != nil {
return nil, fmt.Errorf("opening database: %w", err)
// First, try to recover from any stale locks
if err := recoverDatabase(ctx, path); err != nil {
log.Warn("Failed to recover database", "error", err)
}
if err := conn.PingContext(ctx); err != nil {
if closeErr := conn.Close(); closeErr != nil {
Fatal("failed to close database connection: %v", closeErr)
// First attempt with standard WAL mode
conn, err := sql.Open("sqlite", path+"?_journal_mode=WAL&_synchronous=NORMAL&_busy_timeout=10000&_locking_mode=NORMAL")
if err == nil {
// Set connection pool settings to ensure proper cleanup
conn.SetMaxOpenConns(1) // SQLite only supports one writer
conn.SetMaxIdleConns(1)
if err := conn.PingContext(ctx); err == nil {
// Success on first try
db := &DB{conn: conn}
if err := db.createSchema(ctx); err != nil {
_ = conn.Close()
return nil, fmt.Errorf("creating schema: %w", err)
}
return db, nil
}
return nil, fmt.Errorf("pinging database: %w", err)
_ = conn.Close()
}
// If first attempt failed, try with TRUNCATE mode to clear any locks
log.Info("Database appears locked, attempting recovery with TRUNCATE mode")
conn, err = sql.Open("sqlite", path+"?_journal_mode=TRUNCATE&_synchronous=NORMAL&_busy_timeout=10000")
if err != nil {
return nil, fmt.Errorf("opening database in recovery mode: %w", err)
}
// Set connection pool settings
conn.SetMaxOpenConns(1)
conn.SetMaxIdleConns(1)
if err := conn.PingContext(ctx); err != nil {
_ = conn.Close()
return nil, fmt.Errorf("database still locked after recovery attempt: %w", err)
}
// Switch back to WAL mode
if _, err := conn.ExecContext(ctx, "PRAGMA journal_mode=WAL"); err != nil {
log.Warn("Failed to switch back to WAL mode", "error", err)
}
db := &DB{conn: conn}
if err := db.createSchema(ctx); err != nil {
if closeErr := conn.Close(); closeErr != nil {
Fatal("failed to close database connection: %v", closeErr)
}
_ = conn.Close()
return nil, fmt.Errorf("creating schema: %w", err)
}
@@ -39,9 +73,68 @@ func New(ctx context.Context, path string) (*DB, error) {
}
func (db *DB) Close() error {
log.Debug("Closing database connection")
if err := db.conn.Close(); err != nil {
Fatal("failed to close database: %v", err)
log.Error("Failed to close database", "error", err)
return fmt.Errorf("failed to close database: %w", err)
}
log.Debug("Database connection closed successfully")
return nil
}
// recoverDatabase attempts to recover a locked database
func recoverDatabase(ctx context.Context, path string) error {
// Check if database file exists
if _, err := os.Stat(path); os.IsNotExist(err) {
// No database file, nothing to recover
return nil
}
// Remove stale lock files
// SQLite creates -wal and -shm files for WAL mode
walPath := path + "-wal"
shmPath := path + "-shm"
journalPath := path + "-journal"
log.Info("Attempting database recovery", "path", path)
// Always remove lock files on startup to ensure clean state
removed := false
// Check for and remove journal file (from non-WAL mode)
if _, err := os.Stat(journalPath); err == nil {
log.Info("Found journal file, removing", "path", journalPath)
if err := os.Remove(journalPath); err != nil {
log.Warn("Failed to remove journal file", "error", err)
} else {
removed = true
}
}
// Remove WAL file
if _, err := os.Stat(walPath); err == nil {
log.Info("Found WAL file, removing", "path", walPath)
if err := os.Remove(walPath); err != nil {
log.Warn("Failed to remove WAL file", "error", err)
} else {
removed = true
}
}
// Remove SHM file
if _, err := os.Stat(shmPath); err == nil {
log.Info("Found shared memory file, removing", "path", shmPath)
if err := os.Remove(shmPath); err != nil {
log.Warn("Failed to remove shared memory file", "error", err)
} else {
removed = true
}
}
if removed {
log.Info("Database lock files removed")
}
return nil
}
@@ -55,18 +148,24 @@ func (db *DB) BeginTx(ctx context.Context, opts *sql.TxOptions) (*sql.Tx, error)
// LockForWrite acquires the write lock
func (db *DB) LockForWrite() {
log.Debug("Attempting to acquire write lock")
db.writeLock.Lock()
log.Debug("Write lock acquired")
}
// UnlockWrite releases the write lock
func (db *DB) UnlockWrite() {
log.Debug("Releasing write lock")
db.writeLock.Unlock()
log.Debug("Write lock released")
}
// ExecWithLock executes a write query with the write lock held
func (db *DB) ExecWithLock(ctx context.Context, query string, args ...interface{}) (sql.Result, error) {
db.writeLock.Lock()
defer db.writeLock.Unlock()
LogSQL("Execute", query, args...)
return db.conn.ExecContext(ctx, query, args...)
}
@@ -104,16 +203,22 @@ func (db *DB) createSchema(ctx context.Context) error {
);
CREATE TABLE IF NOT EXISTS blobs (
blob_hash TEXT PRIMARY KEY,
created_ts INTEGER NOT NULL
id TEXT PRIMARY KEY,
blob_hash TEXT UNIQUE,
created_ts INTEGER NOT NULL,
finished_ts INTEGER,
uncompressed_size INTEGER NOT NULL DEFAULT 0,
compressed_size INTEGER NOT NULL DEFAULT 0,
uploaded_ts INTEGER
);
CREATE TABLE IF NOT EXISTS blob_chunks (
blob_hash TEXT NOT NULL,
blob_id TEXT NOT NULL,
chunk_hash TEXT NOT NULL,
offset INTEGER NOT NULL,
length INTEGER NOT NULL,
PRIMARY KEY (blob_hash, chunk_hash)
PRIMARY KEY (blob_id, chunk_hash),
FOREIGN KEY (blob_id) REFERENCES blobs(id)
);
CREATE TABLE IF NOT EXISTS chunk_files (
@@ -128,13 +233,38 @@ func (db *DB) createSchema(ctx context.Context) error {
id TEXT PRIMARY KEY,
hostname TEXT NOT NULL,
vaultik_version TEXT NOT NULL,
created_ts INTEGER NOT NULL,
file_count INTEGER NOT NULL,
chunk_count INTEGER NOT NULL,
blob_count INTEGER NOT NULL,
total_size INTEGER NOT NULL,
blob_size INTEGER NOT NULL,
compression_ratio REAL NOT NULL
started_at INTEGER NOT NULL,
completed_at INTEGER,
file_count INTEGER NOT NULL DEFAULT 0,
chunk_count INTEGER NOT NULL DEFAULT 0,
blob_count INTEGER NOT NULL DEFAULT 0,
total_size INTEGER NOT NULL DEFAULT 0,
blob_size INTEGER NOT NULL DEFAULT 0,
compression_ratio REAL NOT NULL DEFAULT 1.0
);
CREATE TABLE IF NOT EXISTS snapshot_files (
snapshot_id TEXT NOT NULL,
file_path TEXT NOT NULL,
PRIMARY KEY (snapshot_id, file_path),
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
FOREIGN KEY (file_path) REFERENCES files(path) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS snapshot_blobs (
snapshot_id TEXT NOT NULL,
blob_id TEXT NOT NULL,
blob_hash TEXT NOT NULL,
PRIMARY KEY (snapshot_id, blob_id),
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
FOREIGN KEY (blob_id) REFERENCES blobs(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS uploads (
blob_hash TEXT PRIMARY KEY,
uploaded_at INTEGER NOT NULL,
size INTEGER NOT NULL,
duration_ms INTEGER NOT NULL
);
`
@@ -146,3 +276,10 @@ func (db *DB) createSchema(ctx context.Context) error {
func NewTestDB() (*DB, error) {
return New(context.Background(), ":memory:")
}
// LogSQL logs SQL queries if debug mode is enabled
func LogSQL(operation, query string, args ...interface{}) {
if strings.Contains(os.Getenv("GODEBUG"), "vaultik") {
log.Debug("SQL "+operation, "query", strings.TrimSpace(query), "args", fmt.Sprintf("%v", args))
}
}

View File

@@ -62,6 +62,36 @@ func (r *FileChunkRepository) GetByPath(ctx context.Context, path string) ([]*Fi
return fileChunks, rows.Err()
}
// GetByPathTx retrieves file chunks within a transaction
func (r *FileChunkRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path string) ([]*FileChunk, error) {
query := `
SELECT path, idx, chunk_hash
FROM file_chunks
WHERE path = ?
ORDER BY idx
`
LogSQL("GetByPathTx", query, path)
rows, err := tx.QueryContext(ctx, query, path)
if err != nil {
return nil, fmt.Errorf("querying file chunks: %w", err)
}
defer CloseRows(rows)
var fileChunks []*FileChunk
for rows.Next() {
var fc FileChunk
err := rows.Scan(&fc.Path, &fc.Idx, &fc.ChunkHash)
if err != nil {
return nil, fmt.Errorf("scanning file chunk: %w", err)
}
fileChunks = append(fileChunks, &fc)
}
LogSQL("GetByPathTx", "Complete", path, "count", len(fileChunks))
return fileChunks, rows.Err()
}
func (r *FileChunkRepository) DeleteByPath(ctx context.Context, tx *sql.Tx, path string) error {
query := `DELETE FROM file_chunks WHERE path = ?`
@@ -81,5 +111,16 @@ func (r *FileChunkRepository) DeleteByPath(ctx context.Context, tx *sql.Tx, path
// GetByFile is an alias for GetByPath for compatibility
func (r *FileChunkRepository) GetByFile(ctx context.Context, path string) ([]*FileChunk, error) {
return r.GetByPath(ctx, path)
LogSQL("GetByFile", "Starting", path)
result, err := r.GetByPath(ctx, path)
LogSQL("GetByFile", "Complete", path, "count", len(result))
return result, err
}
// GetByFileTx retrieves file chunks within a transaction
func (r *FileChunkRepository) GetByFileTx(ctx context.Context, tx *sql.Tx, path string) ([]*FileChunk, error) {
LogSQL("GetByFileTx", "Starting", path)
result, err := r.GetByPathTx(ctx, tx, path)
LogSQL("GetByFileTx", "Complete", path, "count", len(result))
return result, err
}

View File

@@ -31,6 +31,7 @@ func (r *FileRepository) Create(ctx context.Context, tx *sql.Tx, file *File) err
var err error
if tx != nil {
LogSQL("Execute", query, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget)
_, err = tx.ExecContext(ctx, query, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget)
} else {
_, err = r.db.ExecWithLock(ctx, query, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget)
@@ -81,6 +82,46 @@ func (r *FileRepository) GetByPath(ctx context.Context, path string) (*File, err
return &file, nil
}
func (r *FileRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path string) (*File, error) {
query := `
SELECT path, mtime, ctime, size, mode, uid, gid, link_target
FROM files
WHERE path = ?
`
var file File
var mtimeUnix, ctimeUnix int64
var linkTarget sql.NullString
LogSQL("GetByPathTx QueryRowContext", query, path)
err := tx.QueryRowContext(ctx, query, path).Scan(
&file.Path,
&mtimeUnix,
&ctimeUnix,
&file.Size,
&file.Mode,
&file.UID,
&file.GID,
&linkTarget,
)
LogSQL("GetByPathTx Scan complete", query, path)
if err == sql.ErrNoRows {
return nil, nil
}
if err != nil {
return nil, fmt.Errorf("querying file: %w", err)
}
file.MTime = time.Unix(mtimeUnix, 0)
file.CTime = time.Unix(ctimeUnix, 0)
if linkTarget.Valid {
file.LinkTarget = linkTarget.String
}
return &file, nil
}
func (r *FileRepository) ListModifiedSince(ctx context.Context, since time.Time) ([]*File, error) {
query := `
SELECT path, mtime, ctime, size, mode, uid, gid, link_target

View File

@@ -35,13 +35,18 @@ type Chunk struct {
// Blob represents a blob record in the database
type Blob struct {
BlobHash string
CreatedTS time.Time
ID string
Hash string // Can be empty until blob is finalized
CreatedTS time.Time
FinishedTS *time.Time // nil if not yet finalized
UncompressedSize int64
CompressedSize int64
UploadedTS *time.Time // nil if not yet uploaded
}
// BlobChunk represents the mapping between blobs and chunks
type BlobChunk struct {
BlobHash string
BlobID string
ChunkHash string
Offset int64
Length int64
@@ -60,7 +65,8 @@ type Snapshot struct {
ID string
Hostname string
VaultikVersion string
CreatedTS time.Time
StartedAt time.Time
CompletedAt *time.Time // nil if still in progress
FileCount int64
ChunkCount int64
BlobCount int64
@@ -68,3 +74,21 @@ type Snapshot struct {
BlobSize int64 // Total size of all referenced blobs (compressed and encrypted)
CompressionRatio float64 // Compression ratio (BlobSize / TotalSize)
}
// IsComplete returns true if the snapshot has completed
func (s *Snapshot) IsComplete() bool {
return s.CompletedAt != nil
}
// SnapshotFile represents the mapping between snapshots and files
type SnapshotFile struct {
SnapshotID string
FilePath string
}
// SnapshotBlob represents the mapping between snapshots and blobs
type SnapshotBlob struct {
SnapshotID string
BlobID string
BlobHash string // Denormalized for easier manifest generation
}

View File

@@ -7,6 +7,7 @@ import (
"path/filepath"
"git.eeqj.de/sneak/vaultik/internal/config"
"git.eeqj.de/sneak/vaultik/internal/log"
"go.uber.org/fx"
)
@@ -32,7 +33,13 @@ func provideDatabase(lc fx.Lifecycle, cfg *config.Config) (*DB, error) {
lc.Append(fx.Hook{
OnStop: func(ctx context.Context) error {
return db.Close()
log.Debug("Database module OnStop hook called")
if err := db.Close(); err != nil {
log.Error("Failed to close database in OnStop hook", "error", err)
return err
}
log.Debug("Database closed successfully in OnStop hook")
return nil
},
})

View File

@@ -15,6 +15,7 @@ type Repositories struct {
BlobChunks *BlobChunkRepository
ChunkFiles *ChunkFileRepository
Snapshots *SnapshotRepository
Uploads *UploadRepository
}
func NewRepositories(db *DB) *Repositories {
@@ -27,6 +28,7 @@ func NewRepositories(db *DB) *Repositories {
BlobChunks: NewBlobChunkRepository(db),
ChunkFiles: NewChunkFileRepository(db),
Snapshots: NewSnapshotRepository(db),
Uploads: NewUploadRepository(db.conn),
}
}
@@ -34,13 +36,19 @@ type TxFunc func(ctx context.Context, tx *sql.Tx) error
func (r *Repositories) WithTx(ctx context.Context, fn TxFunc) error {
// Acquire write lock for the entire transaction
LogSQL("WithTx", "Acquiring write lock", "")
r.db.LockForWrite()
defer r.db.UnlockWrite()
defer func() {
LogSQL("WithTx", "Releasing write lock", "")
r.db.UnlockWrite()
}()
LogSQL("WithTx", "Beginning transaction", "")
tx, err := r.db.BeginTx(ctx, nil)
if err != nil {
return fmt.Errorf("beginning transaction: %w", err)
}
LogSQL("WithTx", "Transaction started", "")
defer func() {
if p := recover(); p != nil {

View File

@@ -71,7 +71,8 @@ func TestRepositoriesTransaction(t *testing.T) {
// Create blob
blob := &Blob{
BlobHash: "tx_blob1",
ID: "tx-blob-id-1",
Hash: "tx_blob1",
CreatedTS: time.Now().Truncate(time.Second),
}
if err := repos.Blobs.Create(ctx, tx, blob); err != nil {
@@ -80,7 +81,7 @@ func TestRepositoriesTransaction(t *testing.T) {
// Map chunks to blob
bc1 := &BlobChunk{
BlobHash: blob.BlobHash,
BlobID: blob.ID,
ChunkHash: chunk1.ChunkHash,
Offset: 0,
Length: 512,
@@ -90,7 +91,7 @@ func TestRepositoriesTransaction(t *testing.T) {
}
bc2 := &BlobChunk{
BlobHash: blob.BlobHash,
BlobID: blob.ID,
ChunkHash: chunk2.ChunkHash,
Offset: 512,
Length: 512,

View File

@@ -0,0 +1,11 @@
-- Track blob upload metrics
CREATE TABLE IF NOT EXISTS uploads (
blob_hash TEXT PRIMARY KEY,
uploaded_at TIMESTAMP NOT NULL,
size INTEGER NOT NULL,
duration_ms INTEGER NOT NULL,
FOREIGN KEY (blob_hash) REFERENCES blobs(blob_hash)
);
CREATE INDEX idx_uploads_uploaded_at ON uploads(uploaded_at);
CREATE INDEX idx_uploads_duration ON uploads(duration_ms);

View File

@@ -17,17 +17,23 @@ func NewSnapshotRepository(db *DB) *SnapshotRepository {
func (r *SnapshotRepository) Create(ctx context.Context, tx *sql.Tx, snapshot *Snapshot) error {
query := `
INSERT INTO snapshots (id, hostname, vaultik_version, created_ts, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
INSERT INTO snapshots (id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`
var completedAt *int64
if snapshot.CompletedAt != nil {
ts := snapshot.CompletedAt.Unix()
completedAt = &ts
}
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.CreatedTS.Unix(),
snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
_, err = tx.ExecContext(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.StartedAt.Unix(),
completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
} else {
_, err = r.db.ExecWithLock(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.CreatedTS.Unix(),
snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
_, err = r.db.ExecWithLock(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.StartedAt.Unix(),
completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
}
if err != nil {
@@ -70,19 +76,21 @@ func (r *SnapshotRepository) UpdateCounts(ctx context.Context, tx *sql.Tx, snaps
func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*Snapshot, error) {
query := `
SELECT id, hostname, vaultik_version, created_ts, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
FROM snapshots
WHERE id = ?
`
var snapshot Snapshot
var createdTSUnix int64
var startedAtUnix int64
var completedAtUnix *int64
err := r.db.conn.QueryRowContext(ctx, query, snapshotID).Scan(
&snapshot.ID,
&snapshot.Hostname,
&snapshot.VaultikVersion,
&createdTSUnix,
&startedAtUnix,
&completedAtUnix,
&snapshot.FileCount,
&snapshot.ChunkCount,
&snapshot.BlobCount,
@@ -98,16 +106,20 @@ func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*S
return nil, fmt.Errorf("querying snapshot: %w", err)
}
snapshot.CreatedTS = time.Unix(createdTSUnix, 0)
snapshot.StartedAt = time.Unix(startedAtUnix, 0)
if completedAtUnix != nil {
t := time.Unix(*completedAtUnix, 0)
snapshot.CompletedAt = &t
}
return &snapshot, nil
}
func (r *SnapshotRepository) ListRecent(ctx context.Context, limit int) ([]*Snapshot, error) {
query := `
SELECT id, hostname, vaultik_version, created_ts, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
FROM snapshots
ORDER BY created_ts DESC
ORDER BY started_at DESC
LIMIT ?
`
@@ -120,13 +132,15 @@ func (r *SnapshotRepository) ListRecent(ctx context.Context, limit int) ([]*Snap
var snapshots []*Snapshot
for rows.Next() {
var snapshot Snapshot
var createdTSUnix int64
var startedAtUnix int64
var completedAtUnix *int64
err := rows.Scan(
&snapshot.ID,
&snapshot.Hostname,
&snapshot.VaultikVersion,
&createdTSUnix,
&startedAtUnix,
&completedAtUnix,
&snapshot.FileCount,
&snapshot.ChunkCount,
&snapshot.BlobCount,
@@ -138,7 +152,154 @@ func (r *SnapshotRepository) ListRecent(ctx context.Context, limit int) ([]*Snap
return nil, fmt.Errorf("scanning snapshot: %w", err)
}
snapshot.CreatedTS = time.Unix(createdTSUnix, 0)
snapshot.StartedAt = time.Unix(startedAtUnix, 0)
if completedAtUnix != nil {
t := time.Unix(*completedAtUnix, 0)
snapshot.CompletedAt = &t
}
snapshots = append(snapshots, &snapshot)
}
return snapshots, rows.Err()
}
// MarkComplete marks a snapshot as completed with the current timestamp
func (r *SnapshotRepository) MarkComplete(ctx context.Context, tx *sql.Tx, snapshotID string) error {
query := `
UPDATE snapshots
SET completed_at = ?
WHERE id = ?
`
completedAt := time.Now().Unix()
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, completedAt, snapshotID)
} else {
_, err = r.db.ExecWithLock(ctx, query, completedAt, snapshotID)
}
if err != nil {
return fmt.Errorf("marking snapshot complete: %w", err)
}
return nil
}
// AddFile adds a file to a snapshot
func (r *SnapshotRepository) AddFile(ctx context.Context, tx *sql.Tx, snapshotID string, filePath string) error {
query := `
INSERT OR IGNORE INTO snapshot_files (snapshot_id, file_path)
VALUES (?, ?)
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, snapshotID, filePath)
} else {
_, err = r.db.ExecWithLock(ctx, query, snapshotID, filePath)
}
if err != nil {
return fmt.Errorf("adding file to snapshot: %w", err)
}
return nil
}
// AddBlob adds a blob to a snapshot
func (r *SnapshotRepository) AddBlob(ctx context.Context, tx *sql.Tx, snapshotID string, blobID string, blobHash string) error {
query := `
INSERT OR IGNORE INTO snapshot_blobs (snapshot_id, blob_id, blob_hash)
VALUES (?, ?, ?)
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, snapshotID, blobID, blobHash)
} else {
_, err = r.db.ExecWithLock(ctx, query, snapshotID, blobID, blobHash)
}
if err != nil {
return fmt.Errorf("adding blob to snapshot: %w", err)
}
return nil
}
// GetBlobHashes returns all blob hashes for a snapshot
func (r *SnapshotRepository) GetBlobHashes(ctx context.Context, snapshotID string) ([]string, error) {
query := `
SELECT sb.blob_hash
FROM snapshot_blobs sb
WHERE sb.snapshot_id = ?
ORDER BY sb.blob_hash
`
rows, err := r.db.conn.QueryContext(ctx, query, snapshotID)
if err != nil {
return nil, fmt.Errorf("querying blob hashes: %w", err)
}
defer CloseRows(rows)
var blobs []string
for rows.Next() {
var blobHash string
if err := rows.Scan(&blobHash); err != nil {
return nil, fmt.Errorf("scanning blob hash: %w", err)
}
blobs = append(blobs, blobHash)
}
return blobs, rows.Err()
}
// GetIncompleteSnapshots returns all snapshots that haven't been completed
func (r *SnapshotRepository) GetIncompleteSnapshots(ctx context.Context) ([]*Snapshot, error) {
query := `
SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
FROM snapshots
WHERE completed_at IS NULL
ORDER BY started_at DESC
`
rows, err := r.db.conn.QueryContext(ctx, query)
if err != nil {
return nil, fmt.Errorf("querying incomplete snapshots: %w", err)
}
defer CloseRows(rows)
var snapshots []*Snapshot
for rows.Next() {
var snapshot Snapshot
var startedAtUnix int64
var completedAtUnix *int64
err := rows.Scan(
&snapshot.ID,
&snapshot.Hostname,
&snapshot.VaultikVersion,
&startedAtUnix,
&completedAtUnix,
&snapshot.FileCount,
&snapshot.ChunkCount,
&snapshot.BlobCount,
&snapshot.TotalSize,
&snapshot.BlobSize,
&snapshot.CompressionRatio,
)
if err != nil {
return nil, fmt.Errorf("scanning snapshot: %w", err)
}
snapshot.StartedAt = time.Unix(startedAtUnix, 0)
if completedAtUnix != nil {
t := time.Unix(*completedAtUnix, 0)
snapshot.CompletedAt = &t
}
snapshots = append(snapshots, &snapshot)
}

View File

@@ -30,7 +30,8 @@ func TestSnapshotRepository(t *testing.T) {
ID: "2024-01-01T12:00:00Z",
Hostname: "test-host",
VaultikVersion: "1.0.0",
CreatedTS: time.Now().Truncate(time.Second),
StartedAt: time.Now().Truncate(time.Second),
CompletedAt: nil,
FileCount: 100,
ChunkCount: 500,
BlobCount: 10,
@@ -99,7 +100,8 @@ func TestSnapshotRepository(t *testing.T) {
ID: fmt.Sprintf("2024-01-0%dT12:00:00Z", i),
Hostname: "test-host",
VaultikVersion: "1.0.0",
CreatedTS: time.Now().Add(time.Duration(i) * time.Hour).Truncate(time.Second),
StartedAt: time.Now().Add(time.Duration(i) * time.Hour).Truncate(time.Second),
CompletedAt: nil,
FileCount: int64(100 * i),
ChunkCount: int64(500 * i),
BlobCount: int64(10 * i),
@@ -121,7 +123,7 @@ func TestSnapshotRepository(t *testing.T) {
// Verify order (most recent first)
for i := 0; i < len(recent)-1; i++ {
if recent[i].CreatedTS.Before(recent[i+1].CreatedTS) {
if recent[i].StartedAt.Before(recent[i+1].StartedAt) {
t.Error("snapshots not in descending order")
}
}
@@ -162,7 +164,8 @@ func TestSnapshotRepositoryDuplicate(t *testing.T) {
ID: "2024-01-01T12:00:00Z",
Hostname: "test-host",
VaultikVersion: "1.0.0",
CreatedTS: time.Now().Truncate(time.Second),
StartedAt: time.Now().Truncate(time.Second),
CompletedAt: nil,
FileCount: 100,
ChunkCount: 500,
BlobCount: 10,

View File

@@ -0,0 +1,135 @@
package database
import (
"context"
"database/sql"
"time"
"git.eeqj.de/sneak/vaultik/internal/log"
)
// Upload represents a blob upload record
type Upload struct {
BlobHash string
UploadedAt time.Time
Size int64
DurationMs int64
}
// UploadRepository handles upload records
type UploadRepository struct {
conn *sql.DB
}
// NewUploadRepository creates a new upload repository
func NewUploadRepository(conn *sql.DB) *UploadRepository {
return &UploadRepository{conn: conn}
}
// Create inserts a new upload record
func (r *UploadRepository) Create(ctx context.Context, tx *sql.Tx, upload *Upload) error {
query := `
INSERT INTO uploads (blob_hash, uploaded_at, size, duration_ms)
VALUES (?, ?, ?, ?)
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, upload.BlobHash, upload.UploadedAt, upload.Size, upload.DurationMs)
} else {
_, err = r.conn.ExecContext(ctx, query, upload.BlobHash, upload.UploadedAt, upload.Size, upload.DurationMs)
}
return err
}
// GetByBlobHash retrieves an upload record by blob hash
func (r *UploadRepository) GetByBlobHash(ctx context.Context, blobHash string) (*Upload, error) {
query := `
SELECT blob_hash, uploaded_at, size, duration_ms
FROM uploads
WHERE blob_hash = ?
`
var upload Upload
err := r.conn.QueryRowContext(ctx, query, blobHash).Scan(
&upload.BlobHash,
&upload.UploadedAt,
&upload.Size,
&upload.DurationMs,
)
if err == sql.ErrNoRows {
return nil, nil
}
if err != nil {
return nil, err
}
return &upload, nil
}
// GetRecentUploads retrieves recent uploads ordered by upload time
func (r *UploadRepository) GetRecentUploads(ctx context.Context, limit int) ([]*Upload, error) {
query := `
SELECT blob_hash, uploaded_at, size, duration_ms
FROM uploads
ORDER BY uploaded_at DESC
LIMIT ?
`
rows, err := r.conn.QueryContext(ctx, query, limit)
if err != nil {
return nil, err
}
defer func() {
if err := rows.Close(); err != nil {
log.Error("failed to close rows", "error", err)
}
}()
var uploads []*Upload
for rows.Next() {
var upload Upload
if err := rows.Scan(&upload.BlobHash, &upload.UploadedAt, &upload.Size, &upload.DurationMs); err != nil {
return nil, err
}
uploads = append(uploads, &upload)
}
return uploads, rows.Err()
}
// GetUploadStats returns aggregate statistics for uploads
func (r *UploadRepository) GetUploadStats(ctx context.Context, since time.Time) (*UploadStats, error) {
query := `
SELECT
COUNT(*) as count,
COALESCE(SUM(size), 0) as total_size,
COALESCE(AVG(duration_ms), 0) as avg_duration_ms,
COALESCE(MIN(duration_ms), 0) as min_duration_ms,
COALESCE(MAX(duration_ms), 0) as max_duration_ms
FROM uploads
WHERE uploaded_at >= ?
`
var stats UploadStats
err := r.conn.QueryRowContext(ctx, query, since).Scan(
&stats.Count,
&stats.TotalSize,
&stats.AvgDurationMs,
&stats.MinDurationMs,
&stats.MaxDurationMs,
)
return &stats, err
}
// UploadStats contains aggregate upload statistics
type UploadStats struct {
Count int64
TotalSize int64
AvgDurationMs float64
MinDurationMs int64
MaxDurationMs int64
}