Refactor blob storage to use UUID primary keys and implement streaming chunking
- Changed blob table to use ID (UUID) as primary key instead of hash - Blob records are now created at packing start, enabling immediate chunk associations - Implemented streaming chunking to process large files without memory exhaustion - Fixed blob manifest generation to include all referenced blobs - Updated all foreign key references from blob_hash to blob_id - Added progress reporting and improved error handling - Enforced encryption requirement for all blob packing - Updated tests to use test encryption keys - Added Cyrillic transliteration to README
This commit is contained in:
@@ -16,15 +16,15 @@ func NewBlobChunkRepository(db *DB) *BlobChunkRepository {
|
||||
|
||||
func (r *BlobChunkRepository) Create(ctx context.Context, tx *sql.Tx, bc *BlobChunk) error {
|
||||
query := `
|
||||
INSERT INTO blob_chunks (blob_hash, chunk_hash, offset, length)
|
||||
INSERT INTO blob_chunks (blob_id, chunk_hash, offset, length)
|
||||
VALUES (?, ?, ?, ?)
|
||||
`
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, bc.BlobHash, bc.ChunkHash, bc.Offset, bc.Length)
|
||||
_, err = tx.ExecContext(ctx, query, bc.BlobID, bc.ChunkHash, bc.Offset, bc.Length)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLock(ctx, query, bc.BlobHash, bc.ChunkHash, bc.Offset, bc.Length)
|
||||
_, err = r.db.ExecWithLock(ctx, query, bc.BlobID, bc.ChunkHash, bc.Offset, bc.Length)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
@@ -34,15 +34,15 @@ func (r *BlobChunkRepository) Create(ctx context.Context, tx *sql.Tx, bc *BlobCh
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *BlobChunkRepository) GetByBlobHash(ctx context.Context, blobHash string) ([]*BlobChunk, error) {
|
||||
func (r *BlobChunkRepository) GetByBlobID(ctx context.Context, blobID string) ([]*BlobChunk, error) {
|
||||
query := `
|
||||
SELECT blob_hash, chunk_hash, offset, length
|
||||
SELECT blob_id, chunk_hash, offset, length
|
||||
FROM blob_chunks
|
||||
WHERE blob_hash = ?
|
||||
WHERE blob_id = ?
|
||||
ORDER BY offset
|
||||
`
|
||||
|
||||
rows, err := r.db.conn.QueryContext(ctx, query, blobHash)
|
||||
rows, err := r.db.conn.QueryContext(ctx, query, blobID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("querying blob chunks: %w", err)
|
||||
}
|
||||
@@ -51,7 +51,7 @@ func (r *BlobChunkRepository) GetByBlobHash(ctx context.Context, blobHash string
|
||||
var blobChunks []*BlobChunk
|
||||
for rows.Next() {
|
||||
var bc BlobChunk
|
||||
err := rows.Scan(&bc.BlobHash, &bc.ChunkHash, &bc.Offset, &bc.Length)
|
||||
err := rows.Scan(&bc.BlobID, &bc.ChunkHash, &bc.Offset, &bc.Length)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning blob chunk: %w", err)
|
||||
}
|
||||
@@ -63,26 +63,61 @@ func (r *BlobChunkRepository) GetByBlobHash(ctx context.Context, blobHash string
|
||||
|
||||
func (r *BlobChunkRepository) GetByChunkHash(ctx context.Context, chunkHash string) (*BlobChunk, error) {
|
||||
query := `
|
||||
SELECT blob_hash, chunk_hash, offset, length
|
||||
SELECT blob_id, chunk_hash, offset, length
|
||||
FROM blob_chunks
|
||||
WHERE chunk_hash = ?
|
||||
LIMIT 1
|
||||
`
|
||||
|
||||
LogSQL("GetByChunkHash", query, chunkHash)
|
||||
var bc BlobChunk
|
||||
err := r.db.conn.QueryRowContext(ctx, query, chunkHash).Scan(
|
||||
&bc.BlobHash,
|
||||
&bc.BlobID,
|
||||
&bc.ChunkHash,
|
||||
&bc.Offset,
|
||||
&bc.Length,
|
||||
)
|
||||
|
||||
if err == sql.ErrNoRows {
|
||||
LogSQL("GetByChunkHash", "No rows found", chunkHash)
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
LogSQL("GetByChunkHash", "Error", chunkHash, err)
|
||||
return nil, fmt.Errorf("querying blob chunk: %w", err)
|
||||
}
|
||||
|
||||
LogSQL("GetByChunkHash", "Found blob", chunkHash, "blob", bc.BlobID)
|
||||
return &bc, nil
|
||||
}
|
||||
|
||||
// GetByChunkHashTx retrieves a blob chunk within a transaction
|
||||
func (r *BlobChunkRepository) GetByChunkHashTx(ctx context.Context, tx *sql.Tx, chunkHash string) (*BlobChunk, error) {
|
||||
query := `
|
||||
SELECT blob_id, chunk_hash, offset, length
|
||||
FROM blob_chunks
|
||||
WHERE chunk_hash = ?
|
||||
LIMIT 1
|
||||
`
|
||||
|
||||
LogSQL("GetByChunkHashTx", query, chunkHash)
|
||||
var bc BlobChunk
|
||||
err := tx.QueryRowContext(ctx, query, chunkHash).Scan(
|
||||
&bc.BlobID,
|
||||
&bc.ChunkHash,
|
||||
&bc.Offset,
|
||||
&bc.Length,
|
||||
)
|
||||
|
||||
if err == sql.ErrNoRows {
|
||||
LogSQL("GetByChunkHashTx", "No rows found", chunkHash)
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
LogSQL("GetByChunkHashTx", "Error", chunkHash, err)
|
||||
return nil, fmt.Errorf("querying blob chunk: %w", err)
|
||||
}
|
||||
|
||||
LogSQL("GetByChunkHashTx", "Found blob", chunkHash, "blob", bc.BlobID)
|
||||
return &bc, nil
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ func TestBlobChunkRepository(t *testing.T) {
|
||||
|
||||
// Test Create
|
||||
bc1 := &BlobChunk{
|
||||
BlobHash: "blob1",
|
||||
BlobID: "blob1-uuid",
|
||||
ChunkHash: "chunk1",
|
||||
Offset: 0,
|
||||
Length: 1024,
|
||||
@@ -27,7 +27,7 @@ func TestBlobChunkRepository(t *testing.T) {
|
||||
|
||||
// Add more chunks to the same blob
|
||||
bc2 := &BlobChunk{
|
||||
BlobHash: "blob1",
|
||||
BlobID: "blob1-uuid",
|
||||
ChunkHash: "chunk2",
|
||||
Offset: 1024,
|
||||
Length: 2048,
|
||||
@@ -38,7 +38,7 @@ func TestBlobChunkRepository(t *testing.T) {
|
||||
}
|
||||
|
||||
bc3 := &BlobChunk{
|
||||
BlobHash: "blob1",
|
||||
BlobID: "blob1-uuid",
|
||||
ChunkHash: "chunk3",
|
||||
Offset: 3072,
|
||||
Length: 512,
|
||||
@@ -48,8 +48,8 @@ func TestBlobChunkRepository(t *testing.T) {
|
||||
t.Fatalf("failed to create third blob chunk: %v", err)
|
||||
}
|
||||
|
||||
// Test GetByBlobHash
|
||||
chunks, err := repo.GetByBlobHash(ctx, "blob1")
|
||||
// Test GetByBlobID
|
||||
chunks, err := repo.GetByBlobID(ctx, "blob1-uuid")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get blob chunks: %v", err)
|
||||
}
|
||||
@@ -73,8 +73,8 @@ func TestBlobChunkRepository(t *testing.T) {
|
||||
if bc == nil {
|
||||
t.Fatal("expected blob chunk, got nil")
|
||||
}
|
||||
if bc.BlobHash != "blob1" {
|
||||
t.Errorf("wrong blob hash: expected blob1, got %s", bc.BlobHash)
|
||||
if bc.BlobID != "blob1-uuid" {
|
||||
t.Errorf("wrong blob ID: expected blob1-uuid, got %s", bc.BlobID)
|
||||
}
|
||||
if bc.Offset != 1024 {
|
||||
t.Errorf("wrong offset: expected 1024, got %d", bc.Offset)
|
||||
@@ -100,10 +100,10 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
|
||||
// Create chunks across multiple blobs
|
||||
// Some chunks are shared between blobs (deduplication scenario)
|
||||
blobChunks := []BlobChunk{
|
||||
{BlobHash: "blob1", ChunkHash: "chunk1", Offset: 0, Length: 1024},
|
||||
{BlobHash: "blob1", ChunkHash: "chunk2", Offset: 1024, Length: 1024},
|
||||
{BlobHash: "blob2", ChunkHash: "chunk2", Offset: 0, Length: 1024}, // chunk2 is shared
|
||||
{BlobHash: "blob2", ChunkHash: "chunk3", Offset: 1024, Length: 1024},
|
||||
{BlobID: "blob1-uuid", ChunkHash: "chunk1", Offset: 0, Length: 1024},
|
||||
{BlobID: "blob1-uuid", ChunkHash: "chunk2", Offset: 1024, Length: 1024},
|
||||
{BlobID: "blob2-uuid", ChunkHash: "chunk2", Offset: 0, Length: 1024}, // chunk2 is shared
|
||||
{BlobID: "blob2-uuid", ChunkHash: "chunk3", Offset: 1024, Length: 1024},
|
||||
}
|
||||
|
||||
for _, bc := range blobChunks {
|
||||
@@ -114,7 +114,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
|
||||
}
|
||||
|
||||
// Verify blob1 chunks
|
||||
chunks, err := repo.GetByBlobHash(ctx, "blob1")
|
||||
chunks, err := repo.GetByBlobID(ctx, "blob1-uuid")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get blob1 chunks: %v", err)
|
||||
}
|
||||
@@ -123,7 +123,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
|
||||
}
|
||||
|
||||
// Verify blob2 chunks
|
||||
chunks, err = repo.GetByBlobHash(ctx, "blob2")
|
||||
chunks, err = repo.GetByBlobID(ctx, "blob2-uuid")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get blob2 chunks: %v", err)
|
||||
}
|
||||
@@ -140,7 +140,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
|
||||
t.Fatal("expected shared chunk, got nil")
|
||||
}
|
||||
// GetByChunkHash returns first match, should be blob1
|
||||
if bc.BlobHash != "blob1" {
|
||||
t.Errorf("expected blob1 for shared chunk, got %s", bc.BlobHash)
|
||||
if bc.BlobID != "blob1-uuid" {
|
||||
t.Errorf("expected blob1-uuid for shared chunk, got %s", bc.BlobID)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -17,15 +17,27 @@ func NewBlobRepository(db *DB) *BlobRepository {
|
||||
|
||||
func (r *BlobRepository) Create(ctx context.Context, tx *sql.Tx, blob *Blob) error {
|
||||
query := `
|
||||
INSERT INTO blobs (blob_hash, created_ts)
|
||||
VALUES (?, ?)
|
||||
INSERT INTO blobs (id, blob_hash, created_ts, finished_ts, uncompressed_size, compressed_size, uploaded_ts)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?)
|
||||
`
|
||||
|
||||
var finishedTS, uploadedTS *int64
|
||||
if blob.FinishedTS != nil {
|
||||
ts := blob.FinishedTS.Unix()
|
||||
finishedTS = &ts
|
||||
}
|
||||
if blob.UploadedTS != nil {
|
||||
ts := blob.UploadedTS.Unix()
|
||||
uploadedTS = &ts
|
||||
}
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, blob.BlobHash, blob.CreatedTS.Unix())
|
||||
_, err = tx.ExecContext(ctx, query, blob.ID, blob.Hash, blob.CreatedTS.Unix(),
|
||||
finishedTS, blob.UncompressedSize, blob.CompressedSize, uploadedTS)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLock(ctx, query, blob.BlobHash, blob.CreatedTS.Unix())
|
||||
_, err = r.db.ExecWithLock(ctx, query, blob.ID, blob.Hash, blob.CreatedTS.Unix(),
|
||||
finishedTS, blob.UncompressedSize, blob.CompressedSize, uploadedTS)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
@@ -37,17 +49,23 @@ func (r *BlobRepository) Create(ctx context.Context, tx *sql.Tx, blob *Blob) err
|
||||
|
||||
func (r *BlobRepository) GetByHash(ctx context.Context, hash string) (*Blob, error) {
|
||||
query := `
|
||||
SELECT blob_hash, created_ts
|
||||
SELECT id, blob_hash, created_ts, finished_ts, uncompressed_size, compressed_size, uploaded_ts
|
||||
FROM blobs
|
||||
WHERE blob_hash = ?
|
||||
`
|
||||
|
||||
var blob Blob
|
||||
var createdTSUnix int64
|
||||
var finishedTSUnix, uploadedTSUnix sql.NullInt64
|
||||
|
||||
err := r.db.conn.QueryRowContext(ctx, query, hash).Scan(
|
||||
&blob.BlobHash,
|
||||
&blob.ID,
|
||||
&blob.Hash,
|
||||
&createdTSUnix,
|
||||
&finishedTSUnix,
|
||||
&blob.UncompressedSize,
|
||||
&blob.CompressedSize,
|
||||
&uploadedTSUnix,
|
||||
)
|
||||
|
||||
if err == sql.ErrNoRows {
|
||||
@@ -58,39 +76,100 @@ func (r *BlobRepository) GetByHash(ctx context.Context, hash string) (*Blob, err
|
||||
}
|
||||
|
||||
blob.CreatedTS = time.Unix(createdTSUnix, 0)
|
||||
if finishedTSUnix.Valid {
|
||||
ts := time.Unix(finishedTSUnix.Int64, 0)
|
||||
blob.FinishedTS = &ts
|
||||
}
|
||||
if uploadedTSUnix.Valid {
|
||||
ts := time.Unix(uploadedTSUnix.Int64, 0)
|
||||
blob.UploadedTS = &ts
|
||||
}
|
||||
return &blob, nil
|
||||
}
|
||||
|
||||
func (r *BlobRepository) List(ctx context.Context, limit, offset int) ([]*Blob, error) {
|
||||
// GetByID retrieves a blob by its ID
|
||||
func (r *BlobRepository) GetByID(ctx context.Context, id string) (*Blob, error) {
|
||||
query := `
|
||||
SELECT blob_hash, created_ts
|
||||
SELECT id, blob_hash, created_ts, finished_ts, uncompressed_size, compressed_size, uploaded_ts
|
||||
FROM blobs
|
||||
ORDER BY blob_hash
|
||||
LIMIT ? OFFSET ?
|
||||
WHERE id = ?
|
||||
`
|
||||
|
||||
rows, err := r.db.conn.QueryContext(ctx, query, limit, offset)
|
||||
var blob Blob
|
||||
var createdTSUnix int64
|
||||
var finishedTSUnix, uploadedTSUnix sql.NullInt64
|
||||
|
||||
err := r.db.conn.QueryRowContext(ctx, query, id).Scan(
|
||||
&blob.ID,
|
||||
&blob.Hash,
|
||||
&createdTSUnix,
|
||||
&finishedTSUnix,
|
||||
&blob.UncompressedSize,
|
||||
&blob.CompressedSize,
|
||||
&uploadedTSUnix,
|
||||
)
|
||||
|
||||
if err == sql.ErrNoRows {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("querying blobs: %w", err)
|
||||
}
|
||||
defer CloseRows(rows)
|
||||
|
||||
var blobs []*Blob
|
||||
for rows.Next() {
|
||||
var blob Blob
|
||||
var createdTSUnix int64
|
||||
|
||||
err := rows.Scan(
|
||||
&blob.BlobHash,
|
||||
&createdTSUnix,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning blob: %w", err)
|
||||
}
|
||||
|
||||
blob.CreatedTS = time.Unix(createdTSUnix, 0)
|
||||
blobs = append(blobs, &blob)
|
||||
return nil, fmt.Errorf("querying blob: %w", err)
|
||||
}
|
||||
|
||||
return blobs, rows.Err()
|
||||
blob.CreatedTS = time.Unix(createdTSUnix, 0)
|
||||
if finishedTSUnix.Valid {
|
||||
ts := time.Unix(finishedTSUnix.Int64, 0)
|
||||
blob.FinishedTS = &ts
|
||||
}
|
||||
if uploadedTSUnix.Valid {
|
||||
ts := time.Unix(uploadedTSUnix.Int64, 0)
|
||||
blob.UploadedTS = &ts
|
||||
}
|
||||
return &blob, nil
|
||||
}
|
||||
|
||||
// UpdateFinished updates a blob when it's finalized
|
||||
func (r *BlobRepository) UpdateFinished(ctx context.Context, tx *sql.Tx, id string, hash string, uncompressedSize, compressedSize int64) error {
|
||||
query := `
|
||||
UPDATE blobs
|
||||
SET blob_hash = ?, finished_ts = ?, uncompressed_size = ?, compressed_size = ?
|
||||
WHERE id = ?
|
||||
`
|
||||
|
||||
now := time.Now().Unix()
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, hash, now, uncompressedSize, compressedSize, id)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLock(ctx, query, hash, now, uncompressedSize, compressedSize, id)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("updating blob: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// UpdateUploaded marks a blob as uploaded
|
||||
func (r *BlobRepository) UpdateUploaded(ctx context.Context, tx *sql.Tx, id string) error {
|
||||
query := `
|
||||
UPDATE blobs
|
||||
SET uploaded_ts = ?
|
||||
WHERE id = ?
|
||||
`
|
||||
|
||||
now := time.Now().Unix()
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, now, id)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLock(ctx, query, now, id)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("marking blob as uploaded: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -15,7 +15,8 @@ func TestBlobRepository(t *testing.T) {
|
||||
|
||||
// Test Create
|
||||
blob := &Blob{
|
||||
BlobHash: "blobhash123",
|
||||
ID: "test-blob-id-123",
|
||||
Hash: "blobhash123",
|
||||
CreatedTS: time.Now().Truncate(time.Second),
|
||||
}
|
||||
|
||||
@@ -25,23 +26,36 @@ func TestBlobRepository(t *testing.T) {
|
||||
}
|
||||
|
||||
// Test GetByHash
|
||||
retrieved, err := repo.GetByHash(ctx, blob.BlobHash)
|
||||
retrieved, err := repo.GetByHash(ctx, blob.Hash)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get blob: %v", err)
|
||||
}
|
||||
if retrieved == nil {
|
||||
t.Fatal("expected blob, got nil")
|
||||
}
|
||||
if retrieved.BlobHash != blob.BlobHash {
|
||||
t.Errorf("blob hash mismatch: got %s, want %s", retrieved.BlobHash, blob.BlobHash)
|
||||
if retrieved.Hash != blob.Hash {
|
||||
t.Errorf("blob hash mismatch: got %s, want %s", retrieved.Hash, blob.Hash)
|
||||
}
|
||||
if !retrieved.CreatedTS.Equal(blob.CreatedTS) {
|
||||
t.Errorf("created timestamp mismatch: got %v, want %v", retrieved.CreatedTS, blob.CreatedTS)
|
||||
}
|
||||
|
||||
// Test List
|
||||
// Test GetByID
|
||||
retrievedByID, err := repo.GetByID(ctx, blob.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get blob by ID: %v", err)
|
||||
}
|
||||
if retrievedByID == nil {
|
||||
t.Fatal("expected blob, got nil")
|
||||
}
|
||||
if retrievedByID.ID != blob.ID {
|
||||
t.Errorf("blob ID mismatch: got %s, want %s", retrievedByID.ID, blob.ID)
|
||||
}
|
||||
|
||||
// Test with second blob
|
||||
blob2 := &Blob{
|
||||
BlobHash: "blobhash456",
|
||||
ID: "test-blob-id-456",
|
||||
Hash: "blobhash456",
|
||||
CreatedTS: time.Now().Truncate(time.Second),
|
||||
}
|
||||
err = repo.Create(ctx, nil, blob2)
|
||||
@@ -49,29 +63,45 @@ func TestBlobRepository(t *testing.T) {
|
||||
t.Fatalf("failed to create second blob: %v", err)
|
||||
}
|
||||
|
||||
blobs, err := repo.List(ctx, 10, 0)
|
||||
// Test UpdateFinished
|
||||
now := time.Now()
|
||||
err = repo.UpdateFinished(ctx, nil, blob.ID, blob.Hash, 1000, 500)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to list blobs: %v", err)
|
||||
}
|
||||
if len(blobs) != 2 {
|
||||
t.Errorf("expected 2 blobs, got %d", len(blobs))
|
||||
t.Fatalf("failed to update blob as finished: %v", err)
|
||||
}
|
||||
|
||||
// Test pagination
|
||||
blobs, err = repo.List(ctx, 1, 0)
|
||||
// Verify update
|
||||
updated, err := repo.GetByID(ctx, blob.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to list blobs with limit: %v", err)
|
||||
t.Fatalf("failed to get updated blob: %v", err)
|
||||
}
|
||||
if len(blobs) != 1 {
|
||||
t.Errorf("expected 1 blob with limit, got %d", len(blobs))
|
||||
if updated.FinishedTS == nil {
|
||||
t.Fatal("expected finished timestamp to be set")
|
||||
}
|
||||
if updated.UncompressedSize != 1000 {
|
||||
t.Errorf("expected uncompressed size 1000, got %d", updated.UncompressedSize)
|
||||
}
|
||||
if updated.CompressedSize != 500 {
|
||||
t.Errorf("expected compressed size 500, got %d", updated.CompressedSize)
|
||||
}
|
||||
|
||||
blobs, err = repo.List(ctx, 1, 1)
|
||||
// Test UpdateUploaded
|
||||
err = repo.UpdateUploaded(ctx, nil, blob.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to list blobs with offset: %v", err)
|
||||
t.Fatalf("failed to update blob as uploaded: %v", err)
|
||||
}
|
||||
if len(blobs) != 1 {
|
||||
t.Errorf("expected 1 blob with offset, got %d", len(blobs))
|
||||
|
||||
// Verify upload update
|
||||
uploaded, err := repo.GetByID(ctx, blob.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get uploaded blob: %v", err)
|
||||
}
|
||||
if uploaded.UploadedTS == nil {
|
||||
t.Fatal("expected uploaded timestamp to be set")
|
||||
}
|
||||
// Allow 1 second tolerance for timestamp comparison
|
||||
if uploaded.UploadedTS.Before(now.Add(-1 * time.Second)) {
|
||||
t.Error("uploaded timestamp should be around test time")
|
||||
}
|
||||
}
|
||||
|
||||
@@ -83,7 +113,8 @@ func TestBlobRepositoryDuplicate(t *testing.T) {
|
||||
repo := NewBlobRepository(db)
|
||||
|
||||
blob := &Blob{
|
||||
BlobHash: "duplicate_blob",
|
||||
ID: "duplicate-test-id",
|
||||
Hash: "duplicate_blob",
|
||||
CreatedTS: time.Now().Truncate(time.Second),
|
||||
}
|
||||
|
||||
|
||||
@@ -4,8 +4,11 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
_ "modernc.org/sqlite"
|
||||
)
|
||||
|
||||
@@ -15,23 +18,54 @@ type DB struct {
|
||||
}
|
||||
|
||||
func New(ctx context.Context, path string) (*DB, error) {
|
||||
conn, err := sql.Open("sqlite", path+"?_journal_mode=WAL&_synchronous=NORMAL&_busy_timeout=5000")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("opening database: %w", err)
|
||||
// First, try to recover from any stale locks
|
||||
if err := recoverDatabase(ctx, path); err != nil {
|
||||
log.Warn("Failed to recover database", "error", err)
|
||||
}
|
||||
|
||||
if err := conn.PingContext(ctx); err != nil {
|
||||
if closeErr := conn.Close(); closeErr != nil {
|
||||
Fatal("failed to close database connection: %v", closeErr)
|
||||
// First attempt with standard WAL mode
|
||||
conn, err := sql.Open("sqlite", path+"?_journal_mode=WAL&_synchronous=NORMAL&_busy_timeout=10000&_locking_mode=NORMAL")
|
||||
if err == nil {
|
||||
// Set connection pool settings to ensure proper cleanup
|
||||
conn.SetMaxOpenConns(1) // SQLite only supports one writer
|
||||
conn.SetMaxIdleConns(1)
|
||||
|
||||
if err := conn.PingContext(ctx); err == nil {
|
||||
// Success on first try
|
||||
db := &DB{conn: conn}
|
||||
if err := db.createSchema(ctx); err != nil {
|
||||
_ = conn.Close()
|
||||
return nil, fmt.Errorf("creating schema: %w", err)
|
||||
}
|
||||
return db, nil
|
||||
}
|
||||
return nil, fmt.Errorf("pinging database: %w", err)
|
||||
_ = conn.Close()
|
||||
}
|
||||
|
||||
// If first attempt failed, try with TRUNCATE mode to clear any locks
|
||||
log.Info("Database appears locked, attempting recovery with TRUNCATE mode")
|
||||
conn, err = sql.Open("sqlite", path+"?_journal_mode=TRUNCATE&_synchronous=NORMAL&_busy_timeout=10000")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("opening database in recovery mode: %w", err)
|
||||
}
|
||||
|
||||
// Set connection pool settings
|
||||
conn.SetMaxOpenConns(1)
|
||||
conn.SetMaxIdleConns(1)
|
||||
|
||||
if err := conn.PingContext(ctx); err != nil {
|
||||
_ = conn.Close()
|
||||
return nil, fmt.Errorf("database still locked after recovery attempt: %w", err)
|
||||
}
|
||||
|
||||
// Switch back to WAL mode
|
||||
if _, err := conn.ExecContext(ctx, "PRAGMA journal_mode=WAL"); err != nil {
|
||||
log.Warn("Failed to switch back to WAL mode", "error", err)
|
||||
}
|
||||
|
||||
db := &DB{conn: conn}
|
||||
if err := db.createSchema(ctx); err != nil {
|
||||
if closeErr := conn.Close(); closeErr != nil {
|
||||
Fatal("failed to close database connection: %v", closeErr)
|
||||
}
|
||||
_ = conn.Close()
|
||||
return nil, fmt.Errorf("creating schema: %w", err)
|
||||
}
|
||||
|
||||
@@ -39,9 +73,68 @@ func New(ctx context.Context, path string) (*DB, error) {
|
||||
}
|
||||
|
||||
func (db *DB) Close() error {
|
||||
log.Debug("Closing database connection")
|
||||
if err := db.conn.Close(); err != nil {
|
||||
Fatal("failed to close database: %v", err)
|
||||
log.Error("Failed to close database", "error", err)
|
||||
return fmt.Errorf("failed to close database: %w", err)
|
||||
}
|
||||
log.Debug("Database connection closed successfully")
|
||||
return nil
|
||||
}
|
||||
|
||||
// recoverDatabase attempts to recover a locked database
|
||||
func recoverDatabase(ctx context.Context, path string) error {
|
||||
// Check if database file exists
|
||||
if _, err := os.Stat(path); os.IsNotExist(err) {
|
||||
// No database file, nothing to recover
|
||||
return nil
|
||||
}
|
||||
|
||||
// Remove stale lock files
|
||||
// SQLite creates -wal and -shm files for WAL mode
|
||||
walPath := path + "-wal"
|
||||
shmPath := path + "-shm"
|
||||
journalPath := path + "-journal"
|
||||
|
||||
log.Info("Attempting database recovery", "path", path)
|
||||
|
||||
// Always remove lock files on startup to ensure clean state
|
||||
removed := false
|
||||
|
||||
// Check for and remove journal file (from non-WAL mode)
|
||||
if _, err := os.Stat(journalPath); err == nil {
|
||||
log.Info("Found journal file, removing", "path", journalPath)
|
||||
if err := os.Remove(journalPath); err != nil {
|
||||
log.Warn("Failed to remove journal file", "error", err)
|
||||
} else {
|
||||
removed = true
|
||||
}
|
||||
}
|
||||
|
||||
// Remove WAL file
|
||||
if _, err := os.Stat(walPath); err == nil {
|
||||
log.Info("Found WAL file, removing", "path", walPath)
|
||||
if err := os.Remove(walPath); err != nil {
|
||||
log.Warn("Failed to remove WAL file", "error", err)
|
||||
} else {
|
||||
removed = true
|
||||
}
|
||||
}
|
||||
|
||||
// Remove SHM file
|
||||
if _, err := os.Stat(shmPath); err == nil {
|
||||
log.Info("Found shared memory file, removing", "path", shmPath)
|
||||
if err := os.Remove(shmPath); err != nil {
|
||||
log.Warn("Failed to remove shared memory file", "error", err)
|
||||
} else {
|
||||
removed = true
|
||||
}
|
||||
}
|
||||
|
||||
if removed {
|
||||
log.Info("Database lock files removed")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -55,18 +148,24 @@ func (db *DB) BeginTx(ctx context.Context, opts *sql.TxOptions) (*sql.Tx, error)
|
||||
|
||||
// LockForWrite acquires the write lock
|
||||
func (db *DB) LockForWrite() {
|
||||
log.Debug("Attempting to acquire write lock")
|
||||
db.writeLock.Lock()
|
||||
log.Debug("Write lock acquired")
|
||||
}
|
||||
|
||||
// UnlockWrite releases the write lock
|
||||
func (db *DB) UnlockWrite() {
|
||||
log.Debug("Releasing write lock")
|
||||
db.writeLock.Unlock()
|
||||
log.Debug("Write lock released")
|
||||
}
|
||||
|
||||
// ExecWithLock executes a write query with the write lock held
|
||||
func (db *DB) ExecWithLock(ctx context.Context, query string, args ...interface{}) (sql.Result, error) {
|
||||
db.writeLock.Lock()
|
||||
defer db.writeLock.Unlock()
|
||||
|
||||
LogSQL("Execute", query, args...)
|
||||
return db.conn.ExecContext(ctx, query, args...)
|
||||
}
|
||||
|
||||
@@ -104,16 +203,22 @@ func (db *DB) createSchema(ctx context.Context) error {
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS blobs (
|
||||
blob_hash TEXT PRIMARY KEY,
|
||||
created_ts INTEGER NOT NULL
|
||||
id TEXT PRIMARY KEY,
|
||||
blob_hash TEXT UNIQUE,
|
||||
created_ts INTEGER NOT NULL,
|
||||
finished_ts INTEGER,
|
||||
uncompressed_size INTEGER NOT NULL DEFAULT 0,
|
||||
compressed_size INTEGER NOT NULL DEFAULT 0,
|
||||
uploaded_ts INTEGER
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS blob_chunks (
|
||||
blob_hash TEXT NOT NULL,
|
||||
blob_id TEXT NOT NULL,
|
||||
chunk_hash TEXT NOT NULL,
|
||||
offset INTEGER NOT NULL,
|
||||
length INTEGER NOT NULL,
|
||||
PRIMARY KEY (blob_hash, chunk_hash)
|
||||
PRIMARY KEY (blob_id, chunk_hash),
|
||||
FOREIGN KEY (blob_id) REFERENCES blobs(id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS chunk_files (
|
||||
@@ -128,13 +233,38 @@ func (db *DB) createSchema(ctx context.Context) error {
|
||||
id TEXT PRIMARY KEY,
|
||||
hostname TEXT NOT NULL,
|
||||
vaultik_version TEXT NOT NULL,
|
||||
created_ts INTEGER NOT NULL,
|
||||
file_count INTEGER NOT NULL,
|
||||
chunk_count INTEGER NOT NULL,
|
||||
blob_count INTEGER NOT NULL,
|
||||
total_size INTEGER NOT NULL,
|
||||
blob_size INTEGER NOT NULL,
|
||||
compression_ratio REAL NOT NULL
|
||||
started_at INTEGER NOT NULL,
|
||||
completed_at INTEGER,
|
||||
file_count INTEGER NOT NULL DEFAULT 0,
|
||||
chunk_count INTEGER NOT NULL DEFAULT 0,
|
||||
blob_count INTEGER NOT NULL DEFAULT 0,
|
||||
total_size INTEGER NOT NULL DEFAULT 0,
|
||||
blob_size INTEGER NOT NULL DEFAULT 0,
|
||||
compression_ratio REAL NOT NULL DEFAULT 1.0
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS snapshot_files (
|
||||
snapshot_id TEXT NOT NULL,
|
||||
file_path TEXT NOT NULL,
|
||||
PRIMARY KEY (snapshot_id, file_path),
|
||||
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (file_path) REFERENCES files(path) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS snapshot_blobs (
|
||||
snapshot_id TEXT NOT NULL,
|
||||
blob_id TEXT NOT NULL,
|
||||
blob_hash TEXT NOT NULL,
|
||||
PRIMARY KEY (snapshot_id, blob_id),
|
||||
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (blob_id) REFERENCES blobs(id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS uploads (
|
||||
blob_hash TEXT PRIMARY KEY,
|
||||
uploaded_at INTEGER NOT NULL,
|
||||
size INTEGER NOT NULL,
|
||||
duration_ms INTEGER NOT NULL
|
||||
);
|
||||
`
|
||||
|
||||
@@ -146,3 +276,10 @@ func (db *DB) createSchema(ctx context.Context) error {
|
||||
func NewTestDB() (*DB, error) {
|
||||
return New(context.Background(), ":memory:")
|
||||
}
|
||||
|
||||
// LogSQL logs SQL queries if debug mode is enabled
|
||||
func LogSQL(operation, query string, args ...interface{}) {
|
||||
if strings.Contains(os.Getenv("GODEBUG"), "vaultik") {
|
||||
log.Debug("SQL "+operation, "query", strings.TrimSpace(query), "args", fmt.Sprintf("%v", args))
|
||||
}
|
||||
}
|
||||
|
||||
@@ -62,6 +62,36 @@ func (r *FileChunkRepository) GetByPath(ctx context.Context, path string) ([]*Fi
|
||||
return fileChunks, rows.Err()
|
||||
}
|
||||
|
||||
// GetByPathTx retrieves file chunks within a transaction
|
||||
func (r *FileChunkRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path string) ([]*FileChunk, error) {
|
||||
query := `
|
||||
SELECT path, idx, chunk_hash
|
||||
FROM file_chunks
|
||||
WHERE path = ?
|
||||
ORDER BY idx
|
||||
`
|
||||
|
||||
LogSQL("GetByPathTx", query, path)
|
||||
rows, err := tx.QueryContext(ctx, query, path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("querying file chunks: %w", err)
|
||||
}
|
||||
defer CloseRows(rows)
|
||||
|
||||
var fileChunks []*FileChunk
|
||||
for rows.Next() {
|
||||
var fc FileChunk
|
||||
err := rows.Scan(&fc.Path, &fc.Idx, &fc.ChunkHash)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning file chunk: %w", err)
|
||||
}
|
||||
fileChunks = append(fileChunks, &fc)
|
||||
}
|
||||
LogSQL("GetByPathTx", "Complete", path, "count", len(fileChunks))
|
||||
|
||||
return fileChunks, rows.Err()
|
||||
}
|
||||
|
||||
func (r *FileChunkRepository) DeleteByPath(ctx context.Context, tx *sql.Tx, path string) error {
|
||||
query := `DELETE FROM file_chunks WHERE path = ?`
|
||||
|
||||
@@ -81,5 +111,16 @@ func (r *FileChunkRepository) DeleteByPath(ctx context.Context, tx *sql.Tx, path
|
||||
|
||||
// GetByFile is an alias for GetByPath for compatibility
|
||||
func (r *FileChunkRepository) GetByFile(ctx context.Context, path string) ([]*FileChunk, error) {
|
||||
return r.GetByPath(ctx, path)
|
||||
LogSQL("GetByFile", "Starting", path)
|
||||
result, err := r.GetByPath(ctx, path)
|
||||
LogSQL("GetByFile", "Complete", path, "count", len(result))
|
||||
return result, err
|
||||
}
|
||||
|
||||
// GetByFileTx retrieves file chunks within a transaction
|
||||
func (r *FileChunkRepository) GetByFileTx(ctx context.Context, tx *sql.Tx, path string) ([]*FileChunk, error) {
|
||||
LogSQL("GetByFileTx", "Starting", path)
|
||||
result, err := r.GetByPathTx(ctx, tx, path)
|
||||
LogSQL("GetByFileTx", "Complete", path, "count", len(result))
|
||||
return result, err
|
||||
}
|
||||
|
||||
@@ -31,6 +31,7 @@ func (r *FileRepository) Create(ctx context.Context, tx *sql.Tx, file *File) err
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
LogSQL("Execute", query, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget)
|
||||
_, err = tx.ExecContext(ctx, query, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLock(ctx, query, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget)
|
||||
@@ -81,6 +82,46 @@ func (r *FileRepository) GetByPath(ctx context.Context, path string) (*File, err
|
||||
return &file, nil
|
||||
}
|
||||
|
||||
func (r *FileRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path string) (*File, error) {
|
||||
query := `
|
||||
SELECT path, mtime, ctime, size, mode, uid, gid, link_target
|
||||
FROM files
|
||||
WHERE path = ?
|
||||
`
|
||||
|
||||
var file File
|
||||
var mtimeUnix, ctimeUnix int64
|
||||
var linkTarget sql.NullString
|
||||
|
||||
LogSQL("GetByPathTx QueryRowContext", query, path)
|
||||
err := tx.QueryRowContext(ctx, query, path).Scan(
|
||||
&file.Path,
|
||||
&mtimeUnix,
|
||||
&ctimeUnix,
|
||||
&file.Size,
|
||||
&file.Mode,
|
||||
&file.UID,
|
||||
&file.GID,
|
||||
&linkTarget,
|
||||
)
|
||||
LogSQL("GetByPathTx Scan complete", query, path)
|
||||
|
||||
if err == sql.ErrNoRows {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("querying file: %w", err)
|
||||
}
|
||||
|
||||
file.MTime = time.Unix(mtimeUnix, 0)
|
||||
file.CTime = time.Unix(ctimeUnix, 0)
|
||||
if linkTarget.Valid {
|
||||
file.LinkTarget = linkTarget.String
|
||||
}
|
||||
|
||||
return &file, nil
|
||||
}
|
||||
|
||||
func (r *FileRepository) ListModifiedSince(ctx context.Context, since time.Time) ([]*File, error) {
|
||||
query := `
|
||||
SELECT path, mtime, ctime, size, mode, uid, gid, link_target
|
||||
|
||||
@@ -35,13 +35,18 @@ type Chunk struct {
|
||||
|
||||
// Blob represents a blob record in the database
|
||||
type Blob struct {
|
||||
BlobHash string
|
||||
CreatedTS time.Time
|
||||
ID string
|
||||
Hash string // Can be empty until blob is finalized
|
||||
CreatedTS time.Time
|
||||
FinishedTS *time.Time // nil if not yet finalized
|
||||
UncompressedSize int64
|
||||
CompressedSize int64
|
||||
UploadedTS *time.Time // nil if not yet uploaded
|
||||
}
|
||||
|
||||
// BlobChunk represents the mapping between blobs and chunks
|
||||
type BlobChunk struct {
|
||||
BlobHash string
|
||||
BlobID string
|
||||
ChunkHash string
|
||||
Offset int64
|
||||
Length int64
|
||||
@@ -60,7 +65,8 @@ type Snapshot struct {
|
||||
ID string
|
||||
Hostname string
|
||||
VaultikVersion string
|
||||
CreatedTS time.Time
|
||||
StartedAt time.Time
|
||||
CompletedAt *time.Time // nil if still in progress
|
||||
FileCount int64
|
||||
ChunkCount int64
|
||||
BlobCount int64
|
||||
@@ -68,3 +74,21 @@ type Snapshot struct {
|
||||
BlobSize int64 // Total size of all referenced blobs (compressed and encrypted)
|
||||
CompressionRatio float64 // Compression ratio (BlobSize / TotalSize)
|
||||
}
|
||||
|
||||
// IsComplete returns true if the snapshot has completed
|
||||
func (s *Snapshot) IsComplete() bool {
|
||||
return s.CompletedAt != nil
|
||||
}
|
||||
|
||||
// SnapshotFile represents the mapping between snapshots and files
|
||||
type SnapshotFile struct {
|
||||
SnapshotID string
|
||||
FilePath string
|
||||
}
|
||||
|
||||
// SnapshotBlob represents the mapping between snapshots and blobs
|
||||
type SnapshotBlob struct {
|
||||
SnapshotID string
|
||||
BlobID string
|
||||
BlobHash string // Denormalized for easier manifest generation
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"path/filepath"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/config"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"go.uber.org/fx"
|
||||
)
|
||||
|
||||
@@ -32,7 +33,13 @@ func provideDatabase(lc fx.Lifecycle, cfg *config.Config) (*DB, error) {
|
||||
|
||||
lc.Append(fx.Hook{
|
||||
OnStop: func(ctx context.Context) error {
|
||||
return db.Close()
|
||||
log.Debug("Database module OnStop hook called")
|
||||
if err := db.Close(); err != nil {
|
||||
log.Error("Failed to close database in OnStop hook", "error", err)
|
||||
return err
|
||||
}
|
||||
log.Debug("Database closed successfully in OnStop hook")
|
||||
return nil
|
||||
},
|
||||
})
|
||||
|
||||
|
||||
@@ -15,6 +15,7 @@ type Repositories struct {
|
||||
BlobChunks *BlobChunkRepository
|
||||
ChunkFiles *ChunkFileRepository
|
||||
Snapshots *SnapshotRepository
|
||||
Uploads *UploadRepository
|
||||
}
|
||||
|
||||
func NewRepositories(db *DB) *Repositories {
|
||||
@@ -27,6 +28,7 @@ func NewRepositories(db *DB) *Repositories {
|
||||
BlobChunks: NewBlobChunkRepository(db),
|
||||
ChunkFiles: NewChunkFileRepository(db),
|
||||
Snapshots: NewSnapshotRepository(db),
|
||||
Uploads: NewUploadRepository(db.conn),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,13 +36,19 @@ type TxFunc func(ctx context.Context, tx *sql.Tx) error
|
||||
|
||||
func (r *Repositories) WithTx(ctx context.Context, fn TxFunc) error {
|
||||
// Acquire write lock for the entire transaction
|
||||
LogSQL("WithTx", "Acquiring write lock", "")
|
||||
r.db.LockForWrite()
|
||||
defer r.db.UnlockWrite()
|
||||
defer func() {
|
||||
LogSQL("WithTx", "Releasing write lock", "")
|
||||
r.db.UnlockWrite()
|
||||
}()
|
||||
|
||||
LogSQL("WithTx", "Beginning transaction", "")
|
||||
tx, err := r.db.BeginTx(ctx, nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("beginning transaction: %w", err)
|
||||
}
|
||||
LogSQL("WithTx", "Transaction started", "")
|
||||
|
||||
defer func() {
|
||||
if p := recover(); p != nil {
|
||||
|
||||
@@ -71,7 +71,8 @@ func TestRepositoriesTransaction(t *testing.T) {
|
||||
|
||||
// Create blob
|
||||
blob := &Blob{
|
||||
BlobHash: "tx_blob1",
|
||||
ID: "tx-blob-id-1",
|
||||
Hash: "tx_blob1",
|
||||
CreatedTS: time.Now().Truncate(time.Second),
|
||||
}
|
||||
if err := repos.Blobs.Create(ctx, tx, blob); err != nil {
|
||||
@@ -80,7 +81,7 @@ func TestRepositoriesTransaction(t *testing.T) {
|
||||
|
||||
// Map chunks to blob
|
||||
bc1 := &BlobChunk{
|
||||
BlobHash: blob.BlobHash,
|
||||
BlobID: blob.ID,
|
||||
ChunkHash: chunk1.ChunkHash,
|
||||
Offset: 0,
|
||||
Length: 512,
|
||||
@@ -90,7 +91,7 @@ func TestRepositoriesTransaction(t *testing.T) {
|
||||
}
|
||||
|
||||
bc2 := &BlobChunk{
|
||||
BlobHash: blob.BlobHash,
|
||||
BlobID: blob.ID,
|
||||
ChunkHash: chunk2.ChunkHash,
|
||||
Offset: 512,
|
||||
Length: 512,
|
||||
|
||||
11
internal/database/schema/008_uploads.sql
Normal file
11
internal/database/schema/008_uploads.sql
Normal file
@@ -0,0 +1,11 @@
|
||||
-- Track blob upload metrics
|
||||
CREATE TABLE IF NOT EXISTS uploads (
|
||||
blob_hash TEXT PRIMARY KEY,
|
||||
uploaded_at TIMESTAMP NOT NULL,
|
||||
size INTEGER NOT NULL,
|
||||
duration_ms INTEGER NOT NULL,
|
||||
FOREIGN KEY (blob_hash) REFERENCES blobs(blob_hash)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_uploads_uploaded_at ON uploads(uploaded_at);
|
||||
CREATE INDEX idx_uploads_duration ON uploads(duration_ms);
|
||||
@@ -17,17 +17,23 @@ func NewSnapshotRepository(db *DB) *SnapshotRepository {
|
||||
|
||||
func (r *SnapshotRepository) Create(ctx context.Context, tx *sql.Tx, snapshot *Snapshot) error {
|
||||
query := `
|
||||
INSERT INTO snapshots (id, hostname, vaultik_version, created_ts, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
INSERT INTO snapshots (id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
`
|
||||
|
||||
var completedAt *int64
|
||||
if snapshot.CompletedAt != nil {
|
||||
ts := snapshot.CompletedAt.Unix()
|
||||
completedAt = &ts
|
||||
}
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.CreatedTS.Unix(),
|
||||
snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
|
||||
_, err = tx.ExecContext(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.StartedAt.Unix(),
|
||||
completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLock(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.CreatedTS.Unix(),
|
||||
snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
|
||||
_, err = r.db.ExecWithLock(ctx, query, snapshot.ID, snapshot.Hostname, snapshot.VaultikVersion, snapshot.StartedAt.Unix(),
|
||||
completedAt, snapshot.FileCount, snapshot.ChunkCount, snapshot.BlobCount, snapshot.TotalSize, snapshot.BlobSize, snapshot.CompressionRatio)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
@@ -70,19 +76,21 @@ func (r *SnapshotRepository) UpdateCounts(ctx context.Context, tx *sql.Tx, snaps
|
||||
|
||||
func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*Snapshot, error) {
|
||||
query := `
|
||||
SELECT id, hostname, vaultik_version, created_ts, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
|
||||
SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
|
||||
FROM snapshots
|
||||
WHERE id = ?
|
||||
`
|
||||
|
||||
var snapshot Snapshot
|
||||
var createdTSUnix int64
|
||||
var startedAtUnix int64
|
||||
var completedAtUnix *int64
|
||||
|
||||
err := r.db.conn.QueryRowContext(ctx, query, snapshotID).Scan(
|
||||
&snapshot.ID,
|
||||
&snapshot.Hostname,
|
||||
&snapshot.VaultikVersion,
|
||||
&createdTSUnix,
|
||||
&startedAtUnix,
|
||||
&completedAtUnix,
|
||||
&snapshot.FileCount,
|
||||
&snapshot.ChunkCount,
|
||||
&snapshot.BlobCount,
|
||||
@@ -98,16 +106,20 @@ func (r *SnapshotRepository) GetByID(ctx context.Context, snapshotID string) (*S
|
||||
return nil, fmt.Errorf("querying snapshot: %w", err)
|
||||
}
|
||||
|
||||
snapshot.CreatedTS = time.Unix(createdTSUnix, 0)
|
||||
snapshot.StartedAt = time.Unix(startedAtUnix, 0)
|
||||
if completedAtUnix != nil {
|
||||
t := time.Unix(*completedAtUnix, 0)
|
||||
snapshot.CompletedAt = &t
|
||||
}
|
||||
|
||||
return &snapshot, nil
|
||||
}
|
||||
|
||||
func (r *SnapshotRepository) ListRecent(ctx context.Context, limit int) ([]*Snapshot, error) {
|
||||
query := `
|
||||
SELECT id, hostname, vaultik_version, created_ts, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
|
||||
SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
|
||||
FROM snapshots
|
||||
ORDER BY created_ts DESC
|
||||
ORDER BY started_at DESC
|
||||
LIMIT ?
|
||||
`
|
||||
|
||||
@@ -120,13 +132,15 @@ func (r *SnapshotRepository) ListRecent(ctx context.Context, limit int) ([]*Snap
|
||||
var snapshots []*Snapshot
|
||||
for rows.Next() {
|
||||
var snapshot Snapshot
|
||||
var createdTSUnix int64
|
||||
var startedAtUnix int64
|
||||
var completedAtUnix *int64
|
||||
|
||||
err := rows.Scan(
|
||||
&snapshot.ID,
|
||||
&snapshot.Hostname,
|
||||
&snapshot.VaultikVersion,
|
||||
&createdTSUnix,
|
||||
&startedAtUnix,
|
||||
&completedAtUnix,
|
||||
&snapshot.FileCount,
|
||||
&snapshot.ChunkCount,
|
||||
&snapshot.BlobCount,
|
||||
@@ -138,7 +152,154 @@ func (r *SnapshotRepository) ListRecent(ctx context.Context, limit int) ([]*Snap
|
||||
return nil, fmt.Errorf("scanning snapshot: %w", err)
|
||||
}
|
||||
|
||||
snapshot.CreatedTS = time.Unix(createdTSUnix, 0)
|
||||
snapshot.StartedAt = time.Unix(startedAtUnix, 0)
|
||||
if completedAtUnix != nil {
|
||||
t := time.Unix(*completedAtUnix, 0)
|
||||
snapshot.CompletedAt = &t
|
||||
}
|
||||
|
||||
snapshots = append(snapshots, &snapshot)
|
||||
}
|
||||
|
||||
return snapshots, rows.Err()
|
||||
}
|
||||
|
||||
// MarkComplete marks a snapshot as completed with the current timestamp
|
||||
func (r *SnapshotRepository) MarkComplete(ctx context.Context, tx *sql.Tx, snapshotID string) error {
|
||||
query := `
|
||||
UPDATE snapshots
|
||||
SET completed_at = ?
|
||||
WHERE id = ?
|
||||
`
|
||||
|
||||
completedAt := time.Now().Unix()
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, completedAt, snapshotID)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLock(ctx, query, completedAt, snapshotID)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("marking snapshot complete: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddFile adds a file to a snapshot
|
||||
func (r *SnapshotRepository) AddFile(ctx context.Context, tx *sql.Tx, snapshotID string, filePath string) error {
|
||||
query := `
|
||||
INSERT OR IGNORE INTO snapshot_files (snapshot_id, file_path)
|
||||
VALUES (?, ?)
|
||||
`
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, snapshotID, filePath)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLock(ctx, query, snapshotID, filePath)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("adding file to snapshot: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddBlob adds a blob to a snapshot
|
||||
func (r *SnapshotRepository) AddBlob(ctx context.Context, tx *sql.Tx, snapshotID string, blobID string, blobHash string) error {
|
||||
query := `
|
||||
INSERT OR IGNORE INTO snapshot_blobs (snapshot_id, blob_id, blob_hash)
|
||||
VALUES (?, ?, ?)
|
||||
`
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, snapshotID, blobID, blobHash)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLock(ctx, query, snapshotID, blobID, blobHash)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("adding blob to snapshot: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetBlobHashes returns all blob hashes for a snapshot
|
||||
func (r *SnapshotRepository) GetBlobHashes(ctx context.Context, snapshotID string) ([]string, error) {
|
||||
query := `
|
||||
SELECT sb.blob_hash
|
||||
FROM snapshot_blobs sb
|
||||
WHERE sb.snapshot_id = ?
|
||||
ORDER BY sb.blob_hash
|
||||
`
|
||||
|
||||
rows, err := r.db.conn.QueryContext(ctx, query, snapshotID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("querying blob hashes: %w", err)
|
||||
}
|
||||
defer CloseRows(rows)
|
||||
|
||||
var blobs []string
|
||||
for rows.Next() {
|
||||
var blobHash string
|
||||
if err := rows.Scan(&blobHash); err != nil {
|
||||
return nil, fmt.Errorf("scanning blob hash: %w", err)
|
||||
}
|
||||
blobs = append(blobs, blobHash)
|
||||
}
|
||||
|
||||
return blobs, rows.Err()
|
||||
}
|
||||
|
||||
// GetIncompleteSnapshots returns all snapshots that haven't been completed
|
||||
func (r *SnapshotRepository) GetIncompleteSnapshots(ctx context.Context) ([]*Snapshot, error) {
|
||||
query := `
|
||||
SELECT id, hostname, vaultik_version, started_at, completed_at, file_count, chunk_count, blob_count, total_size, blob_size, compression_ratio
|
||||
FROM snapshots
|
||||
WHERE completed_at IS NULL
|
||||
ORDER BY started_at DESC
|
||||
`
|
||||
|
||||
rows, err := r.db.conn.QueryContext(ctx, query)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("querying incomplete snapshots: %w", err)
|
||||
}
|
||||
defer CloseRows(rows)
|
||||
|
||||
var snapshots []*Snapshot
|
||||
for rows.Next() {
|
||||
var snapshot Snapshot
|
||||
var startedAtUnix int64
|
||||
var completedAtUnix *int64
|
||||
|
||||
err := rows.Scan(
|
||||
&snapshot.ID,
|
||||
&snapshot.Hostname,
|
||||
&snapshot.VaultikVersion,
|
||||
&startedAtUnix,
|
||||
&completedAtUnix,
|
||||
&snapshot.FileCount,
|
||||
&snapshot.ChunkCount,
|
||||
&snapshot.BlobCount,
|
||||
&snapshot.TotalSize,
|
||||
&snapshot.BlobSize,
|
||||
&snapshot.CompressionRatio,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning snapshot: %w", err)
|
||||
}
|
||||
|
||||
snapshot.StartedAt = time.Unix(startedAtUnix, 0)
|
||||
if completedAtUnix != nil {
|
||||
t := time.Unix(*completedAtUnix, 0)
|
||||
snapshot.CompletedAt = &t
|
||||
}
|
||||
|
||||
snapshots = append(snapshots, &snapshot)
|
||||
}
|
||||
|
||||
@@ -30,7 +30,8 @@ func TestSnapshotRepository(t *testing.T) {
|
||||
ID: "2024-01-01T12:00:00Z",
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "1.0.0",
|
||||
CreatedTS: time.Now().Truncate(time.Second),
|
||||
StartedAt: time.Now().Truncate(time.Second),
|
||||
CompletedAt: nil,
|
||||
FileCount: 100,
|
||||
ChunkCount: 500,
|
||||
BlobCount: 10,
|
||||
@@ -99,7 +100,8 @@ func TestSnapshotRepository(t *testing.T) {
|
||||
ID: fmt.Sprintf("2024-01-0%dT12:00:00Z", i),
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "1.0.0",
|
||||
CreatedTS: time.Now().Add(time.Duration(i) * time.Hour).Truncate(time.Second),
|
||||
StartedAt: time.Now().Add(time.Duration(i) * time.Hour).Truncate(time.Second),
|
||||
CompletedAt: nil,
|
||||
FileCount: int64(100 * i),
|
||||
ChunkCount: int64(500 * i),
|
||||
BlobCount: int64(10 * i),
|
||||
@@ -121,7 +123,7 @@ func TestSnapshotRepository(t *testing.T) {
|
||||
|
||||
// Verify order (most recent first)
|
||||
for i := 0; i < len(recent)-1; i++ {
|
||||
if recent[i].CreatedTS.Before(recent[i+1].CreatedTS) {
|
||||
if recent[i].StartedAt.Before(recent[i+1].StartedAt) {
|
||||
t.Error("snapshots not in descending order")
|
||||
}
|
||||
}
|
||||
@@ -162,7 +164,8 @@ func TestSnapshotRepositoryDuplicate(t *testing.T) {
|
||||
ID: "2024-01-01T12:00:00Z",
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "1.0.0",
|
||||
CreatedTS: time.Now().Truncate(time.Second),
|
||||
StartedAt: time.Now().Truncate(time.Second),
|
||||
CompletedAt: nil,
|
||||
FileCount: 100,
|
||||
ChunkCount: 500,
|
||||
BlobCount: 10,
|
||||
|
||||
135
internal/database/uploads.go
Normal file
135
internal/database/uploads.go
Normal file
@@ -0,0 +1,135 @@
|
||||
package database
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
)
|
||||
|
||||
// Upload represents a blob upload record
|
||||
type Upload struct {
|
||||
BlobHash string
|
||||
UploadedAt time.Time
|
||||
Size int64
|
||||
DurationMs int64
|
||||
}
|
||||
|
||||
// UploadRepository handles upload records
|
||||
type UploadRepository struct {
|
||||
conn *sql.DB
|
||||
}
|
||||
|
||||
// NewUploadRepository creates a new upload repository
|
||||
func NewUploadRepository(conn *sql.DB) *UploadRepository {
|
||||
return &UploadRepository{conn: conn}
|
||||
}
|
||||
|
||||
// Create inserts a new upload record
|
||||
func (r *UploadRepository) Create(ctx context.Context, tx *sql.Tx, upload *Upload) error {
|
||||
query := `
|
||||
INSERT INTO uploads (blob_hash, uploaded_at, size, duration_ms)
|
||||
VALUES (?, ?, ?, ?)
|
||||
`
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, upload.BlobHash, upload.UploadedAt, upload.Size, upload.DurationMs)
|
||||
} else {
|
||||
_, err = r.conn.ExecContext(ctx, query, upload.BlobHash, upload.UploadedAt, upload.Size, upload.DurationMs)
|
||||
}
|
||||
|
||||
return err
|
||||
}
|
||||
|
||||
// GetByBlobHash retrieves an upload record by blob hash
|
||||
func (r *UploadRepository) GetByBlobHash(ctx context.Context, blobHash string) (*Upload, error) {
|
||||
query := `
|
||||
SELECT blob_hash, uploaded_at, size, duration_ms
|
||||
FROM uploads
|
||||
WHERE blob_hash = ?
|
||||
`
|
||||
|
||||
var upload Upload
|
||||
err := r.conn.QueryRowContext(ctx, query, blobHash).Scan(
|
||||
&upload.BlobHash,
|
||||
&upload.UploadedAt,
|
||||
&upload.Size,
|
||||
&upload.DurationMs,
|
||||
)
|
||||
|
||||
if err == sql.ErrNoRows {
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return &upload, nil
|
||||
}
|
||||
|
||||
// GetRecentUploads retrieves recent uploads ordered by upload time
|
||||
func (r *UploadRepository) GetRecentUploads(ctx context.Context, limit int) ([]*Upload, error) {
|
||||
query := `
|
||||
SELECT blob_hash, uploaded_at, size, duration_ms
|
||||
FROM uploads
|
||||
ORDER BY uploaded_at DESC
|
||||
LIMIT ?
|
||||
`
|
||||
|
||||
rows, err := r.conn.QueryContext(ctx, query, limit)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() {
|
||||
if err := rows.Close(); err != nil {
|
||||
log.Error("failed to close rows", "error", err)
|
||||
}
|
||||
}()
|
||||
|
||||
var uploads []*Upload
|
||||
for rows.Next() {
|
||||
var upload Upload
|
||||
if err := rows.Scan(&upload.BlobHash, &upload.UploadedAt, &upload.Size, &upload.DurationMs); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
uploads = append(uploads, &upload)
|
||||
}
|
||||
|
||||
return uploads, rows.Err()
|
||||
}
|
||||
|
||||
// GetUploadStats returns aggregate statistics for uploads
|
||||
func (r *UploadRepository) GetUploadStats(ctx context.Context, since time.Time) (*UploadStats, error) {
|
||||
query := `
|
||||
SELECT
|
||||
COUNT(*) as count,
|
||||
COALESCE(SUM(size), 0) as total_size,
|
||||
COALESCE(AVG(duration_ms), 0) as avg_duration_ms,
|
||||
COALESCE(MIN(duration_ms), 0) as min_duration_ms,
|
||||
COALESCE(MAX(duration_ms), 0) as max_duration_ms
|
||||
FROM uploads
|
||||
WHERE uploaded_at >= ?
|
||||
`
|
||||
|
||||
var stats UploadStats
|
||||
err := r.conn.QueryRowContext(ctx, query, since).Scan(
|
||||
&stats.Count,
|
||||
&stats.TotalSize,
|
||||
&stats.AvgDurationMs,
|
||||
&stats.MinDurationMs,
|
||||
&stats.MaxDurationMs,
|
||||
)
|
||||
|
||||
return &stats, err
|
||||
}
|
||||
|
||||
// UploadStats contains aggregate upload statistics
|
||||
type UploadStats struct {
|
||||
Count int64
|
||||
TotalSize int64
|
||||
AvgDurationMs float64
|
||||
MinDurationMs int64
|
||||
MaxDurationMs int64
|
||||
}
|
||||
Reference in New Issue
Block a user