Refactor blob storage to use UUID primary keys and implement streaming chunking

- Changed blob table to use ID (UUID) as primary key instead of hash
- Blob records are now created at packing start, enabling immediate chunk associations
- Implemented streaming chunking to process large files without memory exhaustion
- Fixed blob manifest generation to include all referenced blobs
- Updated all foreign key references from blob_hash to blob_id
- Added progress reporting and improved error handling
- Enforced encryption requirement for all blob packing
- Updated tests to use test encryption keys
- Added Cyrillic transliteration to README
This commit is contained in:
2025-07-22 07:43:39 +02:00
parent 26db096913
commit 86b533d6ee
49 changed files with 5709 additions and 324 deletions

View File

@@ -16,15 +16,15 @@ func NewBlobChunkRepository(db *DB) *BlobChunkRepository {
func (r *BlobChunkRepository) Create(ctx context.Context, tx *sql.Tx, bc *BlobChunk) error {
query := `
INSERT INTO blob_chunks (blob_hash, chunk_hash, offset, length)
INSERT INTO blob_chunks (blob_id, chunk_hash, offset, length)
VALUES (?, ?, ?, ?)
`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, bc.BlobHash, bc.ChunkHash, bc.Offset, bc.Length)
_, err = tx.ExecContext(ctx, query, bc.BlobID, bc.ChunkHash, bc.Offset, bc.Length)
} else {
_, err = r.db.ExecWithLock(ctx, query, bc.BlobHash, bc.ChunkHash, bc.Offset, bc.Length)
_, err = r.db.ExecWithLock(ctx, query, bc.BlobID, bc.ChunkHash, bc.Offset, bc.Length)
}
if err != nil {
@@ -34,15 +34,15 @@ func (r *BlobChunkRepository) Create(ctx context.Context, tx *sql.Tx, bc *BlobCh
return nil
}
func (r *BlobChunkRepository) GetByBlobHash(ctx context.Context, blobHash string) ([]*BlobChunk, error) {
func (r *BlobChunkRepository) GetByBlobID(ctx context.Context, blobID string) ([]*BlobChunk, error) {
query := `
SELECT blob_hash, chunk_hash, offset, length
SELECT blob_id, chunk_hash, offset, length
FROM blob_chunks
WHERE blob_hash = ?
WHERE blob_id = ?
ORDER BY offset
`
rows, err := r.db.conn.QueryContext(ctx, query, blobHash)
rows, err := r.db.conn.QueryContext(ctx, query, blobID)
if err != nil {
return nil, fmt.Errorf("querying blob chunks: %w", err)
}
@@ -51,7 +51,7 @@ func (r *BlobChunkRepository) GetByBlobHash(ctx context.Context, blobHash string
var blobChunks []*BlobChunk
for rows.Next() {
var bc BlobChunk
err := rows.Scan(&bc.BlobHash, &bc.ChunkHash, &bc.Offset, &bc.Length)
err := rows.Scan(&bc.BlobID, &bc.ChunkHash, &bc.Offset, &bc.Length)
if err != nil {
return nil, fmt.Errorf("scanning blob chunk: %w", err)
}
@@ -63,26 +63,61 @@ func (r *BlobChunkRepository) GetByBlobHash(ctx context.Context, blobHash string
func (r *BlobChunkRepository) GetByChunkHash(ctx context.Context, chunkHash string) (*BlobChunk, error) {
query := `
SELECT blob_hash, chunk_hash, offset, length
SELECT blob_id, chunk_hash, offset, length
FROM blob_chunks
WHERE chunk_hash = ?
LIMIT 1
`
LogSQL("GetByChunkHash", query, chunkHash)
var bc BlobChunk
err := r.db.conn.QueryRowContext(ctx, query, chunkHash).Scan(
&bc.BlobHash,
&bc.BlobID,
&bc.ChunkHash,
&bc.Offset,
&bc.Length,
)
if err == sql.ErrNoRows {
LogSQL("GetByChunkHash", "No rows found", chunkHash)
return nil, nil
}
if err != nil {
LogSQL("GetByChunkHash", "Error", chunkHash, err)
return nil, fmt.Errorf("querying blob chunk: %w", err)
}
LogSQL("GetByChunkHash", "Found blob", chunkHash, "blob", bc.BlobID)
return &bc, nil
}
// GetByChunkHashTx retrieves a blob chunk within a transaction
func (r *BlobChunkRepository) GetByChunkHashTx(ctx context.Context, tx *sql.Tx, chunkHash string) (*BlobChunk, error) {
query := `
SELECT blob_id, chunk_hash, offset, length
FROM blob_chunks
WHERE chunk_hash = ?
LIMIT 1
`
LogSQL("GetByChunkHashTx", query, chunkHash)
var bc BlobChunk
err := tx.QueryRowContext(ctx, query, chunkHash).Scan(
&bc.BlobID,
&bc.ChunkHash,
&bc.Offset,
&bc.Length,
)
if err == sql.ErrNoRows {
LogSQL("GetByChunkHashTx", "No rows found", chunkHash)
return nil, nil
}
if err != nil {
LogSQL("GetByChunkHashTx", "Error", chunkHash, err)
return nil, fmt.Errorf("querying blob chunk: %w", err)
}
LogSQL("GetByChunkHashTx", "Found blob", chunkHash, "blob", bc.BlobID)
return &bc, nil
}