Refactor blob storage to use UUID primary keys and implement streaming chunking
- Changed blob table to use ID (UUID) as primary key instead of hash - Blob records are now created at packing start, enabling immediate chunk associations - Implemented streaming chunking to process large files without memory exhaustion - Fixed blob manifest generation to include all referenced blobs - Updated all foreign key references from blob_hash to blob_id - Added progress reporting and improved error handling - Enforced encryption requirement for all blob packing - Updated tests to use test encryption keys - Added Cyrillic transliteration to README
This commit is contained in:
@@ -16,15 +16,15 @@ func NewBlobChunkRepository(db *DB) *BlobChunkRepository {
|
||||
|
||||
func (r *BlobChunkRepository) Create(ctx context.Context, tx *sql.Tx, bc *BlobChunk) error {
|
||||
query := `
|
||||
INSERT INTO blob_chunks (blob_hash, chunk_hash, offset, length)
|
||||
INSERT INTO blob_chunks (blob_id, chunk_hash, offset, length)
|
||||
VALUES (?, ?, ?, ?)
|
||||
`
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, bc.BlobHash, bc.ChunkHash, bc.Offset, bc.Length)
|
||||
_, err = tx.ExecContext(ctx, query, bc.BlobID, bc.ChunkHash, bc.Offset, bc.Length)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLock(ctx, query, bc.BlobHash, bc.ChunkHash, bc.Offset, bc.Length)
|
||||
_, err = r.db.ExecWithLock(ctx, query, bc.BlobID, bc.ChunkHash, bc.Offset, bc.Length)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
@@ -34,15 +34,15 @@ func (r *BlobChunkRepository) Create(ctx context.Context, tx *sql.Tx, bc *BlobCh
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *BlobChunkRepository) GetByBlobHash(ctx context.Context, blobHash string) ([]*BlobChunk, error) {
|
||||
func (r *BlobChunkRepository) GetByBlobID(ctx context.Context, blobID string) ([]*BlobChunk, error) {
|
||||
query := `
|
||||
SELECT blob_hash, chunk_hash, offset, length
|
||||
SELECT blob_id, chunk_hash, offset, length
|
||||
FROM blob_chunks
|
||||
WHERE blob_hash = ?
|
||||
WHERE blob_id = ?
|
||||
ORDER BY offset
|
||||
`
|
||||
|
||||
rows, err := r.db.conn.QueryContext(ctx, query, blobHash)
|
||||
rows, err := r.db.conn.QueryContext(ctx, query, blobID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("querying blob chunks: %w", err)
|
||||
}
|
||||
@@ -51,7 +51,7 @@ func (r *BlobChunkRepository) GetByBlobHash(ctx context.Context, blobHash string
|
||||
var blobChunks []*BlobChunk
|
||||
for rows.Next() {
|
||||
var bc BlobChunk
|
||||
err := rows.Scan(&bc.BlobHash, &bc.ChunkHash, &bc.Offset, &bc.Length)
|
||||
err := rows.Scan(&bc.BlobID, &bc.ChunkHash, &bc.Offset, &bc.Length)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning blob chunk: %w", err)
|
||||
}
|
||||
@@ -63,26 +63,61 @@ func (r *BlobChunkRepository) GetByBlobHash(ctx context.Context, blobHash string
|
||||
|
||||
func (r *BlobChunkRepository) GetByChunkHash(ctx context.Context, chunkHash string) (*BlobChunk, error) {
|
||||
query := `
|
||||
SELECT blob_hash, chunk_hash, offset, length
|
||||
SELECT blob_id, chunk_hash, offset, length
|
||||
FROM blob_chunks
|
||||
WHERE chunk_hash = ?
|
||||
LIMIT 1
|
||||
`
|
||||
|
||||
LogSQL("GetByChunkHash", query, chunkHash)
|
||||
var bc BlobChunk
|
||||
err := r.db.conn.QueryRowContext(ctx, query, chunkHash).Scan(
|
||||
&bc.BlobHash,
|
||||
&bc.BlobID,
|
||||
&bc.ChunkHash,
|
||||
&bc.Offset,
|
||||
&bc.Length,
|
||||
)
|
||||
|
||||
if err == sql.ErrNoRows {
|
||||
LogSQL("GetByChunkHash", "No rows found", chunkHash)
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
LogSQL("GetByChunkHash", "Error", chunkHash, err)
|
||||
return nil, fmt.Errorf("querying blob chunk: %w", err)
|
||||
}
|
||||
|
||||
LogSQL("GetByChunkHash", "Found blob", chunkHash, "blob", bc.BlobID)
|
||||
return &bc, nil
|
||||
}
|
||||
|
||||
// GetByChunkHashTx retrieves a blob chunk within a transaction
|
||||
func (r *BlobChunkRepository) GetByChunkHashTx(ctx context.Context, tx *sql.Tx, chunkHash string) (*BlobChunk, error) {
|
||||
query := `
|
||||
SELECT blob_id, chunk_hash, offset, length
|
||||
FROM blob_chunks
|
||||
WHERE chunk_hash = ?
|
||||
LIMIT 1
|
||||
`
|
||||
|
||||
LogSQL("GetByChunkHashTx", query, chunkHash)
|
||||
var bc BlobChunk
|
||||
err := tx.QueryRowContext(ctx, query, chunkHash).Scan(
|
||||
&bc.BlobID,
|
||||
&bc.ChunkHash,
|
||||
&bc.Offset,
|
||||
&bc.Length,
|
||||
)
|
||||
|
||||
if err == sql.ErrNoRows {
|
||||
LogSQL("GetByChunkHashTx", "No rows found", chunkHash)
|
||||
return nil, nil
|
||||
}
|
||||
if err != nil {
|
||||
LogSQL("GetByChunkHashTx", "Error", chunkHash, err)
|
||||
return nil, fmt.Errorf("querying blob chunk: %w", err)
|
||||
}
|
||||
|
||||
LogSQL("GetByChunkHashTx", "Found blob", chunkHash, "blob", bc.BlobID)
|
||||
return &bc, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user