Refactor blob storage to use UUID primary keys and implement streaming chunking

- Changed blob table to use ID (UUID) as primary key instead of hash
- Blob records are now created at packing start, enabling immediate chunk associations
- Implemented streaming chunking to process large files without memory exhaustion
- Fixed blob manifest generation to include all referenced blobs
- Updated all foreign key references from blob_hash to blob_id
- Added progress reporting and improved error handling
- Enforced encryption requirement for all blob packing
- Updated tests to use test encryption keys
- Added Cyrillic transliteration to README
This commit is contained in:
2025-07-22 07:43:39 +02:00
parent 26db096913
commit 86b533d6ee
49 changed files with 5709 additions and 324 deletions

View File

@@ -17,15 +17,27 @@ func NewBlobRepository(db *DB) *BlobRepository {
func (r *BlobRepository) Create(ctx context.Context, tx *sql.Tx, blob *Blob) error {
query := `
INSERT INTO blobs (blob_hash, created_ts)
VALUES (?, ?)
INSERT INTO blobs (id, blob_hash, created_ts, finished_ts, uncompressed_size, compressed_size, uploaded_ts)
VALUES (?, ?, ?, ?, ?, ?, ?)
`
var finishedTS, uploadedTS *int64
if blob.FinishedTS != nil {
ts := blob.FinishedTS.Unix()
finishedTS = &ts
}
if blob.UploadedTS != nil {
ts := blob.UploadedTS.Unix()
uploadedTS = &ts
}
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, blob.BlobHash, blob.CreatedTS.Unix())
_, err = tx.ExecContext(ctx, query, blob.ID, blob.Hash, blob.CreatedTS.Unix(),
finishedTS, blob.UncompressedSize, blob.CompressedSize, uploadedTS)
} else {
_, err = r.db.ExecWithLock(ctx, query, blob.BlobHash, blob.CreatedTS.Unix())
_, err = r.db.ExecWithLock(ctx, query, blob.ID, blob.Hash, blob.CreatedTS.Unix(),
finishedTS, blob.UncompressedSize, blob.CompressedSize, uploadedTS)
}
if err != nil {
@@ -37,17 +49,23 @@ func (r *BlobRepository) Create(ctx context.Context, tx *sql.Tx, blob *Blob) err
func (r *BlobRepository) GetByHash(ctx context.Context, hash string) (*Blob, error) {
query := `
SELECT blob_hash, created_ts
SELECT id, blob_hash, created_ts, finished_ts, uncompressed_size, compressed_size, uploaded_ts
FROM blobs
WHERE blob_hash = ?
`
var blob Blob
var createdTSUnix int64
var finishedTSUnix, uploadedTSUnix sql.NullInt64
err := r.db.conn.QueryRowContext(ctx, query, hash).Scan(
&blob.BlobHash,
&blob.ID,
&blob.Hash,
&createdTSUnix,
&finishedTSUnix,
&blob.UncompressedSize,
&blob.CompressedSize,
&uploadedTSUnix,
)
if err == sql.ErrNoRows {
@@ -58,39 +76,100 @@ func (r *BlobRepository) GetByHash(ctx context.Context, hash string) (*Blob, err
}
blob.CreatedTS = time.Unix(createdTSUnix, 0)
if finishedTSUnix.Valid {
ts := time.Unix(finishedTSUnix.Int64, 0)
blob.FinishedTS = &ts
}
if uploadedTSUnix.Valid {
ts := time.Unix(uploadedTSUnix.Int64, 0)
blob.UploadedTS = &ts
}
return &blob, nil
}
func (r *BlobRepository) List(ctx context.Context, limit, offset int) ([]*Blob, error) {
// GetByID retrieves a blob by its ID
func (r *BlobRepository) GetByID(ctx context.Context, id string) (*Blob, error) {
query := `
SELECT blob_hash, created_ts
SELECT id, blob_hash, created_ts, finished_ts, uncompressed_size, compressed_size, uploaded_ts
FROM blobs
ORDER BY blob_hash
LIMIT ? OFFSET ?
WHERE id = ?
`
rows, err := r.db.conn.QueryContext(ctx, query, limit, offset)
var blob Blob
var createdTSUnix int64
var finishedTSUnix, uploadedTSUnix sql.NullInt64
err := r.db.conn.QueryRowContext(ctx, query, id).Scan(
&blob.ID,
&blob.Hash,
&createdTSUnix,
&finishedTSUnix,
&blob.UncompressedSize,
&blob.CompressedSize,
&uploadedTSUnix,
)
if err == sql.ErrNoRows {
return nil, nil
}
if err != nil {
return nil, fmt.Errorf("querying blobs: %w", err)
}
defer CloseRows(rows)
var blobs []*Blob
for rows.Next() {
var blob Blob
var createdTSUnix int64
err := rows.Scan(
&blob.BlobHash,
&createdTSUnix,
)
if err != nil {
return nil, fmt.Errorf("scanning blob: %w", err)
}
blob.CreatedTS = time.Unix(createdTSUnix, 0)
blobs = append(blobs, &blob)
return nil, fmt.Errorf("querying blob: %w", err)
}
return blobs, rows.Err()
blob.CreatedTS = time.Unix(createdTSUnix, 0)
if finishedTSUnix.Valid {
ts := time.Unix(finishedTSUnix.Int64, 0)
blob.FinishedTS = &ts
}
if uploadedTSUnix.Valid {
ts := time.Unix(uploadedTSUnix.Int64, 0)
blob.UploadedTS = &ts
}
return &blob, nil
}
// UpdateFinished updates a blob when it's finalized
func (r *BlobRepository) UpdateFinished(ctx context.Context, tx *sql.Tx, id string, hash string, uncompressedSize, compressedSize int64) error {
query := `
UPDATE blobs
SET blob_hash = ?, finished_ts = ?, uncompressed_size = ?, compressed_size = ?
WHERE id = ?
`
now := time.Now().Unix()
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, hash, now, uncompressedSize, compressedSize, id)
} else {
_, err = r.db.ExecWithLock(ctx, query, hash, now, uncompressedSize, compressedSize, id)
}
if err != nil {
return fmt.Errorf("updating blob: %w", err)
}
return nil
}
// UpdateUploaded marks a blob as uploaded
func (r *BlobRepository) UpdateUploaded(ctx context.Context, tx *sql.Tx, id string) error {
query := `
UPDATE blobs
SET uploaded_ts = ?
WHERE id = ?
`
now := time.Now().Unix()
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, now, id)
} else {
_, err = r.db.ExecWithLock(ctx, query, now, id)
}
if err != nil {
return fmt.Errorf("marking blob as uploaded: %w", err)
}
return nil
}