Refactor blob storage to use UUID primary keys and implement streaming chunking

- Changed blob table to use ID (UUID) as primary key instead of hash
- Blob records are now created at packing start, enabling immediate chunk associations
- Implemented streaming chunking to process large files without memory exhaustion
- Fixed blob manifest generation to include all referenced blobs
- Updated all foreign key references from blob_hash to blob_id
- Added progress reporting and improved error handling
- Enforced encryption requirement for all blob packing
- Updated tests to use test encryption keys
- Added Cyrillic transliteration to README
This commit is contained in:
2025-07-22 07:43:39 +02:00
parent 26db096913
commit 86b533d6ee
49 changed files with 5709 additions and 324 deletions

View File

@@ -15,7 +15,8 @@ func TestBlobRepository(t *testing.T) {
// Test Create
blob := &Blob{
BlobHash: "blobhash123",
ID: "test-blob-id-123",
Hash: "blobhash123",
CreatedTS: time.Now().Truncate(time.Second),
}
@@ -25,23 +26,36 @@ func TestBlobRepository(t *testing.T) {
}
// Test GetByHash
retrieved, err := repo.GetByHash(ctx, blob.BlobHash)
retrieved, err := repo.GetByHash(ctx, blob.Hash)
if err != nil {
t.Fatalf("failed to get blob: %v", err)
}
if retrieved == nil {
t.Fatal("expected blob, got nil")
}
if retrieved.BlobHash != blob.BlobHash {
t.Errorf("blob hash mismatch: got %s, want %s", retrieved.BlobHash, blob.BlobHash)
if retrieved.Hash != blob.Hash {
t.Errorf("blob hash mismatch: got %s, want %s", retrieved.Hash, blob.Hash)
}
if !retrieved.CreatedTS.Equal(blob.CreatedTS) {
t.Errorf("created timestamp mismatch: got %v, want %v", retrieved.CreatedTS, blob.CreatedTS)
}
// Test List
// Test GetByID
retrievedByID, err := repo.GetByID(ctx, blob.ID)
if err != nil {
t.Fatalf("failed to get blob by ID: %v", err)
}
if retrievedByID == nil {
t.Fatal("expected blob, got nil")
}
if retrievedByID.ID != blob.ID {
t.Errorf("blob ID mismatch: got %s, want %s", retrievedByID.ID, blob.ID)
}
// Test with second blob
blob2 := &Blob{
BlobHash: "blobhash456",
ID: "test-blob-id-456",
Hash: "blobhash456",
CreatedTS: time.Now().Truncate(time.Second),
}
err = repo.Create(ctx, nil, blob2)
@@ -49,29 +63,45 @@ func TestBlobRepository(t *testing.T) {
t.Fatalf("failed to create second blob: %v", err)
}
blobs, err := repo.List(ctx, 10, 0)
// Test UpdateFinished
now := time.Now()
err = repo.UpdateFinished(ctx, nil, blob.ID, blob.Hash, 1000, 500)
if err != nil {
t.Fatalf("failed to list blobs: %v", err)
}
if len(blobs) != 2 {
t.Errorf("expected 2 blobs, got %d", len(blobs))
t.Fatalf("failed to update blob as finished: %v", err)
}
// Test pagination
blobs, err = repo.List(ctx, 1, 0)
// Verify update
updated, err := repo.GetByID(ctx, blob.ID)
if err != nil {
t.Fatalf("failed to list blobs with limit: %v", err)
t.Fatalf("failed to get updated blob: %v", err)
}
if len(blobs) != 1 {
t.Errorf("expected 1 blob with limit, got %d", len(blobs))
if updated.FinishedTS == nil {
t.Fatal("expected finished timestamp to be set")
}
if updated.UncompressedSize != 1000 {
t.Errorf("expected uncompressed size 1000, got %d", updated.UncompressedSize)
}
if updated.CompressedSize != 500 {
t.Errorf("expected compressed size 500, got %d", updated.CompressedSize)
}
blobs, err = repo.List(ctx, 1, 1)
// Test UpdateUploaded
err = repo.UpdateUploaded(ctx, nil, blob.ID)
if err != nil {
t.Fatalf("failed to list blobs with offset: %v", err)
t.Fatalf("failed to update blob as uploaded: %v", err)
}
if len(blobs) != 1 {
t.Errorf("expected 1 blob with offset, got %d", len(blobs))
// Verify upload update
uploaded, err := repo.GetByID(ctx, blob.ID)
if err != nil {
t.Fatalf("failed to get uploaded blob: %v", err)
}
if uploaded.UploadedTS == nil {
t.Fatal("expected uploaded timestamp to be set")
}
// Allow 1 second tolerance for timestamp comparison
if uploaded.UploadedTS.Before(now.Add(-1 * time.Second)) {
t.Error("uploaded timestamp should be around test time")
}
}
@@ -83,7 +113,8 @@ func TestBlobRepositoryDuplicate(t *testing.T) {
repo := NewBlobRepository(db)
blob := &Blob{
BlobHash: "duplicate_blob",
ID: "duplicate-test-id",
Hash: "duplicate_blob",
CreatedTS: time.Now().Truncate(time.Second),
}