Refactor blob storage to use UUID primary keys and implement streaming chunking

- Changed blob table to use ID (UUID) as primary key instead of hash
- Blob records are now created at packing start, enabling immediate chunk associations
- Implemented streaming chunking to process large files without memory exhaustion
- Fixed blob manifest generation to include all referenced blobs
- Updated all foreign key references from blob_hash to blob_id
- Added progress reporting and improved error handling
- Enforced encryption requirement for all blob packing
- Updated tests to use test encryption keys
- Added Cyrillic transliteration to README
This commit is contained in:
2025-07-22 07:43:39 +02:00
parent 26db096913
commit 86b533d6ee
49 changed files with 5709 additions and 324 deletions

View File

@@ -14,7 +14,7 @@ func TestBlobChunkRepository(t *testing.T) {
// Test Create
bc1 := &BlobChunk{
BlobHash: "blob1",
BlobID: "blob1-uuid",
ChunkHash: "chunk1",
Offset: 0,
Length: 1024,
@@ -27,7 +27,7 @@ func TestBlobChunkRepository(t *testing.T) {
// Add more chunks to the same blob
bc2 := &BlobChunk{
BlobHash: "blob1",
BlobID: "blob1-uuid",
ChunkHash: "chunk2",
Offset: 1024,
Length: 2048,
@@ -38,7 +38,7 @@ func TestBlobChunkRepository(t *testing.T) {
}
bc3 := &BlobChunk{
BlobHash: "blob1",
BlobID: "blob1-uuid",
ChunkHash: "chunk3",
Offset: 3072,
Length: 512,
@@ -48,8 +48,8 @@ func TestBlobChunkRepository(t *testing.T) {
t.Fatalf("failed to create third blob chunk: %v", err)
}
// Test GetByBlobHash
chunks, err := repo.GetByBlobHash(ctx, "blob1")
// Test GetByBlobID
chunks, err := repo.GetByBlobID(ctx, "blob1-uuid")
if err != nil {
t.Fatalf("failed to get blob chunks: %v", err)
}
@@ -73,8 +73,8 @@ func TestBlobChunkRepository(t *testing.T) {
if bc == nil {
t.Fatal("expected blob chunk, got nil")
}
if bc.BlobHash != "blob1" {
t.Errorf("wrong blob hash: expected blob1, got %s", bc.BlobHash)
if bc.BlobID != "blob1-uuid" {
t.Errorf("wrong blob ID: expected blob1-uuid, got %s", bc.BlobID)
}
if bc.Offset != 1024 {
t.Errorf("wrong offset: expected 1024, got %d", bc.Offset)
@@ -100,10 +100,10 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
// Create chunks across multiple blobs
// Some chunks are shared between blobs (deduplication scenario)
blobChunks := []BlobChunk{
{BlobHash: "blob1", ChunkHash: "chunk1", Offset: 0, Length: 1024},
{BlobHash: "blob1", ChunkHash: "chunk2", Offset: 1024, Length: 1024},
{BlobHash: "blob2", ChunkHash: "chunk2", Offset: 0, Length: 1024}, // chunk2 is shared
{BlobHash: "blob2", ChunkHash: "chunk3", Offset: 1024, Length: 1024},
{BlobID: "blob1-uuid", ChunkHash: "chunk1", Offset: 0, Length: 1024},
{BlobID: "blob1-uuid", ChunkHash: "chunk2", Offset: 1024, Length: 1024},
{BlobID: "blob2-uuid", ChunkHash: "chunk2", Offset: 0, Length: 1024}, // chunk2 is shared
{BlobID: "blob2-uuid", ChunkHash: "chunk3", Offset: 1024, Length: 1024},
}
for _, bc := range blobChunks {
@@ -114,7 +114,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
}
// Verify blob1 chunks
chunks, err := repo.GetByBlobHash(ctx, "blob1")
chunks, err := repo.GetByBlobID(ctx, "blob1-uuid")
if err != nil {
t.Fatalf("failed to get blob1 chunks: %v", err)
}
@@ -123,7 +123,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
}
// Verify blob2 chunks
chunks, err = repo.GetByBlobHash(ctx, "blob2")
chunks, err = repo.GetByBlobID(ctx, "blob2-uuid")
if err != nil {
t.Fatalf("failed to get blob2 chunks: %v", err)
}
@@ -140,7 +140,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
t.Fatal("expected shared chunk, got nil")
}
// GetByChunkHash returns first match, should be blob1
if bc.BlobHash != "blob1" {
t.Errorf("expected blob1 for shared chunk, got %s", bc.BlobHash)
if bc.BlobID != "blob1-uuid" {
t.Errorf("expected blob1-uuid for shared chunk, got %s", bc.BlobID)
}
}