Refactor blob storage to use UUID primary keys and implement streaming chunking
- Changed blob table to use ID (UUID) as primary key instead of hash - Blob records are now created at packing start, enabling immediate chunk associations - Implemented streaming chunking to process large files without memory exhaustion - Fixed blob manifest generation to include all referenced blobs - Updated all foreign key references from blob_hash to blob_id - Added progress reporting and improved error handling - Enforced encryption requirement for all blob packing - Updated tests to use test encryption keys - Added Cyrillic transliteration to README
This commit is contained in:
77
internal/chunker/chunker_isolated_test.go
Normal file
77
internal/chunker/chunker_isolated_test.go
Normal file
@@ -0,0 +1,77 @@
|
||||
package chunker
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestChunkerExpectedChunkCount(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
fileSize int
|
||||
avgChunkSize int64
|
||||
minExpected int
|
||||
maxExpected int
|
||||
}{
|
||||
{
|
||||
name: "1MB file with 64KB average",
|
||||
fileSize: 1024 * 1024,
|
||||
avgChunkSize: 64 * 1024,
|
||||
minExpected: 8, // At least half the expected count
|
||||
maxExpected: 32, // At most double the expected count
|
||||
},
|
||||
{
|
||||
name: "10MB file with 256KB average",
|
||||
fileSize: 10 * 1024 * 1024,
|
||||
avgChunkSize: 256 * 1024,
|
||||
minExpected: 10, // FastCDC may produce larger chunks
|
||||
maxExpected: 80,
|
||||
},
|
||||
{
|
||||
name: "512KB file with 64KB average",
|
||||
fileSize: 512 * 1024,
|
||||
avgChunkSize: 64 * 1024,
|
||||
minExpected: 4, // ~8 expected
|
||||
maxExpected: 16,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
chunker := NewChunker(tt.avgChunkSize)
|
||||
|
||||
// Create data with some variation to trigger chunk boundaries
|
||||
data := make([]byte, tt.fileSize)
|
||||
for i := 0; i < len(data); i++ {
|
||||
// Use a pattern that should create boundaries
|
||||
data[i] = byte((i * 17) ^ (i >> 5))
|
||||
}
|
||||
|
||||
chunks, err := chunker.ChunkReader(bytes.NewReader(data))
|
||||
if err != nil {
|
||||
t.Fatalf("chunking failed: %v", err)
|
||||
}
|
||||
|
||||
t.Logf("Created %d chunks for %d bytes with %d average chunk size",
|
||||
len(chunks), tt.fileSize, tt.avgChunkSize)
|
||||
|
||||
if len(chunks) < tt.minExpected {
|
||||
t.Errorf("too few chunks: got %d, expected at least %d",
|
||||
len(chunks), tt.minExpected)
|
||||
}
|
||||
if len(chunks) > tt.maxExpected {
|
||||
t.Errorf("too many chunks: got %d, expected at most %d",
|
||||
len(chunks), tt.maxExpected)
|
||||
}
|
||||
|
||||
// Verify chunks reconstruct to original
|
||||
var reconstructed []byte
|
||||
for _, chunk := range chunks {
|
||||
reconstructed = append(reconstructed, chunk.Data...)
|
||||
}
|
||||
if !bytes.Equal(data, reconstructed) {
|
||||
t.Error("reconstructed data doesn't match original")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user