- Changed blob table to use ID (UUID) as primary key instead of hash - Blob records are now created at packing start, enabling immediate chunk associations - Implemented streaming chunking to process large files without memory exhaustion - Fixed blob manifest generation to include all referenced blobs - Updated all foreign key references from blob_hash to blob_id - Added progress reporting and improved error handling - Enforced encryption requirement for all blob packing - Updated tests to use test encryption keys - Added Cyrillic transliteration to README
78 lines
1.9 KiB
Go
78 lines
1.9 KiB
Go
package chunker
|
|
|
|
import (
|
|
"bytes"
|
|
"testing"
|
|
)
|
|
|
|
func TestChunkerExpectedChunkCount(t *testing.T) {
|
|
tests := []struct {
|
|
name string
|
|
fileSize int
|
|
avgChunkSize int64
|
|
minExpected int
|
|
maxExpected int
|
|
}{
|
|
{
|
|
name: "1MB file with 64KB average",
|
|
fileSize: 1024 * 1024,
|
|
avgChunkSize: 64 * 1024,
|
|
minExpected: 8, // At least half the expected count
|
|
maxExpected: 32, // At most double the expected count
|
|
},
|
|
{
|
|
name: "10MB file with 256KB average",
|
|
fileSize: 10 * 1024 * 1024,
|
|
avgChunkSize: 256 * 1024,
|
|
minExpected: 10, // FastCDC may produce larger chunks
|
|
maxExpected: 80,
|
|
},
|
|
{
|
|
name: "512KB file with 64KB average",
|
|
fileSize: 512 * 1024,
|
|
avgChunkSize: 64 * 1024,
|
|
minExpected: 4, // ~8 expected
|
|
maxExpected: 16,
|
|
},
|
|
}
|
|
|
|
for _, tt := range tests {
|
|
t.Run(tt.name, func(t *testing.T) {
|
|
chunker := NewChunker(tt.avgChunkSize)
|
|
|
|
// Create data with some variation to trigger chunk boundaries
|
|
data := make([]byte, tt.fileSize)
|
|
for i := 0; i < len(data); i++ {
|
|
// Use a pattern that should create boundaries
|
|
data[i] = byte((i * 17) ^ (i >> 5))
|
|
}
|
|
|
|
chunks, err := chunker.ChunkReader(bytes.NewReader(data))
|
|
if err != nil {
|
|
t.Fatalf("chunking failed: %v", err)
|
|
}
|
|
|
|
t.Logf("Created %d chunks for %d bytes with %d average chunk size",
|
|
len(chunks), tt.fileSize, tt.avgChunkSize)
|
|
|
|
if len(chunks) < tt.minExpected {
|
|
t.Errorf("too few chunks: got %d, expected at least %d",
|
|
len(chunks), tt.minExpected)
|
|
}
|
|
if len(chunks) > tt.maxExpected {
|
|
t.Errorf("too many chunks: got %d, expected at most %d",
|
|
len(chunks), tt.maxExpected)
|
|
}
|
|
|
|
// Verify chunks reconstruct to original
|
|
var reconstructed []byte
|
|
for _, chunk := range chunks {
|
|
reconstructed = append(reconstructed, chunk.Data...)
|
|
}
|
|
if !bytes.Equal(data, reconstructed) {
|
|
t.Error("reconstructed data doesn't match original")
|
|
}
|
|
})
|
|
}
|
|
}
|