- Changed blob table to use ID (UUID) as primary key instead of hash - Blob records are now created at packing start, enabling immediate chunk associations - Implemented streaming chunking to process large files without memory exhaustion - Fixed blob manifest generation to include all referenced blobs - Updated all foreign key references from blob_hash to blob_id - Added progress reporting and improved error handling - Enforced encryption requirement for all blob packing - Updated tests to use test encryption keys - Added Cyrillic transliteration to README
129 lines
3.4 KiB
Go
129 lines
3.4 KiB
Go
package chunker
|
|
|
|
import (
|
|
"bytes"
|
|
"crypto/rand"
|
|
"testing"
|
|
)
|
|
|
|
func TestChunker(t *testing.T) {
|
|
t.Run("small file produces single chunk", func(t *testing.T) {
|
|
chunker := NewChunker(1024 * 1024) // 1MB average
|
|
data := bytes.Repeat([]byte("hello"), 100) // 500 bytes
|
|
|
|
chunks, err := chunker.ChunkReader(bytes.NewReader(data))
|
|
if err != nil {
|
|
t.Fatalf("chunking failed: %v", err)
|
|
}
|
|
|
|
if len(chunks) != 1 {
|
|
t.Errorf("expected 1 chunk, got %d", len(chunks))
|
|
}
|
|
|
|
if chunks[0].Size != int64(len(data)) {
|
|
t.Errorf("expected chunk size %d, got %d", len(data), chunks[0].Size)
|
|
}
|
|
})
|
|
|
|
t.Run("large file produces multiple chunks", func(t *testing.T) {
|
|
chunker := NewChunker(256 * 1024) // 256KB average chunk size
|
|
|
|
// Generate 2MB of random data
|
|
data := make([]byte, 2*1024*1024)
|
|
if _, err := rand.Read(data); err != nil {
|
|
t.Fatalf("failed to generate random data: %v", err)
|
|
}
|
|
|
|
chunks, err := chunker.ChunkReader(bytes.NewReader(data))
|
|
if err != nil {
|
|
t.Fatalf("chunking failed: %v", err)
|
|
}
|
|
|
|
// Should produce multiple chunks - with FastCDC we expect around 8 chunks for 2MB with 256KB average
|
|
if len(chunks) < 4 || len(chunks) > 16 {
|
|
t.Errorf("expected 4-16 chunks, got %d", len(chunks))
|
|
}
|
|
|
|
// Verify chunks reconstruct original data
|
|
var reconstructed []byte
|
|
for _, chunk := range chunks {
|
|
reconstructed = append(reconstructed, chunk.Data...)
|
|
}
|
|
|
|
if !bytes.Equal(data, reconstructed) {
|
|
t.Error("reconstructed data doesn't match original")
|
|
}
|
|
|
|
// Verify offsets
|
|
var expectedOffset int64
|
|
for i, chunk := range chunks {
|
|
if chunk.Offset != expectedOffset {
|
|
t.Errorf("chunk %d: expected offset %d, got %d", i, expectedOffset, chunk.Offset)
|
|
}
|
|
expectedOffset += chunk.Size
|
|
}
|
|
})
|
|
|
|
t.Run("deterministic chunking", func(t *testing.T) {
|
|
chunker1 := NewChunker(256 * 1024)
|
|
chunker2 := NewChunker(256 * 1024)
|
|
|
|
// Use deterministic data
|
|
data := bytes.Repeat([]byte("abcdefghijklmnopqrstuvwxyz"), 20000) // ~520KB
|
|
|
|
chunks1, err := chunker1.ChunkReader(bytes.NewReader(data))
|
|
if err != nil {
|
|
t.Fatalf("chunking failed: %v", err)
|
|
}
|
|
|
|
chunks2, err := chunker2.ChunkReader(bytes.NewReader(data))
|
|
if err != nil {
|
|
t.Fatalf("chunking failed: %v", err)
|
|
}
|
|
|
|
// Should produce same chunks
|
|
if len(chunks1) != len(chunks2) {
|
|
t.Fatalf("different number of chunks: %d vs %d", len(chunks1), len(chunks2))
|
|
}
|
|
|
|
for i := range chunks1 {
|
|
if chunks1[i].Hash != chunks2[i].Hash {
|
|
t.Errorf("chunk %d: different hashes", i)
|
|
}
|
|
if chunks1[i].Size != chunks2[i].Size {
|
|
t.Errorf("chunk %d: different sizes", i)
|
|
}
|
|
}
|
|
})
|
|
}
|
|
|
|
func TestChunkBoundaries(t *testing.T) {
|
|
chunker := NewChunker(256 * 1024) // 256KB average
|
|
|
|
// FastCDC uses avg/4 for min and avg*4 for max
|
|
avgSize := int64(256 * 1024)
|
|
minSize := avgSize / 4
|
|
maxSize := avgSize * 4
|
|
|
|
// Test that minimum chunk size is respected
|
|
data := make([]byte, minSize+1024)
|
|
if _, err := rand.Read(data); err != nil {
|
|
t.Fatalf("failed to generate random data: %v", err)
|
|
}
|
|
|
|
chunks, err := chunker.ChunkReader(bytes.NewReader(data))
|
|
if err != nil {
|
|
t.Fatalf("chunking failed: %v", err)
|
|
}
|
|
|
|
for i, chunk := range chunks {
|
|
// Last chunk can be smaller than minimum
|
|
if i < len(chunks)-1 && chunk.Size < minSize {
|
|
t.Errorf("chunk %d size %d is below minimum %d", i, chunk.Size, minSize)
|
|
}
|
|
if chunk.Size > maxSize {
|
|
t.Errorf("chunk %d size %d exceeds maximum %d", i, chunk.Size, maxSize)
|
|
}
|
|
}
|
|
}
|