package chunker import ( "bytes" "crypto/rand" "testing" ) func TestChunker(t *testing.T) { t.Run("small file produces single chunk", func(t *testing.T) { chunker := NewChunker(1024 * 1024) // 1MB average data := bytes.Repeat([]byte("hello"), 100) // 500 bytes chunks, err := chunker.ChunkReader(bytes.NewReader(data)) if err != nil { t.Fatalf("chunking failed: %v", err) } if len(chunks) != 1 { t.Errorf("expected 1 chunk, got %d", len(chunks)) } if chunks[0].Size != int64(len(data)) { t.Errorf("expected chunk size %d, got %d", len(data), chunks[0].Size) } }) t.Run("large file produces multiple chunks", func(t *testing.T) { chunker := NewChunker(256 * 1024) // 256KB average chunk size // Generate 2MB of random data data := make([]byte, 2*1024*1024) if _, err := rand.Read(data); err != nil { t.Fatalf("failed to generate random data: %v", err) } chunks, err := chunker.ChunkReader(bytes.NewReader(data)) if err != nil { t.Fatalf("chunking failed: %v", err) } // Should produce multiple chunks - with FastCDC we expect around 8 chunks for 2MB with 256KB average if len(chunks) < 4 || len(chunks) > 16 { t.Errorf("expected 4-16 chunks, got %d", len(chunks)) } // Verify chunks reconstruct original data var reconstructed []byte for _, chunk := range chunks { reconstructed = append(reconstructed, chunk.Data...) } if !bytes.Equal(data, reconstructed) { t.Error("reconstructed data doesn't match original") } // Verify offsets var expectedOffset int64 for i, chunk := range chunks { if chunk.Offset != expectedOffset { t.Errorf("chunk %d: expected offset %d, got %d", i, expectedOffset, chunk.Offset) } expectedOffset += chunk.Size } }) t.Run("deterministic chunking", func(t *testing.T) { chunker1 := NewChunker(256 * 1024) chunker2 := NewChunker(256 * 1024) // Use deterministic data data := bytes.Repeat([]byte("abcdefghijklmnopqrstuvwxyz"), 20000) // ~520KB chunks1, err := chunker1.ChunkReader(bytes.NewReader(data)) if err != nil { t.Fatalf("chunking failed: %v", err) } chunks2, err := chunker2.ChunkReader(bytes.NewReader(data)) if err != nil { t.Fatalf("chunking failed: %v", err) } // Should produce same chunks if len(chunks1) != len(chunks2) { t.Fatalf("different number of chunks: %d vs %d", len(chunks1), len(chunks2)) } for i := range chunks1 { if chunks1[i].Hash != chunks2[i].Hash { t.Errorf("chunk %d: different hashes", i) } if chunks1[i].Size != chunks2[i].Size { t.Errorf("chunk %d: different sizes", i) } } }) } func TestChunkBoundaries(t *testing.T) { chunker := NewChunker(256 * 1024) // 256KB average // FastCDC uses avg/4 for min and avg*4 for max avgSize := int64(256 * 1024) minSize := avgSize / 4 maxSize := avgSize * 4 // Test that minimum chunk size is respected data := make([]byte, minSize+1024) if _, err := rand.Read(data); err != nil { t.Fatalf("failed to generate random data: %v", err) } chunks, err := chunker.ChunkReader(bytes.NewReader(data)) if err != nil { t.Fatalf("chunking failed: %v", err) } for i, chunk := range chunks { // Last chunk can be smaller than minimum if i < len(chunks)-1 && chunk.Size < minSize { t.Errorf("chunk %d size %d is below minimum %d", i, chunk.Size, minSize) } if chunk.Size > maxSize { t.Errorf("chunk %d size %d exceeds maximum %d", i, chunk.Size, maxSize) } } }