package chunker import ( "bytes" "testing" ) func TestChunkerExpectedChunkCount(t *testing.T) { tests := []struct { name string fileSize int avgChunkSize int64 minExpected int maxExpected int }{ { name: "1MB file with 64KB average", fileSize: 1024 * 1024, avgChunkSize: 64 * 1024, minExpected: 8, // At least half the expected count maxExpected: 32, // At most double the expected count }, { name: "10MB file with 256KB average", fileSize: 10 * 1024 * 1024, avgChunkSize: 256 * 1024, minExpected: 10, // FastCDC may produce larger chunks maxExpected: 80, }, { name: "512KB file with 64KB average", fileSize: 512 * 1024, avgChunkSize: 64 * 1024, minExpected: 4, // ~8 expected maxExpected: 16, }, } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { chunker := NewChunker(tt.avgChunkSize) // Create data with some variation to trigger chunk boundaries data := make([]byte, tt.fileSize) for i := 0; i < len(data); i++ { // Use a pattern that should create boundaries data[i] = byte((i * 17) ^ (i >> 5)) } chunks, err := chunker.ChunkReader(bytes.NewReader(data)) if err != nil { t.Fatalf("chunking failed: %v", err) } t.Logf("Created %d chunks for %d bytes with %d average chunk size", len(chunks), tt.fileSize, tt.avgChunkSize) if len(chunks) < tt.minExpected { t.Errorf("too few chunks: got %d, expected at least %d", len(chunks), tt.minExpected) } if len(chunks) > tt.maxExpected { t.Errorf("too many chunks: got %d, expected at most %d", len(chunks), tt.maxExpected) } // Verify chunks reconstruct to original var reconstructed []byte for _, chunk := range chunks { reconstructed = append(reconstructed, chunk.Data...) } if !bytes.Equal(data, reconstructed) { t.Error("reconstructed data doesn't match original") } }) } }