vaultik/internal/chunker/chunker_isolated_test.go

package chunker

import (
	"bytes"
	"testing"
)

func TestChunkerExpectedChunkCount(t *testing.T) {
	tests := []struct {
		name         string
		fileSize     int
		avgChunkSize int64
		minExpected  int
		maxExpected  int
	}{
		{
			name:         "1MB file with 64KB average",
			fileSize:     1024 * 1024,
			avgChunkSize: 64 * 1024,
			minExpected:  8,  // At least half the expected count
			maxExpected:  32, // At most double the expected count
		},
		{
			name:         "10MB file with 256KB average",
			fileSize:     10 * 1024 * 1024,
			avgChunkSize: 256 * 1024,
			minExpected:  10, // FastCDC may produce larger chunks
			maxExpected:  80,
		},
		{
			name:         "512KB file with 64KB average",
			fileSize:     512 * 1024,
			avgChunkSize: 64 * 1024,
			minExpected:  4, // ~8 expected
			maxExpected:  16,
		},
	}

	for _, tt := range tests {
		t.Run(tt.name, func(t *testing.T) {
			chunker := NewChunker(tt.avgChunkSize)

			// Create data with some variation to trigger chunk boundaries
			data := make([]byte, tt.fileSize)
			for i := 0; i < len(data); i++ {
				// Use a pattern that should create boundaries
				data[i] = byte((i * 17) ^ (i >> 5))
			}

			chunks, err := chunker.ChunkReader(bytes.NewReader(data))
			if err != nil {
				t.Fatalf("chunking failed: %v", err)
			}

			t.Logf("Created %d chunks for %d bytes with %d average chunk size",
				len(chunks), tt.fileSize, tt.avgChunkSize)

			if len(chunks) < tt.minExpected {
				t.Errorf("too few chunks: got %d, expected at least %d",
					len(chunks), tt.minExpected)
			}
			if len(chunks) > tt.maxExpected {
				t.Errorf("too many chunks: got %d, expected at most %d",
					len(chunks), tt.maxExpected)
			}

			// Verify chunks reconstruct to original
			var reconstructed []byte
			for _, chunk := range chunks {
				reconstructed = append(reconstructed, chunk.Data...)
			}
			if !bytes.Equal(data, reconstructed) {
				t.Error("reconstructed data doesn't match original")
			}
		})
	}
}