vaultik/internal/chunker/chunker_isolated_test.go
sneak 86b533d6ee Refactor blob storage to use UUID primary keys and implement streaming chunking
- Changed blob table to use ID (UUID) as primary key instead of hash
- Blob records are now created at packing start, enabling immediate chunk associations
- Implemented streaming chunking to process large files without memory exhaustion
- Fixed blob manifest generation to include all referenced blobs
- Updated all foreign key references from blob_hash to blob_id
- Added progress reporting and improved error handling
- Enforced encryption requirement for all blob packing
- Updated tests to use test encryption keys
- Added Cyrillic transliteration to README
2025-07-22 07:43:39 +02:00

78 lines
1.9 KiB
Go

package chunker
import (
"bytes"
"testing"
)
func TestChunkerExpectedChunkCount(t *testing.T) {
tests := []struct {
name string
fileSize int
avgChunkSize int64
minExpected int
maxExpected int
}{
{
name: "1MB file with 64KB average",
fileSize: 1024 * 1024,
avgChunkSize: 64 * 1024,
minExpected: 8, // At least half the expected count
maxExpected: 32, // At most double the expected count
},
{
name: "10MB file with 256KB average",
fileSize: 10 * 1024 * 1024,
avgChunkSize: 256 * 1024,
minExpected: 10, // FastCDC may produce larger chunks
maxExpected: 80,
},
{
name: "512KB file with 64KB average",
fileSize: 512 * 1024,
avgChunkSize: 64 * 1024,
minExpected: 4, // ~8 expected
maxExpected: 16,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
chunker := NewChunker(tt.avgChunkSize)
// Create data with some variation to trigger chunk boundaries
data := make([]byte, tt.fileSize)
for i := 0; i < len(data); i++ {
// Use a pattern that should create boundaries
data[i] = byte((i * 17) ^ (i >> 5))
}
chunks, err := chunker.ChunkReader(bytes.NewReader(data))
if err != nil {
t.Fatalf("chunking failed: %v", err)
}
t.Logf("Created %d chunks for %d bytes with %d average chunk size",
len(chunks), tt.fileSize, tt.avgChunkSize)
if len(chunks) < tt.minExpected {
t.Errorf("too few chunks: got %d, expected at least %d",
len(chunks), tt.minExpected)
}
if len(chunks) > tt.maxExpected {
t.Errorf("too many chunks: got %d, expected at most %d",
len(chunks), tt.maxExpected)
}
// Verify chunks reconstruct to original
var reconstructed []byte
for _, chunk := range chunks {
reconstructed = append(reconstructed, chunk.Data...)
}
if !bytes.Equal(data, reconstructed) {
t.Error("reconstructed data doesn't match original")
}
})
}
}