Refactor blob storage to use UUID primary keys and implement streaming chunking
- Changed blob table to use ID (UUID) as primary key instead of hash - Blob records are now created at packing start, enabling immediate chunk associations - Implemented streaming chunking to process large files without memory exhaustion - Fixed blob manifest generation to include all referenced blobs - Updated all foreign key references from blob_hash to blob_id - Added progress reporting and improved error handling - Enforced encryption requirement for all blob packing - Updated tests to use test encryption keys - Added Cyrillic transliteration to README
This commit is contained in:
6
internal/blob/errors.go
Normal file
6
internal/blob/errors.go
Normal file
@@ -0,0 +1,6 @@
|
||||
package blob
|
||||
|
||||
import "errors"
|
||||
|
||||
// ErrBlobSizeLimitExceeded is returned when adding a chunk would exceed the blob size limit
|
||||
var ErrBlobSizeLimitExceeded = errors.New("adding chunk would exceed blob size limit")
|
||||
517
internal/blob/packer.go
Normal file
517
internal/blob/packer.go
Normal file
@@ -0,0 +1,517 @@
|
||||
package blob
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"database/sql"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"math/bits"
|
||||
"os"
|
||||
"runtime"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"github.com/google/uuid"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
)
|
||||
|
||||
// BlobHandler is called when a blob is finalized
|
||||
type BlobHandler func(blob *BlobWithReader) error
|
||||
|
||||
// PackerConfig holds configuration for creating a Packer
|
||||
type PackerConfig struct {
|
||||
MaxBlobSize int64
|
||||
CompressionLevel int
|
||||
Encryptor Encryptor // Required - blobs are always encrypted
|
||||
Repositories *database.Repositories // For creating blob records
|
||||
BlobHandler BlobHandler // Optional - called when blob is ready
|
||||
}
|
||||
|
||||
// Packer combines chunks into blobs with compression and encryption
|
||||
type Packer struct {
|
||||
maxBlobSize int64
|
||||
compressionLevel int
|
||||
encryptor Encryptor // Required - blobs are always encrypted
|
||||
blobHandler BlobHandler // Called when blob is ready
|
||||
repos *database.Repositories // For creating blob records
|
||||
|
||||
// Mutex for thread-safe blob creation
|
||||
mu sync.Mutex
|
||||
|
||||
// Current blob being packed
|
||||
currentBlob *blobInProgress
|
||||
finishedBlobs []*FinishedBlob // Only used if no handler provided
|
||||
}
|
||||
|
||||
// Encryptor interface for encryption support
|
||||
type Encryptor interface {
|
||||
Encrypt(data []byte) ([]byte, error)
|
||||
EncryptWriter(dst io.Writer) (io.WriteCloser, error)
|
||||
}
|
||||
|
||||
// blobInProgress represents a blob being assembled
|
||||
type blobInProgress struct {
|
||||
id string // UUID of the blob
|
||||
chunks []*chunkInfo // Track chunk metadata
|
||||
chunkSet map[string]bool // Track unique chunks in this blob
|
||||
tempFile *os.File // Temporary file for encrypted compressed data
|
||||
hasher hash.Hash // For computing hash of final encrypted data
|
||||
compressor io.WriteCloser // Compression writer
|
||||
encryptor io.WriteCloser // Encryption writer (if encryption enabled)
|
||||
finalWriter io.Writer // The final writer in the chain
|
||||
startTime time.Time
|
||||
size int64 // Current uncompressed size
|
||||
compressedSize int64 // Current compressed size (estimated)
|
||||
}
|
||||
|
||||
// ChunkRef represents a chunk to be added to a blob
|
||||
type ChunkRef struct {
|
||||
Hash string
|
||||
Data []byte
|
||||
}
|
||||
|
||||
// chunkInfo tracks chunk metadata in a blob
|
||||
type chunkInfo struct {
|
||||
Hash string
|
||||
Offset int64
|
||||
Size int64
|
||||
}
|
||||
|
||||
// FinishedBlob represents a completed blob ready for storage
|
||||
type FinishedBlob struct {
|
||||
ID string
|
||||
Hash string
|
||||
Data []byte // Compressed data
|
||||
Chunks []*BlobChunkRef
|
||||
CreatedTS time.Time
|
||||
Uncompressed int64
|
||||
Compressed int64
|
||||
}
|
||||
|
||||
// BlobChunkRef represents a chunk's position within a blob
|
||||
type BlobChunkRef struct {
|
||||
ChunkHash string
|
||||
Offset int64
|
||||
Length int64
|
||||
}
|
||||
|
||||
// BlobWithReader wraps a FinishedBlob with its data reader
|
||||
type BlobWithReader struct {
|
||||
*FinishedBlob
|
||||
Reader io.ReadSeeker
|
||||
TempFile *os.File // Optional, only set for disk-based blobs
|
||||
}
|
||||
|
||||
// NewPacker creates a new blob packer
|
||||
func NewPacker(cfg PackerConfig) (*Packer, error) {
|
||||
if cfg.Encryptor == nil {
|
||||
return nil, fmt.Errorf("encryptor is required - blobs must be encrypted")
|
||||
}
|
||||
if cfg.MaxBlobSize <= 0 {
|
||||
return nil, fmt.Errorf("max blob size must be positive")
|
||||
}
|
||||
return &Packer{
|
||||
maxBlobSize: cfg.MaxBlobSize,
|
||||
compressionLevel: cfg.CompressionLevel,
|
||||
encryptor: cfg.Encryptor,
|
||||
blobHandler: cfg.BlobHandler,
|
||||
repos: cfg.Repositories,
|
||||
finishedBlobs: make([]*FinishedBlob, 0),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// SetBlobHandler sets the handler to be called when a blob is finalized
|
||||
func (p *Packer) SetBlobHandler(handler BlobHandler) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
p.blobHandler = handler
|
||||
}
|
||||
|
||||
// AddChunk adds a chunk to the current blob
|
||||
// Returns ErrBlobSizeLimitExceeded if adding the chunk would exceed the size limit
|
||||
func (p *Packer) AddChunk(chunk *ChunkRef) error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
// Initialize new blob if needed
|
||||
if p.currentBlob == nil {
|
||||
if err := p.startNewBlob(); err != nil {
|
||||
return fmt.Errorf("starting new blob: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Check if adding this chunk would exceed blob size limit
|
||||
// Use conservative estimate: assume no compression
|
||||
// Skip size check if chunk already exists in blob
|
||||
if !p.currentBlob.chunkSet[chunk.Hash] {
|
||||
currentSize := p.currentBlob.size
|
||||
newSize := currentSize + int64(len(chunk.Data))
|
||||
|
||||
if newSize > p.maxBlobSize && len(p.currentBlob.chunks) > 0 {
|
||||
// Return error indicating size limit would be exceeded
|
||||
return ErrBlobSizeLimitExceeded
|
||||
}
|
||||
}
|
||||
|
||||
// Add chunk to current blob
|
||||
if err := p.addChunkToCurrentBlob(chunk); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Flush finalizes any pending blob
|
||||
func (p *Packer) Flush() error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
if p.currentBlob != nil && len(p.currentBlob.chunks) > 0 {
|
||||
if err := p.finalizeCurrentBlob(); err != nil {
|
||||
return fmt.Errorf("finalizing blob: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// FinalizeBlob finalizes the current blob being assembled
|
||||
// Caller must handle retrying the chunk that triggered size limit
|
||||
func (p *Packer) FinalizeBlob() error {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
if p.currentBlob == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return p.finalizeCurrentBlob()
|
||||
}
|
||||
|
||||
// GetFinishedBlobs returns all completed blobs and clears the list
|
||||
func (p *Packer) GetFinishedBlobs() []*FinishedBlob {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
|
||||
blobs := p.finishedBlobs
|
||||
p.finishedBlobs = make([]*FinishedBlob, 0)
|
||||
return blobs
|
||||
}
|
||||
|
||||
// startNewBlob initializes a new blob (must be called with lock held)
|
||||
func (p *Packer) startNewBlob() error {
|
||||
// Generate UUID for the blob
|
||||
blobID := uuid.New().String()
|
||||
|
||||
// Create blob record in database
|
||||
if p.repos != nil {
|
||||
blob := &database.Blob{
|
||||
ID: blobID,
|
||||
Hash: "", // Will be set when finalized
|
||||
CreatedTS: time.Now(),
|
||||
FinishedTS: nil,
|
||||
UncompressedSize: 0,
|
||||
CompressedSize: 0,
|
||||
UploadedTS: nil,
|
||||
}
|
||||
err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
return p.repos.Blobs.Create(ctx, tx, blob)
|
||||
})
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating blob record: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Create temporary file
|
||||
tempFile, err := os.CreateTemp("", "vaultik-blob-*.tmp")
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating temp file: %w", err)
|
||||
}
|
||||
|
||||
p.currentBlob = &blobInProgress{
|
||||
id: blobID,
|
||||
chunks: make([]*chunkInfo, 0),
|
||||
chunkSet: make(map[string]bool),
|
||||
startTime: time.Now(),
|
||||
tempFile: tempFile,
|
||||
hasher: sha256.New(),
|
||||
size: 0,
|
||||
compressedSize: 0,
|
||||
}
|
||||
|
||||
// Build writer chain: compressor -> [encryptor ->] hasher+file
|
||||
// This ensures only encrypted data touches disk
|
||||
|
||||
// Final destination: write to both file and hasher
|
||||
finalWriter := io.MultiWriter(tempFile, p.currentBlob.hasher)
|
||||
|
||||
// Set up encryption (required - closest to disk)
|
||||
encWriter, err := p.encryptor.EncryptWriter(finalWriter)
|
||||
if err != nil {
|
||||
_ = tempFile.Close()
|
||||
_ = os.Remove(tempFile.Name())
|
||||
return fmt.Errorf("creating encryption writer: %w", err)
|
||||
}
|
||||
p.currentBlob.encryptor = encWriter
|
||||
currentWriter := encWriter
|
||||
|
||||
// Set up compression (processes data before encryption)
|
||||
encoderLevel := zstd.EncoderLevel(p.compressionLevel)
|
||||
if p.compressionLevel < 1 {
|
||||
encoderLevel = zstd.SpeedDefault
|
||||
} else if p.compressionLevel > 9 {
|
||||
encoderLevel = zstd.SpeedBestCompression
|
||||
}
|
||||
|
||||
// Calculate window size based on blob size
|
||||
windowSize := p.maxBlobSize / 100
|
||||
if windowSize < (1 << 20) { // Min 1MB
|
||||
windowSize = 1 << 20
|
||||
} else if windowSize > (128 << 20) { // Max 128MB
|
||||
windowSize = 128 << 20
|
||||
}
|
||||
windowSize = 1 << uint(63-bits.LeadingZeros64(uint64(windowSize)))
|
||||
|
||||
compWriter, err := zstd.NewWriter(currentWriter,
|
||||
zstd.WithEncoderLevel(encoderLevel),
|
||||
zstd.WithEncoderConcurrency(runtime.NumCPU()),
|
||||
zstd.WithWindowSize(int(windowSize)),
|
||||
)
|
||||
if err != nil {
|
||||
if p.currentBlob.encryptor != nil {
|
||||
_ = p.currentBlob.encryptor.Close()
|
||||
}
|
||||
_ = tempFile.Close()
|
||||
_ = os.Remove(tempFile.Name())
|
||||
return fmt.Errorf("creating compression writer: %w", err)
|
||||
}
|
||||
p.currentBlob.compressor = compWriter
|
||||
p.currentBlob.finalWriter = compWriter
|
||||
|
||||
log.Debug("Started new blob", "blob_id", blobID, "temp_file", tempFile.Name())
|
||||
return nil
|
||||
}
|
||||
|
||||
// addChunkToCurrentBlob adds a chunk to the current blob (must be called with lock held)
|
||||
func (p *Packer) addChunkToCurrentBlob(chunk *ChunkRef) error {
|
||||
// Skip if chunk already in current blob
|
||||
if p.currentBlob.chunkSet[chunk.Hash] {
|
||||
log.Debug("Skipping duplicate chunk in blob", "chunk_hash", chunk.Hash)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Track offset before writing
|
||||
offset := p.currentBlob.size
|
||||
|
||||
// Write to the final writer (compression -> encryption -> disk)
|
||||
if _, err := p.currentBlob.finalWriter.Write(chunk.Data); err != nil {
|
||||
return fmt.Errorf("writing to blob stream: %w", err)
|
||||
}
|
||||
|
||||
// Track chunk info
|
||||
chunkSize := int64(len(chunk.Data))
|
||||
chunkInfo := &chunkInfo{
|
||||
Hash: chunk.Hash,
|
||||
Offset: offset,
|
||||
Size: chunkSize,
|
||||
}
|
||||
p.currentBlob.chunks = append(p.currentBlob.chunks, chunkInfo)
|
||||
p.currentBlob.chunkSet[chunk.Hash] = true
|
||||
|
||||
// Store blob-chunk association in database immediately
|
||||
if p.repos != nil {
|
||||
blobChunk := &database.BlobChunk{
|
||||
BlobID: p.currentBlob.id,
|
||||
ChunkHash: chunk.Hash,
|
||||
Offset: offset,
|
||||
Length: chunkSize,
|
||||
}
|
||||
err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
return p.repos.BlobChunks.Create(ctx, tx, blobChunk)
|
||||
})
|
||||
if err != nil {
|
||||
log.Error("Failed to store blob-chunk association", "error", err,
|
||||
"blob_id", p.currentBlob.id, "chunk_hash", chunk.Hash)
|
||||
// Continue anyway - we can reconstruct this later if needed
|
||||
}
|
||||
}
|
||||
|
||||
// Update total size
|
||||
p.currentBlob.size += chunkSize
|
||||
|
||||
log.Debug("Added chunk to blob",
|
||||
"blob_id", p.currentBlob.id,
|
||||
"chunk_hash", chunk.Hash,
|
||||
"chunk_size", len(chunk.Data),
|
||||
"offset", offset,
|
||||
"blob_chunks", len(p.currentBlob.chunks),
|
||||
"uncompressed_size", p.currentBlob.size)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// finalizeCurrentBlob completes the current blob (must be called with lock held)
|
||||
func (p *Packer) finalizeCurrentBlob() error {
|
||||
if p.currentBlob == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close compression writer to flush all data
|
||||
if err := p.currentBlob.compressor.Close(); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("closing compression writer: %w", err)
|
||||
}
|
||||
|
||||
// Close encryption writer
|
||||
if err := p.currentBlob.encryptor.Close(); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("closing encryption writer: %w", err)
|
||||
}
|
||||
|
||||
// Sync file to ensure all data is written
|
||||
if err := p.currentBlob.tempFile.Sync(); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("syncing temp file: %w", err)
|
||||
}
|
||||
|
||||
// Get the final size (encrypted if applicable)
|
||||
finalSize, err := p.currentBlob.tempFile.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("getting file size: %w", err)
|
||||
}
|
||||
|
||||
// Reset to beginning for reading
|
||||
if _, err := p.currentBlob.tempFile.Seek(0, io.SeekStart); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("seeking to start: %w", err)
|
||||
}
|
||||
|
||||
// Get hash from hasher (of final encrypted data)
|
||||
finalHash := p.currentBlob.hasher.Sum(nil)
|
||||
blobHash := hex.EncodeToString(finalHash)
|
||||
|
||||
// Create chunk references with offsets
|
||||
chunkRefs := make([]*BlobChunkRef, 0, len(p.currentBlob.chunks))
|
||||
|
||||
for _, chunk := range p.currentBlob.chunks {
|
||||
chunkRefs = append(chunkRefs, &BlobChunkRef{
|
||||
ChunkHash: chunk.Hash,
|
||||
Offset: chunk.Offset,
|
||||
Length: chunk.Size,
|
||||
})
|
||||
}
|
||||
|
||||
// Update blob record in database with hash and sizes
|
||||
if p.repos != nil {
|
||||
err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
return p.repos.Blobs.UpdateFinished(ctx, tx, p.currentBlob.id, blobHash,
|
||||
p.currentBlob.size, finalSize)
|
||||
})
|
||||
if err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("updating blob record: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Create finished blob
|
||||
finished := &FinishedBlob{
|
||||
ID: p.currentBlob.id,
|
||||
Hash: blobHash,
|
||||
Data: nil, // We don't load data into memory anymore
|
||||
Chunks: chunkRefs,
|
||||
CreatedTS: p.currentBlob.startTime,
|
||||
Uncompressed: p.currentBlob.size,
|
||||
Compressed: finalSize,
|
||||
}
|
||||
|
||||
compressionRatio := float64(finished.Compressed) / float64(finished.Uncompressed)
|
||||
log.Info("Finalized blob",
|
||||
"hash", blobHash,
|
||||
"chunks", len(chunkRefs),
|
||||
"uncompressed", finished.Uncompressed,
|
||||
"compressed", finished.Compressed,
|
||||
"ratio", fmt.Sprintf("%.2f", compressionRatio),
|
||||
"duration", time.Since(p.currentBlob.startTime))
|
||||
|
||||
// Call blob handler if set
|
||||
if p.blobHandler != nil {
|
||||
log.Debug("Calling blob handler", "blob_hash", blobHash[:8]+"...")
|
||||
// Reset file position for handler
|
||||
if _, err := p.currentBlob.tempFile.Seek(0, io.SeekStart); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("seeking for handler: %w", err)
|
||||
}
|
||||
|
||||
// Create a blob reader that includes the data stream
|
||||
blobWithReader := &BlobWithReader{
|
||||
FinishedBlob: finished,
|
||||
Reader: p.currentBlob.tempFile,
|
||||
TempFile: p.currentBlob.tempFile,
|
||||
}
|
||||
|
||||
if err := p.blobHandler(blobWithReader); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("blob handler failed: %w", err)
|
||||
}
|
||||
// Note: blob handler is responsible for closing/cleaning up temp file
|
||||
p.currentBlob = nil
|
||||
} else {
|
||||
log.Debug("No blob handler set", "blob_hash", blobHash[:8]+"...")
|
||||
// No handler, need to read data for legacy behavior
|
||||
if _, err := p.currentBlob.tempFile.Seek(0, io.SeekStart); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("seeking to read data: %w", err)
|
||||
}
|
||||
|
||||
data, err := io.ReadAll(p.currentBlob.tempFile)
|
||||
if err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("reading blob data: %w", err)
|
||||
}
|
||||
finished.Data = data
|
||||
|
||||
p.finishedBlobs = append(p.finishedBlobs, finished)
|
||||
|
||||
// Cleanup
|
||||
p.cleanupTempFile()
|
||||
p.currentBlob = nil
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanupTempFile removes the temporary file
|
||||
func (p *Packer) cleanupTempFile() {
|
||||
if p.currentBlob != nil && p.currentBlob.tempFile != nil {
|
||||
name := p.currentBlob.tempFile.Name()
|
||||
_ = p.currentBlob.tempFile.Close()
|
||||
_ = os.Remove(name)
|
||||
}
|
||||
}
|
||||
|
||||
// PackChunks is a convenience method to pack multiple chunks at once
|
||||
func (p *Packer) PackChunks(chunks []*ChunkRef) error {
|
||||
for _, chunk := range chunks {
|
||||
err := p.AddChunk(chunk)
|
||||
if err == ErrBlobSizeLimitExceeded {
|
||||
// Finalize current blob and retry
|
||||
if err := p.FinalizeBlob(); err != nil {
|
||||
return fmt.Errorf("finalizing blob before retry: %w", err)
|
||||
}
|
||||
// Retry the chunk
|
||||
if err := p.AddChunk(chunk); err != nil {
|
||||
return fmt.Errorf("adding chunk %s after finalize: %w", chunk.Hash, err)
|
||||
}
|
||||
} else if err != nil {
|
||||
return fmt.Errorf("adding chunk %s: %w", chunk.Hash, err)
|
||||
}
|
||||
}
|
||||
|
||||
return p.Flush()
|
||||
}
|
||||
328
internal/blob/packer_test.go
Normal file
328
internal/blob/packer_test.go
Normal file
@@ -0,0 +1,328 @@
|
||||
package blob
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"io"
|
||||
"testing"
|
||||
|
||||
"filippo.io/age"
|
||||
"git.eeqj.de/sneak/vaultik/internal/crypto"
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
)
|
||||
|
||||
const (
|
||||
// Test key from test/insecure-integration-test.key
|
||||
testPrivateKey = "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
|
||||
testPublicKey = "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
|
||||
)
|
||||
|
||||
func TestPacker(t *testing.T) {
|
||||
// Initialize logger for tests
|
||||
log.Initialize(log.Config{})
|
||||
|
||||
// Parse test identity
|
||||
identity, err := age.ParseX25519Identity(testPrivateKey)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to parse test identity: %v", err)
|
||||
}
|
||||
|
||||
// Create test encryptor using the public key
|
||||
enc, err := crypto.NewEncryptor([]string{testPublicKey})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create encryptor: %v", err)
|
||||
}
|
||||
|
||||
t.Run("single chunk creates single blob", func(t *testing.T) {
|
||||
// Create test database
|
||||
db, err := database.NewTestDB()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test db: %v", err)
|
||||
}
|
||||
defer func() { _ = db.Close() }()
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
cfg := PackerConfig{
|
||||
MaxBlobSize: 10 * 1024 * 1024, // 10MB
|
||||
CompressionLevel: 3,
|
||||
Encryptor: enc,
|
||||
Repositories: repos,
|
||||
}
|
||||
packer, err := NewPacker(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create packer: %v", err)
|
||||
}
|
||||
|
||||
// Create a chunk
|
||||
data := []byte("Hello, World!")
|
||||
hash := sha256.Sum256(data)
|
||||
chunk := &ChunkRef{
|
||||
Hash: hex.EncodeToString(hash[:]),
|
||||
Data: data,
|
||||
}
|
||||
|
||||
// Add chunk
|
||||
if err := packer.AddChunk(chunk); err != nil {
|
||||
t.Fatalf("failed to add chunk: %v", err)
|
||||
}
|
||||
|
||||
// Flush
|
||||
if err := packer.Flush(); err != nil {
|
||||
t.Fatalf("failed to flush: %v", err)
|
||||
}
|
||||
|
||||
// Get finished blobs
|
||||
blobs := packer.GetFinishedBlobs()
|
||||
if len(blobs) != 1 {
|
||||
t.Fatalf("expected 1 blob, got %d", len(blobs))
|
||||
}
|
||||
|
||||
blob := blobs[0]
|
||||
if len(blob.Chunks) != 1 {
|
||||
t.Errorf("expected 1 chunk in blob, got %d", len(blob.Chunks))
|
||||
}
|
||||
|
||||
// Note: Very small data may not compress well
|
||||
t.Logf("Compression: %d -> %d bytes", blob.Uncompressed, blob.Compressed)
|
||||
|
||||
// Decrypt the blob data
|
||||
decrypted, err := age.Decrypt(bytes.NewReader(blob.Data), identity)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to decrypt blob: %v", err)
|
||||
}
|
||||
|
||||
// Decompress the decrypted data
|
||||
reader, err := zstd.NewReader(decrypted)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create decompressor: %v", err)
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
var decompressed bytes.Buffer
|
||||
if _, err := io.Copy(&decompressed, reader); err != nil {
|
||||
t.Fatalf("failed to decompress: %v", err)
|
||||
}
|
||||
|
||||
if !bytes.Equal(decompressed.Bytes(), data) {
|
||||
t.Error("decompressed data doesn't match original")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("multiple chunks packed together", func(t *testing.T) {
|
||||
// Create test database
|
||||
db, err := database.NewTestDB()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test db: %v", err)
|
||||
}
|
||||
defer func() { _ = db.Close() }()
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
cfg := PackerConfig{
|
||||
MaxBlobSize: 10 * 1024 * 1024, // 10MB
|
||||
CompressionLevel: 3,
|
||||
Encryptor: enc,
|
||||
Repositories: repos,
|
||||
}
|
||||
packer, err := NewPacker(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create packer: %v", err)
|
||||
}
|
||||
|
||||
// Create multiple small chunks
|
||||
chunks := make([]*ChunkRef, 10)
|
||||
for i := 0; i < 10; i++ {
|
||||
data := bytes.Repeat([]byte{byte(i)}, 1000)
|
||||
hash := sha256.Sum256(data)
|
||||
chunks[i] = &ChunkRef{
|
||||
Hash: hex.EncodeToString(hash[:]),
|
||||
Data: data,
|
||||
}
|
||||
}
|
||||
|
||||
// Add all chunks
|
||||
for _, chunk := range chunks {
|
||||
err := packer.AddChunk(chunk)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to add chunk: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Flush
|
||||
if err := packer.Flush(); err != nil {
|
||||
t.Fatalf("failed to flush: %v", err)
|
||||
}
|
||||
|
||||
// Should have one blob with all chunks
|
||||
blobs := packer.GetFinishedBlobs()
|
||||
if len(blobs) != 1 {
|
||||
t.Fatalf("expected 1 blob, got %d", len(blobs))
|
||||
}
|
||||
|
||||
if len(blobs[0].Chunks) != 10 {
|
||||
t.Errorf("expected 10 chunks in blob, got %d", len(blobs[0].Chunks))
|
||||
}
|
||||
|
||||
// Verify offsets are correct
|
||||
expectedOffset := int64(0)
|
||||
for i, chunkRef := range blobs[0].Chunks {
|
||||
if chunkRef.Offset != expectedOffset {
|
||||
t.Errorf("chunk %d: expected offset %d, got %d", i, expectedOffset, chunkRef.Offset)
|
||||
}
|
||||
if chunkRef.Length != 1000 {
|
||||
t.Errorf("chunk %d: expected length 1000, got %d", i, chunkRef.Length)
|
||||
}
|
||||
expectedOffset += chunkRef.Length
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("blob size limit enforced", func(t *testing.T) {
|
||||
// Create test database
|
||||
db, err := database.NewTestDB()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test db: %v", err)
|
||||
}
|
||||
defer func() { _ = db.Close() }()
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
// Small blob size limit to force multiple blobs
|
||||
cfg := PackerConfig{
|
||||
MaxBlobSize: 5000, // 5KB max
|
||||
CompressionLevel: 3,
|
||||
Encryptor: enc,
|
||||
Repositories: repos,
|
||||
}
|
||||
packer, err := NewPacker(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create packer: %v", err)
|
||||
}
|
||||
|
||||
// Create chunks that will exceed the limit
|
||||
chunks := make([]*ChunkRef, 10)
|
||||
for i := 0; i < 10; i++ {
|
||||
data := bytes.Repeat([]byte{byte(i)}, 1000) // 1KB each
|
||||
hash := sha256.Sum256(data)
|
||||
chunks[i] = &ChunkRef{
|
||||
Hash: hex.EncodeToString(hash[:]),
|
||||
Data: data,
|
||||
}
|
||||
}
|
||||
|
||||
blobCount := 0
|
||||
|
||||
// Add chunks and handle size limit errors
|
||||
for _, chunk := range chunks {
|
||||
err := packer.AddChunk(chunk)
|
||||
if err == ErrBlobSizeLimitExceeded {
|
||||
// Finalize current blob
|
||||
if err := packer.FinalizeBlob(); err != nil {
|
||||
t.Fatalf("failed to finalize blob: %v", err)
|
||||
}
|
||||
blobCount++
|
||||
// Retry adding the chunk
|
||||
if err := packer.AddChunk(chunk); err != nil {
|
||||
t.Fatalf("failed to add chunk after finalize: %v", err)
|
||||
}
|
||||
} else if err != nil {
|
||||
t.Fatalf("failed to add chunk: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Flush remaining
|
||||
if err := packer.Flush(); err != nil {
|
||||
t.Fatalf("failed to flush: %v", err)
|
||||
}
|
||||
|
||||
// Get all blobs
|
||||
blobs := packer.GetFinishedBlobs()
|
||||
totalBlobs := blobCount + len(blobs)
|
||||
|
||||
// Should have multiple blobs due to size limit
|
||||
if totalBlobs < 2 {
|
||||
t.Errorf("expected multiple blobs due to size limit, got %d", totalBlobs)
|
||||
}
|
||||
|
||||
// Verify each blob respects size limit (approximately)
|
||||
for _, blob := range blobs {
|
||||
if blob.Compressed > 6000 { // Allow some overhead
|
||||
t.Errorf("blob size %d exceeds limit", blob.Compressed)
|
||||
}
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("with encryption", func(t *testing.T) {
|
||||
// Create test database
|
||||
db, err := database.NewTestDB()
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create test db: %v", err)
|
||||
}
|
||||
defer func() { _ = db.Close() }()
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
// Generate test identity (using the one from parent test)
|
||||
cfg := PackerConfig{
|
||||
MaxBlobSize: 10 * 1024 * 1024, // 10MB
|
||||
CompressionLevel: 3,
|
||||
Encryptor: enc,
|
||||
Repositories: repos,
|
||||
}
|
||||
packer, err := NewPacker(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create packer: %v", err)
|
||||
}
|
||||
|
||||
// Create test data
|
||||
data := bytes.Repeat([]byte("Test data for encryption!"), 100)
|
||||
hash := sha256.Sum256(data)
|
||||
chunk := &ChunkRef{
|
||||
Hash: hex.EncodeToString(hash[:]),
|
||||
Data: data,
|
||||
}
|
||||
|
||||
// Add chunk and flush
|
||||
if err := packer.AddChunk(chunk); err != nil {
|
||||
t.Fatalf("failed to add chunk: %v", err)
|
||||
}
|
||||
if err := packer.Flush(); err != nil {
|
||||
t.Fatalf("failed to flush: %v", err)
|
||||
}
|
||||
|
||||
// Get blob
|
||||
blobs := packer.GetFinishedBlobs()
|
||||
if len(blobs) != 1 {
|
||||
t.Fatalf("expected 1 blob, got %d", len(blobs))
|
||||
}
|
||||
|
||||
blob := blobs[0]
|
||||
|
||||
// Decrypt the blob
|
||||
decrypted, err := age.Decrypt(bytes.NewReader(blob.Data), identity)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to decrypt blob: %v", err)
|
||||
}
|
||||
|
||||
var decryptedData bytes.Buffer
|
||||
if _, err := decryptedData.ReadFrom(decrypted); err != nil {
|
||||
t.Fatalf("failed to read decrypted data: %v", err)
|
||||
}
|
||||
|
||||
// Decompress
|
||||
reader, err := zstd.NewReader(&decryptedData)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create decompressor: %v", err)
|
||||
}
|
||||
defer reader.Close()
|
||||
|
||||
var decompressed bytes.Buffer
|
||||
if _, err := decompressed.ReadFrom(reader); err != nil {
|
||||
t.Fatalf("failed to decompress: %v", err)
|
||||
}
|
||||
|
||||
// Verify data
|
||||
if !bytes.Equal(decompressed.Bytes(), data) {
|
||||
t.Error("decrypted and decompressed data doesn't match original")
|
||||
}
|
||||
})
|
||||
}
|
||||
Reference in New Issue
Block a user