Refactor blob storage to use UUID primary keys and implement streaming chunking

- Changed blob table to use ID (UUID) as primary key instead of hash - Blob records are now created at packing start, enabling immediate chunk associations - Implemented streaming chunking to process large files without memory exhaustion - Fixed blob manifest generation to include all referenced blobs - Updated all foreign key references from blob_hash to blob_id - Added progress reporting and improved error handling - Enforced encryption requirement for all blob packing - Updated tests to use test encryption keys - Added Cyrillic transliteration to README
2025-07-22 07:43:39 +02:00
parent 26db096913
commit 86b533d6ee
49 changed files with 5709 additions and 324 deletions
--- a/internal/blob/errors.go
+++ b/internal/blob/errors.go
@@ -0,0 +1,6 @@
+package blob
+
+import "errors"
+
+// ErrBlobSizeLimitExceeded is returned when adding a chunk would exceed the blob size limit
+var ErrBlobSizeLimitExceeded = errors.New("adding chunk would exceed blob size limit")
--- a/internal/blob/packer.go
+++ b/internal/blob/packer.go
@@ -0,0 +1,517 @@
+package blob
+
+import (
+	"context"
+	"crypto/sha256"
+	"database/sql"
+	"encoding/hex"
+	"fmt"
+	"hash"
+	"io"
+	"math/bits"
+	"os"
+	"runtime"
+	"sync"
+	"time"
+
+	"git.eeqj.de/sneak/vaultik/internal/database"
+	"git.eeqj.de/sneak/vaultik/internal/log"
+	"github.com/google/uuid"
+	"github.com/klauspost/compress/zstd"
+)
+
+// BlobHandler is called when a blob is finalized
+type BlobHandler func(blob *BlobWithReader) error
+
+// PackerConfig holds configuration for creating a Packer
+type PackerConfig struct {
+	MaxBlobSize      int64
+	CompressionLevel int
+	Encryptor        Encryptor              // Required - blobs are always encrypted
+	Repositories     *database.Repositories // For creating blob records
+	BlobHandler      BlobHandler            // Optional - called when blob is ready
+}
+
+// Packer combines chunks into blobs with compression and encryption
+type Packer struct {
+	maxBlobSize      int64
+	compressionLevel int
+	encryptor        Encryptor              // Required - blobs are always encrypted
+	blobHandler      BlobHandler            // Called when blob is ready
+	repos            *database.Repositories // For creating blob records
+
+	// Mutex for thread-safe blob creation
+	mu sync.Mutex
+
+	// Current blob being packed
+	currentBlob   *blobInProgress
+	finishedBlobs []*FinishedBlob // Only used if no handler provided
+}
+
+// Encryptor interface for encryption support
+type Encryptor interface {
+	Encrypt(data []byte) ([]byte, error)
+	EncryptWriter(dst io.Writer) (io.WriteCloser, error)
+}
+
+// blobInProgress represents a blob being assembled
+type blobInProgress struct {
+	id             string          // UUID of the blob
+	chunks         []*chunkInfo    // Track chunk metadata
+	chunkSet       map[string]bool // Track unique chunks in this blob
+	tempFile       *os.File        // Temporary file for encrypted compressed data
+	hasher         hash.Hash       // For computing hash of final encrypted data
+	compressor     io.WriteCloser  // Compression writer
+	encryptor      io.WriteCloser  // Encryption writer (if encryption enabled)
+	finalWriter    io.Writer       // The final writer in the chain
+	startTime      time.Time
+	size           int64 // Current uncompressed size
+	compressedSize int64 // Current compressed size (estimated)
+}
+
+// ChunkRef represents a chunk to be added to a blob
+type ChunkRef struct {
+	Hash string
+	Data []byte
+}
+
+// chunkInfo tracks chunk metadata in a blob
+type chunkInfo struct {
+	Hash   string
+	Offset int64
+	Size   int64
+}
+
+// FinishedBlob represents a completed blob ready for storage
+type FinishedBlob struct {
+	ID           string
+	Hash         string
+	Data         []byte // Compressed data
+	Chunks       []*BlobChunkRef
+	CreatedTS    time.Time
+	Uncompressed int64
+	Compressed   int64
+}
+
+// BlobChunkRef represents a chunk's position within a blob
+type BlobChunkRef struct {
+	ChunkHash string
+	Offset    int64
+	Length    int64
+}
+
+// BlobWithReader wraps a FinishedBlob with its data reader
+type BlobWithReader struct {
+	*FinishedBlob
+	Reader   io.ReadSeeker
+	TempFile *os.File // Optional, only set for disk-based blobs
+}
+
+// NewPacker creates a new blob packer
+func NewPacker(cfg PackerConfig) (*Packer, error) {
+	if cfg.Encryptor == nil {
+		return nil, fmt.Errorf("encryptor is required - blobs must be encrypted")
+	}
+	if cfg.MaxBlobSize <= 0 {
+		return nil, fmt.Errorf("max blob size must be positive")
+	}
+	return &Packer{
+		maxBlobSize:      cfg.MaxBlobSize,
+		compressionLevel: cfg.CompressionLevel,
+		encryptor:        cfg.Encryptor,
+		blobHandler:      cfg.BlobHandler,
+		repos:            cfg.Repositories,
+		finishedBlobs:    make([]*FinishedBlob, 0),
+	}, nil
+}
+
+// SetBlobHandler sets the handler to be called when a blob is finalized
+func (p *Packer) SetBlobHandler(handler BlobHandler) {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+	p.blobHandler = handler
+}
+
+// AddChunk adds a chunk to the current blob
+// Returns ErrBlobSizeLimitExceeded if adding the chunk would exceed the size limit
+func (p *Packer) AddChunk(chunk *ChunkRef) error {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	// Initialize new blob if needed
+	if p.currentBlob == nil {
+		if err := p.startNewBlob(); err != nil {
+			return fmt.Errorf("starting new blob: %w", err)
+		}
+	}
+
+	// Check if adding this chunk would exceed blob size limit
+	// Use conservative estimate: assume no compression
+	// Skip size check if chunk already exists in blob
+	if !p.currentBlob.chunkSet[chunk.Hash] {
+		currentSize := p.currentBlob.size
+		newSize := currentSize + int64(len(chunk.Data))
+
+		if newSize > p.maxBlobSize && len(p.currentBlob.chunks) > 0 {
+			// Return error indicating size limit would be exceeded
+			return ErrBlobSizeLimitExceeded
+		}
+	}
+
+	// Add chunk to current blob
+	if err := p.addChunkToCurrentBlob(chunk); err != nil {
+		return err
+	}
+
+	return nil
+}
+
+// Flush finalizes any pending blob
+func (p *Packer) Flush() error {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	if p.currentBlob != nil && len(p.currentBlob.chunks) > 0 {
+		if err := p.finalizeCurrentBlob(); err != nil {
+			return fmt.Errorf("finalizing blob: %w", err)
+		}
+	}
+
+	return nil
+}
+
+// FinalizeBlob finalizes the current blob being assembled
+// Caller must handle retrying the chunk that triggered size limit
+func (p *Packer) FinalizeBlob() error {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	if p.currentBlob == nil {
+		return nil
+	}
+
+	return p.finalizeCurrentBlob()
+}
+
+// GetFinishedBlobs returns all completed blobs and clears the list
+func (p *Packer) GetFinishedBlobs() []*FinishedBlob {
+	p.mu.Lock()
+	defer p.mu.Unlock()
+
+	blobs := p.finishedBlobs
+	p.finishedBlobs = make([]*FinishedBlob, 0)
+	return blobs
+}
+
+// startNewBlob initializes a new blob (must be called with lock held)
+func (p *Packer) startNewBlob() error {
+	// Generate UUID for the blob
+	blobID := uuid.New().String()
+
+	// Create blob record in database
+	if p.repos != nil {
+		blob := &database.Blob{
+			ID:               blobID,
+			Hash:             "", // Will be set when finalized
+			CreatedTS:        time.Now(),
+			FinishedTS:       nil,
+			UncompressedSize: 0,
+			CompressedSize:   0,
+			UploadedTS:       nil,
+		}
+		err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
+			return p.repos.Blobs.Create(ctx, tx, blob)
+		})
+		if err != nil {
+			return fmt.Errorf("creating blob record: %w", err)
+		}
+	}
+
+	// Create temporary file
+	tempFile, err := os.CreateTemp("", "vaultik-blob-*.tmp")
+	if err != nil {
+		return fmt.Errorf("creating temp file: %w", err)
+	}
+
+	p.currentBlob = &blobInProgress{
+		id:             blobID,
+		chunks:         make([]*chunkInfo, 0),
+		chunkSet:       make(map[string]bool),
+		startTime:      time.Now(),
+		tempFile:       tempFile,
+		hasher:         sha256.New(),
+		size:           0,
+		compressedSize: 0,
+	}
+
+	// Build writer chain: compressor -> [encryptor ->] hasher+file
+	// This ensures only encrypted data touches disk
+
+	// Final destination: write to both file and hasher
+	finalWriter := io.MultiWriter(tempFile, p.currentBlob.hasher)
+
+	// Set up encryption (required - closest to disk)
+	encWriter, err := p.encryptor.EncryptWriter(finalWriter)
+	if err != nil {
+		_ = tempFile.Close()
+		_ = os.Remove(tempFile.Name())
+		return fmt.Errorf("creating encryption writer: %w", err)
+	}
+	p.currentBlob.encryptor = encWriter
+	currentWriter := encWriter
+
+	// Set up compression (processes data before encryption)
+	encoderLevel := zstd.EncoderLevel(p.compressionLevel)
+	if p.compressionLevel < 1 {
+		encoderLevel = zstd.SpeedDefault
+	} else if p.compressionLevel > 9 {
+		encoderLevel = zstd.SpeedBestCompression
+	}
+
+	// Calculate window size based on blob size
+	windowSize := p.maxBlobSize / 100
+	if windowSize < (1 << 20) { // Min 1MB
+		windowSize = 1 << 20
+	} else if windowSize > (128 << 20) { // Max 128MB
+		windowSize = 128 << 20
+	}
+	windowSize = 1 << uint(63-bits.LeadingZeros64(uint64(windowSize)))
+
+	compWriter, err := zstd.NewWriter(currentWriter,
+		zstd.WithEncoderLevel(encoderLevel),
+		zstd.WithEncoderConcurrency(runtime.NumCPU()),
+		zstd.WithWindowSize(int(windowSize)),
+	)
+	if err != nil {
+		if p.currentBlob.encryptor != nil {
+			_ = p.currentBlob.encryptor.Close()
+		}
+		_ = tempFile.Close()
+		_ = os.Remove(tempFile.Name())
+		return fmt.Errorf("creating compression writer: %w", err)
+	}
+	p.currentBlob.compressor = compWriter
+	p.currentBlob.finalWriter = compWriter
+
+	log.Debug("Started new blob", "blob_id", blobID, "temp_file", tempFile.Name())
+	return nil
+}
+
+// addChunkToCurrentBlob adds a chunk to the current blob (must be called with lock held)
+func (p *Packer) addChunkToCurrentBlob(chunk *ChunkRef) error {
+	// Skip if chunk already in current blob
+	if p.currentBlob.chunkSet[chunk.Hash] {
+		log.Debug("Skipping duplicate chunk in blob", "chunk_hash", chunk.Hash)
+		return nil
+	}
+
+	// Track offset before writing
+	offset := p.currentBlob.size
+
+	// Write to the final writer (compression -> encryption -> disk)
+	if _, err := p.currentBlob.finalWriter.Write(chunk.Data); err != nil {
+		return fmt.Errorf("writing to blob stream: %w", err)
+	}
+
+	// Track chunk info
+	chunkSize := int64(len(chunk.Data))
+	chunkInfo := &chunkInfo{
+		Hash:   chunk.Hash,
+		Offset: offset,
+		Size:   chunkSize,
+	}
+	p.currentBlob.chunks = append(p.currentBlob.chunks, chunkInfo)
+	p.currentBlob.chunkSet[chunk.Hash] = true
+
+	// Store blob-chunk association in database immediately
+	if p.repos != nil {
+		blobChunk := &database.BlobChunk{
+			BlobID:    p.currentBlob.id,
+			ChunkHash: chunk.Hash,
+			Offset:    offset,
+			Length:    chunkSize,
+		}
+		err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
+			return p.repos.BlobChunks.Create(ctx, tx, blobChunk)
+		})
+		if err != nil {
+			log.Error("Failed to store blob-chunk association", "error", err,
+				"blob_id", p.currentBlob.id, "chunk_hash", chunk.Hash)
+			// Continue anyway - we can reconstruct this later if needed
+		}
+	}
+
+	// Update total size
+	p.currentBlob.size += chunkSize
+
+	log.Debug("Added chunk to blob",
+		"blob_id", p.currentBlob.id,
+		"chunk_hash", chunk.Hash,
+		"chunk_size", len(chunk.Data),
+		"offset", offset,
+		"blob_chunks", len(p.currentBlob.chunks),
+		"uncompressed_size", p.currentBlob.size)
+
+	return nil
+}
+
+// finalizeCurrentBlob completes the current blob (must be called with lock held)
+func (p *Packer) finalizeCurrentBlob() error {
+	if p.currentBlob == nil {
+		return nil
+	}
+
+	// Close compression writer to flush all data
+	if err := p.currentBlob.compressor.Close(); err != nil {
+		p.cleanupTempFile()
+		return fmt.Errorf("closing compression writer: %w", err)
+	}
+
+	// Close encryption writer
+	if err := p.currentBlob.encryptor.Close(); err != nil {
+		p.cleanupTempFile()
+		return fmt.Errorf("closing encryption writer: %w", err)
+	}
+
+	// Sync file to ensure all data is written
+	if err := p.currentBlob.tempFile.Sync(); err != nil {
+		p.cleanupTempFile()
+		return fmt.Errorf("syncing temp file: %w", err)
+	}
+
+	// Get the final size (encrypted if applicable)
+	finalSize, err := p.currentBlob.tempFile.Seek(0, io.SeekCurrent)
+	if err != nil {
+		p.cleanupTempFile()
+		return fmt.Errorf("getting file size: %w", err)
+	}
+
+	// Reset to beginning for reading
+	if _, err := p.currentBlob.tempFile.Seek(0, io.SeekStart); err != nil {
+		p.cleanupTempFile()
+		return fmt.Errorf("seeking to start: %w", err)
+	}
+
+	// Get hash from hasher (of final encrypted data)
+	finalHash := p.currentBlob.hasher.Sum(nil)
+	blobHash := hex.EncodeToString(finalHash)
+
+	// Create chunk references with offsets
+	chunkRefs := make([]*BlobChunkRef, 0, len(p.currentBlob.chunks))
+
+	for _, chunk := range p.currentBlob.chunks {
+		chunkRefs = append(chunkRefs, &BlobChunkRef{
+			ChunkHash: chunk.Hash,
+			Offset:    chunk.Offset,
+			Length:    chunk.Size,
+		})
+	}
+
+	// Update blob record in database with hash and sizes
+	if p.repos != nil {
+		err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
+			return p.repos.Blobs.UpdateFinished(ctx, tx, p.currentBlob.id, blobHash,
+				p.currentBlob.size, finalSize)
+		})
+		if err != nil {
+			p.cleanupTempFile()
+			return fmt.Errorf("updating blob record: %w", err)
+		}
+	}
+
+	// Create finished blob
+	finished := &FinishedBlob{
+		ID:           p.currentBlob.id,
+		Hash:         blobHash,
+		Data:         nil, // We don't load data into memory anymore
+		Chunks:       chunkRefs,
+		CreatedTS:    p.currentBlob.startTime,
+		Uncompressed: p.currentBlob.size,
+		Compressed:   finalSize,
+	}
+
+	compressionRatio := float64(finished.Compressed) / float64(finished.Uncompressed)
+	log.Info("Finalized blob",
+		"hash", blobHash,
+		"chunks", len(chunkRefs),
+		"uncompressed", finished.Uncompressed,
+		"compressed", finished.Compressed,
+		"ratio", fmt.Sprintf("%.2f", compressionRatio),
+		"duration", time.Since(p.currentBlob.startTime))
+
+	// Call blob handler if set
+	if p.blobHandler != nil {
+		log.Debug("Calling blob handler", "blob_hash", blobHash[:8]+"...")
+		// Reset file position for handler
+		if _, err := p.currentBlob.tempFile.Seek(0, io.SeekStart); err != nil {
+			p.cleanupTempFile()
+			return fmt.Errorf("seeking for handler: %w", err)
+		}
+
+		// Create a blob reader that includes the data stream
+		blobWithReader := &BlobWithReader{
+			FinishedBlob: finished,
+			Reader:       p.currentBlob.tempFile,
+			TempFile:     p.currentBlob.tempFile,
+		}
+
+		if err := p.blobHandler(blobWithReader); err != nil {
+			p.cleanupTempFile()
+			return fmt.Errorf("blob handler failed: %w", err)
+		}
+		// Note: blob handler is responsible for closing/cleaning up temp file
+		p.currentBlob = nil
+	} else {
+		log.Debug("No blob handler set", "blob_hash", blobHash[:8]+"...")
+		// No handler, need to read data for legacy behavior
+		if _, err := p.currentBlob.tempFile.Seek(0, io.SeekStart); err != nil {
+			p.cleanupTempFile()
+			return fmt.Errorf("seeking to read data: %w", err)
+		}
+
+		data, err := io.ReadAll(p.currentBlob.tempFile)
+		if err != nil {
+			p.cleanupTempFile()
+			return fmt.Errorf("reading blob data: %w", err)
+		}
+		finished.Data = data
+
+		p.finishedBlobs = append(p.finishedBlobs, finished)
+
+		// Cleanup
+		p.cleanupTempFile()
+		p.currentBlob = nil
+	}
+
+	return nil
+}
+
+// cleanupTempFile removes the temporary file
+func (p *Packer) cleanupTempFile() {
+	if p.currentBlob != nil && p.currentBlob.tempFile != nil {
+		name := p.currentBlob.tempFile.Name()
+		_ = p.currentBlob.tempFile.Close()
+		_ = os.Remove(name)
+	}
+}
+
+// PackChunks is a convenience method to pack multiple chunks at once
+func (p *Packer) PackChunks(chunks []*ChunkRef) error {
+	for _, chunk := range chunks {
+		err := p.AddChunk(chunk)
+		if err == ErrBlobSizeLimitExceeded {
+			// Finalize current blob and retry
+			if err := p.FinalizeBlob(); err != nil {
+				return fmt.Errorf("finalizing blob before retry: %w", err)
+			}
+			// Retry the chunk
+			if err := p.AddChunk(chunk); err != nil {
+				return fmt.Errorf("adding chunk %s after finalize: %w", chunk.Hash, err)
+			}
+		} else if err != nil {
+			return fmt.Errorf("adding chunk %s: %w", chunk.Hash, err)
+		}
+	}
+
+	return p.Flush()
+}
--- a/internal/blob/packer_test.go
+++ b/internal/blob/packer_test.go
@@ -0,0 +1,328 @@
+package blob
+
+import (
+	"bytes"
+	"crypto/sha256"
+	"encoding/hex"
+	"io"
+	"testing"
+
+	"filippo.io/age"
+	"git.eeqj.de/sneak/vaultik/internal/crypto"
+	"git.eeqj.de/sneak/vaultik/internal/database"
+	"git.eeqj.de/sneak/vaultik/internal/log"
+	"github.com/klauspost/compress/zstd"
+)
+
+const (
+	// Test key from test/insecure-integration-test.key
+	testPrivateKey = "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
+	testPublicKey  = "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
+)
+
+func TestPacker(t *testing.T) {
+	// Initialize logger for tests
+	log.Initialize(log.Config{})
+
+	// Parse test identity
+	identity, err := age.ParseX25519Identity(testPrivateKey)
+	if err != nil {
+		t.Fatalf("failed to parse test identity: %v", err)
+	}
+
+	// Create test encryptor using the public key
+	enc, err := crypto.NewEncryptor([]string{testPublicKey})
+	if err != nil {
+		t.Fatalf("failed to create encryptor: %v", err)
+	}
+
+	t.Run("single chunk creates single blob", func(t *testing.T) {
+		// Create test database
+		db, err := database.NewTestDB()
+		if err != nil {
+			t.Fatalf("failed to create test db: %v", err)
+		}
+		defer func() { _ = db.Close() }()
+		repos := database.NewRepositories(db)
+
+		cfg := PackerConfig{
+			MaxBlobSize:      10 * 1024 * 1024, // 10MB
+			CompressionLevel: 3,
+			Encryptor:        enc,
+			Repositories:     repos,
+		}
+		packer, err := NewPacker(cfg)
+		if err != nil {
+			t.Fatalf("failed to create packer: %v", err)
+		}
+
+		// Create a chunk
+		data := []byte("Hello, World!")
+		hash := sha256.Sum256(data)
+		chunk := &ChunkRef{
+			Hash: hex.EncodeToString(hash[:]),
+			Data: data,
+		}
+
+		// Add chunk
+		if err := packer.AddChunk(chunk); err != nil {
+			t.Fatalf("failed to add chunk: %v", err)
+		}
+
+		// Flush
+		if err := packer.Flush(); err != nil {
+			t.Fatalf("failed to flush: %v", err)
+		}
+
+		// Get finished blobs
+		blobs := packer.GetFinishedBlobs()
+		if len(blobs) != 1 {
+			t.Fatalf("expected 1 blob, got %d", len(blobs))
+		}
+
+		blob := blobs[0]
+		if len(blob.Chunks) != 1 {
+			t.Errorf("expected 1 chunk in blob, got %d", len(blob.Chunks))
+		}
+
+		// Note: Very small data may not compress well
+		t.Logf("Compression: %d -> %d bytes", blob.Uncompressed, blob.Compressed)
+
+		// Decrypt the blob data
+		decrypted, err := age.Decrypt(bytes.NewReader(blob.Data), identity)
+		if err != nil {
+			t.Fatalf("failed to decrypt blob: %v", err)
+		}
+
+		// Decompress the decrypted data
+		reader, err := zstd.NewReader(decrypted)
+		if err != nil {
+			t.Fatalf("failed to create decompressor: %v", err)
+		}
+		defer reader.Close()
+
+		var decompressed bytes.Buffer
+		if _, err := io.Copy(&decompressed, reader); err != nil {
+			t.Fatalf("failed to decompress: %v", err)
+		}
+
+		if !bytes.Equal(decompressed.Bytes(), data) {
+			t.Error("decompressed data doesn't match original")
+		}
+	})
+
+	t.Run("multiple chunks packed together", func(t *testing.T) {
+		// Create test database
+		db, err := database.NewTestDB()
+		if err != nil {
+			t.Fatalf("failed to create test db: %v", err)
+		}
+		defer func() { _ = db.Close() }()
+		repos := database.NewRepositories(db)
+
+		cfg := PackerConfig{
+			MaxBlobSize:      10 * 1024 * 1024, // 10MB
+			CompressionLevel: 3,
+			Encryptor:        enc,
+			Repositories:     repos,
+		}
+		packer, err := NewPacker(cfg)
+		if err != nil {
+			t.Fatalf("failed to create packer: %v", err)
+		}
+
+		// Create multiple small chunks
+		chunks := make([]*ChunkRef, 10)
+		for i := 0; i < 10; i++ {
+			data := bytes.Repeat([]byte{byte(i)}, 1000)
+			hash := sha256.Sum256(data)
+			chunks[i] = &ChunkRef{
+				Hash: hex.EncodeToString(hash[:]),
+				Data: data,
+			}
+		}
+
+		// Add all chunks
+		for _, chunk := range chunks {
+			err := packer.AddChunk(chunk)
+			if err != nil {
+				t.Fatalf("failed to add chunk: %v", err)
+			}
+		}
+
+		// Flush
+		if err := packer.Flush(); err != nil {
+			t.Fatalf("failed to flush: %v", err)
+		}
+
+		// Should have one blob with all chunks
+		blobs := packer.GetFinishedBlobs()
+		if len(blobs) != 1 {
+			t.Fatalf("expected 1 blob, got %d", len(blobs))
+		}
+
+		if len(blobs[0].Chunks) != 10 {
+			t.Errorf("expected 10 chunks in blob, got %d", len(blobs[0].Chunks))
+		}
+
+		// Verify offsets are correct
+		expectedOffset := int64(0)
+		for i, chunkRef := range blobs[0].Chunks {
+			if chunkRef.Offset != expectedOffset {
+				t.Errorf("chunk %d: expected offset %d, got %d", i, expectedOffset, chunkRef.Offset)
+			}
+			if chunkRef.Length != 1000 {
+				t.Errorf("chunk %d: expected length 1000, got %d", i, chunkRef.Length)
+			}
+			expectedOffset += chunkRef.Length
+		}
+	})
+
+	t.Run("blob size limit enforced", func(t *testing.T) {
+		// Create test database
+		db, err := database.NewTestDB()
+		if err != nil {
+			t.Fatalf("failed to create test db: %v", err)
+		}
+		defer func() { _ = db.Close() }()
+		repos := database.NewRepositories(db)
+
+		// Small blob size limit to force multiple blobs
+		cfg := PackerConfig{
+			MaxBlobSize:      5000, // 5KB max
+			CompressionLevel: 3,
+			Encryptor:        enc,
+			Repositories:     repos,
+		}
+		packer, err := NewPacker(cfg)
+		if err != nil {
+			t.Fatalf("failed to create packer: %v", err)
+		}
+
+		// Create chunks that will exceed the limit
+		chunks := make([]*ChunkRef, 10)
+		for i := 0; i < 10; i++ {
+			data := bytes.Repeat([]byte{byte(i)}, 1000) // 1KB each
+			hash := sha256.Sum256(data)
+			chunks[i] = &ChunkRef{
+				Hash: hex.EncodeToString(hash[:]),
+				Data: data,
+			}
+		}
+
+		blobCount := 0
+
+		// Add chunks and handle size limit errors
+		for _, chunk := range chunks {
+			err := packer.AddChunk(chunk)
+			if err == ErrBlobSizeLimitExceeded {
+				// Finalize current blob
+				if err := packer.FinalizeBlob(); err != nil {
+					t.Fatalf("failed to finalize blob: %v", err)
+				}
+				blobCount++
+				// Retry adding the chunk
+				if err := packer.AddChunk(chunk); err != nil {
+					t.Fatalf("failed to add chunk after finalize: %v", err)
+				}
+			} else if err != nil {
+				t.Fatalf("failed to add chunk: %v", err)
+			}
+		}
+
+		// Flush remaining
+		if err := packer.Flush(); err != nil {
+			t.Fatalf("failed to flush: %v", err)
+		}
+
+		// Get all blobs
+		blobs := packer.GetFinishedBlobs()
+		totalBlobs := blobCount + len(blobs)
+
+		// Should have multiple blobs due to size limit
+		if totalBlobs < 2 {
+			t.Errorf("expected multiple blobs due to size limit, got %d", totalBlobs)
+		}
+
+		// Verify each blob respects size limit (approximately)
+		for _, blob := range blobs {
+			if blob.Compressed > 6000 { // Allow some overhead
+				t.Errorf("blob size %d exceeds limit", blob.Compressed)
+			}
+		}
+	})
+
+	t.Run("with encryption", func(t *testing.T) {
+		// Create test database
+		db, err := database.NewTestDB()
+		if err != nil {
+			t.Fatalf("failed to create test db: %v", err)
+		}
+		defer func() { _ = db.Close() }()
+		repos := database.NewRepositories(db)
+
+		// Generate test identity (using the one from parent test)
+		cfg := PackerConfig{
+			MaxBlobSize:      10 * 1024 * 1024, // 10MB
+			CompressionLevel: 3,
+			Encryptor:        enc,
+			Repositories:     repos,
+		}
+		packer, err := NewPacker(cfg)
+		if err != nil {
+			t.Fatalf("failed to create packer: %v", err)
+		}
+
+		// Create test data
+		data := bytes.Repeat([]byte("Test data for encryption!"), 100)
+		hash := sha256.Sum256(data)
+		chunk := &ChunkRef{
+			Hash: hex.EncodeToString(hash[:]),
+			Data: data,
+		}
+
+		// Add chunk and flush
+		if err := packer.AddChunk(chunk); err != nil {
+			t.Fatalf("failed to add chunk: %v", err)
+		}
+		if err := packer.Flush(); err != nil {
+			t.Fatalf("failed to flush: %v", err)
+		}
+
+		// Get blob
+		blobs := packer.GetFinishedBlobs()
+		if len(blobs) != 1 {
+			t.Fatalf("expected 1 blob, got %d", len(blobs))
+		}
+
+		blob := blobs[0]
+
+		// Decrypt the blob
+		decrypted, err := age.Decrypt(bytes.NewReader(blob.Data), identity)
+		if err != nil {
+			t.Fatalf("failed to decrypt blob: %v", err)
+		}
+
+		var decryptedData bytes.Buffer
+		if _, err := decryptedData.ReadFrom(decrypted); err != nil {
+			t.Fatalf("failed to read decrypted data: %v", err)
+		}
+
+		// Decompress
+		reader, err := zstd.NewReader(&decryptedData)
+		if err != nil {
+			t.Fatalf("failed to create decompressor: %v", err)
+		}
+		defer reader.Close()
+
+		var decompressed bytes.Buffer
+		if _, err := decompressed.ReadFrom(reader); err != nil {
+			t.Fatalf("failed to decompress: %v", err)
+		}
+
+		// Verify data
+		if !bytes.Equal(decompressed.Bytes(), data) {
+			t.Error("decrypted and decompressed data doesn't match original")
+		}
+	})
+}