Batch transactions per blob for improved performance
Previously, each chunk and blob_chunk was inserted in a separate transaction, leading to ~560k+ transactions for large backups. This change batches all database operations per blob: - Chunks are queued in packer.pendingChunks during file processing - When blob finalizes, one transaction inserts all chunks, blob_chunks, and updates the blob record - Scanner tracks pending chunk hashes to know which files can be flushed - Files are flushed when all their chunks are committed to DB - Database is consistent after each blob finalize This reduces transaction count from O(chunks) to O(blobs), which for a 614k file / 44GB backup means ~50-100 transactions instead of ~560k.
This commit is contained in:
@@ -47,6 +47,12 @@ type PackerConfig struct {
|
||||
Fs afero.Fs // Filesystem for temporary files
|
||||
}
|
||||
|
||||
// PendingChunk represents a chunk waiting to be inserted into the database.
|
||||
type PendingChunk struct {
|
||||
Hash string
|
||||
Size int64
|
||||
}
|
||||
|
||||
// Packer accumulates chunks and packs them into blobs.
|
||||
// It handles compression, encryption, and coordination with the database
|
||||
// to track blob metadata. Packer is thread-safe.
|
||||
@@ -64,6 +70,9 @@ type Packer struct {
|
||||
// Current blob being packed
|
||||
currentBlob *blobInProgress
|
||||
finishedBlobs []*FinishedBlob // Only used if no handler provided
|
||||
|
||||
// Pending chunks to be inserted when blob finalizes
|
||||
pendingChunks []PendingChunk
|
||||
}
|
||||
|
||||
// blobInProgress represents a blob being assembled
|
||||
@@ -114,8 +123,9 @@ type BlobChunkRef struct {
|
||||
// BlobWithReader wraps a FinishedBlob with its data reader
|
||||
type BlobWithReader struct {
|
||||
*FinishedBlob
|
||||
Reader io.ReadSeeker
|
||||
TempFile afero.File // Optional, only set for disk-based blobs
|
||||
Reader io.ReadSeeker
|
||||
TempFile afero.File // Optional, only set for disk-based blobs
|
||||
InsertedChunkHashes []string // Chunk hashes that were inserted to DB with this blob
|
||||
}
|
||||
|
||||
// NewPacker creates a new blob packer that accumulates chunks into blobs.
|
||||
@@ -152,6 +162,15 @@ func (p *Packer) SetBlobHandler(handler BlobHandler) {
|
||||
p.blobHandler = handler
|
||||
}
|
||||
|
||||
// AddPendingChunk queues a chunk to be inserted into the database when the
|
||||
// current blob is finalized. This batches chunk inserts to reduce transaction
|
||||
// overhead. Thread-safe.
|
||||
func (p *Packer) AddPendingChunk(hash string, size int64) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
p.pendingChunks = append(p.pendingChunks, PendingChunk{Hash: hash, Size: size})
|
||||
}
|
||||
|
||||
// AddChunk adds a chunk to the current blob being packed.
|
||||
// If adding the chunk would exceed MaxBlobSize, returns ErrBlobSizeLimitExceeded.
|
||||
// In this case, the caller should finalize the current blob and retry.
|
||||
@@ -314,23 +333,9 @@ func (p *Packer) addChunkToCurrentBlob(chunk *ChunkRef) error {
|
||||
p.currentBlob.chunks = append(p.currentBlob.chunks, chunkInfo)
|
||||
p.currentBlob.chunkSet[chunk.Hash] = true
|
||||
|
||||
// Store blob-chunk association in database immediately
|
||||
if p.repos != nil {
|
||||
blobChunk := &database.BlobChunk{
|
||||
BlobID: p.currentBlob.id,
|
||||
ChunkHash: chunk.Hash,
|
||||
Offset: offset,
|
||||
Length: chunkSize,
|
||||
}
|
||||
err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
return p.repos.BlobChunks.Create(ctx, tx, blobChunk)
|
||||
})
|
||||
if err != nil {
|
||||
log.Error("Failed to store blob-chunk association in database", "error", err,
|
||||
"blob_id", p.currentBlob.id, "chunk_hash", chunk.Hash)
|
||||
// Continue anyway - we can reconstruct this later if needed
|
||||
}
|
||||
}
|
||||
// Note: blob_chunk records are inserted in batch when blob is finalized
|
||||
// to reduce transaction overhead. The chunk info is already stored in
|
||||
// p.currentBlob.chunks for later insertion.
|
||||
|
||||
// Update total size
|
||||
p.currentBlob.size += chunkSize
|
||||
@@ -392,16 +397,49 @@ func (p *Packer) finalizeCurrentBlob() error {
|
||||
})
|
||||
}
|
||||
|
||||
// Update blob record in database with hash and sizes
|
||||
// Get pending chunks (will be inserted to DB and reported to handler)
|
||||
chunksToInsert := p.pendingChunks
|
||||
p.pendingChunks = nil // Clear pending list
|
||||
|
||||
// Insert pending chunks, blob_chunks, and update blob in a single transaction
|
||||
if p.repos != nil {
|
||||
err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
// First insert all pending chunks (required for blob_chunks FK)
|
||||
for _, chunk := range chunksToInsert {
|
||||
dbChunk := &database.Chunk{
|
||||
ChunkHash: chunk.Hash,
|
||||
Size: chunk.Size,
|
||||
}
|
||||
if err := p.repos.Chunks.Create(ctx, tx, dbChunk); err != nil {
|
||||
return fmt.Errorf("creating chunk: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Insert all blob_chunk records in batch
|
||||
for _, chunk := range p.currentBlob.chunks {
|
||||
blobChunk := &database.BlobChunk{
|
||||
BlobID: p.currentBlob.id,
|
||||
ChunkHash: chunk.Hash,
|
||||
Offset: chunk.Offset,
|
||||
Length: chunk.Size,
|
||||
}
|
||||
if err := p.repos.BlobChunks.Create(ctx, tx, blobChunk); err != nil {
|
||||
return fmt.Errorf("creating blob_chunk: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Update blob record with final hash and sizes
|
||||
return p.repos.Blobs.UpdateFinished(ctx, tx, p.currentBlob.id, blobHash,
|
||||
p.currentBlob.size, finalSize)
|
||||
})
|
||||
if err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("updating blob record: %w", err)
|
||||
return fmt.Errorf("finalizing blob transaction: %w", err)
|
||||
}
|
||||
|
||||
log.Debug("Committed blob transaction",
|
||||
"chunks_inserted", len(chunksToInsert),
|
||||
"blob_chunks_inserted", len(p.currentBlob.chunks))
|
||||
}
|
||||
|
||||
// Create finished blob
|
||||
@@ -424,6 +462,12 @@ func (p *Packer) finalizeCurrentBlob() error {
|
||||
"ratio", fmt.Sprintf("%.2f", compressionRatio),
|
||||
"duration", time.Since(p.currentBlob.startTime))
|
||||
|
||||
// Collect inserted chunk hashes for the scanner to track
|
||||
var insertedChunkHashes []string
|
||||
for _, chunk := range chunksToInsert {
|
||||
insertedChunkHashes = append(insertedChunkHashes, chunk.Hash)
|
||||
}
|
||||
|
||||
// Call blob handler if set
|
||||
if p.blobHandler != nil {
|
||||
// Reset file position for handler
|
||||
@@ -434,9 +478,10 @@ func (p *Packer) finalizeCurrentBlob() error {
|
||||
|
||||
// Create a blob reader that includes the data stream
|
||||
blobWithReader := &BlobWithReader{
|
||||
FinishedBlob: finished,
|
||||
Reader: p.currentBlob.tempFile,
|
||||
TempFile: p.currentBlob.tempFile,
|
||||
FinishedBlob: finished,
|
||||
Reader: p.currentBlob.tempFile,
|
||||
TempFile: p.currentBlob.tempFile,
|
||||
InsertedChunkHashes: insertedChunkHashes,
|
||||
}
|
||||
|
||||
if err := p.blobHandler(blobWithReader); err != nil {
|
||||
|
||||
Reference in New Issue
Block a user