Fix foreign key constraints and improve snapshot tracking
- Add unified compression/encryption package in internal/blobgen - Update DATAMODEL.md to reflect current schema implementation - Refactor snapshot cleanup into well-named methods for clarity - Add snapshot_id to uploads table to track new blobs per snapshot - Fix blob count reporting for incremental backups - Add DeleteOrphaned method to BlobChunkRepository - Fix cleanup order to respect foreign key constraints - Update tests to reflect schema changes
This commit is contained in:
@@ -16,22 +16,18 @@ package blob
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"database/sql"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"math/bits"
|
||||
"os"
|
||||
"runtime"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/blobgen"
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"github.com/google/uuid"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
)
|
||||
|
||||
// BlobHandler is a callback function invoked when a blob is finalized and ready for upload.
|
||||
@@ -45,7 +41,7 @@ type BlobHandler func(blob *BlobWithReader) error
|
||||
type PackerConfig struct {
|
||||
MaxBlobSize int64 // Maximum size of a blob before forcing finalization
|
||||
CompressionLevel int // Zstd compression level (1-19, higher = better compression)
|
||||
Encryptor Encryptor // Age encryptor for blob encryption (required)
|
||||
Recipients []string // Age recipients for encryption
|
||||
Repositories *database.Repositories // Database repositories for tracking blob metadata
|
||||
BlobHandler BlobHandler // Optional callback when blob is ready for upload
|
||||
}
|
||||
@@ -56,7 +52,7 @@ type PackerConfig struct {
|
||||
type Packer struct {
|
||||
maxBlobSize int64
|
||||
compressionLevel int
|
||||
encryptor Encryptor // Required - blobs are always encrypted
|
||||
recipients []string // Age recipients for encryption
|
||||
blobHandler BlobHandler // Called when blob is ready
|
||||
repos *database.Repositories // For creating blob records
|
||||
|
||||
@@ -68,25 +64,15 @@ type Packer struct {
|
||||
finishedBlobs []*FinishedBlob // Only used if no handler provided
|
||||
}
|
||||
|
||||
// Encryptor interface for encryption support
|
||||
type Encryptor interface {
|
||||
Encrypt(data []byte) ([]byte, error)
|
||||
EncryptWriter(dst io.Writer) (io.WriteCloser, error)
|
||||
}
|
||||
|
||||
// blobInProgress represents a blob being assembled
|
||||
type blobInProgress struct {
|
||||
id string // UUID of the blob
|
||||
chunks []*chunkInfo // Track chunk metadata
|
||||
chunkSet map[string]bool // Track unique chunks in this blob
|
||||
tempFile *os.File // Temporary file for encrypted compressed data
|
||||
hasher hash.Hash // For computing hash of final encrypted data
|
||||
compressor io.WriteCloser // Compression writer
|
||||
encryptor io.WriteCloser // Encryption writer (if encryption enabled)
|
||||
finalWriter io.Writer // The final writer in the chain
|
||||
startTime time.Time
|
||||
size int64 // Current uncompressed size
|
||||
compressedSize int64 // Current compressed size (estimated)
|
||||
id string // UUID of the blob
|
||||
chunks []*chunkInfo // Track chunk metadata
|
||||
chunkSet map[string]bool // Track unique chunks in this blob
|
||||
tempFile *os.File // Temporary file for encrypted compressed data
|
||||
writer *blobgen.Writer // Unified compression/encryption/hashing writer
|
||||
startTime time.Time
|
||||
size int64 // Current uncompressed size
|
||||
}
|
||||
|
||||
// ChunkRef represents a chunk to be added to a blob.
|
||||
@@ -134,8 +120,8 @@ type BlobWithReader struct {
|
||||
// The packer will automatically finalize blobs when they reach MaxBlobSize.
|
||||
// Returns an error if required configuration fields are missing or invalid.
|
||||
func NewPacker(cfg PackerConfig) (*Packer, error) {
|
||||
if cfg.Encryptor == nil {
|
||||
return nil, fmt.Errorf("encryptor is required - blobs must be encrypted")
|
||||
if len(cfg.Recipients) == 0 {
|
||||
return nil, fmt.Errorf("recipients are required - blobs must be encrypted")
|
||||
}
|
||||
if cfg.MaxBlobSize <= 0 {
|
||||
return nil, fmt.Errorf("max blob size must be positive")
|
||||
@@ -143,7 +129,7 @@ func NewPacker(cfg PackerConfig) (*Packer, error) {
|
||||
return &Packer{
|
||||
maxBlobSize: cfg.MaxBlobSize,
|
||||
compressionLevel: cfg.CompressionLevel,
|
||||
encryptor: cfg.Encryptor,
|
||||
recipients: cfg.Recipients,
|
||||
blobHandler: cfg.BlobHandler,
|
||||
repos: cfg.Repositories,
|
||||
finishedBlobs: make([]*FinishedBlob, 0),
|
||||
@@ -274,66 +260,24 @@ func (p *Packer) startNewBlob() error {
|
||||
return fmt.Errorf("creating temp file: %w", err)
|
||||
}
|
||||
|
||||
// Create blobgen writer for unified compression/encryption/hashing
|
||||
writer, err := blobgen.NewWriter(tempFile, p.compressionLevel, p.recipients)
|
||||
if err != nil {
|
||||
_ = tempFile.Close()
|
||||
_ = os.Remove(tempFile.Name())
|
||||
return fmt.Errorf("creating blobgen writer: %w", err)
|
||||
}
|
||||
|
||||
p.currentBlob = &blobInProgress{
|
||||
id: blobID,
|
||||
chunks: make([]*chunkInfo, 0),
|
||||
chunkSet: make(map[string]bool),
|
||||
startTime: time.Now().UTC(),
|
||||
tempFile: tempFile,
|
||||
hasher: sha256.New(),
|
||||
size: 0,
|
||||
compressedSize: 0,
|
||||
id: blobID,
|
||||
chunks: make([]*chunkInfo, 0),
|
||||
chunkSet: make(map[string]bool),
|
||||
startTime: time.Now().UTC(),
|
||||
tempFile: tempFile,
|
||||
writer: writer,
|
||||
size: 0,
|
||||
}
|
||||
|
||||
// Build writer chain: compressor -> [encryptor ->] hasher+file
|
||||
// This ensures only encrypted data touches disk
|
||||
|
||||
// Final destination: write to both file and hasher
|
||||
finalWriter := io.MultiWriter(tempFile, p.currentBlob.hasher)
|
||||
|
||||
// Set up encryption (required - closest to disk)
|
||||
encWriter, err := p.encryptor.EncryptWriter(finalWriter)
|
||||
if err != nil {
|
||||
_ = tempFile.Close()
|
||||
_ = os.Remove(tempFile.Name())
|
||||
return fmt.Errorf("creating encryption writer: %w", err)
|
||||
}
|
||||
p.currentBlob.encryptor = encWriter
|
||||
currentWriter := encWriter
|
||||
|
||||
// Set up compression (processes data before encryption)
|
||||
encoderLevel := zstd.EncoderLevel(p.compressionLevel)
|
||||
if p.compressionLevel < 1 {
|
||||
encoderLevel = zstd.SpeedDefault
|
||||
} else if p.compressionLevel > 9 {
|
||||
encoderLevel = zstd.SpeedBestCompression
|
||||
}
|
||||
|
||||
// Calculate window size based on blob size
|
||||
windowSize := p.maxBlobSize / 100
|
||||
if windowSize < (1 << 20) { // Min 1MB
|
||||
windowSize = 1 << 20
|
||||
} else if windowSize > (128 << 20) { // Max 128MB
|
||||
windowSize = 128 << 20
|
||||
}
|
||||
windowSize = 1 << uint(63-bits.LeadingZeros64(uint64(windowSize)))
|
||||
|
||||
compWriter, err := zstd.NewWriter(currentWriter,
|
||||
zstd.WithEncoderLevel(encoderLevel),
|
||||
zstd.WithEncoderConcurrency(runtime.NumCPU()),
|
||||
zstd.WithWindowSize(int(windowSize)),
|
||||
)
|
||||
if err != nil {
|
||||
if p.currentBlob.encryptor != nil {
|
||||
_ = p.currentBlob.encryptor.Close()
|
||||
}
|
||||
_ = tempFile.Close()
|
||||
_ = os.Remove(tempFile.Name())
|
||||
return fmt.Errorf("creating compression writer: %w", err)
|
||||
}
|
||||
p.currentBlob.compressor = compWriter
|
||||
p.currentBlob.finalWriter = compWriter
|
||||
|
||||
log.Debug("Started new blob", "blob_id", blobID, "temp_file", tempFile.Name())
|
||||
return nil
|
||||
}
|
||||
@@ -349,8 +293,8 @@ func (p *Packer) addChunkToCurrentBlob(chunk *ChunkRef) error {
|
||||
// Track offset before writing
|
||||
offset := p.currentBlob.size
|
||||
|
||||
// Write to the final writer (compression -> encryption -> disk)
|
||||
if _, err := p.currentBlob.finalWriter.Write(chunk.Data); err != nil {
|
||||
// Write to the blobgen writer (compression -> encryption -> disk)
|
||||
if _, err := p.currentBlob.writer.Write(chunk.Data); err != nil {
|
||||
return fmt.Errorf("writing to blob stream: %w", err)
|
||||
}
|
||||
|
||||
@@ -402,16 +346,10 @@ func (p *Packer) finalizeCurrentBlob() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close compression writer to flush all data
|
||||
if err := p.currentBlob.compressor.Close(); err != nil {
|
||||
// Close blobgen writer to flush all data
|
||||
if err := p.currentBlob.writer.Close(); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("closing compression writer: %w", err)
|
||||
}
|
||||
|
||||
// Close encryption writer
|
||||
if err := p.currentBlob.encryptor.Close(); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("closing encryption writer: %w", err)
|
||||
return fmt.Errorf("closing blobgen writer: %w", err)
|
||||
}
|
||||
|
||||
// Sync file to ensure all data is written
|
||||
@@ -433,8 +371,8 @@ func (p *Packer) finalizeCurrentBlob() error {
|
||||
return fmt.Errorf("seeking to start: %w", err)
|
||||
}
|
||||
|
||||
// Get hash from hasher (of final encrypted data)
|
||||
finalHash := p.currentBlob.hasher.Sum(nil)
|
||||
// Get hash from blobgen writer (of final encrypted data)
|
||||
finalHash := p.currentBlob.writer.Sum256()
|
||||
blobHash := hex.EncodeToString(finalHash)
|
||||
|
||||
// Create chunk references with offsets
|
||||
|
||||
Reference in New Issue
Block a user