Fix foreign key constraints and improve snapshot tracking
- Add unified compression/encryption package in internal/blobgen - Update DATAMODEL.md to reflect current schema implementation - Refactor snapshot cleanup into well-named methods for clarity - Add snapshot_id to uploads table to track new blobs per snapshot - Fix blob count reporting for incremental backups - Add DeleteOrphaned method to BlobChunkRepository - Fix cleanup order to respect foreign key constraints - Update tests to reflect schema changes
This commit is contained in:
@@ -16,22 +16,18 @@ package blob
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"database/sql"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"math/bits"
|
||||
"os"
|
||||
"runtime"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/blobgen"
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"github.com/google/uuid"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
)
|
||||
|
||||
// BlobHandler is a callback function invoked when a blob is finalized and ready for upload.
|
||||
@@ -45,7 +41,7 @@ type BlobHandler func(blob *BlobWithReader) error
|
||||
type PackerConfig struct {
|
||||
MaxBlobSize int64 // Maximum size of a blob before forcing finalization
|
||||
CompressionLevel int // Zstd compression level (1-19, higher = better compression)
|
||||
Encryptor Encryptor // Age encryptor for blob encryption (required)
|
||||
Recipients []string // Age recipients for encryption
|
||||
Repositories *database.Repositories // Database repositories for tracking blob metadata
|
||||
BlobHandler BlobHandler // Optional callback when blob is ready for upload
|
||||
}
|
||||
@@ -56,7 +52,7 @@ type PackerConfig struct {
|
||||
type Packer struct {
|
||||
maxBlobSize int64
|
||||
compressionLevel int
|
||||
encryptor Encryptor // Required - blobs are always encrypted
|
||||
recipients []string // Age recipients for encryption
|
||||
blobHandler BlobHandler // Called when blob is ready
|
||||
repos *database.Repositories // For creating blob records
|
||||
|
||||
@@ -68,25 +64,15 @@ type Packer struct {
|
||||
finishedBlobs []*FinishedBlob // Only used if no handler provided
|
||||
}
|
||||
|
||||
// Encryptor interface for encryption support
|
||||
type Encryptor interface {
|
||||
Encrypt(data []byte) ([]byte, error)
|
||||
EncryptWriter(dst io.Writer) (io.WriteCloser, error)
|
||||
}
|
||||
|
||||
// blobInProgress represents a blob being assembled
|
||||
type blobInProgress struct {
|
||||
id string // UUID of the blob
|
||||
chunks []*chunkInfo // Track chunk metadata
|
||||
chunkSet map[string]bool // Track unique chunks in this blob
|
||||
tempFile *os.File // Temporary file for encrypted compressed data
|
||||
hasher hash.Hash // For computing hash of final encrypted data
|
||||
compressor io.WriteCloser // Compression writer
|
||||
encryptor io.WriteCloser // Encryption writer (if encryption enabled)
|
||||
finalWriter io.Writer // The final writer in the chain
|
||||
startTime time.Time
|
||||
size int64 // Current uncompressed size
|
||||
compressedSize int64 // Current compressed size (estimated)
|
||||
id string // UUID of the blob
|
||||
chunks []*chunkInfo // Track chunk metadata
|
||||
chunkSet map[string]bool // Track unique chunks in this blob
|
||||
tempFile *os.File // Temporary file for encrypted compressed data
|
||||
writer *blobgen.Writer // Unified compression/encryption/hashing writer
|
||||
startTime time.Time
|
||||
size int64 // Current uncompressed size
|
||||
}
|
||||
|
||||
// ChunkRef represents a chunk to be added to a blob.
|
||||
@@ -134,8 +120,8 @@ type BlobWithReader struct {
|
||||
// The packer will automatically finalize blobs when they reach MaxBlobSize.
|
||||
// Returns an error if required configuration fields are missing or invalid.
|
||||
func NewPacker(cfg PackerConfig) (*Packer, error) {
|
||||
if cfg.Encryptor == nil {
|
||||
return nil, fmt.Errorf("encryptor is required - blobs must be encrypted")
|
||||
if len(cfg.Recipients) == 0 {
|
||||
return nil, fmt.Errorf("recipients are required - blobs must be encrypted")
|
||||
}
|
||||
if cfg.MaxBlobSize <= 0 {
|
||||
return nil, fmt.Errorf("max blob size must be positive")
|
||||
@@ -143,7 +129,7 @@ func NewPacker(cfg PackerConfig) (*Packer, error) {
|
||||
return &Packer{
|
||||
maxBlobSize: cfg.MaxBlobSize,
|
||||
compressionLevel: cfg.CompressionLevel,
|
||||
encryptor: cfg.Encryptor,
|
||||
recipients: cfg.Recipients,
|
||||
blobHandler: cfg.BlobHandler,
|
||||
repos: cfg.Repositories,
|
||||
finishedBlobs: make([]*FinishedBlob, 0),
|
||||
@@ -274,66 +260,24 @@ func (p *Packer) startNewBlob() error {
|
||||
return fmt.Errorf("creating temp file: %w", err)
|
||||
}
|
||||
|
||||
// Create blobgen writer for unified compression/encryption/hashing
|
||||
writer, err := blobgen.NewWriter(tempFile, p.compressionLevel, p.recipients)
|
||||
if err != nil {
|
||||
_ = tempFile.Close()
|
||||
_ = os.Remove(tempFile.Name())
|
||||
return fmt.Errorf("creating blobgen writer: %w", err)
|
||||
}
|
||||
|
||||
p.currentBlob = &blobInProgress{
|
||||
id: blobID,
|
||||
chunks: make([]*chunkInfo, 0),
|
||||
chunkSet: make(map[string]bool),
|
||||
startTime: time.Now().UTC(),
|
||||
tempFile: tempFile,
|
||||
hasher: sha256.New(),
|
||||
size: 0,
|
||||
compressedSize: 0,
|
||||
id: blobID,
|
||||
chunks: make([]*chunkInfo, 0),
|
||||
chunkSet: make(map[string]bool),
|
||||
startTime: time.Now().UTC(),
|
||||
tempFile: tempFile,
|
||||
writer: writer,
|
||||
size: 0,
|
||||
}
|
||||
|
||||
// Build writer chain: compressor -> [encryptor ->] hasher+file
|
||||
// This ensures only encrypted data touches disk
|
||||
|
||||
// Final destination: write to both file and hasher
|
||||
finalWriter := io.MultiWriter(tempFile, p.currentBlob.hasher)
|
||||
|
||||
// Set up encryption (required - closest to disk)
|
||||
encWriter, err := p.encryptor.EncryptWriter(finalWriter)
|
||||
if err != nil {
|
||||
_ = tempFile.Close()
|
||||
_ = os.Remove(tempFile.Name())
|
||||
return fmt.Errorf("creating encryption writer: %w", err)
|
||||
}
|
||||
p.currentBlob.encryptor = encWriter
|
||||
currentWriter := encWriter
|
||||
|
||||
// Set up compression (processes data before encryption)
|
||||
encoderLevel := zstd.EncoderLevel(p.compressionLevel)
|
||||
if p.compressionLevel < 1 {
|
||||
encoderLevel = zstd.SpeedDefault
|
||||
} else if p.compressionLevel > 9 {
|
||||
encoderLevel = zstd.SpeedBestCompression
|
||||
}
|
||||
|
||||
// Calculate window size based on blob size
|
||||
windowSize := p.maxBlobSize / 100
|
||||
if windowSize < (1 << 20) { // Min 1MB
|
||||
windowSize = 1 << 20
|
||||
} else if windowSize > (128 << 20) { // Max 128MB
|
||||
windowSize = 128 << 20
|
||||
}
|
||||
windowSize = 1 << uint(63-bits.LeadingZeros64(uint64(windowSize)))
|
||||
|
||||
compWriter, err := zstd.NewWriter(currentWriter,
|
||||
zstd.WithEncoderLevel(encoderLevel),
|
||||
zstd.WithEncoderConcurrency(runtime.NumCPU()),
|
||||
zstd.WithWindowSize(int(windowSize)),
|
||||
)
|
||||
if err != nil {
|
||||
if p.currentBlob.encryptor != nil {
|
||||
_ = p.currentBlob.encryptor.Close()
|
||||
}
|
||||
_ = tempFile.Close()
|
||||
_ = os.Remove(tempFile.Name())
|
||||
return fmt.Errorf("creating compression writer: %w", err)
|
||||
}
|
||||
p.currentBlob.compressor = compWriter
|
||||
p.currentBlob.finalWriter = compWriter
|
||||
|
||||
log.Debug("Started new blob", "blob_id", blobID, "temp_file", tempFile.Name())
|
||||
return nil
|
||||
}
|
||||
@@ -349,8 +293,8 @@ func (p *Packer) addChunkToCurrentBlob(chunk *ChunkRef) error {
|
||||
// Track offset before writing
|
||||
offset := p.currentBlob.size
|
||||
|
||||
// Write to the final writer (compression -> encryption -> disk)
|
||||
if _, err := p.currentBlob.finalWriter.Write(chunk.Data); err != nil {
|
||||
// Write to the blobgen writer (compression -> encryption -> disk)
|
||||
if _, err := p.currentBlob.writer.Write(chunk.Data); err != nil {
|
||||
return fmt.Errorf("writing to blob stream: %w", err)
|
||||
}
|
||||
|
||||
@@ -402,16 +346,10 @@ func (p *Packer) finalizeCurrentBlob() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close compression writer to flush all data
|
||||
if err := p.currentBlob.compressor.Close(); err != nil {
|
||||
// Close blobgen writer to flush all data
|
||||
if err := p.currentBlob.writer.Close(); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("closing compression writer: %w", err)
|
||||
}
|
||||
|
||||
// Close encryption writer
|
||||
if err := p.currentBlob.encryptor.Close(); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("closing encryption writer: %w", err)
|
||||
return fmt.Errorf("closing blobgen writer: %w", err)
|
||||
}
|
||||
|
||||
// Sync file to ensure all data is written
|
||||
@@ -433,8 +371,8 @@ func (p *Packer) finalizeCurrentBlob() error {
|
||||
return fmt.Errorf("seeking to start: %w", err)
|
||||
}
|
||||
|
||||
// Get hash from hasher (of final encrypted data)
|
||||
finalHash := p.currentBlob.hasher.Sum(nil)
|
||||
// Get hash from blobgen writer (of final encrypted data)
|
||||
finalHash := p.currentBlob.writer.Sum256()
|
||||
blobHash := hex.EncodeToString(finalHash)
|
||||
|
||||
// Create chunk references with offsets
|
||||
|
||||
@@ -2,13 +2,14 @@ package blob
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"database/sql"
|
||||
"encoding/hex"
|
||||
"io"
|
||||
"testing"
|
||||
|
||||
"filippo.io/age"
|
||||
"git.eeqj.de/sneak/vaultik/internal/crypto"
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
@@ -30,12 +31,6 @@ func TestPacker(t *testing.T) {
|
||||
t.Fatalf("failed to parse test identity: %v", err)
|
||||
}
|
||||
|
||||
// Create test encryptor using the public key
|
||||
enc, err := crypto.NewEncryptor([]string{testPublicKey})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create encryptor: %v", err)
|
||||
}
|
||||
|
||||
t.Run("single chunk creates single blob", func(t *testing.T) {
|
||||
// Create test database
|
||||
db, err := database.NewTestDB()
|
||||
@@ -48,7 +43,7 @@ func TestPacker(t *testing.T) {
|
||||
cfg := PackerConfig{
|
||||
MaxBlobSize: 10 * 1024 * 1024, // 10MB
|
||||
CompressionLevel: 3,
|
||||
Encryptor: enc,
|
||||
Recipients: []string{testPublicKey},
|
||||
Repositories: repos,
|
||||
}
|
||||
packer, err := NewPacker(cfg)
|
||||
@@ -59,8 +54,22 @@ func TestPacker(t *testing.T) {
|
||||
// Create a chunk
|
||||
data := []byte("Hello, World!")
|
||||
hash := sha256.Sum256(data)
|
||||
hashStr := hex.EncodeToString(hash[:])
|
||||
|
||||
// Create chunk in database first
|
||||
dbChunk := &database.Chunk{
|
||||
ChunkHash: hashStr,
|
||||
Size: int64(len(data)),
|
||||
}
|
||||
err = repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
return repos.Chunks.Create(ctx, tx, dbChunk)
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create chunk in db: %v", err)
|
||||
}
|
||||
|
||||
chunk := &ChunkRef{
|
||||
Hash: hex.EncodeToString(hash[:]),
|
||||
Hash: hashStr,
|
||||
Data: data,
|
||||
}
|
||||
|
||||
@@ -123,7 +132,7 @@ func TestPacker(t *testing.T) {
|
||||
cfg := PackerConfig{
|
||||
MaxBlobSize: 10 * 1024 * 1024, // 10MB
|
||||
CompressionLevel: 3,
|
||||
Encryptor: enc,
|
||||
Recipients: []string{testPublicKey},
|
||||
Repositories: repos,
|
||||
}
|
||||
packer, err := NewPacker(cfg)
|
||||
@@ -136,8 +145,22 @@ func TestPacker(t *testing.T) {
|
||||
for i := 0; i < 10; i++ {
|
||||
data := bytes.Repeat([]byte{byte(i)}, 1000)
|
||||
hash := sha256.Sum256(data)
|
||||
hashStr := hex.EncodeToString(hash[:])
|
||||
|
||||
// Create chunk in database first
|
||||
dbChunk := &database.Chunk{
|
||||
ChunkHash: hashStr,
|
||||
Size: int64(len(data)),
|
||||
}
|
||||
err = repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
return repos.Chunks.Create(ctx, tx, dbChunk)
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create chunk in db: %v", err)
|
||||
}
|
||||
|
||||
chunks[i] = &ChunkRef{
|
||||
Hash: hex.EncodeToString(hash[:]),
|
||||
Hash: hashStr,
|
||||
Data: data,
|
||||
}
|
||||
}
|
||||
@@ -191,7 +214,7 @@ func TestPacker(t *testing.T) {
|
||||
cfg := PackerConfig{
|
||||
MaxBlobSize: 5000, // 5KB max
|
||||
CompressionLevel: 3,
|
||||
Encryptor: enc,
|
||||
Recipients: []string{testPublicKey},
|
||||
Repositories: repos,
|
||||
}
|
||||
packer, err := NewPacker(cfg)
|
||||
@@ -204,8 +227,22 @@ func TestPacker(t *testing.T) {
|
||||
for i := 0; i < 10; i++ {
|
||||
data := bytes.Repeat([]byte{byte(i)}, 1000) // 1KB each
|
||||
hash := sha256.Sum256(data)
|
||||
hashStr := hex.EncodeToString(hash[:])
|
||||
|
||||
// Create chunk in database first
|
||||
dbChunk := &database.Chunk{
|
||||
ChunkHash: hashStr,
|
||||
Size: int64(len(data)),
|
||||
}
|
||||
err = repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
return repos.Chunks.Create(ctx, tx, dbChunk)
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create chunk in db: %v", err)
|
||||
}
|
||||
|
||||
chunks[i] = &ChunkRef{
|
||||
Hash: hex.EncodeToString(hash[:]),
|
||||
Hash: hashStr,
|
||||
Data: data,
|
||||
}
|
||||
}
|
||||
@@ -265,7 +302,7 @@ func TestPacker(t *testing.T) {
|
||||
cfg := PackerConfig{
|
||||
MaxBlobSize: 10 * 1024 * 1024, // 10MB
|
||||
CompressionLevel: 3,
|
||||
Encryptor: enc,
|
||||
Recipients: []string{testPublicKey},
|
||||
Repositories: repos,
|
||||
}
|
||||
packer, err := NewPacker(cfg)
|
||||
@@ -276,8 +313,22 @@ func TestPacker(t *testing.T) {
|
||||
// Create test data
|
||||
data := bytes.Repeat([]byte("Test data for encryption!"), 100)
|
||||
hash := sha256.Sum256(data)
|
||||
hashStr := hex.EncodeToString(hash[:])
|
||||
|
||||
// Create chunk in database first
|
||||
dbChunk := &database.Chunk{
|
||||
ChunkHash: hashStr,
|
||||
Size: int64(len(data)),
|
||||
}
|
||||
err = repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
return repos.Chunks.Create(ctx, tx, dbChunk)
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create chunk in db: %v", err)
|
||||
}
|
||||
|
||||
chunk := &ChunkRef{
|
||||
Hash: hex.EncodeToString(hash[:]),
|
||||
Hash: hashStr,
|
||||
Data: data,
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user