Fix foreign key constraints and improve snapshot tracking

- Add unified compression/encryption package in internal/blobgen
- Update DATAMODEL.md to reflect current schema implementation
- Refactor snapshot cleanup into well-named methods for clarity
- Add snapshot_id to uploads table to track new blobs per snapshot
- Fix blob count reporting for incremental backups
- Add DeleteOrphaned method to BlobChunkRepository
- Fix cleanup order to respect foreign key constraints
- Update tests to reflect schema changes
This commit is contained in:
2025-07-26 02:22:25 +02:00
parent 78af626759
commit d3afa65420
28 changed files with 994 additions and 534 deletions

View File

@@ -16,22 +16,18 @@ package blob
import (
"context"
"crypto/sha256"
"database/sql"
"encoding/hex"
"fmt"
"hash"
"io"
"math/bits"
"os"
"runtime"
"sync"
"time"
"git.eeqj.de/sneak/vaultik/internal/blobgen"
"git.eeqj.de/sneak/vaultik/internal/database"
"git.eeqj.de/sneak/vaultik/internal/log"
"github.com/google/uuid"
"github.com/klauspost/compress/zstd"
)
// BlobHandler is a callback function invoked when a blob is finalized and ready for upload.
@@ -45,7 +41,7 @@ type BlobHandler func(blob *BlobWithReader) error
type PackerConfig struct {
MaxBlobSize int64 // Maximum size of a blob before forcing finalization
CompressionLevel int // Zstd compression level (1-19, higher = better compression)
Encryptor Encryptor // Age encryptor for blob encryption (required)
Recipients []string // Age recipients for encryption
Repositories *database.Repositories // Database repositories for tracking blob metadata
BlobHandler BlobHandler // Optional callback when blob is ready for upload
}
@@ -56,7 +52,7 @@ type PackerConfig struct {
type Packer struct {
maxBlobSize int64
compressionLevel int
encryptor Encryptor // Required - blobs are always encrypted
recipients []string // Age recipients for encryption
blobHandler BlobHandler // Called when blob is ready
repos *database.Repositories // For creating blob records
@@ -68,25 +64,15 @@ type Packer struct {
finishedBlobs []*FinishedBlob // Only used if no handler provided
}
// Encryptor interface for encryption support
type Encryptor interface {
Encrypt(data []byte) ([]byte, error)
EncryptWriter(dst io.Writer) (io.WriteCloser, error)
}
// blobInProgress represents a blob being assembled
type blobInProgress struct {
id string // UUID of the blob
chunks []*chunkInfo // Track chunk metadata
chunkSet map[string]bool // Track unique chunks in this blob
tempFile *os.File // Temporary file for encrypted compressed data
hasher hash.Hash // For computing hash of final encrypted data
compressor io.WriteCloser // Compression writer
encryptor io.WriteCloser // Encryption writer (if encryption enabled)
finalWriter io.Writer // The final writer in the chain
startTime time.Time
size int64 // Current uncompressed size
compressedSize int64 // Current compressed size (estimated)
id string // UUID of the blob
chunks []*chunkInfo // Track chunk metadata
chunkSet map[string]bool // Track unique chunks in this blob
tempFile *os.File // Temporary file for encrypted compressed data
writer *blobgen.Writer // Unified compression/encryption/hashing writer
startTime time.Time
size int64 // Current uncompressed size
}
// ChunkRef represents a chunk to be added to a blob.
@@ -134,8 +120,8 @@ type BlobWithReader struct {
// The packer will automatically finalize blobs when they reach MaxBlobSize.
// Returns an error if required configuration fields are missing or invalid.
func NewPacker(cfg PackerConfig) (*Packer, error) {
if cfg.Encryptor == nil {
return nil, fmt.Errorf("encryptor is required - blobs must be encrypted")
if len(cfg.Recipients) == 0 {
return nil, fmt.Errorf("recipients are required - blobs must be encrypted")
}
if cfg.MaxBlobSize <= 0 {
return nil, fmt.Errorf("max blob size must be positive")
@@ -143,7 +129,7 @@ func NewPacker(cfg PackerConfig) (*Packer, error) {
return &Packer{
maxBlobSize: cfg.MaxBlobSize,
compressionLevel: cfg.CompressionLevel,
encryptor: cfg.Encryptor,
recipients: cfg.Recipients,
blobHandler: cfg.BlobHandler,
repos: cfg.Repositories,
finishedBlobs: make([]*FinishedBlob, 0),
@@ -274,66 +260,24 @@ func (p *Packer) startNewBlob() error {
return fmt.Errorf("creating temp file: %w", err)
}
// Create blobgen writer for unified compression/encryption/hashing
writer, err := blobgen.NewWriter(tempFile, p.compressionLevel, p.recipients)
if err != nil {
_ = tempFile.Close()
_ = os.Remove(tempFile.Name())
return fmt.Errorf("creating blobgen writer: %w", err)
}
p.currentBlob = &blobInProgress{
id: blobID,
chunks: make([]*chunkInfo, 0),
chunkSet: make(map[string]bool),
startTime: time.Now().UTC(),
tempFile: tempFile,
hasher: sha256.New(),
size: 0,
compressedSize: 0,
id: blobID,
chunks: make([]*chunkInfo, 0),
chunkSet: make(map[string]bool),
startTime: time.Now().UTC(),
tempFile: tempFile,
writer: writer,
size: 0,
}
// Build writer chain: compressor -> [encryptor ->] hasher+file
// This ensures only encrypted data touches disk
// Final destination: write to both file and hasher
finalWriter := io.MultiWriter(tempFile, p.currentBlob.hasher)
// Set up encryption (required - closest to disk)
encWriter, err := p.encryptor.EncryptWriter(finalWriter)
if err != nil {
_ = tempFile.Close()
_ = os.Remove(tempFile.Name())
return fmt.Errorf("creating encryption writer: %w", err)
}
p.currentBlob.encryptor = encWriter
currentWriter := encWriter
// Set up compression (processes data before encryption)
encoderLevel := zstd.EncoderLevel(p.compressionLevel)
if p.compressionLevel < 1 {
encoderLevel = zstd.SpeedDefault
} else if p.compressionLevel > 9 {
encoderLevel = zstd.SpeedBestCompression
}
// Calculate window size based on blob size
windowSize := p.maxBlobSize / 100
if windowSize < (1 << 20) { // Min 1MB
windowSize = 1 << 20
} else if windowSize > (128 << 20) { // Max 128MB
windowSize = 128 << 20
}
windowSize = 1 << uint(63-bits.LeadingZeros64(uint64(windowSize)))
compWriter, err := zstd.NewWriter(currentWriter,
zstd.WithEncoderLevel(encoderLevel),
zstd.WithEncoderConcurrency(runtime.NumCPU()),
zstd.WithWindowSize(int(windowSize)),
)
if err != nil {
if p.currentBlob.encryptor != nil {
_ = p.currentBlob.encryptor.Close()
}
_ = tempFile.Close()
_ = os.Remove(tempFile.Name())
return fmt.Errorf("creating compression writer: %w", err)
}
p.currentBlob.compressor = compWriter
p.currentBlob.finalWriter = compWriter
log.Debug("Started new blob", "blob_id", blobID, "temp_file", tempFile.Name())
return nil
}
@@ -349,8 +293,8 @@ func (p *Packer) addChunkToCurrentBlob(chunk *ChunkRef) error {
// Track offset before writing
offset := p.currentBlob.size
// Write to the final writer (compression -> encryption -> disk)
if _, err := p.currentBlob.finalWriter.Write(chunk.Data); err != nil {
// Write to the blobgen writer (compression -> encryption -> disk)
if _, err := p.currentBlob.writer.Write(chunk.Data); err != nil {
return fmt.Errorf("writing to blob stream: %w", err)
}
@@ -402,16 +346,10 @@ func (p *Packer) finalizeCurrentBlob() error {
return nil
}
// Close compression writer to flush all data
if err := p.currentBlob.compressor.Close(); err != nil {
// Close blobgen writer to flush all data
if err := p.currentBlob.writer.Close(); err != nil {
p.cleanupTempFile()
return fmt.Errorf("closing compression writer: %w", err)
}
// Close encryption writer
if err := p.currentBlob.encryptor.Close(); err != nil {
p.cleanupTempFile()
return fmt.Errorf("closing encryption writer: %w", err)
return fmt.Errorf("closing blobgen writer: %w", err)
}
// Sync file to ensure all data is written
@@ -433,8 +371,8 @@ func (p *Packer) finalizeCurrentBlob() error {
return fmt.Errorf("seeking to start: %w", err)
}
// Get hash from hasher (of final encrypted data)
finalHash := p.currentBlob.hasher.Sum(nil)
// Get hash from blobgen writer (of final encrypted data)
finalHash := p.currentBlob.writer.Sum256()
blobHash := hex.EncodeToString(finalHash)
// Create chunk references with offsets

View File

@@ -2,13 +2,14 @@ package blob
import (
"bytes"
"context"
"crypto/sha256"
"database/sql"
"encoding/hex"
"io"
"testing"
"filippo.io/age"
"git.eeqj.de/sneak/vaultik/internal/crypto"
"git.eeqj.de/sneak/vaultik/internal/database"
"git.eeqj.de/sneak/vaultik/internal/log"
"github.com/klauspost/compress/zstd"
@@ -30,12 +31,6 @@ func TestPacker(t *testing.T) {
t.Fatalf("failed to parse test identity: %v", err)
}
// Create test encryptor using the public key
enc, err := crypto.NewEncryptor([]string{testPublicKey})
if err != nil {
t.Fatalf("failed to create encryptor: %v", err)
}
t.Run("single chunk creates single blob", func(t *testing.T) {
// Create test database
db, err := database.NewTestDB()
@@ -48,7 +43,7 @@ func TestPacker(t *testing.T) {
cfg := PackerConfig{
MaxBlobSize: 10 * 1024 * 1024, // 10MB
CompressionLevel: 3,
Encryptor: enc,
Recipients: []string{testPublicKey},
Repositories: repos,
}
packer, err := NewPacker(cfg)
@@ -59,8 +54,22 @@ func TestPacker(t *testing.T) {
// Create a chunk
data := []byte("Hello, World!")
hash := sha256.Sum256(data)
hashStr := hex.EncodeToString(hash[:])
// Create chunk in database first
dbChunk := &database.Chunk{
ChunkHash: hashStr,
Size: int64(len(data)),
}
err = repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
return repos.Chunks.Create(ctx, tx, dbChunk)
})
if err != nil {
t.Fatalf("failed to create chunk in db: %v", err)
}
chunk := &ChunkRef{
Hash: hex.EncodeToString(hash[:]),
Hash: hashStr,
Data: data,
}
@@ -123,7 +132,7 @@ func TestPacker(t *testing.T) {
cfg := PackerConfig{
MaxBlobSize: 10 * 1024 * 1024, // 10MB
CompressionLevel: 3,
Encryptor: enc,
Recipients: []string{testPublicKey},
Repositories: repos,
}
packer, err := NewPacker(cfg)
@@ -136,8 +145,22 @@ func TestPacker(t *testing.T) {
for i := 0; i < 10; i++ {
data := bytes.Repeat([]byte{byte(i)}, 1000)
hash := sha256.Sum256(data)
hashStr := hex.EncodeToString(hash[:])
// Create chunk in database first
dbChunk := &database.Chunk{
ChunkHash: hashStr,
Size: int64(len(data)),
}
err = repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
return repos.Chunks.Create(ctx, tx, dbChunk)
})
if err != nil {
t.Fatalf("failed to create chunk in db: %v", err)
}
chunks[i] = &ChunkRef{
Hash: hex.EncodeToString(hash[:]),
Hash: hashStr,
Data: data,
}
}
@@ -191,7 +214,7 @@ func TestPacker(t *testing.T) {
cfg := PackerConfig{
MaxBlobSize: 5000, // 5KB max
CompressionLevel: 3,
Encryptor: enc,
Recipients: []string{testPublicKey},
Repositories: repos,
}
packer, err := NewPacker(cfg)
@@ -204,8 +227,22 @@ func TestPacker(t *testing.T) {
for i := 0; i < 10; i++ {
data := bytes.Repeat([]byte{byte(i)}, 1000) // 1KB each
hash := sha256.Sum256(data)
hashStr := hex.EncodeToString(hash[:])
// Create chunk in database first
dbChunk := &database.Chunk{
ChunkHash: hashStr,
Size: int64(len(data)),
}
err = repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
return repos.Chunks.Create(ctx, tx, dbChunk)
})
if err != nil {
t.Fatalf("failed to create chunk in db: %v", err)
}
chunks[i] = &ChunkRef{
Hash: hex.EncodeToString(hash[:]),
Hash: hashStr,
Data: data,
}
}
@@ -265,7 +302,7 @@ func TestPacker(t *testing.T) {
cfg := PackerConfig{
MaxBlobSize: 10 * 1024 * 1024, // 10MB
CompressionLevel: 3,
Encryptor: enc,
Recipients: []string{testPublicKey},
Repositories: repos,
}
packer, err := NewPacker(cfg)
@@ -276,8 +313,22 @@ func TestPacker(t *testing.T) {
// Create test data
data := bytes.Repeat([]byte("Test data for encryption!"), 100)
hash := sha256.Sum256(data)
hashStr := hex.EncodeToString(hash[:])
// Create chunk in database first
dbChunk := &database.Chunk{
ChunkHash: hashStr,
Size: int64(len(data)),
}
err = repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
return repos.Chunks.Create(ctx, tx, dbChunk)
})
if err != nil {
t.Fatalf("failed to create chunk in db: %v", err)
}
chunk := &ChunkRef{
Hash: hex.EncodeToString(hash[:]),
Hash: hashStr,
Data: data,
}