vaultik/internal/blobgen/writer.go
sneak 470bf648c4 Add deterministic deduplication, rclone backend, and database purge command
- Implement deterministic blob hashing using double SHA256 of uncompressed
  plaintext data, enabling deduplication even after local DB is cleared
- Add Stat() check before blob upload to skip existing blobs in storage
- Add rclone storage backend for additional remote storage options
- Add 'vaultik database purge' command to erase local state DB
- Add 'vaultik remote check' command to verify remote connectivity
- Show configured snapshots in 'vaultik snapshot list' output
- Skip macOS resource fork files (._*) when listing remote snapshots
- Use multi-threaded zstd compression (CPUs - 2 threads)
- Add writer tests for double hashing behavior
2026-01-28 15:50:17 -08:00

128 lines
3.8 KiB
Go

package blobgen
import (
"crypto/sha256"
"fmt"
"hash"
"io"
"runtime"
"filippo.io/age"
"github.com/klauspost/compress/zstd"
)
// Writer wraps compression and encryption with SHA256 hashing.
// Data flows: input -> tee(hasher, compressor -> encryptor -> destination)
// The hash is computed on the uncompressed input for deterministic content-addressing.
type Writer struct {
teeWriter io.Writer // Tee to hasher and compressor
compressor *zstd.Encoder // Compression layer
encryptor io.WriteCloser // Encryption layer
hasher hash.Hash // SHA256 hasher (on uncompressed input)
compressionLevel int
bytesWritten int64
}
// NewWriter creates a new Writer that compresses, encrypts, and hashes data.
// The hash is computed on the uncompressed input for deterministic content-addressing.
func NewWriter(w io.Writer, compressionLevel int, recipients []string) (*Writer, error) {
// Validate compression level
if err := validateCompressionLevel(compressionLevel); err != nil {
return nil, err
}
// Create SHA256 hasher for the uncompressed input
hasher := sha256.New()
// Parse recipients
var ageRecipients []age.Recipient
for _, recipient := range recipients {
r, err := age.ParseX25519Recipient(recipient)
if err != nil {
return nil, fmt.Errorf("parsing recipient %s: %w", recipient, err)
}
ageRecipients = append(ageRecipients, r)
}
// Create encryption writer that outputs to destination
encWriter, err := age.Encrypt(w, ageRecipients...)
if err != nil {
return nil, fmt.Errorf("creating encryption writer: %w", err)
}
// Calculate compression concurrency: CPUs - 2, minimum 1
concurrency := runtime.NumCPU() - 2
if concurrency < 1 {
concurrency = 1
}
// Create compression writer with encryption as destination
compressor, err := zstd.NewWriter(encWriter,
zstd.WithEncoderLevel(zstd.EncoderLevelFromZstd(compressionLevel)),
zstd.WithEncoderConcurrency(concurrency),
)
if err != nil {
_ = encWriter.Close()
return nil, fmt.Errorf("creating compression writer: %w", err)
}
// Create tee writer: input goes to both hasher and compressor
teeWriter := io.MultiWriter(hasher, compressor)
return &Writer{
teeWriter: teeWriter,
compressor: compressor,
encryptor: encWriter,
hasher: hasher,
compressionLevel: compressionLevel,
}, nil
}
// Write implements io.Writer
func (w *Writer) Write(p []byte) (n int, err error) {
n, err = w.teeWriter.Write(p)
w.bytesWritten += int64(n)
return n, err
}
// Close closes all layers and returns any errors
func (w *Writer) Close() error {
// Close compressor first
if err := w.compressor.Close(); err != nil {
return fmt.Errorf("closing compressor: %w", err)
}
// Then close encryptor
if err := w.encryptor.Close(); err != nil {
return fmt.Errorf("closing encryptor: %w", err)
}
return nil
}
// Sum256 returns the double SHA256 hash of the uncompressed input data.
// Double hashing (SHA256(SHA256(data))) prevents information leakage about
// the plaintext - an attacker cannot confirm existence of known content
// by computing its hash and checking for a matching blob filename.
func (w *Writer) Sum256() []byte {
// First hash: SHA256(plaintext)
firstHash := w.hasher.Sum(nil)
// Second hash: SHA256(firstHash) - this is the blob ID
secondHash := sha256.Sum256(firstHash)
return secondHash[:]
}
// BytesWritten returns the number of uncompressed bytes written
func (w *Writer) BytesWritten() int64 {
return w.bytesWritten
}
func validateCompressionLevel(level int) error {
// Zstd compression levels: 1-19 (default is 3)
// SpeedFastest = 1, SpeedDefault = 3, SpeedBetterCompression = 7, SpeedBestCompression = 11
if level < 1 || level > 19 {
return fmt.Errorf("invalid compression level %d: must be between 1 and 19", level)
}
return nil
}