- Implement deterministic blob hashing using double SHA256 of uncompressed plaintext data, enabling deduplication even after local DB is cleared - Add Stat() check before blob upload to skip existing blobs in storage - Add rclone storage backend for additional remote storage options - Add 'vaultik database purge' command to erase local state DB - Add 'vaultik remote check' command to verify remote connectivity - Show configured snapshots in 'vaultik snapshot list' output - Skip macOS resource fork files (._*) when listing remote snapshots - Use multi-threaded zstd compression (CPUs - 2 threads) - Add writer tests for double hashing behavior
128 lines
3.8 KiB
Go
128 lines
3.8 KiB
Go
package blobgen
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"fmt"
|
|
"hash"
|
|
"io"
|
|
"runtime"
|
|
|
|
"filippo.io/age"
|
|
"github.com/klauspost/compress/zstd"
|
|
)
|
|
|
|
// Writer wraps compression and encryption with SHA256 hashing.
|
|
// Data flows: input -> tee(hasher, compressor -> encryptor -> destination)
|
|
// The hash is computed on the uncompressed input for deterministic content-addressing.
|
|
type Writer struct {
|
|
teeWriter io.Writer // Tee to hasher and compressor
|
|
compressor *zstd.Encoder // Compression layer
|
|
encryptor io.WriteCloser // Encryption layer
|
|
hasher hash.Hash // SHA256 hasher (on uncompressed input)
|
|
compressionLevel int
|
|
bytesWritten int64
|
|
}
|
|
|
|
// NewWriter creates a new Writer that compresses, encrypts, and hashes data.
|
|
// The hash is computed on the uncompressed input for deterministic content-addressing.
|
|
func NewWriter(w io.Writer, compressionLevel int, recipients []string) (*Writer, error) {
|
|
// Validate compression level
|
|
if err := validateCompressionLevel(compressionLevel); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
// Create SHA256 hasher for the uncompressed input
|
|
hasher := sha256.New()
|
|
|
|
// Parse recipients
|
|
var ageRecipients []age.Recipient
|
|
for _, recipient := range recipients {
|
|
r, err := age.ParseX25519Recipient(recipient)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("parsing recipient %s: %w", recipient, err)
|
|
}
|
|
ageRecipients = append(ageRecipients, r)
|
|
}
|
|
|
|
// Create encryption writer that outputs to destination
|
|
encWriter, err := age.Encrypt(w, ageRecipients...)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("creating encryption writer: %w", err)
|
|
}
|
|
|
|
// Calculate compression concurrency: CPUs - 2, minimum 1
|
|
concurrency := runtime.NumCPU() - 2
|
|
if concurrency < 1 {
|
|
concurrency = 1
|
|
}
|
|
|
|
// Create compression writer with encryption as destination
|
|
compressor, err := zstd.NewWriter(encWriter,
|
|
zstd.WithEncoderLevel(zstd.EncoderLevelFromZstd(compressionLevel)),
|
|
zstd.WithEncoderConcurrency(concurrency),
|
|
)
|
|
if err != nil {
|
|
_ = encWriter.Close()
|
|
return nil, fmt.Errorf("creating compression writer: %w", err)
|
|
}
|
|
|
|
// Create tee writer: input goes to both hasher and compressor
|
|
teeWriter := io.MultiWriter(hasher, compressor)
|
|
|
|
return &Writer{
|
|
teeWriter: teeWriter,
|
|
compressor: compressor,
|
|
encryptor: encWriter,
|
|
hasher: hasher,
|
|
compressionLevel: compressionLevel,
|
|
}, nil
|
|
}
|
|
|
|
// Write implements io.Writer
|
|
func (w *Writer) Write(p []byte) (n int, err error) {
|
|
n, err = w.teeWriter.Write(p)
|
|
w.bytesWritten += int64(n)
|
|
return n, err
|
|
}
|
|
|
|
// Close closes all layers and returns any errors
|
|
func (w *Writer) Close() error {
|
|
// Close compressor first
|
|
if err := w.compressor.Close(); err != nil {
|
|
return fmt.Errorf("closing compressor: %w", err)
|
|
}
|
|
|
|
// Then close encryptor
|
|
if err := w.encryptor.Close(); err != nil {
|
|
return fmt.Errorf("closing encryptor: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Sum256 returns the double SHA256 hash of the uncompressed input data.
|
|
// Double hashing (SHA256(SHA256(data))) prevents information leakage about
|
|
// the plaintext - an attacker cannot confirm existence of known content
|
|
// by computing its hash and checking for a matching blob filename.
|
|
func (w *Writer) Sum256() []byte {
|
|
// First hash: SHA256(plaintext)
|
|
firstHash := w.hasher.Sum(nil)
|
|
// Second hash: SHA256(firstHash) - this is the blob ID
|
|
secondHash := sha256.Sum256(firstHash)
|
|
return secondHash[:]
|
|
}
|
|
|
|
// BytesWritten returns the number of uncompressed bytes written
|
|
func (w *Writer) BytesWritten() int64 {
|
|
return w.bytesWritten
|
|
}
|
|
|
|
func validateCompressionLevel(level int) error {
|
|
// Zstd compression levels: 1-19 (default is 3)
|
|
// SpeedFastest = 1, SpeedDefault = 3, SpeedBetterCompression = 7, SpeedBestCompression = 11
|
|
if level < 1 || level > 19 {
|
|
return fmt.Errorf("invalid compression level %d: must be between 1 and 19", level)
|
|
}
|
|
return nil
|
|
}
|