Add deterministic deduplication, rclone backend, and database purge command

- Implement deterministic blob hashing using double SHA256 of uncompressed
  plaintext data, enabling deduplication even after local DB is cleared
- Add Stat() check before blob upload to skip existing blobs in storage
- Add rclone storage backend for additional remote storage options
- Add 'vaultik database purge' command to erase local state DB
- Add 'vaultik remote check' command to verify remote connectivity
- Show configured snapshots in 'vaultik snapshot list' output
- Skip macOS resource fork files (._*) when listing remote snapshots
- Use multi-threaded zstd compression (CPUs - 2 threads)
- Add writer tests for double hashing behavior
This commit is contained in:
2026-01-28 15:50:17 -08:00
parent bdaaadf990
commit 470bf648c4
26 changed files with 2966 additions and 777 deletions

View File

@@ -5,30 +5,33 @@ import (
"fmt"
"hash"
"io"
"runtime"
"filippo.io/age"
"github.com/klauspost/compress/zstd"
)
// Writer wraps compression and encryption with SHA256 hashing
// Writer wraps compression and encryption with SHA256 hashing.
// Data flows: input -> tee(hasher, compressor -> encryptor -> destination)
// The hash is computed on the uncompressed input for deterministic content-addressing.
type Writer struct {
writer io.Writer // Final destination
teeWriter io.Writer // Tee to hasher and compressor
compressor *zstd.Encoder // Compression layer
encryptor io.WriteCloser // Encryption layer
hasher hash.Hash // SHA256 hasher
teeWriter io.Writer // Tees data to hasher
hasher hash.Hash // SHA256 hasher (on uncompressed input)
compressionLevel int
bytesWritten int64
}
// NewWriter creates a new Writer that compresses, encrypts, and hashes data
// NewWriter creates a new Writer that compresses, encrypts, and hashes data.
// The hash is computed on the uncompressed input for deterministic content-addressing.
func NewWriter(w io.Writer, compressionLevel int, recipients []string) (*Writer, error) {
// Validate compression level
if err := validateCompressionLevel(compressionLevel); err != nil {
return nil, err
}
// Create SHA256 hasher
// Create SHA256 hasher for the uncompressed input
hasher := sha256.New()
// Parse recipients
@@ -41,31 +44,36 @@ func NewWriter(w io.Writer, compressionLevel int, recipients []string) (*Writer,
ageRecipients = append(ageRecipients, r)
}
// Create encryption writer
// Create encryption writer that outputs to destination
encWriter, err := age.Encrypt(w, ageRecipients...)
if err != nil {
return nil, fmt.Errorf("creating encryption writer: %w", err)
}
// Calculate compression concurrency: CPUs - 2, minimum 1
concurrency := runtime.NumCPU() - 2
if concurrency < 1 {
concurrency = 1
}
// Create compression writer with encryption as destination
compressor, err := zstd.NewWriter(encWriter,
zstd.WithEncoderLevel(zstd.EncoderLevelFromZstd(compressionLevel)),
zstd.WithEncoderConcurrency(1), // Use single thread for streaming
zstd.WithEncoderConcurrency(concurrency),
)
if err != nil {
_ = encWriter.Close()
return nil, fmt.Errorf("creating compression writer: %w", err)
}
// Create tee writer that writes to both compressor and hasher
teeWriter := io.MultiWriter(compressor, hasher)
// Create tee writer: input goes to both hasher and compressor
teeWriter := io.MultiWriter(hasher, compressor)
return &Writer{
writer: w,
teeWriter: teeWriter,
compressor: compressor,
encryptor: encWriter,
hasher: hasher,
teeWriter: teeWriter,
compressionLevel: compressionLevel,
}, nil
}
@@ -92,9 +100,16 @@ func (w *Writer) Close() error {
return nil
}
// Sum256 returns the SHA256 hash of all data written
// Sum256 returns the double SHA256 hash of the uncompressed input data.
// Double hashing (SHA256(SHA256(data))) prevents information leakage about
// the plaintext - an attacker cannot confirm existence of known content
// by computing its hash and checking for a matching blob filename.
func (w *Writer) Sum256() []byte {
return w.hasher.Sum(nil)
// First hash: SHA256(plaintext)
firstHash := w.hasher.Sum(nil)
// Second hash: SHA256(firstHash) - this is the blob ID
secondHash := sha256.Sum256(firstHash)
return secondHash[:]
}
// BytesWritten returns the number of uncompressed bytes written