Add deterministic deduplication, rclone backend, and database purge command
- Implement deterministic blob hashing using double SHA256 of uncompressed plaintext data, enabling deduplication even after local DB is cleared - Add Stat() check before blob upload to skip existing blobs in storage - Add rclone storage backend for additional remote storage options - Add 'vaultik database purge' command to erase local state DB - Add 'vaultik remote check' command to verify remote connectivity - Show configured snapshots in 'vaultik snapshot list' output - Skip macOS resource fork files (._*) when listing remote snapshots - Use multi-threaded zstd compression (CPUs - 2 threads) - Add writer tests for double hashing behavior
This commit is contained in:
@@ -5,30 +5,33 @@ import (
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"runtime"
|
||||
|
||||
"filippo.io/age"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
)
|
||||
|
||||
// Writer wraps compression and encryption with SHA256 hashing
|
||||
// Writer wraps compression and encryption with SHA256 hashing.
|
||||
// Data flows: input -> tee(hasher, compressor -> encryptor -> destination)
|
||||
// The hash is computed on the uncompressed input for deterministic content-addressing.
|
||||
type Writer struct {
|
||||
writer io.Writer // Final destination
|
||||
teeWriter io.Writer // Tee to hasher and compressor
|
||||
compressor *zstd.Encoder // Compression layer
|
||||
encryptor io.WriteCloser // Encryption layer
|
||||
hasher hash.Hash // SHA256 hasher
|
||||
teeWriter io.Writer // Tees data to hasher
|
||||
hasher hash.Hash // SHA256 hasher (on uncompressed input)
|
||||
compressionLevel int
|
||||
bytesWritten int64
|
||||
}
|
||||
|
||||
// NewWriter creates a new Writer that compresses, encrypts, and hashes data
|
||||
// NewWriter creates a new Writer that compresses, encrypts, and hashes data.
|
||||
// The hash is computed on the uncompressed input for deterministic content-addressing.
|
||||
func NewWriter(w io.Writer, compressionLevel int, recipients []string) (*Writer, error) {
|
||||
// Validate compression level
|
||||
if err := validateCompressionLevel(compressionLevel); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create SHA256 hasher
|
||||
// Create SHA256 hasher for the uncompressed input
|
||||
hasher := sha256.New()
|
||||
|
||||
// Parse recipients
|
||||
@@ -41,31 +44,36 @@ func NewWriter(w io.Writer, compressionLevel int, recipients []string) (*Writer,
|
||||
ageRecipients = append(ageRecipients, r)
|
||||
}
|
||||
|
||||
// Create encryption writer
|
||||
// Create encryption writer that outputs to destination
|
||||
encWriter, err := age.Encrypt(w, ageRecipients...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating encryption writer: %w", err)
|
||||
}
|
||||
|
||||
// Calculate compression concurrency: CPUs - 2, minimum 1
|
||||
concurrency := runtime.NumCPU() - 2
|
||||
if concurrency < 1 {
|
||||
concurrency = 1
|
||||
}
|
||||
|
||||
// Create compression writer with encryption as destination
|
||||
compressor, err := zstd.NewWriter(encWriter,
|
||||
zstd.WithEncoderLevel(zstd.EncoderLevelFromZstd(compressionLevel)),
|
||||
zstd.WithEncoderConcurrency(1), // Use single thread for streaming
|
||||
zstd.WithEncoderConcurrency(concurrency),
|
||||
)
|
||||
if err != nil {
|
||||
_ = encWriter.Close()
|
||||
return nil, fmt.Errorf("creating compression writer: %w", err)
|
||||
}
|
||||
|
||||
// Create tee writer that writes to both compressor and hasher
|
||||
teeWriter := io.MultiWriter(compressor, hasher)
|
||||
// Create tee writer: input goes to both hasher and compressor
|
||||
teeWriter := io.MultiWriter(hasher, compressor)
|
||||
|
||||
return &Writer{
|
||||
writer: w,
|
||||
teeWriter: teeWriter,
|
||||
compressor: compressor,
|
||||
encryptor: encWriter,
|
||||
hasher: hasher,
|
||||
teeWriter: teeWriter,
|
||||
compressionLevel: compressionLevel,
|
||||
}, nil
|
||||
}
|
||||
@@ -92,9 +100,16 @@ func (w *Writer) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Sum256 returns the SHA256 hash of all data written
|
||||
// Sum256 returns the double SHA256 hash of the uncompressed input data.
|
||||
// Double hashing (SHA256(SHA256(data))) prevents information leakage about
|
||||
// the plaintext - an attacker cannot confirm existence of known content
|
||||
// by computing its hash and checking for a matching blob filename.
|
||||
func (w *Writer) Sum256() []byte {
|
||||
return w.hasher.Sum(nil)
|
||||
// First hash: SHA256(plaintext)
|
||||
firstHash := w.hasher.Sum(nil)
|
||||
// Second hash: SHA256(firstHash) - this is the blob ID
|
||||
secondHash := sha256.Sum256(firstHash)
|
||||
return secondHash[:]
|
||||
}
|
||||
|
||||
// BytesWritten returns the number of uncompressed bytes written
|
||||
|
||||
105
internal/blobgen/writer_test.go
Normal file
105
internal/blobgen/writer_test.go
Normal file
@@ -0,0 +1,105 @@
|
||||
package blobgen
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/rand"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// TestWriterHashIsDoubleHash verifies that Writer.Sum256() returns
|
||||
// the double hash SHA256(SHA256(plaintext)) for security.
|
||||
// Double hashing prevents attackers from confirming existence of known content.
|
||||
func TestWriterHashIsDoubleHash(t *testing.T) {
|
||||
// Test data - random data that doesn't compress well
|
||||
testData := make([]byte, 1024*1024) // 1MB
|
||||
_, err := rand.Read(testData)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Test recipient (generated with age-keygen)
|
||||
testRecipient := "age1cplgrwj77ta54dnmydvvmzn64ltk83ankxl5sww04mrtmu62kv3s89gmvv"
|
||||
|
||||
// Create a buffer to capture the encrypted output
|
||||
var encryptedBuf bytes.Buffer
|
||||
|
||||
// Create blobgen writer
|
||||
writer, err := NewWriter(&encryptedBuf, 3, []string{testRecipient})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Write test data
|
||||
n, err := writer.Write(testData)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, len(testData), n)
|
||||
|
||||
// Close to flush all data
|
||||
err = writer.Close()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Get the hash from the writer
|
||||
writerHash := hex.EncodeToString(writer.Sum256())
|
||||
|
||||
// Calculate the expected double hash: SHA256(SHA256(plaintext))
|
||||
firstHash := sha256.Sum256(testData)
|
||||
secondHash := sha256.Sum256(firstHash[:])
|
||||
expectedDoubleHash := hex.EncodeToString(secondHash[:])
|
||||
|
||||
// Also compute single hash to verify it's different
|
||||
singleHashStr := hex.EncodeToString(firstHash[:])
|
||||
|
||||
t.Logf("Input size: %d bytes", len(testData))
|
||||
t.Logf("Single hash (SHA256(data)): %s", singleHashStr)
|
||||
t.Logf("Double hash (SHA256(SHA256(data))): %s", expectedDoubleHash)
|
||||
t.Logf("Writer hash: %s", writerHash)
|
||||
|
||||
// The writer hash should match the double hash
|
||||
assert.Equal(t, expectedDoubleHash, writerHash,
|
||||
"Writer.Sum256() should return SHA256(SHA256(plaintext)) for security")
|
||||
|
||||
// Verify it's NOT the single hash (would leak information)
|
||||
assert.NotEqual(t, singleHashStr, writerHash,
|
||||
"Writer hash should not be single hash (would allow content confirmation attacks)")
|
||||
}
|
||||
|
||||
// TestWriterDeterministicHash verifies that the same input always produces
|
||||
// the same hash, even with non-deterministic encryption.
|
||||
func TestWriterDeterministicHash(t *testing.T) {
|
||||
// Test data
|
||||
testData := []byte("Hello, World! This is test data for deterministic hashing.")
|
||||
|
||||
// Test recipient
|
||||
testRecipient := "age1cplgrwj77ta54dnmydvvmzn64ltk83ankxl5sww04mrtmu62kv3s89gmvv"
|
||||
|
||||
// Create two writers and verify they produce the same hash
|
||||
var buf1, buf2 bytes.Buffer
|
||||
|
||||
writer1, err := NewWriter(&buf1, 3, []string{testRecipient})
|
||||
require.NoError(t, err)
|
||||
_, err = writer1.Write(testData)
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, writer1.Close())
|
||||
|
||||
writer2, err := NewWriter(&buf2, 3, []string{testRecipient})
|
||||
require.NoError(t, err)
|
||||
_, err = writer2.Write(testData)
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, writer2.Close())
|
||||
|
||||
hash1 := hex.EncodeToString(writer1.Sum256())
|
||||
hash2 := hex.EncodeToString(writer2.Sum256())
|
||||
|
||||
// Hashes should be identical (deterministic)
|
||||
assert.Equal(t, hash1, hash2, "Same input should produce same hash")
|
||||
|
||||
// Encrypted outputs should be different (non-deterministic encryption)
|
||||
assert.NotEqual(t, buf1.Bytes(), buf2.Bytes(),
|
||||
"Encrypted outputs should differ due to non-deterministic encryption")
|
||||
|
||||
t.Logf("Hash 1: %s", hash1)
|
||||
t.Logf("Hash 2: %s", hash2)
|
||||
t.Logf("Encrypted size 1: %d bytes", buf1.Len())
|
||||
t.Logf("Encrypted size 2: %d bytes", buf2.Len())
|
||||
}
|
||||
Reference in New Issue
Block a user