vaultik/internal/blobgen/writer_test.go
sneak 470bf648c4 Add deterministic deduplication, rclone backend, and database purge command
- Implement deterministic blob hashing using double SHA256 of uncompressed
  plaintext data, enabling deduplication even after local DB is cleared
- Add Stat() check before blob upload to skip existing blobs in storage
- Add rclone storage backend for additional remote storage options
- Add 'vaultik database purge' command to erase local state DB
- Add 'vaultik remote check' command to verify remote connectivity
- Show configured snapshots in 'vaultik snapshot list' output
- Skip macOS resource fork files (._*) when listing remote snapshots
- Use multi-threaded zstd compression (CPUs - 2 threads)
- Add writer tests for double hashing behavior
2026-01-28 15:50:17 -08:00

106 lines
3.4 KiB
Go

package blobgen
import (
"bytes"
"crypto/rand"
"crypto/sha256"
"encoding/hex"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// TestWriterHashIsDoubleHash verifies that Writer.Sum256() returns
// the double hash SHA256(SHA256(plaintext)) for security.
// Double hashing prevents attackers from confirming existence of known content.
func TestWriterHashIsDoubleHash(t *testing.T) {
// Test data - random data that doesn't compress well
testData := make([]byte, 1024*1024) // 1MB
_, err := rand.Read(testData)
require.NoError(t, err)
// Test recipient (generated with age-keygen)
testRecipient := "age1cplgrwj77ta54dnmydvvmzn64ltk83ankxl5sww04mrtmu62kv3s89gmvv"
// Create a buffer to capture the encrypted output
var encryptedBuf bytes.Buffer
// Create blobgen writer
writer, err := NewWriter(&encryptedBuf, 3, []string{testRecipient})
require.NoError(t, err)
// Write test data
n, err := writer.Write(testData)
require.NoError(t, err)
assert.Equal(t, len(testData), n)
// Close to flush all data
err = writer.Close()
require.NoError(t, err)
// Get the hash from the writer
writerHash := hex.EncodeToString(writer.Sum256())
// Calculate the expected double hash: SHA256(SHA256(plaintext))
firstHash := sha256.Sum256(testData)
secondHash := sha256.Sum256(firstHash[:])
expectedDoubleHash := hex.EncodeToString(secondHash[:])
// Also compute single hash to verify it's different
singleHashStr := hex.EncodeToString(firstHash[:])
t.Logf("Input size: %d bytes", len(testData))
t.Logf("Single hash (SHA256(data)): %s", singleHashStr)
t.Logf("Double hash (SHA256(SHA256(data))): %s", expectedDoubleHash)
t.Logf("Writer hash: %s", writerHash)
// The writer hash should match the double hash
assert.Equal(t, expectedDoubleHash, writerHash,
"Writer.Sum256() should return SHA256(SHA256(plaintext)) for security")
// Verify it's NOT the single hash (would leak information)
assert.NotEqual(t, singleHashStr, writerHash,
"Writer hash should not be single hash (would allow content confirmation attacks)")
}
// TestWriterDeterministicHash verifies that the same input always produces
// the same hash, even with non-deterministic encryption.
func TestWriterDeterministicHash(t *testing.T) {
// Test data
testData := []byte("Hello, World! This is test data for deterministic hashing.")
// Test recipient
testRecipient := "age1cplgrwj77ta54dnmydvvmzn64ltk83ankxl5sww04mrtmu62kv3s89gmvv"
// Create two writers and verify they produce the same hash
var buf1, buf2 bytes.Buffer
writer1, err := NewWriter(&buf1, 3, []string{testRecipient})
require.NoError(t, err)
_, err = writer1.Write(testData)
require.NoError(t, err)
require.NoError(t, writer1.Close())
writer2, err := NewWriter(&buf2, 3, []string{testRecipient})
require.NoError(t, err)
_, err = writer2.Write(testData)
require.NoError(t, err)
require.NoError(t, writer2.Close())
hash1 := hex.EncodeToString(writer1.Sum256())
hash2 := hex.EncodeToString(writer2.Sum256())
// Hashes should be identical (deterministic)
assert.Equal(t, hash1, hash2, "Same input should produce same hash")
// Encrypted outputs should be different (non-deterministic encryption)
assert.NotEqual(t, buf1.Bytes(), buf2.Bytes(),
"Encrypted outputs should differ due to non-deterministic encryption")
t.Logf("Hash 1: %s", hash1)
t.Logf("Hash 2: %s", hash2)
t.Logf("Encrypted size 1: %d bytes", buf1.Len())
t.Logf("Encrypted size 2: %d bytes", buf2.Len())
}