refactor: stream blob hash verification instead of buffering in memory

FetchAndDecryptBlob now returns io.ReadCloser with a hashVerifyReader
that computes the double-SHA-256 on-the-fly during reads. Hash is
verified on Close() after the stream is fully consumed. This avoids
loading entire blobs into memory, which could exceed available RAM.

Addresses review feedback on PR #39.
This commit is contained in:
clawbot 2026-02-20 02:29:19 -08:00
parent 2bdbf38be6
commit 22efd90f8c
3 changed files with 87 additions and 31 deletions

View File

@ -5,6 +5,7 @@ import (
"context"
"crypto/sha256"
"encoding/hex"
"io"
"strings"
"testing"
@ -58,12 +59,19 @@ func TestFetchAndDecryptBlobVerifiesHash(t *testing.T) {
ctx := context.Background()
t.Run("correct hash succeeds", func(t *testing.T) {
result, err := tv.FetchAndDecryptBlob(ctx, correctHash, int64(len(encryptedData)), identity)
rc, err := tv.FetchAndDecryptBlob(ctx, correctHash, int64(len(encryptedData)), identity)
if err != nil {
t.Fatalf("expected success, got error: %v", err)
}
if !bytes.Equal(result.Data, plaintext) {
t.Fatalf("decrypted data mismatch: got %q, want %q", result.Data, plaintext)
data, err := io.ReadAll(rc)
if err != nil {
t.Fatalf("reading stream: %v", err)
}
if err := rc.Close(); err != nil {
t.Fatalf("close (hash verification) failed: %v", err)
}
if !bytes.Equal(data, plaintext) {
t.Fatalf("decrypted data mismatch: got %q, want %q", data, plaintext)
}
})
@ -75,7 +83,13 @@ func TestFetchAndDecryptBlobVerifiesHash(t *testing.T) {
mockStorage.data[fakePath] = encryptedData
mockStorage.mu.Unlock()
_, err := tv.FetchAndDecryptBlob(ctx, fakeHash, int64(len(encryptedData)), identity)
rc, err := tv.FetchAndDecryptBlob(ctx, fakeHash, int64(len(encryptedData)), identity)
if err != nil {
t.Fatalf("unexpected error opening stream: %v", err)
}
// Read all data — hash is verified on Close
_, _ = io.ReadAll(rc)
err = rc.Close()
if err == nil {
t.Fatal("expected error for mismatched hash, got nil")
}

View File

@ -5,49 +5,79 @@ import (
"crypto/sha256"
"encoding/hex"
"fmt"
"hash"
"io"
"filippo.io/age"
"git.eeqj.de/sneak/vaultik/internal/blobgen"
)
// FetchAndDecryptBlobResult holds the result of fetching and decrypting a blob.
type FetchAndDecryptBlobResult struct {
Data []byte
// hashVerifyReader wraps a reader and computes a double-SHA-256 hash of all
// data read through it. The hash is verified against the expected blob hash
// when Close is called. This allows streaming blob verification without
// buffering the entire blob in memory.
type hashVerifyReader struct {
reader io.ReadCloser // underlying decrypted blob reader
fetcher io.ReadCloser // raw fetched stream (closed on Close)
hasher hash.Hash // running SHA-256 of plaintext
blobHash string // expected double-SHA-256 hex
done bool // EOF reached
}
// FetchAndDecryptBlob downloads a blob, decrypts and decompresses it, then
// verifies that the double-SHA-256 hash of the plaintext matches blobHash.
// This ensures blob integrity end-to-end during restore operations.
func (v *Vaultik) FetchAndDecryptBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) (*FetchAndDecryptBlobResult, error) {
func (h *hashVerifyReader) Read(p []byte) (int, error) {
n, err := h.reader.Read(p)
if n > 0 {
h.hasher.Write(p[:n])
}
if err == io.EOF {
h.done = true
}
return n, err
}
// Close verifies the hash (if the stream was fully read) and closes underlying readers.
func (h *hashVerifyReader) Close() error {
readerErr := h.reader.Close()
fetcherErr := h.fetcher.Close()
if h.done {
firstHash := h.hasher.Sum(nil)
secondHasher := sha256.New()
secondHasher.Write(firstHash)
actualHashHex := hex.EncodeToString(secondHasher.Sum(nil))
if actualHashHex != h.blobHash {
return fmt.Errorf("blob hash mismatch: expected %s, got %s", h.blobHash[:16], actualHashHex[:16])
}
}
if readerErr != nil {
return readerErr
}
return fetcherErr
}
// FetchAndDecryptBlob downloads a blob, decrypts and decompresses it, and
// returns a streaming reader that computes the double-SHA-256 hash on the fly.
// The hash is verified when the returned reader is closed (after fully reading).
// This avoids buffering the entire blob in memory.
func (v *Vaultik) FetchAndDecryptBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) (io.ReadCloser, error) {
rc, _, err := v.FetchBlob(ctx, blobHash, expectedSize)
if err != nil {
return nil, err
}
defer func() { _ = rc.Close() }()
reader, err := blobgen.NewReader(rc, identity)
if err != nil {
_ = rc.Close()
return nil, fmt.Errorf("creating blob reader: %w", err)
}
defer func() { _ = reader.Close() }()
data, err := io.ReadAll(reader)
if err != nil {
return nil, fmt.Errorf("reading blob data: %w", err)
}
// Verify blob integrity: compute double-SHA-256 of the decrypted plaintext
// and compare to the expected blob hash. The blob hash is SHA256(SHA256(plaintext))
// as produced by blobgen.Writer.Sum256().
firstHash := sha256.Sum256(data)
secondHash := sha256.Sum256(firstHash[:])
actualHashHex := hex.EncodeToString(secondHash[:])
if actualHashHex != blobHash {
return nil, fmt.Errorf("blob hash mismatch: expected %s, got %s", blobHash[:16], actualHashHex[:16])
}
return &FetchAndDecryptBlobResult{Data: data}, nil
return &hashVerifyReader{
reader: reader,
fetcher: rc,
hasher: sha256.New(),
blobHash: blobHash,
}, nil
}
// FetchBlob downloads a blob and returns a reader for the encrypted data.

View File

@ -494,11 +494,23 @@ func (v *Vaultik) restoreRegularFile(
// downloadBlob downloads and decrypts a blob
func (v *Vaultik) downloadBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) ([]byte, error) {
result, err := v.FetchAndDecryptBlob(ctx, blobHash, expectedSize, identity)
rc, err := v.FetchAndDecryptBlob(ctx, blobHash, expectedSize, identity)
if err != nil {
return nil, err
}
return result.Data, nil
data, err := io.ReadAll(rc)
if err != nil {
_ = rc.Close()
return nil, fmt.Errorf("reading blob data: %w", err)
}
// Close triggers hash verification
if err := rc.Close(); err != nil {
return nil, err
}
return data, nil
}
// verifyRestoredFiles verifies that all restored files match their expected chunk hashes