refactor: stream blob hash verification instead of buffering in memory

FetchAndDecryptBlob now returns io.ReadCloser with a hashVerifyReader that computes the double-SHA-256 on-the-fly during reads. Hash is verified on Close() after the stream is fully consumed. This avoids loading entire blobs into memory, which could exceed available RAM. Addresses review feedback on PR #39.
fix: verify blob hash after download and decryption (closes #5 )
2026-02-20 02:29:19 -08:00 · 2026-02-20 02:26:15 -08:00
9 changed files with 214 additions and 184 deletions
--- a/.dockerignore
+++ b/.dockerignore
@@ -1,8 +0,0 @@
-.git
-.gitea
-*.md
-LICENSE
-vaultik
-coverage.out
-coverage.html
-.DS_Store
--- a/.gitea/workflows/check.yml
+++ b/.gitea/workflows/check.yml
@@ -1,14 +0,0 @@
-name: check
-on:
-  push:
-    branches: [main]
-  pull_request:
-    branches: [main]
-jobs:
-  check:
-    runs-on: ubuntu-latest
-    steps:
-      # actions/checkout v4, 2024-09-16
-      - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5
-      - name: Build and check
-        run: docker build .
--- a/61
+++ b/61
@@ -1,61 +0,0 @@
-# Lint stage
-# golangci/golangci-lint:v2.11.3-alpine, 2026-03-17
-FROM golangci/golangci-lint:v2.11.3-alpine@sha256:b1c3de5862ad0a95b4e45a993b0f00415835d687e4f12c845c7493b86c13414e AS lint
-
-RUN apk add --no-cache make build-base
-
-WORKDIR /src
-
-# Copy go mod files first for better layer caching
-COPY go.mod go.sum ./
-RUN go mod download
-
-# Copy source code
-COPY . .
-
-# Run formatting check and linter
-RUN make fmt-check
-RUN make lint
-
-# Build stage
-# golang:1.26.1-alpine, 2026-03-17
-FROM golang:1.26.1-alpine@sha256:2389ebfa5b7f43eeafbd6be0c3700cc46690ef842ad962f6c5bd6be49ed82039 AS builder
-
-# Depend on lint stage passing
-COPY --from=lint /src/go.sum /dev/null
-
-ARG VERSION=dev
-
-# Install build dependencies for CGO (mattn/go-sqlite3) and sqlite3 CLI (tests)
-RUN apk add --no-cache make build-base sqlite
-
-WORKDIR /src
-
-# Copy go mod files first for better layer caching
-COPY go.mod go.sum ./
-RUN go mod download
-
-# Copy source code
-COPY . .
-
-# Run tests
-RUN make test
-
-# Build with CGO enabled (required for mattn/go-sqlite3)
-RUN CGO_ENABLED=1 go build -ldflags "-X 'git.eeqj.de/sneak/vaultik/internal/globals.Version=${VERSION}' -X 'git.eeqj.de/sneak/vaultik/internal/globals.Commit=$(git rev-parse HEAD 2>/dev/null || echo unknown)'" -o /vaultik ./cmd/vaultik
-
-# Runtime stage
-# alpine:3.21, 2026-02-25
-FROM alpine:3.21@sha256:c3f8e73fdb79deaebaa2037150150191b9dcbfba68b4a46d70103204c53f4709
-
-RUN apk add --no-cache ca-certificates sqlite
-
-# Copy binary from builder
-COPY --from=builder /vaultik /usr/local/bin/vaultik
-
-# Create non-root user
-RUN adduser -D -H -s /sbin/nologin vaultik
-
-USER vaultik
-
-ENTRYPOINT ["/usr/local/bin/vaultik"]
--- a/40
+++ b/40
@@ -1,4 +1,4 @@
-.PHONY: test fmt lint fmt-check check build clean all docker hooks
+.PHONY: test fmt lint build clean all

 # Version number
 VERSION := 0.0.1
@@ -14,12 +14,21 @@ LDFLAGS := -X 'git.eeqj.de/sneak/vaultik/internal/globals.Version=$(VERSION)' \
 all: vaultik

 # Run tests
-test:
-	go test -race -timeout 30s ./...
+test: lint fmt-check
+	@echo "Running tests..."
+	@if ! go test -v -timeout 10s ./... 2>&1; then \
+		echo ""; \
+		echo "TEST FAILURES DETECTED"; \
+		echo "Run 'go test -v ./internal/database' to see database test details"; \
+		exit 1; \
+	fi

-# Check if code is formatted (read-only)
+# Check if code is formatted
 fmt-check:
-	@test -z "$$(gofmt -l .)" || (echo "Files not formatted:" && gofmt -l . && exit 1)
+	@if [ -n "$$(go fmt ./...)" ]; then \
+		echo "Error: Code is not formatted. Run 'make fmt' to fix."; \
+		exit 1; \
+	fi

 # Format code
 fmt:
@@ -27,7 +36,7 @@ fmt:

 # Run linter
 lint:
-	golangci-lint run ./...
+	golangci-lint run

 # Build binary
 vaultik: internal/*/*.go cmd/vaultik/*.go
@@ -38,6 +47,11 @@ clean:
 	rm -f vaultik
 	go clean

+# Install dependencies
+deps:
+	go mod download
+	go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
+
 # Run tests with coverage
 test-coverage:
 	go test -v -coverprofile=coverage.out ./...
@@ -53,17 +67,3 @@ local:

 install: vaultik
 	cp ./vaultik $(HOME)/bin/
-
-# Run all checks (formatting, linting, tests) without modifying files
-check: fmt-check lint test
-
-# Build Docker image
-docker:
-	docker build -t vaultik .
-
-# Install pre-commit hook
-hooks:
-	@printf '#!/bin/sh\nset -e\n' > .git/hooks/pre-commit
-	@printf 'go mod tidy\ngo fmt ./...\ngit diff --exit-code -- go.mod go.sum || { echo "go mod tidy changed files; please stage and retry"; exit 1; }\n' >> .git/hooks/pre-commit
-	@printf 'make check\n' >> .git/hooks/pre-commit
-	@chmod +x .git/hooks/pre-commit
--- a/go.mod
+++ b/go.mod
@@ -1,6 +1,6 @@
 module git.eeqj.de/sneak/vaultik

-go 1.26.1
+go 1.24.4

 require (
 	filippo.io/age v1.2.1
--- a/internal/vaultik/blob_fetch_hash_test.go
+++ b/internal/vaultik/blob_fetch_hash_test.go
@@ -0,0 +1,100 @@
+package vaultik_test
+
+import (
+	"bytes"
+	"context"
+	"crypto/sha256"
+	"encoding/hex"
+	"io"
+	"strings"
+	"testing"
+
+	"filippo.io/age"
+	"git.eeqj.de/sneak/vaultik/internal/blobgen"
+	"git.eeqj.de/sneak/vaultik/internal/vaultik"
+)
+
+// TestFetchAndDecryptBlobVerifiesHash verifies that FetchAndDecryptBlob checks
+// the double-SHA-256 hash of the decrypted plaintext against the expected blob hash.
+func TestFetchAndDecryptBlobVerifiesHash(t *testing.T) {
+	identity, err := age.GenerateX25519Identity()
+	if err != nil {
+		t.Fatalf("generating identity: %v", err)
+	}
+
+	// Create test data and encrypt it using blobgen.Writer
+	plaintext := []byte("hello world test data for blob hash verification")
+	var encBuf bytes.Buffer
+	writer, err := blobgen.NewWriter(&encBuf, 1, []string{identity.Recipient().String()})
+	if err != nil {
+		t.Fatalf("creating blobgen writer: %v", err)
+	}
+	if _, err := writer.Write(plaintext); err != nil {
+		t.Fatalf("writing plaintext: %v", err)
+	}
+	if err := writer.Close(); err != nil {
+		t.Fatalf("closing writer: %v", err)
+	}
+	encryptedData := encBuf.Bytes()
+
+	// Compute correct double-SHA-256 hash of the plaintext (matches blobgen.Writer.Sum256)
+	firstHash := sha256.Sum256(plaintext)
+	secondHash := sha256.Sum256(firstHash[:])
+	correctHash := hex.EncodeToString(secondHash[:])
+
+	// Verify our hash matches what blobgen.Writer produces
+	writerHash := hex.EncodeToString(writer.Sum256())
+	if correctHash != writerHash {
+		t.Fatalf("hash computation mismatch: manual=%s, writer=%s", correctHash, writerHash)
+	}
+
+	// Set up mock storage with the blob at the correct path
+	mockStorage := NewMockStorer()
+	blobPath := "blobs/" + correctHash[:2] + "/" + correctHash[2:4] + "/" + correctHash
+	mockStorage.mu.Lock()
+	mockStorage.data[blobPath] = encryptedData
+	mockStorage.mu.Unlock()
+
+	tv := vaultik.NewForTesting(mockStorage)
+	ctx := context.Background()
+
+	t.Run("correct hash succeeds", func(t *testing.T) {
+		rc, err := tv.FetchAndDecryptBlob(ctx, correctHash, int64(len(encryptedData)), identity)
+		if err != nil {
+			t.Fatalf("expected success, got error: %v", err)
+		}
+		data, err := io.ReadAll(rc)
+		if err != nil {
+			t.Fatalf("reading stream: %v", err)
+		}
+		if err := rc.Close(); err != nil {
+			t.Fatalf("close (hash verification) failed: %v", err)
+		}
+		if !bytes.Equal(data, plaintext) {
+			t.Fatalf("decrypted data mismatch: got %q, want %q", data, plaintext)
+		}
+	})
+
+	t.Run("wrong hash fails", func(t *testing.T) {
+		// Use a fake hash that doesn't match the actual plaintext
+		fakeHash := strings.Repeat("ab", 32) // 64 hex chars
+		fakePath := "blobs/" + fakeHash[:2] + "/" + fakeHash[2:4] + "/" + fakeHash
+		mockStorage.mu.Lock()
+		mockStorage.data[fakePath] = encryptedData
+		mockStorage.mu.Unlock()
+
+		rc, err := tv.FetchAndDecryptBlob(ctx, fakeHash, int64(len(encryptedData)), identity)
+		if err != nil {
+			t.Fatalf("unexpected error opening stream: %v", err)
+		}
+		// Read all data — hash is verified on Close
+		_, _ = io.ReadAll(rc)
+		err = rc.Close()
+		if err == nil {
+			t.Fatal("expected error for mismatched hash, got nil")
+		}
+		if !strings.Contains(err.Error(), "hash mismatch") {
+			t.Fatalf("expected hash mismatch error, got: %v", err)
+		}
+	})
+}
--- a/internal/vaultik/blob_fetch_stub.go
+++ b/internal/vaultik/blob_fetch_stub.go
@@ -2,38 +2,82 @@ package vaultik

 import (
 	"context"
+	"crypto/sha256"
+	"encoding/hex"
 	"fmt"
+	"hash"
 	"io"

 	"filippo.io/age"
 	"git.eeqj.de/sneak/vaultik/internal/blobgen"
 )

-// FetchAndDecryptBlobResult holds the result of fetching and decrypting a blob.
-type FetchAndDecryptBlobResult struct {
-	Data []byte
+// hashVerifyReader wraps a reader and computes a double-SHA-256 hash of all
+// data read through it. The hash is verified against the expected blob hash
+// when Close is called. This allows streaming blob verification without
+// buffering the entire blob in memory.
+type hashVerifyReader struct {
+	reader   io.ReadCloser // underlying decrypted blob reader
+	fetcher  io.ReadCloser // raw fetched stream (closed on Close)
+	hasher   hash.Hash     // running SHA-256 of plaintext
+	blobHash string        // expected double-SHA-256 hex
+	done     bool          // EOF reached
 }

-// FetchAndDecryptBlob downloads a blob, decrypts it, and returns the plaintext data.
-func (v *Vaultik) FetchAndDecryptBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) (*FetchAndDecryptBlobResult, error) {
+func (h *hashVerifyReader) Read(p []byte) (int, error) {
+	n, err := h.reader.Read(p)
+	if n > 0 {
+		h.hasher.Write(p[:n])
+	}
+	if err == io.EOF {
+		h.done = true
+	}
+	return n, err
+}
+
+// Close verifies the hash (if the stream was fully read) and closes underlying readers.
+func (h *hashVerifyReader) Close() error {
+	readerErr := h.reader.Close()
+	fetcherErr := h.fetcher.Close()
+
+	if h.done {
+		firstHash := h.hasher.Sum(nil)
+		secondHasher := sha256.New()
+		secondHasher.Write(firstHash)
+		actualHashHex := hex.EncodeToString(secondHasher.Sum(nil))
+		if actualHashHex != h.blobHash {
+			return fmt.Errorf("blob hash mismatch: expected %s, got %s", h.blobHash[:16], actualHashHex[:16])
+		}
+	}
+
+	if readerErr != nil {
+		return readerErr
+	}
+	return fetcherErr
+}
+
+// FetchAndDecryptBlob downloads a blob, decrypts and decompresses it, and
+// returns a streaming reader that computes the double-SHA-256 hash on the fly.
+// The hash is verified when the returned reader is closed (after fully reading).
+// This avoids buffering the entire blob in memory.
+func (v *Vaultik) FetchAndDecryptBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) (io.ReadCloser, error) {
 	rc, _, err := v.FetchBlob(ctx, blobHash, expectedSize)
 	if err != nil {
 		return nil, err
 	}
-	defer func() { _ = rc.Close() }()

 	reader, err := blobgen.NewReader(rc, identity)
 	if err != nil {
+		_ = rc.Close()
 		return nil, fmt.Errorf("creating blob reader: %w", err)
 	}
-	defer func() { _ = reader.Close() }()

-	data, err := io.ReadAll(reader)
-	if err != nil {
-		return nil, fmt.Errorf("reading blob data: %w", err)
-	}
-
-	return &FetchAndDecryptBlobResult{Data: data}, nil
+	return &hashVerifyReader{
+		reader:   reader,
+		fetcher:  rc,
+		hasher:   sha256.New(),
+		blobHash: blobHash,
+	}, nil
 }

 // FetchBlob downloads a blob and returns a reader for the encrypted data.
--- a/internal/vaultik/restore.go
+++ b/internal/vaultik/restore.go
@@ -22,13 +22,6 @@ import (
 	"golang.org/x/term"
 )

-const (
-	// progressBarWidth is the character width of the progress bar display.
-	progressBarWidth = 40
-	// progressBarThrottle is the minimum interval between progress bar redraws.
-	progressBarThrottle = 100 * time.Millisecond
-)
-
 // RestoreOptions contains options for the restore operation
 type RestoreOptions struct {
 	SnapshotID string
@@ -122,15 +115,6 @@ func (v *Vaultik) Restore(opts *RestoreOptions) error {
 	}
 	defer func() { _ = blobCache.Close() }()

-	// Calculate total bytes for progress bar
-	var totalBytesExpected int64
-	for _, file := range files {
-		totalBytesExpected += file.Size
-	}
-
-	// Create progress bar if output is a terminal
-	bar := v.newProgressBar("Restoring", totalBytesExpected)
-
 	for i, file := range files {
 		if v.ctx.Err() != nil {
 			return v.ctx.Err()
@@ -140,19 +124,11 @@ func (v *Vaultik) Restore(opts *RestoreOptions) error {
 			log.Error("Failed to restore file", "path", file.Path, "error", err)
 			result.FilesFailed++
 			result.FailedFiles = append(result.FailedFiles, file.Path.String())
-			// Update progress bar even on failure
-			if bar != nil {
-				_ = bar.Add64(file.Size)
-			}
+			// Continue with other files
 			continue
 		}

-		// Update progress bar
-		if bar != nil {
-			_ = bar.Add64(file.Size)
-		}
-
-		// Progress logging (for non-terminal or structured logs)
+		// Progress logging
 		if (i+1)%100 == 0 || i+1 == len(files) {
 			log.Info("Restore progress",
 				"files", fmt.Sprintf("%d/%d", i+1, len(files)),
@@ -161,10 +137,6 @@ func (v *Vaultik) Restore(opts *RestoreOptions) error {
 		}
 	}

-	if bar != nil {
-		_ = bar.Finish()
-	}
-
 	result.Duration = time.Since(startTime)

 	log.Info("Restore complete",
@@ -522,11 +494,23 @@ func (v *Vaultik) restoreRegularFile(

 // downloadBlob downloads and decrypts a blob
 func (v *Vaultik) downloadBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) ([]byte, error) {
-	result, err := v.FetchAndDecryptBlob(ctx, blobHash, expectedSize, identity)
+	rc, err := v.FetchAndDecryptBlob(ctx, blobHash, expectedSize, identity)
 	if err != nil {
 		return nil, err
 	}
-	return result.Data, nil
+
+	data, err := io.ReadAll(rc)
+	if err != nil {
+		_ = rc.Close()
+		return nil, fmt.Errorf("reading blob data: %w", err)
+	}
+
+	// Close triggers hash verification
+	if err := rc.Close(); err != nil {
+		return nil, err
+	}
+
+	return data, nil
 }

 // verifyRestoredFiles verifies that all restored files match their expected chunk hashes
@@ -564,7 +548,22 @@ func (v *Vaultik) verifyRestoredFiles(
 	)

 	// Create progress bar if output is a terminal
-	bar := v.newProgressBar("Verifying", totalBytes)
+	var bar *progressbar.ProgressBar
+	if isTerminal() {
+		bar = progressbar.NewOptions64(
+			totalBytes,
+			progressbar.OptionSetDescription("Verifying"),
+			progressbar.OptionSetWriter(v.Stderr),
+			progressbar.OptionShowBytes(true),
+			progressbar.OptionShowCount(),
+			progressbar.OptionSetWidth(40),
+			progressbar.OptionThrottle(100*time.Millisecond),
+			progressbar.OptionOnCompletion(func() {
+				v.printfStderr("\n")
+			}),
+			progressbar.OptionSetRenderBlankState(true),
+		)
+	}

 	// Verify each file
 	for _, file := range regularFiles {
@@ -658,37 +657,7 @@ func (v *Vaultik) verifyFile(
 	return bytesVerified, nil
 }

-// newProgressBar creates a terminal-aware progress bar with standard options.
-// It returns nil if stdout is not a terminal.
-func (v *Vaultik) newProgressBar(description string, total int64) *progressbar.ProgressBar {
-	if !v.isTerminal() {
-		return nil
-	}
-	return progressbar.NewOptions64(
-		total,
-		progressbar.OptionSetDescription(description),
-		progressbar.OptionSetWriter(v.Stderr),
-		progressbar.OptionShowBytes(true),
-		progressbar.OptionShowCount(),
-		progressbar.OptionSetWidth(progressBarWidth),
-		progressbar.OptionThrottle(progressBarThrottle),
-		progressbar.OptionOnCompletion(func() {
-			v.printfStderr("\n")
-		}),
-		progressbar.OptionSetRenderBlankState(true),
-	)
-}
-
-// isTerminal returns true if stdout is a terminal.
-// It checks whether v.Stdout implements Fd() (i.e. is an *os.File),
-// and falls back to false for non-file writers (e.g. in tests).
-func (v *Vaultik) isTerminal() bool {
-	type fder interface {
-		Fd() uintptr
-	}
-	f, ok := v.Stdout.(fder)
-	if !ok {
-		return false
-	}
-	return term.IsTerminal(int(f.Fd()))
+// isTerminal returns true if stdout is a terminal
+func isTerminal() bool {
+	return term.IsTerminal(int(os.Stdout.Fd()))
 }
--- a/internal/vaultik/snapshot.go
+++ b/internal/vaultik/snapshot.go
@@ -802,7 +802,7 @@ func (v *Vaultik) syncWithRemote() error {
 		snapshotIDStr := snapshot.ID.String()
 		if !remoteSnapshots[snapshotIDStr] {
 			log.Info("Removing local snapshot not found in remote", "snapshot_id", snapshot.ID)
-			if err := v.deleteSnapshotFromLocalDB(snapshotIDStr); err != nil {
+			if err := v.Repositories.Snapshots.Delete(v.ctx, snapshotIDStr); err != nil {
 				log.Error("Failed to delete local snapshot", "snapshot_id", snapshot.ID, "error", err)
 			} else {
 				removedCount++