Merge branch 'main' into fix/sql-injection-whitelist

Merge pull request 'Disk-based blob cache with LRU eviction during restore (closes #29 )' (#34 ) from fix/issue-29 into main
Reviewed-on: #34
2026-02-20 11:16:27 +01:00 · 2026-02-20 11:16:15 +01:00 · 2026-02-20 11:15:59 +01:00 · 2026-02-20 11:12:51 +01:00 · 2026-02-20 02:11:54 -08:00 · 2026-02-20 02:10:23 -08:00
8 changed files with 178 additions and 21 deletions
--- a/internal/blobgen/compress_test.go
+++ b/internal/blobgen/compress_test.go
@@ -0,0 +1,64 @@
+package blobgen
+
+import (
+	"bytes"
+	"crypto/rand"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// testRecipient is a static age recipient for tests.
+const testRecipient = "age1cplgrwj77ta54dnmydvvmzn64ltk83ankxl5sww04mrtmu62kv3s89gmvv"
+
+// TestCompressStreamNoDoubleClose is a regression test for issue #28.
+// It verifies that CompressStream does not panic or return an error due to
+// double-closing the underlying blobgen.Writer. Before the fix in PR #33,
+// the explicit Close() on the happy path combined with defer Close() would
+// cause a double close.
+func TestCompressStreamNoDoubleClose(t *testing.T) {
+	input := []byte("regression test data for issue #28 double-close fix")
+	var buf bytes.Buffer
+
+	written, hash, err := CompressStream(&buf, bytes.NewReader(input), 3, []string{testRecipient})
+	require.NoError(t, err, "CompressStream should not return an error")
+	assert.True(t, written > 0, "expected bytes written > 0")
+	assert.NotEmpty(t, hash, "expected non-empty hash")
+	assert.True(t, buf.Len() > 0, "expected non-empty output")
+}
+
+// TestCompressStreamLargeInput exercises CompressStream with a larger payload
+// to ensure no double-close issues surface under heavier I/O.
+func TestCompressStreamLargeInput(t *testing.T) {
+	data := make([]byte, 512*1024) // 512 KB
+	_, err := rand.Read(data)
+	require.NoError(t, err)
+
+	var buf bytes.Buffer
+	written, hash, err := CompressStream(&buf, bytes.NewReader(data), 3, []string{testRecipient})
+	require.NoError(t, err)
+	assert.True(t, written > 0)
+	assert.NotEmpty(t, hash)
+}
+
+// TestCompressStreamEmptyInput verifies CompressStream handles empty input
+// without double-close issues.
+func TestCompressStreamEmptyInput(t *testing.T) {
+	var buf bytes.Buffer
+	_, hash, err := CompressStream(&buf, strings.NewReader(""), 3, []string{testRecipient})
+	require.NoError(t, err)
+	assert.NotEmpty(t, hash)
+}
+
+// TestCompressDataNoDoubleClose mirrors the stream test for CompressData,
+// ensuring the explicit Close + error-path Close pattern is also safe.
+func TestCompressDataNoDoubleClose(t *testing.T) {
+	input := []byte("CompressData regression test for double-close")
+	result, err := CompressData(input, 3, []string{testRecipient})
+	require.NoError(t, err)
+	assert.True(t, result.CompressedSize > 0)
+	assert.True(t, result.UncompressedSize == int64(len(input)))
+	assert.NotEmpty(t, result.SHA256)
+}
--- a/internal/vaultik/blob_fetch_stub.go
+++ b/internal/vaultik/blob_fetch_stub.go
@@ -1,15 +1,12 @@
 package vaultik

-// TODO: These are stub implementations for methods referenced but not yet
-// implemented. They allow the package to compile for testing.
-// Remove once the real implementations land.
-
 import (
 	"context"
 	"fmt"
 	"io"

 	"filippo.io/age"
+	"git.eeqj.de/sneak/vaultik/internal/blobgen"
 )

 // FetchAndDecryptBlobResult holds the result of fetching and decrypting a blob.
@@ -19,10 +16,40 @@ type FetchAndDecryptBlobResult struct {

 // FetchAndDecryptBlob downloads a blob, decrypts it, and returns the plaintext data.
 func (v *Vaultik) FetchAndDecryptBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) (*FetchAndDecryptBlobResult, error) {
-	return nil, fmt.Errorf("FetchAndDecryptBlob not yet implemented")
+	rc, _, err := v.FetchBlob(ctx, blobHash, expectedSize)
+	if err != nil {
+		return nil, err
+	}
+	defer func() { _ = rc.Close() }()
+
+	reader, err := blobgen.NewReader(rc, identity)
+	if err != nil {
+		return nil, fmt.Errorf("creating blob reader: %w", err)
+	}
+	defer func() { _ = reader.Close() }()
+
+	data, err := io.ReadAll(reader)
+	if err != nil {
+		return nil, fmt.Errorf("reading blob data: %w", err)
+	}
+
+	return &FetchAndDecryptBlobResult{Data: data}, nil
 }

 // FetchBlob downloads a blob and returns a reader for the encrypted data.
 func (v *Vaultik) FetchBlob(ctx context.Context, blobHash string, expectedSize int64) (io.ReadCloser, int64, error) {
-	return nil, 0, fmt.Errorf("FetchBlob not yet implemented")
+	blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobHash[:2], blobHash[2:4], blobHash)
+
+	rc, err := v.Storage.Get(ctx, blobPath)
+	if err != nil {
+		return nil, 0, fmt.Errorf("downloading blob %s: %w", blobHash[:16], err)
+	}
+
+	info, err := v.Storage.Stat(ctx, blobPath)
+	if err != nil {
+		_ = rc.Close()
+		return nil, 0, fmt.Errorf("stat blob %s: %w", blobHash[:16], err)
+	}
+
+	return rc, info.Size, nil
 }
--- a/internal/vaultik/blobcache.go
+++ b/internal/vaultik/blobcache.go
@@ -7,9 +7,6 @@ import (
 	"sync"
 )

-// defaultMaxBlobCacheBytes is the default maximum size of the disk blob cache (10 GB).
-const defaultMaxBlobCacheBytes = 10 << 30 // 10 GiB
-
 // blobDiskCacheEntry tracks a cached blob on disk.
 type blobDiskCacheEntry struct {
 	key  string
@@ -167,7 +164,7 @@ func (c *blobDiskCache) ReadAt(key string, offset, length int64) ([]byte, error)
 	if err != nil {
 		return nil, err
 	}
-	defer f.Close()
+	defer func() { _ = f.Close() }()

 	buf := make([]byte, length)
 	if _, err := f.ReadAt(buf, offset); err != nil {
--- a/internal/vaultik/blobcache_test.go
+++ b/internal/vaultik/blobcache_test.go
@@ -12,7 +12,7 @@ func TestBlobDiskCache_BasicGetPut(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer cache.Close()
+	defer func() { _ = cache.Close() }()

 	data := []byte("hello world")
 	if err := cache.Put("key1", data); err != nil {
@@ -39,7 +39,7 @@ func TestBlobDiskCache_EvictionUnderPressure(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer cache.Close()
+	defer func() { _ = cache.Close() }()

 	for i := 0; i < 5; i++ {
 		data := make([]byte, 300)
@@ -65,7 +65,7 @@ func TestBlobDiskCache_OversizedEntryRejected(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer cache.Close()
+	defer func() { _ = cache.Close() }()

 	data := make([]byte, 200)
 	if err := cache.Put("big", data); err != nil {
@@ -82,7 +82,7 @@ func TestBlobDiskCache_UpdateInPlace(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer cache.Close()
+	defer func() { _ = cache.Close() }()

 	if err := cache.Put("key1", []byte("v1")); err != nil {
 		t.Fatal(err)
@@ -111,7 +111,7 @@ func TestBlobDiskCache_ReadAt(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer cache.Close()
+	defer func() { _ = cache.Close() }()

 	data := make([]byte, 1024)
 	if _, err := rand.Read(data); err != nil {
@@ -159,7 +159,7 @@ func TestBlobDiskCache_LRUOrder(t *testing.T) {
 	if err != nil {
 		t.Fatal(err)
 	}
-	defer cache.Close()
+	defer func() { _ = cache.Close() }()

 	d := make([]byte, 100)
 	if err := cache.Put("a", d); err != nil {
--- a/internal/vaultik/restore.go
+++ b/internal/vaultik/restore.go
@@ -109,11 +109,11 @@ func (v *Vaultik) Restore(opts *RestoreOptions) error {

 	// Step 5: Restore files
 	result := &RestoreResult{}
-	blobCache, err := newBlobDiskCache(defaultMaxBlobCacheBytes)
+	blobCache, err := newBlobDiskCache(4 * v.Config.BlobSizeLimit.Int64())
 	if err != nil {
 		return fmt.Errorf("creating blob cache: %w", err)
 	}
-	defer blobCache.Close()
+	defer func() { _ = blobCache.Close() }()

 	for i, file := range files {
 		if v.ctx.Err() != nil {
@@ -427,7 +427,9 @@ func (v *Vaultik) restoreRegularFile(
 			if err != nil {
 				return fmt.Errorf("downloading blob %s: %w", blobHashStr[:16], err)
 			}
-			if putErr := blobCache.Put(blobHashStr, blobData); putErr != nil { log.Debug("Failed to cache blob on disk", "hash", blobHashStr[:16], "error", putErr) }
+			if putErr := blobCache.Put(blobHashStr, blobData); putErr != nil {
+				log.Debug("Failed to cache blob on disk", "hash", blobHashStr[:16], "error", putErr)
+			}
 			result.BlobsDownloaded++
 			result.BytesDownloaded += blob.CompressedSize
 		}
--- a/internal/vaultik/snapshot.go
+++ b/internal/vaultik/snapshot.go
@@ -1126,8 +1126,23 @@ func (v *Vaultik) PruneDatabase() (*PruneResult, error) {
 	return result, nil
 }

-// getTableCount returns the count of rows in a table
+// allowedTableNames is the exhaustive whitelist of table names that may be
+// passed to getTableCount. Any name not in this set is rejected, preventing
+// SQL injection even if caller-controlled input is accidentally supplied.
+var allowedTableNames = map[string]struct{}{
+	"files":  {},
+	"chunks": {},
+	"blobs":  {},
+}
+
+// getTableCount returns the number of rows in the given table.
+// tableName must appear in the allowedTableNames whitelist; all other values
+// are rejected with an error, preventing SQL injection.
 func (v *Vaultik) getTableCount(tableName string) (int64, error) {
+	if _, ok := allowedTableNames[tableName]; !ok {
+		return 0, fmt.Errorf("table name not allowed: %q", tableName)
+	}
+
 	if v.DB == nil {
 		return 0, nil
 	}
--- a/internal/vaultik/table_count_test.go
+++ b/internal/vaultik/table_count_test.go
@@ -0,0 +1,51 @@
+package vaultik
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestAllowedTableNames(t *testing.T) {
+	// Verify the whitelist contains exactly the expected tables
+	expected := []string{"files", "chunks", "blobs"}
+	assert.Len(t, allowedTableNames, len(expected))
+	for _, name := range expected {
+		_, ok := allowedTableNames[name]
+		assert.True(t, ok, "expected %q in allowedTableNames", name)
+	}
+}
+
+func TestGetTableCount_RejectsInvalidNames(t *testing.T) {
+	v := &Vaultik{} // DB is nil, but rejection happens before DB access
+	v.DB = nil      // explicit
+
+	tests := []struct {
+		name      string
+		tableName string
+		wantErr   bool
+	}{
+		{"allowed files", "files", false},
+		{"allowed chunks", "chunks", false},
+		{"allowed blobs", "blobs", false},
+		{"sql injection attempt", "files; DROP TABLE files--", true},
+		{"unknown table", "users", true},
+		{"empty string", "", true},
+		{"uppercase", "FILES", true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			count, err := v.getTableCount(tt.tableName)
+			if tt.wantErr {
+				assert.Error(t, err)
+				assert.Contains(t, err.Error(), "not allowed")
+				assert.Equal(t, int64(0), count)
+			} else {
+				// DB is nil so returns 0, nil for allowed names
+				assert.NoError(t, err)
+				assert.Equal(t, int64(0), count)
+			}
+		})
+	}
+}
--- a/internal/vaultik/vaultik.go
+++ b/internal/vaultik/vaultik.go
@@ -129,7 +129,7 @@ func (v *Vaultik) GetFilesystem() afero.Fs {
 	return v.Fs
 }

-// printfStdout writes formatted output to stdout for user-facing messages.
+// printfStdout writes formatted output to stdout.
 func (v *Vaultik) printfStdout(format string, args ...any) {
 	_, _ = fmt.Fprintf(v.Stdout, format, args...)
 }
@@ -148,6 +148,7 @@ func (v *Vaultik) printfStderr(format string, args ...any) {
 func (v *Vaultik) scanStdin(a ...any) (int, error) {
 	return fmt.Fscanln(v.Stdin, a...)
 }
+
 // TestVaultik wraps a Vaultik with captured stdout/stderr for testing
 type TestVaultik struct {
 	*Vaultik
Author	SHA1	Message	Date
Jeffrey Paul	3e282af516	Merge branch 'main' into fix/sql-injection-whitelist	2026-02-20 11:16:27 +01:00
Jeffrey Paul	815b35c7ae	Merge pull request 'Disk-based blob cache with LRU eviction during restore (closes #29 )' (#34 ) from fix/issue-29 into main Reviewed-on: #34	2026-02-20 11:16:15 +01:00
Jeffrey Paul	9c66674683	Merge branch 'main' into fix/issue-29	2026-02-20 11:15:59 +01:00
Jeffrey Paul	49de277648	Merge pull request 'Add CompressStream double-close regression test (closes #35 )' (#36 ) from add-compressstream-regression-test into main Reviewed-on: #36	2026-02-20 11:12:51 +01:00
clawbot	ed5d777d05	fix: set disk cache max size to 4x configured blob size instead of hardcoded 10 GiB The disk blob cache now uses 4 * BlobSizeLimit from config instead of a hardcoded 10 GiB default. This ensures the cache scales with the configured blob size.	2026-02-20 02:11:54 -08:00
clawbot	2e7356dd85	Add CompressStream double-close regression test (closes #35 ) Adds regression tests for issue #28 (fixed in PR #33) to prevent reintroduction of the double-close bug in CompressStream. Tests cover: - CompressStream with normal input - CompressStream with large (512KB) input - CompressStream with empty input - CompressData close correctness	2026-02-20 02:10:23 -08:00
user	bb4b9b5bc9	fix: use whitelist for SQL table names in getTableCount (closes #7 ) Replace regex-based validation with a strict whitelist of allowed table names (files, chunks, blobs). The whitelist check now runs before the nil-DB early return so invalid names are always rejected. Removes unused regexp import.	2026-02-20 02:09:40 -08:00
Jeffrey Paul	70d4fe2aa0	Merge pull request 'Use v.Stdout/v.Stdin instead of os.Stdout for all user-facing output (closes #26 )' (#31 ) from fix/issue-26 into main Reviewed-on: #31	2026-02-20 11:07:52 +01:00
clawbot	2f249e3ddd	fix: address review feedback — use helper wrappers, remove duplicates, fix scanStdin usage - Replace bare fmt.Scanln with v.scanStdin() helper in snapshot.go - Remove duplicate FetchBlob from vaultik.go (canonical version in blob_fetch_stub.go) - Remove duplicate FetchAndDecryptBlob from restore.go (canonical version in blob_fetch_stub.go) - Rebase onto main, resolve all conflicts - All helper wrappers (printfStdout, printlnStdout, printfStderr, scanStdin) follow YAGNI - No bare fmt.Print/fmt.Scan calls remain outside helpers - make test passes: lint clean, all tests pass	2026-02-20 00:26:03 -08:00
clawbot	3f834f1c9c	fix: resolve rebase conflicts, fix errcheck issues, implement FetchAndDecryptBlob	2026-02-20 00:19:13 -08:00
user	9879668c31	refactor: add helper wrappers for stdin/stdout/stderr IO Address all four review concerns on PR #31: 1. Fix missed bare fmt.Println() in VerifySnapshotWithOptions (line 620) 2. Replace all direct fmt.Fprintf(v.Stdout,...) / fmt.Fprintln(v.Stdout,...) / fmt.Fscanln(v.Stdin,...) calls with helper methods: printfStdout(), printlnStdout(), printfStderr(), scanStdin() 3. Route progress bar and stderr output through v.Stderr instead of os.Stderr in restore.go (concern #4: v.Stderr now actually used) 4. Rename exported Outputf to unexported printfStdout (YAGNI: only helpers actually used are created)	2026-02-20 00:18:56 -08:00
clawbot	0a0d9f33b0	fix: use v.Stdout/v.Stdin instead of os.Stdout for all user-facing output Multiple methods wrote directly to os.Stdout instead of using the injectable v.Stdout writer, breaking the TestVaultik testing infrastructure and making output impossible to capture or redirect. Fixed in: ListSnapshots, PurgeSnapshots, VerifySnapshotWithOptions, PruneBlobs, outputPruneBlobsJSON, outputRemoveJSON, ShowInfo, RemoteInfo.	2026-02-20 00:18:20 -08:00
clawbot	df0e8c275b	fix: replace in-memory blob cache with disk-based LRU cache (closes #29 ) Blobs are typically hundreds of megabytes and should not be held in memory. The new blobDiskCache writes cached blobs to a temp directory, tracks LRU order in memory, and evicts least-recently-used files when total disk usage exceeds a configurable limit (default 10 GiB). Design: - Blobs written to os.TempDir()/vaultik-blobcache-*/<hash> - Doubly-linked list for O(1) LRU promotion/eviction - ReadAt support for reading chunk slices without loading full blob - Temp directory cleaned up on Close() - Oversized entries (> maxBytes) silently skipped Also adds blob_fetch_stub.go with stub implementations for FetchAndDecryptBlob/FetchBlob to fix pre-existing compile errors.	2026-02-20 00:18:20 -08:00
clawbot	d77ac18aaa	fix: add missing printfStdout, printlnStdout, scanlnStdin, FetchBlob, and FetchAndDecryptBlob methods These methods were referenced in main but never defined, causing compilation failures. They were introduced by merges that assumed dependent PRs were already merged.	2026-02-19 23:51:53 -08:00
Jeffrey Paul	825f25da58	Merge pull request 'Validate table name against allowlist in getTableCount (closes #27 )' (#32 ) from fix/issue-27 into main Reviewed-on: #32	2026-02-16 06:21:41 +01:00
Jeffrey Paul	162d76bb38	Merge branch 'main' into fix/issue-27	2026-02-16 06:17:51 +01:00
clawbot	bfd7334221	fix: replace table name allowlist with regex sanitization Replace the hardcoded validTableNames allowlist with a regexp that only allows [a-z0-9_] characters. This prevents SQL injection without requiring maintenance of a separate allowlist when new tables are added. Addresses review feedback from @sneak on PR #32.	2026-02-15 21:17:24 -08:00
user	9b32bf0846	fix: replace table name allowlist with regex sanitization Replace the hardcoded validTableNames allowlist with a regexp that only allows [a-z0-9_] characters. This prevents SQL injection without requiring maintenance of a separate allowlist when new tables are added. Addresses review feedback from @sneak on PR #32.	2026-02-15 21:15:49 -08:00
clawbot	4d9f912a5f	fix: validate table name against allowlist in getTableCount to prevent SQL injection The getTableCount method used fmt.Sprintf to interpolate a table name directly into a SQL query. While currently only called with hardcoded names, this is a dangerous pattern. Added an allowlist of valid table names and return an error for unrecognized names.	2026-02-08 12:03:18 -08:00