Merge branch 'main' into fix/sql-injection-whitelist

Merge pull request 'Disk-based blob cache with LRU eviction during restore (closes #29 )' (#34 ) from fix/issue-29 into main
Reviewed-on: #34
2026-02-20 11:16:27 +01:00 · 2026-02-20 11:16:15 +01:00 · 2026-02-20 11:15:59 +01:00 · 2026-02-20 11:12:51 +01:00 · 2026-02-20 02:11:54 -08:00 · 2026-02-20 02:10:23 -08:00
6 changed files with 133 additions and 15 deletions
--- a/internal/blobgen/compress_test.go
+++ b/internal/blobgen/compress_test.go
@@ -0,0 +1,64 @@
 package blobgen
 import (
 	"bytes"
 	"crypto/rand"
 	"strings"
 	"testing"
 	"github.com/stretchr/testify/assert"
 	"github.com/stretchr/testify/require"
 )
 // testRecipient is a static age recipient for tests.
 const testRecipient = "age1cplgrwj77ta54dnmydvvmzn64ltk83ankxl5sww04mrtmu62kv3s89gmvv"
 // TestCompressStreamNoDoubleClose is a regression test for issue #28.
 // It verifies that CompressStream does not panic or return an error due to
 // double-closing the underlying blobgen.Writer. Before the fix in PR #33,
 // the explicit Close() on the happy path combined with defer Close() would
 // cause a double close.
 func TestCompressStreamNoDoubleClose(t *testing.T) {
 	input := []byte("regression test data for issue #28 double-close fix")
 	var buf bytes.Buffer
 	written, hash, err := CompressStream(&buf, bytes.NewReader(input), 3, []string{testRecipient})
 	require.NoError(t, err, "CompressStream should not return an error")
 	assert.True(t, written > 0, "expected bytes written > 0")
 	assert.NotEmpty(t, hash, "expected non-empty hash")
 	assert.True(t, buf.Len() > 0, "expected non-empty output")
 }
 // TestCompressStreamLargeInput exercises CompressStream with a larger payload
 // to ensure no double-close issues surface under heavier I/O.
 func TestCompressStreamLargeInput(t *testing.T) {
 	data := make([]byte, 512*1024) // 512 KB
 	_, err := rand.Read(data)
 	require.NoError(t, err)
 	var buf bytes.Buffer
 	written, hash, err := CompressStream(&buf, bytes.NewReader(data), 3, []string{testRecipient})
 	require.NoError(t, err)
 	assert.True(t, written > 0)
 	assert.NotEmpty(t, hash)
 }
 // TestCompressStreamEmptyInput verifies CompressStream handles empty input
 // without double-close issues.
 func TestCompressStreamEmptyInput(t *testing.T) {
 	var buf bytes.Buffer
 	_, hash, err := CompressStream(&buf, strings.NewReader(""), 3, []string{testRecipient})
 	require.NoError(t, err)
 	assert.NotEmpty(t, hash)
 }
 // TestCompressDataNoDoubleClose mirrors the stream test for CompressData,
 // ensuring the explicit Close + error-path Close pattern is also safe.
 func TestCompressDataNoDoubleClose(t *testing.T) {
 	input := []byte("CompressData regression test for double-close")
 	result, err := CompressData(input, 3, []string{testRecipient})
 	require.NoError(t, err)
 	assert.True(t, result.CompressedSize > 0)
 	assert.True(t, result.UncompressedSize == int64(len(input)))
 	assert.NotEmpty(t, result.SHA256)
 }
--- a/internal/vaultik/blobcache.go
+++ b/internal/vaultik/blobcache.go
@@ -7,9 +7,6 @@ import (
 	"sync"
 )
 // defaultMaxBlobCacheBytes is the default maximum size of the disk blob cache (10 GB).
 const defaultMaxBlobCacheBytes = 10 << 30 // 10 GiB
 // blobDiskCacheEntry tracks a cached blob on disk.
 type blobDiskCacheEntry struct {
 	key  string
--- a/internal/vaultik/restore.go
+++ b/internal/vaultik/restore.go
@@ -109,7 +109,7 @@ func (v *Vaultik) Restore(opts *RestoreOptions) error {
 	// Step 5: Restore files
 	result := &RestoreResult{}
-	blobCache, err := newBlobDiskCache(defaultMaxBlobCacheBytes)
+	blobCache, err := newBlobDiskCache(4 * v.Config.BlobSizeLimit.Int64())
 	if err != nil {
 		return fmt.Errorf("creating blob cache: %w", err)
 	}
--- a/internal/vaultik/snapshot.go
+++ b/internal/vaultik/snapshot.go
@@ -5,7 +5,6 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
 	"regexp"
 	"sort"
 	"strings"
 	"text/tabwriter"
@@ -545,7 +544,7 @@ func (v *Vaultik) PurgeSnapshots(keepLatest bool, olderThan string, force bool)
 	if !force {
 		v.printfStdout("\nDelete %d snapshot(s)? [y/N] ", len(toDelete))
 		var confirm string
-		if _, err := fmt.Scanln(&confirm); err != nil {
+		if _, err := v.scanStdin(&confirm); err != nil {
 			// Treat EOF or error as "no"
 			v.printlnStdout("Cancelled")
 			return nil
@@ -1127,18 +1126,25 @@ func (v *Vaultik) PruneDatabase() (*PruneResult, error) {
 	return result, nil
 }
-// validTableNameRe matches table names containing only lowercase alphanumeric characters and underscores.
+// allowedTableNames is the exhaustive whitelist of table names that may be
-var validTableNameRe = regexp.MustCompile(`^[a-z0-9_]+$`)
+// passed to getTableCount. Any name not in this set is rejected, preventing
 // SQL injection even if caller-controlled input is accidentally supplied.
 var allowedTableNames = map[string]struct{}{
 	"files":  {},
 	"chunks": {},
 	"blobs":  {},
 }
-// getTableCount returns the count of rows in a table.
+// getTableCount returns the number of rows in the given table.
-// The tableName is sanitized to only allow [a-z0-9_] characters to prevent SQL injection.
+// tableName must appear in the allowedTableNames whitelist; all other values
 // are rejected with an error, preventing SQL injection.
 func (v *Vaultik) getTableCount(tableName string) (int64, error) {
-	if v.DB == nil {
+	if _, ok := allowedTableNames[tableName]; !ok {
-		return 0, nil
+		return 0, fmt.Errorf("table name not allowed: %q", tableName)
 	}
-	if !validTableNameRe.MatchString(tableName) {
+	if v.DB == nil {
-		return 0, fmt.Errorf("invalid table name: %q", tableName)
+		return 0, nil
 	}
 	var count int64
--- a/internal/vaultik/table_count_test.go
+++ b/internal/vaultik/table_count_test.go
@@ -0,0 +1,51 @@
 package vaultik
 import (
 	"testing"
 	"github.com/stretchr/testify/assert"
 )
 func TestAllowedTableNames(t *testing.T) {
 	// Verify the whitelist contains exactly the expected tables
 	expected := []string{"files", "chunks", "blobs"}
 	assert.Len(t, allowedTableNames, len(expected))
 	for _, name := range expected {
 		_, ok := allowedTableNames[name]
 		assert.True(t, ok, "expected %q in allowedTableNames", name)
 	}
 }
 func TestGetTableCount_RejectsInvalidNames(t *testing.T) {
 	v := &Vaultik{} // DB is nil, but rejection happens before DB access
 	v.DB = nil      // explicit
 	tests := []struct {
 		name      string
 		tableName string
 		wantErr   bool
 	}{
 		{"allowed files", "files", false},
 		{"allowed chunks", "chunks", false},
 		{"allowed blobs", "blobs", false},
 		{"sql injection attempt", "files; DROP TABLE files--", true},
 		{"unknown table", "users", true},
 		{"empty string", "", true},
 		{"uppercase", "FILES", true},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
 			count, err := v.getTableCount(tt.tableName)
 			if tt.wantErr {
 				assert.Error(t, err)
 				assert.Contains(t, err.Error(), "not allowed")
 				assert.Equal(t, int64(0), count)
 			} else {
 				// DB is nil so returns 0, nil for allowed names
 				assert.NoError(t, err)
 				assert.Equal(t, int64(0), count)
 			}
 		})
 	}
 }
--- a/internal/vaultik/vaultik.go
+++ b/internal/vaultik/vaultik.go
@@ -129,7 +129,7 @@ func (v *Vaultik) GetFilesystem() afero.Fs {
 	return v.Fs
 }
-// printfStdout writes formatted output to stdout for user-facing messages.
+// printfStdout writes formatted output to stdout.
 func (v *Vaultik) printfStdout(format string, args ...any) {
 	_, _ = fmt.Fprintf(v.Stdout, format, args...)
 }
Author	SHA1	Message	Date
Jeffrey Paul	3e282af516	Merge branch 'main' into fix/sql-injection-whitelist	2026-02-20 11:16:27 +01:00
Jeffrey Paul	815b35c7ae	Merge pull request 'Disk-based blob cache with LRU eviction during restore (closes #29 )' (#34 ) from fix/issue-29 into main Reviewed-on: #34	2026-02-20 11:16:15 +01:00
Jeffrey Paul	9c66674683	Merge branch 'main' into fix/issue-29	2026-02-20 11:15:59 +01:00
Jeffrey Paul	49de277648	Merge pull request 'Add CompressStream double-close regression test (closes #35 )' (#36 ) from add-compressstream-regression-test into main Reviewed-on: #36	2026-02-20 11:12:51 +01:00
clawbot	ed5d777d05	fix: set disk cache max size to 4x configured blob size instead of hardcoded 10 GiB The disk blob cache now uses 4 * BlobSizeLimit from config instead of a hardcoded 10 GiB default. This ensures the cache scales with the configured blob size.	2026-02-20 02:11:54 -08:00
clawbot	2e7356dd85	Add CompressStream double-close regression test (closes #35 ) Adds regression tests for issue #28 (fixed in PR #33) to prevent reintroduction of the double-close bug in CompressStream. Tests cover: - CompressStream with normal input - CompressStream with large (512KB) input - CompressStream with empty input - CompressData close correctness	2026-02-20 02:10:23 -08:00
user	bb4b9b5bc9	fix: use whitelist for SQL table names in getTableCount (closes #7 ) Replace regex-based validation with a strict whitelist of allowed table names (files, chunks, blobs). The whitelist check now runs before the nil-DB early return so invalid names are always rejected. Removes unused regexp import.	2026-02-20 02:09:40 -08:00
Jeffrey Paul	70d4fe2aa0	Merge pull request 'Use v.Stdout/v.Stdin instead of os.Stdout for all user-facing output (closes #26 )' (#31 ) from fix/issue-26 into main Reviewed-on: #31	2026-02-20 11:07:52 +01:00
clawbot	2f249e3ddd	fix: address review feedback — use helper wrappers, remove duplicates, fix scanStdin usage - Replace bare fmt.Scanln with v.scanStdin() helper in snapshot.go - Remove duplicate FetchBlob from vaultik.go (canonical version in blob_fetch_stub.go) - Remove duplicate FetchAndDecryptBlob from restore.go (canonical version in blob_fetch_stub.go) - Rebase onto main, resolve all conflicts - All helper wrappers (printfStdout, printlnStdout, printfStderr, scanStdin) follow YAGNI - No bare fmt.Print/fmt.Scan calls remain outside helpers - make test passes: lint clean, all tests pass	2026-02-20 00:26:03 -08:00
clawbot	3f834f1c9c	fix: resolve rebase conflicts, fix errcheck issues, implement FetchAndDecryptBlob	2026-02-20 00:19:13 -08:00
user	9879668c31	refactor: add helper wrappers for stdin/stdout/stderr IO Address all four review concerns on PR #31: 1. Fix missed bare fmt.Println() in VerifySnapshotWithOptions (line 620) 2. Replace all direct fmt.Fprintf(v.Stdout,...) / fmt.Fprintln(v.Stdout,...) / fmt.Fscanln(v.Stdin,...) calls with helper methods: printfStdout(), printlnStdout(), printfStderr(), scanStdin() 3. Route progress bar and stderr output through v.Stderr instead of os.Stderr in restore.go (concern #4: v.Stderr now actually used) 4. Rename exported Outputf to unexported printfStdout (YAGNI: only helpers actually used are created)	2026-02-20 00:18:56 -08:00
clawbot	0a0d9f33b0	fix: use v.Stdout/v.Stdin instead of os.Stdout for all user-facing output Multiple methods wrote directly to os.Stdout instead of using the injectable v.Stdout writer, breaking the TestVaultik testing infrastructure and making output impossible to capture or redirect. Fixed in: ListSnapshots, PurgeSnapshots, VerifySnapshotWithOptions, PruneBlobs, outputPruneBlobsJSON, outputRemoveJSON, ShowInfo, RemoteInfo.	2026-02-20 00:18:20 -08:00
clawbot	df0e8c275b	fix: replace in-memory blob cache with disk-based LRU cache (closes #29 ) Blobs are typically hundreds of megabytes and should not be held in memory. The new blobDiskCache writes cached blobs to a temp directory, tracks LRU order in memory, and evicts least-recently-used files when total disk usage exceeds a configurable limit (default 10 GiB). Design: - Blobs written to os.TempDir()/vaultik-blobcache-*/<hash> - Doubly-linked list for O(1) LRU promotion/eviction - ReadAt support for reading chunk slices without loading full blob - Temp directory cleaned up on Close() - Oversized entries (> maxBytes) silently skipped Also adds blob_fetch_stub.go with stub implementations for FetchAndDecryptBlob/FetchBlob to fix pre-existing compile errors.	2026-02-20 00:18:20 -08:00
clawbot	d77ac18aaa	fix: add missing printfStdout, printlnStdout, scanlnStdin, FetchBlob, and FetchAndDecryptBlob methods These methods were referenced in main but never defined, causing compilation failures. They were introduced by merges that assumed dependent PRs were already merged.	2026-02-19 23:51:53 -08:00