14 Commits

Author SHA1 Message Date
3e282af516 Merge branch 'main' into fix/sql-injection-whitelist 2026-02-20 11:16:27 +01:00
815b35c7ae Merge pull request 'Disk-based blob cache with LRU eviction during restore (closes #29)' (#34) from fix/issue-29 into main
Reviewed-on: #34
2026-02-20 11:16:15 +01:00
9c66674683 Merge branch 'main' into fix/issue-29 2026-02-20 11:15:59 +01:00
49de277648 Merge pull request 'Add CompressStream double-close regression test (closes #35)' (#36) from add-compressstream-regression-test into main
Reviewed-on: #36
2026-02-20 11:12:51 +01:00
ed5d777d05 fix: set disk cache max size to 4x configured blob size instead of hardcoded 10 GiB
The disk blob cache now uses 4 * BlobSizeLimit from config instead of a
hardcoded 10 GiB default. This ensures the cache scales with the
configured blob size.
2026-02-20 02:11:54 -08:00
2e7356dd85 Add CompressStream double-close regression test (closes #35)
Adds regression tests for issue #28 (fixed in PR #33) to prevent
reintroduction of the double-close bug in CompressStream.

Tests cover:
- CompressStream with normal input
- CompressStream with large (512KB) input
- CompressStream with empty input
- CompressData close correctness
2026-02-20 02:10:23 -08:00
user
bb4b9b5bc9 fix: use whitelist for SQL table names in getTableCount (closes #7)
Replace regex-based validation with a strict whitelist of allowed table
names (files, chunks, blobs). The whitelist check now runs before the
nil-DB early return so invalid names are always rejected.

Removes unused regexp import.
2026-02-20 02:09:40 -08:00
70d4fe2aa0 Merge pull request 'Use v.Stdout/v.Stdin instead of os.Stdout for all user-facing output (closes #26)' (#31) from fix/issue-26 into main
Reviewed-on: #31
2026-02-20 11:07:52 +01:00
clawbot
2f249e3ddd fix: address review feedback — use helper wrappers, remove duplicates, fix scanStdin usage
- Replace bare fmt.Scanln with v.scanStdin() helper in snapshot.go
- Remove duplicate FetchBlob from vaultik.go (canonical version in blob_fetch_stub.go)
- Remove duplicate FetchAndDecryptBlob from restore.go (canonical version in blob_fetch_stub.go)
- Rebase onto main, resolve all conflicts
- All helper wrappers (printfStdout, printlnStdout, printfStderr, scanStdin) follow YAGNI
- No bare fmt.Print*/fmt.Scan* calls remain outside helpers
- make test passes: lint clean, all tests pass
2026-02-20 00:26:03 -08:00
clawbot
3f834f1c9c fix: resolve rebase conflicts, fix errcheck issues, implement FetchAndDecryptBlob 2026-02-20 00:19:13 -08:00
user
9879668c31 refactor: add helper wrappers for stdin/stdout/stderr IO
Address all four review concerns on PR #31:

1. Fix missed bare fmt.Println() in VerifySnapshotWithOptions (line 620)
2. Replace all direct fmt.Fprintf(v.Stdout,...) / fmt.Fprintln(v.Stdout,...) /
   fmt.Fscanln(v.Stdin,...) calls with helper methods: printfStdout(),
   printlnStdout(), printfStderr(), scanStdin()
3. Route progress bar and stderr output through v.Stderr instead of os.Stderr
   in restore.go (concern #4: v.Stderr now actually used)
4. Rename exported Outputf to unexported printfStdout (YAGNI: only helpers
   actually used are created)
2026-02-20 00:18:56 -08:00
clawbot
0a0d9f33b0 fix: use v.Stdout/v.Stdin instead of os.Stdout for all user-facing output
Multiple methods wrote directly to os.Stdout instead of using the injectable
v.Stdout writer, breaking the TestVaultik testing infrastructure and making
output impossible to capture or redirect.

Fixed in: ListSnapshots, PurgeSnapshots, VerifySnapshotWithOptions,
PruneBlobs, outputPruneBlobsJSON, outputRemoveJSON, ShowInfo, RemoteInfo.
2026-02-20 00:18:20 -08:00
df0e8c275b fix: replace in-memory blob cache with disk-based LRU cache (closes #29)
Blobs are typically hundreds of megabytes and should not be held in memory.
The new blobDiskCache writes cached blobs to a temp directory, tracks LRU
order in memory, and evicts least-recently-used files when total disk usage
exceeds a configurable limit (default 10 GiB).

Design:
- Blobs written to os.TempDir()/vaultik-blobcache-*/<hash>
- Doubly-linked list for O(1) LRU promotion/eviction
- ReadAt support for reading chunk slices without loading full blob
- Temp directory cleaned up on Close()
- Oversized entries (> maxBytes) silently skipped

Also adds blob_fetch_stub.go with stub implementations for
FetchAndDecryptBlob/FetchBlob to fix pre-existing compile errors.
2026-02-20 00:18:20 -08:00
clawbot
d77ac18aaa fix: add missing printfStdout, printlnStdout, scanlnStdin, FetchBlob, and FetchAndDecryptBlob methods
These methods were referenced in main but never defined, causing compilation
failures. They were introduced by merges that assumed dependent PRs were
already merged.
2026-02-19 23:51:53 -08:00
6 changed files with 133 additions and 15 deletions

View File

@@ -0,0 +1,64 @@
package blobgen
import (
"bytes"
"crypto/rand"
"strings"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// testRecipient is a static age recipient for tests.
const testRecipient = "age1cplgrwj77ta54dnmydvvmzn64ltk83ankxl5sww04mrtmu62kv3s89gmvv"
// TestCompressStreamNoDoubleClose is a regression test for issue #28.
// It verifies that CompressStream does not panic or return an error due to
// double-closing the underlying blobgen.Writer. Before the fix in PR #33,
// the explicit Close() on the happy path combined with defer Close() would
// cause a double close.
func TestCompressStreamNoDoubleClose(t *testing.T) {
input := []byte("regression test data for issue #28 double-close fix")
var buf bytes.Buffer
written, hash, err := CompressStream(&buf, bytes.NewReader(input), 3, []string{testRecipient})
require.NoError(t, err, "CompressStream should not return an error")
assert.True(t, written > 0, "expected bytes written > 0")
assert.NotEmpty(t, hash, "expected non-empty hash")
assert.True(t, buf.Len() > 0, "expected non-empty output")
}
// TestCompressStreamLargeInput exercises CompressStream with a larger payload
// to ensure no double-close issues surface under heavier I/O.
func TestCompressStreamLargeInput(t *testing.T) {
data := make([]byte, 512*1024) // 512 KB
_, err := rand.Read(data)
require.NoError(t, err)
var buf bytes.Buffer
written, hash, err := CompressStream(&buf, bytes.NewReader(data), 3, []string{testRecipient})
require.NoError(t, err)
assert.True(t, written > 0)
assert.NotEmpty(t, hash)
}
// TestCompressStreamEmptyInput verifies CompressStream handles empty input
// without double-close issues.
func TestCompressStreamEmptyInput(t *testing.T) {
var buf bytes.Buffer
_, hash, err := CompressStream(&buf, strings.NewReader(""), 3, []string{testRecipient})
require.NoError(t, err)
assert.NotEmpty(t, hash)
}
// TestCompressDataNoDoubleClose mirrors the stream test for CompressData,
// ensuring the explicit Close + error-path Close pattern is also safe.
func TestCompressDataNoDoubleClose(t *testing.T) {
input := []byte("CompressData regression test for double-close")
result, err := CompressData(input, 3, []string{testRecipient})
require.NoError(t, err)
assert.True(t, result.CompressedSize > 0)
assert.True(t, result.UncompressedSize == int64(len(input)))
assert.NotEmpty(t, result.SHA256)
}

View File

@@ -7,9 +7,6 @@ import (
"sync" "sync"
) )
// defaultMaxBlobCacheBytes is the default maximum size of the disk blob cache (10 GB).
const defaultMaxBlobCacheBytes = 10 << 30 // 10 GiB
// blobDiskCacheEntry tracks a cached blob on disk. // blobDiskCacheEntry tracks a cached blob on disk.
type blobDiskCacheEntry struct { type blobDiskCacheEntry struct {
key string key string

View File

@@ -109,7 +109,7 @@ func (v *Vaultik) Restore(opts *RestoreOptions) error {
// Step 5: Restore files // Step 5: Restore files
result := &RestoreResult{} result := &RestoreResult{}
blobCache, err := newBlobDiskCache(defaultMaxBlobCacheBytes) blobCache, err := newBlobDiskCache(4 * v.Config.BlobSizeLimit.Int64())
if err != nil { if err != nil {
return fmt.Errorf("creating blob cache: %w", err) return fmt.Errorf("creating blob cache: %w", err)
} }

View File

@@ -5,7 +5,6 @@ import (
"fmt" "fmt"
"os" "os"
"path/filepath" "path/filepath"
"regexp"
"sort" "sort"
"strings" "strings"
"text/tabwriter" "text/tabwriter"
@@ -545,7 +544,7 @@ func (v *Vaultik) PurgeSnapshots(keepLatest bool, olderThan string, force bool)
if !force { if !force {
v.printfStdout("\nDelete %d snapshot(s)? [y/N] ", len(toDelete)) v.printfStdout("\nDelete %d snapshot(s)? [y/N] ", len(toDelete))
var confirm string var confirm string
if _, err := fmt.Scanln(&confirm); err != nil { if _, err := v.scanStdin(&confirm); err != nil {
// Treat EOF or error as "no" // Treat EOF or error as "no"
v.printlnStdout("Cancelled") v.printlnStdout("Cancelled")
return nil return nil
@@ -1127,18 +1126,25 @@ func (v *Vaultik) PruneDatabase() (*PruneResult, error) {
return result, nil return result, nil
} }
// validTableNameRe matches table names containing only lowercase alphanumeric characters and underscores. // allowedTableNames is the exhaustive whitelist of table names that may be
var validTableNameRe = regexp.MustCompile(`^[a-z0-9_]+$`) // passed to getTableCount. Any name not in this set is rejected, preventing
// SQL injection even if caller-controlled input is accidentally supplied.
var allowedTableNames = map[string]struct{}{
"files": {},
"chunks": {},
"blobs": {},
}
// getTableCount returns the count of rows in a table. // getTableCount returns the number of rows in the given table.
// The tableName is sanitized to only allow [a-z0-9_] characters to prevent SQL injection. // tableName must appear in the allowedTableNames whitelist; all other values
// are rejected with an error, preventing SQL injection.
func (v *Vaultik) getTableCount(tableName string) (int64, error) { func (v *Vaultik) getTableCount(tableName string) (int64, error) {
if v.DB == nil { if _, ok := allowedTableNames[tableName]; !ok {
return 0, nil return 0, fmt.Errorf("table name not allowed: %q", tableName)
} }
if !validTableNameRe.MatchString(tableName) { if v.DB == nil {
return 0, fmt.Errorf("invalid table name: %q", tableName) return 0, nil
} }
var count int64 var count int64

View File

@@ -0,0 +1,51 @@
package vaultik
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestAllowedTableNames(t *testing.T) {
// Verify the whitelist contains exactly the expected tables
expected := []string{"files", "chunks", "blobs"}
assert.Len(t, allowedTableNames, len(expected))
for _, name := range expected {
_, ok := allowedTableNames[name]
assert.True(t, ok, "expected %q in allowedTableNames", name)
}
}
func TestGetTableCount_RejectsInvalidNames(t *testing.T) {
v := &Vaultik{} // DB is nil, but rejection happens before DB access
v.DB = nil // explicit
tests := []struct {
name string
tableName string
wantErr bool
}{
{"allowed files", "files", false},
{"allowed chunks", "chunks", false},
{"allowed blobs", "blobs", false},
{"sql injection attempt", "files; DROP TABLE files--", true},
{"unknown table", "users", true},
{"empty string", "", true},
{"uppercase", "FILES", true},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
count, err := v.getTableCount(tt.tableName)
if tt.wantErr {
assert.Error(t, err)
assert.Contains(t, err.Error(), "not allowed")
assert.Equal(t, int64(0), count)
} else {
// DB is nil so returns 0, nil for allowed names
assert.NoError(t, err)
assert.Equal(t, int64(0), count)
}
})
}
}

View File

@@ -129,7 +129,7 @@ func (v *Vaultik) GetFilesystem() afero.Fs {
return v.Fs return v.Fs
} }
// printfStdout writes formatted output to stdout for user-facing messages. // printfStdout writes formatted output to stdout.
func (v *Vaultik) printfStdout(format string, args ...any) { func (v *Vaultik) printfStdout(format string, args ...any) {
_, _ = fmt.Fprintf(v.Stdout, format, args...) _, _ = fmt.Fprintf(v.Stdout, format, args...)
} }