Merge branch 'main' into fix/sql-injection-whitelist

fix: use whitelist for SQL table names in getTableCount (closes #7 )
Replace regex-based validation with a strict whitelist of allowed table names (files, chunks, blobs). The whitelist check now runs before the nil-DB early return so invalid names are always rejected. Removes unused regexp import.
2026-02-20 11:16:27 +01:00 · 2026-02-20 02:09:40 -08:00
7 changed files with 197 additions and 143 deletions
--- a/internal/blobgen/compress_test.go
+++ b/internal/blobgen/compress_test.go
@@ -0,0 +1,64 @@
+package blobgen
+
+import (
+	"bytes"
+	"crypto/rand"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+	"github.com/stretchr/testify/require"
+)
+
+// testRecipient is a static age recipient for tests.
+const testRecipient = "age1cplgrwj77ta54dnmydvvmzn64ltk83ankxl5sww04mrtmu62kv3s89gmvv"
+
+// TestCompressStreamNoDoubleClose is a regression test for issue #28.
+// It verifies that CompressStream does not panic or return an error due to
+// double-closing the underlying blobgen.Writer. Before the fix in PR #33,
+// the explicit Close() on the happy path combined with defer Close() would
+// cause a double close.
+func TestCompressStreamNoDoubleClose(t *testing.T) {
+	input := []byte("regression test data for issue #28 double-close fix")
+	var buf bytes.Buffer
+
+	written, hash, err := CompressStream(&buf, bytes.NewReader(input), 3, []string{testRecipient})
+	require.NoError(t, err, "CompressStream should not return an error")
+	assert.True(t, written > 0, "expected bytes written > 0")
+	assert.NotEmpty(t, hash, "expected non-empty hash")
+	assert.True(t, buf.Len() > 0, "expected non-empty output")
+}
+
+// TestCompressStreamLargeInput exercises CompressStream with a larger payload
+// to ensure no double-close issues surface under heavier I/O.
+func TestCompressStreamLargeInput(t *testing.T) {
+	data := make([]byte, 512*1024) // 512 KB
+	_, err := rand.Read(data)
+	require.NoError(t, err)
+
+	var buf bytes.Buffer
+	written, hash, err := CompressStream(&buf, bytes.NewReader(data), 3, []string{testRecipient})
+	require.NoError(t, err)
+	assert.True(t, written > 0)
+	assert.NotEmpty(t, hash)
+}
+
+// TestCompressStreamEmptyInput verifies CompressStream handles empty input
+// without double-close issues.
+func TestCompressStreamEmptyInput(t *testing.T) {
+	var buf bytes.Buffer
+	_, hash, err := CompressStream(&buf, strings.NewReader(""), 3, []string{testRecipient})
+	require.NoError(t, err)
+	assert.NotEmpty(t, hash)
+}
+
+// TestCompressDataNoDoubleClose mirrors the stream test for CompressData,
+// ensuring the explicit Close + error-path Close pattern is also safe.
+func TestCompressDataNoDoubleClose(t *testing.T) {
+	input := []byte("CompressData regression test for double-close")
+	result, err := CompressData(input, 3, []string{testRecipient})
+	require.NoError(t, err)
+	assert.True(t, result.CompressedSize > 0)
+	assert.True(t, result.UncompressedSize == int64(len(input)))
+	assert.NotEmpty(t, result.SHA256)
+}
--- a/internal/vaultik/blob_fetch_stub.go
+++ b/internal/vaultik/blob_fetch_stub.go
@@ -0,0 +1,55 @@
+package vaultik
+
+import (
+	"context"
+	"fmt"
+	"io"
+
+	"filippo.io/age"
+	"git.eeqj.de/sneak/vaultik/internal/blobgen"
+)
+
+// FetchAndDecryptBlobResult holds the result of fetching and decrypting a blob.
+type FetchAndDecryptBlobResult struct {
+	Data []byte
+}
+
+// FetchAndDecryptBlob downloads a blob, decrypts it, and returns the plaintext data.
+func (v *Vaultik) FetchAndDecryptBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) (*FetchAndDecryptBlobResult, error) {
+	rc, _, err := v.FetchBlob(ctx, blobHash, expectedSize)
+	if err != nil {
+		return nil, err
+	}
+	defer func() { _ = rc.Close() }()
+
+	reader, err := blobgen.NewReader(rc, identity)
+	if err != nil {
+		return nil, fmt.Errorf("creating blob reader: %w", err)
+	}
+	defer func() { _ = reader.Close() }()
+
+	data, err := io.ReadAll(reader)
+	if err != nil {
+		return nil, fmt.Errorf("reading blob data: %w", err)
+	}
+
+	return &FetchAndDecryptBlobResult{Data: data}, nil
+}
+
+// FetchBlob downloads a blob and returns a reader for the encrypted data.
+func (v *Vaultik) FetchBlob(ctx context.Context, blobHash string, expectedSize int64) (io.ReadCloser, int64, error) {
+	blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobHash[:2], blobHash[2:4], blobHash)
+
+	rc, err := v.Storage.Get(ctx, blobPath)
+	if err != nil {
+		return nil, 0, fmt.Errorf("downloading blob %s: %w", blobHash[:16], err)
+	}
+
+	info, err := v.Storage.Stat(ctx, blobPath)
+	if err != nil {
+		_ = rc.Close()
+		return nil, 0, fmt.Errorf("stat blob %s: %w", blobHash[:16], err)
+	}
+
+	return rc, info.Size, nil
+}
--- a/internal/vaultik/blobcache.go
+++ b/internal/vaultik/blobcache.go
@@ -7,9 +7,6 @@ import (
 	"sync"
 )

-// defaultMaxBlobCacheBytes is the default maximum size of the disk blob cache (10 GB).
-const defaultMaxBlobCacheBytes = 10 << 30 // 10 GiB
-
 // blobDiskCacheEntry tracks a cached blob on disk.
 type blobDiskCacheEntry struct {
 	key  string
--- a/internal/vaultik/restore.go
+++ b/internal/vaultik/restore.go
@@ -109,83 +109,32 @@ func (v *Vaultik) Restore(opts *RestoreOptions) error {

 	// Step 5: Restore files
 	result := &RestoreResult{}
-	blobCache, err := newBlobDiskCache(defaultMaxBlobCacheBytes)
+	blobCache, err := newBlobDiskCache(4 * v.Config.BlobSizeLimit.Int64())
 	if err != nil {
 		return fmt.Errorf("creating blob cache: %w", err)
 	}
 	defer func() { _ = blobCache.Close() }()

-	// Calculate total bytes for progress bar
-	var totalBytes int64
-	for _, file := range files {
-		totalBytes += file.Size
-	}
-
-	_, _ = fmt.Fprintf(v.Stdout, "Restoring %d files (%s)...\n",
-		len(files),
-		humanize.Bytes(uint64(totalBytes)),
-	)
-
-	// Create progress bar if stderr is a terminal
-	isTTY := isTerminal(v.Stderr)
-	var bar *progressbar.ProgressBar
-	if isTTY {
-		bar = progressbar.NewOptions64(
-			totalBytes,
-			progressbar.OptionSetDescription("Restoring"),
-			progressbar.OptionSetWriter(v.Stderr),
-			progressbar.OptionShowBytes(true),
-			progressbar.OptionShowCount(),
-			progressbar.OptionSetWidth(40),
-			progressbar.OptionThrottle(100*time.Millisecond),
-			progressbar.OptionOnCompletion(func() {
-				v.printlnStderr()
-			}),
-			progressbar.OptionSetRenderBlankState(true),
-		)
-	}
-
-	filesProcessed := 0
-	for _, file := range files {
+	for i, file := range files {
 		if v.ctx.Err() != nil {
 			return v.ctx.Err()
 		}

 		if err := v.restoreFile(v.ctx, repos, file, opts.TargetDir, identity, chunkToBlobMap, blobCache, result); err != nil {
 			log.Error("Failed to restore file", "path", file.Path, "error", err)
-			filesProcessed++
-			// Update progress bar even on failure
-			if bar != nil {
-				_ = bar.Add64(file.Size)
-			}
-			// Periodic structured log for non-terminal contexts (headless/CI)
-			if !isTTY && filesProcessed%100 == 0 {
-				log.Info("Restore progress",
-					"files", fmt.Sprintf("%d/%d", filesProcessed, len(files)),
-					"bytes_restored", humanize.Bytes(uint64(result.BytesRestored)),
-				)
-			}
+			// Continue with other files
 			continue
 		}

-		filesProcessed++
-		// Update progress bar
-		if bar != nil {
-			_ = bar.Add64(file.Size)
-		}
-		// Periodic structured log for non-terminal contexts (headless/CI)
-		if !isTTY && (filesProcessed%100 == 0 || filesProcessed == len(files)) {
+		// Progress logging
+		if (i+1)%100 == 0 || i+1 == len(files) {
 			log.Info("Restore progress",
-				"files", fmt.Sprintf("%d/%d", filesProcessed, len(files)),
-				"bytes_restored", humanize.Bytes(uint64(result.BytesRestored)),
+				"files", fmt.Sprintf("%d/%d", i+1, len(files)),
+				"bytes", humanize.Bytes(uint64(result.BytesRestored)),
 			)
 		}
 	}

-	if bar != nil {
-		_ = bar.Finish()
-	}
-
 	result.Duration = time.Since(startTime)

 	log.Info("Restore complete",
@@ -530,53 +479,6 @@ func (v *Vaultik) restoreRegularFile(
 	return nil
 }

-// BlobFetchResult holds the result of fetching and decrypting a blob.
-type BlobFetchResult struct {
-	Data           []byte
-	CompressedSize int64
-}
-
-// FetchAndDecryptBlob downloads a blob from storage, decrypts and decompresses it.
-func (v *Vaultik) FetchAndDecryptBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) (*BlobFetchResult, error) {
-	// Construct blob path with sharding
-	blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobHash[:2], blobHash[2:4], blobHash)
-
-	reader, err := v.Storage.Get(ctx, blobPath)
-	if err != nil {
-		return nil, fmt.Errorf("downloading blob: %w", err)
-	}
-	defer func() { _ = reader.Close() }()
-
-	// Read encrypted data
-	encryptedData, err := io.ReadAll(reader)
-	if err != nil {
-		return nil, fmt.Errorf("reading blob data: %w", err)
-	}
-
-	// Decrypt and decompress
-	blobReader, err := blobgen.NewReader(bytes.NewReader(encryptedData), identity)
-	if err != nil {
-		return nil, fmt.Errorf("creating decryption reader: %w", err)
-	}
-	defer func() { _ = blobReader.Close() }()
-
-	data, err := io.ReadAll(blobReader)
-	if err != nil {
-		return nil, fmt.Errorf("decrypting blob: %w", err)
-	}
-
-	log.Debug("Downloaded and decrypted blob",
-		"hash", blobHash[:16],
-		"encrypted_size", humanize.Bytes(uint64(len(encryptedData))),
-		"decrypted_size", humanize.Bytes(uint64(len(data))),
-	)
-
-	return &BlobFetchResult{
-		Data:           data,
-		CompressedSize: int64(len(encryptedData)),
-	}, nil
-}
-
 // downloadBlob downloads and decrypts a blob
 func (v *Vaultik) downloadBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) ([]byte, error) {
 	result, err := v.FetchAndDecryptBlob(ctx, blobHash, expectedSize, identity)
@@ -622,7 +524,7 @@ func (v *Vaultik) verifyRestoredFiles(

 	// Create progress bar if output is a terminal
 	var bar *progressbar.ProgressBar
-	if isTerminal(v.Stderr) {
+	if isTerminal() {
 		bar = progressbar.NewOptions64(
 			totalBytes,
 			progressbar.OptionSetDescription("Verifying"),
@@ -730,11 +632,7 @@ func (v *Vaultik) verifyFile(
 	return bytesVerified, nil
 }

-// isTerminal returns true if the given writer is connected to a terminal.
-// Returns false if the writer does not expose a file descriptor (e.g. in tests).
-func isTerminal(w io.Writer) bool {
-	if f, ok := w.(*os.File); ok {
-		return term.IsTerminal(int(f.Fd()))
-	}
-	return false
+// isTerminal returns true if stdout is a terminal
+func isTerminal() bool {
+	return term.IsTerminal(int(os.Stdout.Fd()))
 }
--- a/internal/vaultik/snapshot.go
+++ b/internal/vaultik/snapshot.go
@@ -5,7 +5,6 @@ import (
 	"fmt"
 	"os"
 	"path/filepath"
-	"regexp"
 	"sort"
 	"strings"
 	"text/tabwriter"
@@ -1127,18 +1126,25 @@ func (v *Vaultik) PruneDatabase() (*PruneResult, error) {
 	return result, nil
 }

-// validTableNameRe matches table names containing only lowercase alphanumeric characters and underscores.
-var validTableNameRe = regexp.MustCompile(`^[a-z0-9_]+$`)
+// allowedTableNames is the exhaustive whitelist of table names that may be
+// passed to getTableCount. Any name not in this set is rejected, preventing
+// SQL injection even if caller-controlled input is accidentally supplied.
+var allowedTableNames = map[string]struct{}{
+	"files":  {},
+	"chunks": {},
+	"blobs":  {},
+}

-// getTableCount returns the count of rows in a table.
-// The tableName is sanitized to only allow [a-z0-9_] characters to prevent SQL injection.
+// getTableCount returns the number of rows in the given table.
+// tableName must appear in the allowedTableNames whitelist; all other values
+// are rejected with an error, preventing SQL injection.
 func (v *Vaultik) getTableCount(tableName string) (int64, error) {
-	if v.DB == nil {
-		return 0, nil
+	if _, ok := allowedTableNames[tableName]; !ok {
+		return 0, fmt.Errorf("table name not allowed: %q", tableName)
 	}

-	if !validTableNameRe.MatchString(tableName) {
-		return 0, fmt.Errorf("invalid table name: %q", tableName)
+	if v.DB == nil {
+		return 0, nil
 	}

 	var count int64
--- a/internal/vaultik/table_count_test.go
+++ b/internal/vaultik/table_count_test.go
@@ -0,0 +1,51 @@
+package vaultik
+
+import (
+	"testing"
+
+	"github.com/stretchr/testify/assert"
+)
+
+func TestAllowedTableNames(t *testing.T) {
+	// Verify the whitelist contains exactly the expected tables
+	expected := []string{"files", "chunks", "blobs"}
+	assert.Len(t, allowedTableNames, len(expected))
+	for _, name := range expected {
+		_, ok := allowedTableNames[name]
+		assert.True(t, ok, "expected %q in allowedTableNames", name)
+	}
+}
+
+func TestGetTableCount_RejectsInvalidNames(t *testing.T) {
+	v := &Vaultik{} // DB is nil, but rejection happens before DB access
+	v.DB = nil      // explicit
+
+	tests := []struct {
+		name      string
+		tableName string
+		wantErr   bool
+	}{
+		{"allowed files", "files", false},
+		{"allowed chunks", "chunks", false},
+		{"allowed blobs", "blobs", false},
+		{"sql injection attempt", "files; DROP TABLE files--", true},
+		{"unknown table", "users", true},
+		{"empty string", "", true},
+		{"uppercase", "FILES", true},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			count, err := v.getTableCount(tt.tableName)
+			if tt.wantErr {
+				assert.Error(t, err)
+				assert.Contains(t, err.Error(), "not allowed")
+				assert.Equal(t, int64(0), count)
+			} else {
+				// DB is nil so returns 0, nil for allowed names
+				assert.NoError(t, err)
+				assert.Equal(t, int64(0), count)
+			}
+		})
+	}
+}
--- a/internal/vaultik/vaultik.go
+++ b/internal/vaultik/vaultik.go
@@ -129,7 +129,7 @@ func (v *Vaultik) GetFilesystem() afero.Fs {
 	return v.Fs
 }

-// printfStdout writes formatted output to stdout for user-facing messages.
+// printfStdout writes formatted output to stdout.
 func (v *Vaultik) printfStdout(format string, args ...any) {
 	_, _ = fmt.Fprintf(v.Stdout, format, args...)
 }
@@ -139,28 +139,11 @@ func (v *Vaultik) printlnStdout(args ...any) {
 	_, _ = fmt.Fprintln(v.Stdout, args...)
 }

-// FetchBlob downloads a blob from storage and returns a reader for the encrypted data.
-func (v *Vaultik) FetchBlob(ctx context.Context, blobHash string, expectedSize int64) (io.ReadCloser, int64, error) {
-	blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobHash[:2], blobHash[2:4], blobHash)
-
-	reader, err := v.Storage.Get(ctx, blobPath)
-	if err != nil {
-		return nil, 0, fmt.Errorf("downloading blob: %w", err)
-	}
-
-	return reader, expectedSize, nil
-}
-
 // printfStderr writes formatted output to stderr.
 func (v *Vaultik) printfStderr(format string, args ...any) {
 	_, _ = fmt.Fprintf(v.Stderr, format, args...)
 }

-// printlnStderr writes a line to stderr.
-func (v *Vaultik) printlnStderr(args ...any) {
-	_, _ = fmt.Fprintln(v.Stderr, args...)
-}
-
 // scanStdin reads a line of input from stdin.
 func (v *Vaultik) scanStdin(a ...any) (int, error) {
 	return fmt.Fscanln(v.Stdin, a...)
Author	SHA1	Message	Date
Jeffrey Paul	3e282af516	Merge branch 'main' into fix/sql-injection-whitelist	2026-02-20 11:16:27 +01:00
user	bb4b9b5bc9	fix: use whitelist for SQL table names in getTableCount (closes #7 ) Replace regex-based validation with a strict whitelist of allowed table names (files, chunks, blobs). The whitelist check now runs before the nil-DB early return so invalid names are always rejected. Removes unused regexp import.	2026-02-20 02:09:40 -08:00