3 Commits

Author SHA1 Message Date
c62a4dd5e9 Merge branch 'next' into fix/issue-13 2026-02-09 02:13:09 +01:00
70af055d4e Fix newTimestampFromTime panic on extreme dates (closes #15) (#20)
Co-authored-by: clawbot <clawbot@openclaw>
Co-authored-by: Jeffrey Paul <sneak@noreply.example.org>
Reviewed-on: #20
Co-authored-by: clawbot <clawbot@noreply.example.org>
Co-committed-by: clawbot <clawbot@noreply.example.org>
2026-02-09 02:10:21 +01:00
clawbot
34438cb5b9 fix: URL-encode file paths in fetch command to handle special characters
File paths with spaces, #, ?, %, etc. were concatenated directly into
URLs without encoding, producing malformed download URLs.

Add encodeFilePath() that encodes each path segment individually
(preserving directory separators) and use it in fetch.
2026-02-08 12:03:11 -08:00
9 changed files with 56 additions and 98 deletions

View File

@@ -113,7 +113,7 @@ func (mfa *CLIApp) fetchManifestOperation(ctx *cli.Context) error {
return fmt.Errorf("invalid path in manifest: %w", err) return fmt.Errorf("invalid path in manifest: %w", err)
} }
fileURL := baseURL.String() + f.Path fileURL := baseURL.String() + encodeFilePath(f.Path)
log.Infof("fetching %s", f.Path) log.Infof("fetching %s", f.Path)
if err := downloadFile(fileURL, localPath, f, progress); err != nil { if err := downloadFile(fileURL, localPath, f, progress); err != nil {
@@ -139,6 +139,15 @@ func (mfa *CLIApp) fetchManifestOperation(ctx *cli.Context) error {
return nil return nil
} }
// encodeFilePath URL-encodes each segment of a file path while preserving slashes.
func encodeFilePath(p string) string {
segments := strings.Split(p, "/")
for i, seg := range segments {
segments[i] = url.PathEscape(seg)
}
return strings.Join(segments, "/")
}
// sanitizePath validates and sanitizes a file path from the manifest. // sanitizePath validates and sanitizes a file path from the manifest.
// It prevents path traversal attacks and rejects unsafe paths. // It prevents path traversal attacks and rejects unsafe paths.
func sanitizePath(p string) (string, error) { func sanitizePath(p string) (string, error) {

View File

@@ -16,6 +16,29 @@ import (
"sneak.berlin/go/mfer/mfer" "sneak.berlin/go/mfer/mfer"
) )
func TestEncodeFilePath(t *testing.T) {
tests := []struct {
input string
expected string
}{
{"file.txt", "file.txt"},
{"dir/file.txt", "dir/file.txt"},
{"my file.txt", "my%20file.txt"},
{"dir/my file.txt", "dir/my%20file.txt"},
{"file#1.txt", "file%231.txt"},
{"file?v=1.txt", "file%3Fv=1.txt"},
{"path/to/file with spaces.txt", "path/to/file%20with%20spaces.txt"},
{"100%done.txt", "100%25done.txt"},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
result := encodeFilePath(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestSanitizePath(t *testing.T) { func TestSanitizePath(t *testing.T) {
// Valid paths that should be accepted // Valid paths that should be accepted
validTests := []struct { validTests := []struct {

View File

@@ -25,12 +25,6 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
Fs: mfa.Fs, Fs: mfa.Fs,
} }
// Set seed for deterministic UUID if provided
if seed := ctx.String("seed"); seed != "" {
opts.Seed = seed
log.Infof("using deterministic seed for manifest UUID")
}
// Set up signing options if sign-key is provided // Set up signing options if sign-key is provided
if signKey := ctx.String("sign-key"); signKey != "" { if signKey := ctx.String("sign-key"); signKey != "" {
opts.SigningOptions = &mfer.SigningOptions{ opts.SigningOptions = &mfer.SigningOptions{

View File

@@ -154,11 +154,6 @@ func (mfa *CLIApp) run(args []string) {
Usage: "GPG key ID to sign the manifest with", Usage: "GPG key ID to sign the manifest with",
EnvVars: []string{"MFER_SIGN_KEY"}, EnvVars: []string{"MFER_SIGN_KEY"},
}, },
&cli.StringFlag{
Name: "seed",
Usage: "Seed value for deterministic manifest UUID (hashed 150M times with SHA-256, ~5-10s)",
EnvVars: []string{"MFER_SEED"},
},
), ),
}, },
{ {

View File

@@ -5,7 +5,6 @@ import (
"errors" "errors"
"fmt" "fmt"
"io" "io"
"sort"
"strings" "strings"
"sync" "sync"
"time" "time"
@@ -89,28 +88,6 @@ type Builder struct {
files []*MFFilePath files []*MFFilePath
createdAt time.Time createdAt time.Time
signingOptions *SigningOptions signingOptions *SigningOptions
fixedUUID []byte // if set, use this UUID instead of generating one
}
// seedIterations is the number of SHA-256 rounds used to derive a UUID from a seed.
// Tuned to take approximately 5-10 seconds on modern hardware.
const seedIterations = 150_000_000
// SetSeed derives a deterministic UUID from the given seed string.
// The seed is hashed 150,000,000 times with SHA-256 to produce
// 16 bytes used as a fixed UUID for the manifest (~5-10s on modern hardware).
func (b *Builder) SetSeed(seed string) {
b.fixedUUID = deriveSeedUUID(seed, seedIterations)
}
// deriveSeedUUID hashes the seed string n times with SHA-256
// and returns the first 16 bytes as a UUID.
func deriveSeedUUID(seed string, iterations int) []byte {
hash := sha256.Sum256([]byte(seed))
for i := 1; i < iterations; i++ {
hash = sha256.Sum256(hash[:])
}
return hash[:16]
} }
// NewBuilder creates a new Builder. // NewBuilder creates a new Builder.
@@ -245,11 +222,6 @@ func (b *Builder) Build(w io.Writer) error {
b.mu.Lock() b.mu.Lock()
defer b.mu.Unlock() defer b.mu.Unlock()
// Sort files by path for deterministic output
sort.Slice(b.files, func(i, j int) bool {
return b.files[i].Path < b.files[j].Path
})
// Create inner manifest // Create inner manifest
inner := &MFFile{ inner := &MFFile{
Version: MFFile_VERSION_ONE, Version: MFFile_VERSION_ONE,
@@ -261,7 +233,6 @@ func (b *Builder) Build(w io.Writer) error {
m := &manifest{ m := &manifest{
pbInner: inner, pbInner: inner,
signingOptions: b.signingOptions, signingOptions: b.signingOptions,
fixedUUID: b.fixedUUID,
} }
// Generate outer wrapper // Generate outer wrapper

View File

@@ -92,50 +92,27 @@ func TestBuilderBuild(t *testing.T) {
assert.True(t, strings.HasPrefix(buf.String(), MAGIC)) assert.True(t, strings.HasPrefix(buf.String(), MAGIC))
} }
func TestBuilderDeterministicOutput(t *testing.T) { func TestNewTimestampFromTimeExtremeDate(t *testing.T) {
buildManifest := func() []byte { // Regression test: newTimestampFromTime used UnixNano() which panics
b := NewBuilder() // for dates outside ~1678-2262. Now uses Nanosecond() which is safe.
// Use a fixed createdAt and UUID so output is reproducible tests := []struct {
b.createdAt = time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC) name string
b.fixedUUID = make([]byte, 16) // all zeros time time.Time
}{
mtime := ModTime(time.Date(2025, 6, 1, 0, 0, 0, 0, time.UTC)) {"zero time", time.Time{}},
{"year 1000", time.Date(1000, 1, 1, 0, 0, 0, 0, time.UTC)},
// Add files in reverse order to test sorting {"year 3000", time.Date(3000, 1, 1, 0, 0, 0, 123456789, time.UTC)},
files := []struct { {"unix epoch", time.Unix(0, 0)},
path string
content string
}{
{"c/file.txt", "content c"},
{"a/file.txt", "content a"},
{"b/file.txt", "content b"},
}
for _, f := range files {
r := bytes.NewReader([]byte(f.content))
_, err := b.AddFile(RelFilePath(f.path), FileSize(len(f.content)), mtime, r, nil)
require.NoError(t, err)
}
var buf bytes.Buffer
err := b.Build(&buf)
require.NoError(t, err)
return buf.Bytes()
} }
out1 := buildManifest() for _, tt := range tests {
out2 := buildManifest() t.Run(tt.name, func(t *testing.T) {
assert.Equal(t, out1, out2, "two builds with same input should produce byte-identical output") // Should not panic
} ts := newTimestampFromTime(tt.time)
assert.Equal(t, tt.time.Unix(), ts.Seconds)
func TestDeriveSeedUUID(t *testing.T) { assert.Equal(t, int32(tt.time.Nanosecond()), ts.Nanos)
// Use a small iteration count for testing (production uses 1B) })
uuid1 := deriveSeedUUID("test-seed-value", 1000) }
uuid2 := deriveSeedUUID("test-seed-value", 1000)
assert.Equal(t, uuid1, uuid2, "same seed should produce same UUID")
assert.Len(t, uuid1, 16, "UUID should be 16 bytes")
uuid3 := deriveSeedUUID("different-seed", 1000)
assert.NotEqual(t, uuid1, uuid3, "different seeds should produce different UUIDs")
} }
func TestBuilderBuildEmpty(t *testing.T) { func TestBuilderBuildEmpty(t *testing.T) {

View File

@@ -17,7 +17,6 @@ type manifest struct {
pbOuter *MFFileOuter pbOuter *MFFileOuter
output *bytes.Buffer output *bytes.Buffer
signingOptions *SigningOptions signingOptions *SigningOptions
fixedUUID []byte // if set, use this UUID instead of generating one
} }
func (m *manifest) String() string { func (m *manifest) String() string {

View File

@@ -47,7 +47,6 @@ type ScannerOptions struct {
FollowSymLinks bool // Resolve symlinks instead of skipping them FollowSymLinks bool // Resolve symlinks instead of skipping them
Fs afero.Fs // Filesystem to use, defaults to OsFs if nil Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
SigningOptions *SigningOptions // GPG signing options (nil = no signing) SigningOptions *SigningOptions // GPG signing options (nil = no signing)
Seed string // If set, derive a deterministic UUID from this seed
} }
// FileEntry represents a file that has been enumerated. // FileEntry represents a file that has been enumerated.
@@ -277,9 +276,6 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S
if s.options.SigningOptions != nil { if s.options.SigningOptions != nil {
builder.SetSigningOptions(s.options.SigningOptions) builder.SetSigningOptions(s.options.SigningOptions)
} }
if s.options.Seed != "" {
builder.SetSeed(s.options.Seed)
}
var scannedFiles FileCount var scannedFiles FileCount
var scannedBytes FileSize var scannedBytes FileSize

View File

@@ -16,11 +16,10 @@ import (
const MAGIC string = "ZNAVSRFG" const MAGIC string = "ZNAVSRFG"
func newTimestampFromTime(t time.Time) *Timestamp { func newTimestampFromTime(t time.Time) *Timestamp {
out := &Timestamp{ return &Timestamp{
Seconds: t.Unix(), Seconds: t.Unix(),
Nanos: int32(t.UnixNano() - (t.Unix() * 1000000000)), Nanos: int32(t.Nanosecond()),
} }
return out
} }
func (m *manifest) generate() error { func (m *manifest) generate() error {
@@ -50,13 +49,8 @@ func (m *manifest) generateOuter() error {
return errors.New("internal error") return errors.New("internal error")
} }
// Use fixed UUID if provided, otherwise generate a new one // Generate UUID and set on inner message
var manifestUUID uuid.UUID manifestUUID := uuid.New()
if len(m.fixedUUID) == 16 {
copy(manifestUUID[:], m.fixedUUID)
} else {
manifestUUID = uuid.New()
}
m.pbInner.Uuid = manifestUUID[:] m.pbInner.Uuid = manifestUUID[:]
innerData, err := proto.MarshalOptions{Deterministic: true}.Marshal(m.pbInner) innerData, err := proto.MarshalOptions{Deterministic: true}.Marshal(m.pbInner)