4 Commits

Author SHA1 Message Date
7d6070f5fd Merge branch 'next' into fix/issue-24 2026-02-09 01:45:44 +01:00
2efffd9da8 Specify and enforce path invariants (closes #26) (#31)
Add `ValidatePath()` enforcing UTF-8, forward-slash, relative, no `..`, no empty segments. Applied in `AddFile` and `AddFileWithHash`. Proto comments document the rules.

Co-authored-by: clawbot <clawbot@openclaw>
Co-authored-by: Jeffrey Paul <sneak@noreply.example.org>
Reviewed-on: #31
Co-authored-by: clawbot <clawbot@noreply.example.org>
Co-committed-by: clawbot <clawbot@noreply.example.org>
2026-02-09 01:45:29 +01:00
ebaf2a65ca Fix AddFile to verify actual bytes read matches declared size (closes #25) (#30)
After reading file content, verify `totalRead == size` and return an error on mismatch.

Co-authored-by: clawbot <clawbot@openclaw>
Reviewed-on: #30
Co-authored-by: clawbot <clawbot@noreply.example.org>
Co-committed-by: clawbot <clawbot@noreply.example.org>
2026-02-09 01:35:07 +01:00
clawbot
a9047ddcb1 Add decompression size limit in deserializeInner()
Wrap the zstd decompressor with io.LimitReader to prevent
decompression bombs. Default limit is 256MB (MaxDecompressedSize).

Closes #24
2026-02-08 16:10:10 -08:00
10 changed files with 66 additions and 101 deletions

View File

@@ -25,12 +25,6 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
Fs: mfa.Fs, Fs: mfa.Fs,
} }
// Set seed for deterministic UUID if provided
if seed := ctx.String("seed"); seed != "" {
opts.Seed = seed
log.Infof("using deterministic seed for manifest UUID")
}
// Set up signing options if sign-key is provided // Set up signing options if sign-key is provided
if signKey := ctx.String("sign-key"); signKey != "" { if signKey := ctx.String("sign-key"); signKey != "" {
opts.SigningOptions = &mfer.SigningOptions{ opts.SigningOptions = &mfer.SigningOptions{

View File

@@ -154,11 +154,6 @@ func (mfa *CLIApp) run(args []string) {
Usage: "GPG key ID to sign the manifest with", Usage: "GPG key ID to sign the manifest with",
EnvVars: []string{"MFER_SIGN_KEY"}, EnvVars: []string{"MFER_SIGN_KEY"},
}, },
&cli.StringFlag{
Name: "seed",
Usage: "Seed value for deterministic manifest UUID (hashed 150M times with SHA-256, ~5-10s)",
EnvVars: []string{"MFER_SEED"},
},
), ),
}, },
{ {

View File

@@ -3,14 +3,47 @@ package mfer
import ( import (
"crypto/sha256" "crypto/sha256"
"errors" "errors"
"fmt"
"io" "io"
"sort" "strings"
"sync" "sync"
"time" "time"
"unicode/utf8"
"github.com/multiformats/go-multihash" "github.com/multiformats/go-multihash"
) )
// ValidatePath checks that a file path conforms to manifest path invariants:
// - Must be valid UTF-8
// - Must use forward slashes only (no backslashes)
// - Must be relative (no leading /)
// - Must not contain ".." segments
// - Must not contain empty segments (no "//")
// - Must not be empty
func ValidatePath(p string) error {
if p == "" {
return errors.New("path cannot be empty")
}
if !utf8.ValidString(p) {
return fmt.Errorf("path %q is not valid UTF-8", p)
}
if strings.ContainsRune(p, '\\') {
return fmt.Errorf("path %q contains backslash; use forward slashes only", p)
}
if strings.HasPrefix(p, "/") {
return fmt.Errorf("path %q is absolute; must be relative", p)
}
for _, seg := range strings.Split(p, "/") {
if seg == "" {
return fmt.Errorf("path %q contains empty segment", p)
}
if seg == ".." {
return fmt.Errorf("path %q contains '..' segment", p)
}
}
return nil
}
// RelFilePath represents a relative file path within a manifest. // RelFilePath represents a relative file path within a manifest.
type RelFilePath string type RelFilePath string
@@ -55,28 +88,6 @@ type Builder struct {
files []*MFFilePath files []*MFFilePath
createdAt time.Time createdAt time.Time
signingOptions *SigningOptions signingOptions *SigningOptions
fixedUUID []byte // if set, use this UUID instead of generating one
}
// seedIterations is the number of SHA-256 rounds used to derive a UUID from a seed.
// Tuned to take approximately 5-10 seconds on modern hardware.
const seedIterations = 150_000_000
// SetSeed derives a deterministic UUID from the given seed string.
// The seed is hashed 150,000,000 times with SHA-256 to produce
// 16 bytes used as a fixed UUID for the manifest (~5-10s on modern hardware).
func (b *Builder) SetSeed(seed string) {
b.fixedUUID = deriveSeedUUID(seed, seedIterations)
}
// deriveSeedUUID hashes the seed string n times with SHA-256
// and returns the first 16 bytes as a UUID.
func deriveSeedUUID(seed string, iterations int) []byte {
hash := sha256.Sum256([]byte(seed))
for i := 1; i < iterations; i++ {
hash = sha256.Sum256(hash[:])
}
return hash[:16]
} }
// NewBuilder creates a new Builder. // NewBuilder creates a new Builder.
@@ -97,6 +108,10 @@ func (b *Builder) AddFile(
reader io.Reader, reader io.Reader,
progress chan<- FileHashProgress, progress chan<- FileHashProgress,
) (FileSize, error) { ) (FileSize, error) {
if err := ValidatePath(string(path)); err != nil {
return 0, err
}
// Create hash writer // Create hash writer
h := sha256.New() h := sha256.New()
@@ -119,6 +134,11 @@ func (b *Builder) AddFile(
} }
} }
// Verify actual bytes read matches declared size
if totalRead != size {
return totalRead, fmt.Errorf("size mismatch for %q: declared %d bytes but read %d bytes", path, size, totalRead)
}
// Encode hash as multihash (SHA2-256) // Encode hash as multihash (SHA2-256)
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256) mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
if err != nil { if err != nil {
@@ -164,8 +184,8 @@ func (b *Builder) FileCount() int {
// This is useful when the hash is already known (e.g., from an existing manifest). // This is useful when the hash is already known (e.g., from an existing manifest).
// Returns an error if path is empty, size is negative, or hash is nil/empty. // Returns an error if path is empty, size is negative, or hash is nil/empty.
func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error { func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error {
if path == "" { if err := ValidatePath(string(path)); err != nil {
return errors.New("path cannot be empty") return err
} }
if size < 0 { if size < 0 {
return errors.New("size cannot be negative") return errors.New("size cannot be negative")
@@ -202,11 +222,6 @@ func (b *Builder) Build(w io.Writer) error {
b.mu.Lock() b.mu.Lock()
defer b.mu.Unlock() defer b.mu.Unlock()
// Sort files by path for deterministic output
sort.Slice(b.files, func(i, j int) bool {
return b.files[i].Path < b.files[j].Path
})
// Create inner manifest // Create inner manifest
inner := &MFFile{ inner := &MFFile{
Version: MFFile_VERSION_ONE, Version: MFFile_VERSION_ONE,
@@ -218,7 +233,6 @@ func (b *Builder) Build(w io.Writer) error {
m := &manifest{ m := &manifest{
pbInner: inner, pbInner: inner,
signingOptions: b.signingOptions, signingOptions: b.signingOptions,
fixedUUID: b.fixedUUID,
} }
// Generate outer wrapper // Generate outer wrapper

View File

@@ -92,52 +92,6 @@ func TestBuilderBuild(t *testing.T) {
assert.True(t, strings.HasPrefix(buf.String(), MAGIC)) assert.True(t, strings.HasPrefix(buf.String(), MAGIC))
} }
func TestBuilderDeterministicOutput(t *testing.T) {
buildManifest := func() []byte {
b := NewBuilder()
// Use a fixed createdAt and UUID so output is reproducible
b.createdAt = time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
b.fixedUUID = make([]byte, 16) // all zeros
mtime := ModTime(time.Date(2025, 6, 1, 0, 0, 0, 0, time.UTC))
// Add files in reverse order to test sorting
files := []struct {
path string
content string
}{
{"c/file.txt", "content c"},
{"a/file.txt", "content a"},
{"b/file.txt", "content b"},
}
for _, f := range files {
r := bytes.NewReader([]byte(f.content))
_, err := b.AddFile(RelFilePath(f.path), FileSize(len(f.content)), mtime, r, nil)
require.NoError(t, err)
}
var buf bytes.Buffer
err := b.Build(&buf)
require.NoError(t, err)
return buf.Bytes()
}
out1 := buildManifest()
out2 := buildManifest()
assert.Equal(t, out1, out2, "two builds with same input should produce byte-identical output")
}
func TestDeriveSeedUUID(t *testing.T) {
// Use a small iteration count for testing (production uses 1B)
uuid1 := deriveSeedUUID("test-seed-value", 1000)
uuid2 := deriveSeedUUID("test-seed-value", 1000)
assert.Equal(t, uuid1, uuid2, "same seed should produce same UUID")
assert.Len(t, uuid1, 16, "UUID should be 16 bytes")
uuid3 := deriveSeedUUID("different-seed", 1000)
assert.NotEqual(t, uuid1, uuid3, "different seeds should produce different UUIDs")
}
func TestBuilderBuildEmpty(t *testing.T) { func TestBuilderBuildEmpty(t *testing.T) {
b := NewBuilder() b := NewBuilder()

View File

@@ -3,4 +3,9 @@ package mfer
const ( const (
Version = "0.1.0" Version = "0.1.0"
ReleaseDate = "2025-12-17" ReleaseDate = "2025-12-17"
// MaxDecompressedSize is the maximum allowed size of decompressed manifest
// data (256 MB). This prevents decompression bombs from consuming excessive
// memory.
MaxDecompressedSize int64 = 256 * 1024 * 1024
) )

View File

@@ -76,10 +76,20 @@ func (m *manifest) deserializeInner() error {
} }
defer zr.Close() defer zr.Close()
dat, err := io.ReadAll(zr) // Limit decompressed size to prevent decompression bombs.
// Use declared size + 1 byte to detect overflow, capped at MaxDecompressedSize.
maxSize := MaxDecompressedSize
if m.pbOuter.Size > 0 && m.pbOuter.Size < int64(maxSize) {
maxSize = int64(m.pbOuter.Size) + 1
}
limitedReader := io.LimitReader(zr, maxSize)
dat, err := io.ReadAll(limitedReader)
if err != nil { if err != nil {
return err return err
} }
if int64(len(dat)) >= MaxDecompressedSize {
return fmt.Errorf("decompressed data exceeds maximum allowed size of %d bytes", MaxDecompressedSize)
}
isize := len(dat) isize := len(dat)
if int64(isize) != m.pbOuter.Size { if int64(isize) != m.pbOuter.Size {

View File

@@ -17,7 +17,6 @@ type manifest struct {
pbOuter *MFFileOuter pbOuter *MFFileOuter
output *bytes.Buffer output *bytes.Buffer
signingOptions *SigningOptions signingOptions *SigningOptions
fixedUUID []byte // if set, use this UUID instead of generating one
} }
func (m *manifest) String() string { func (m *manifest) String() string {

View File

@@ -46,6 +46,9 @@ message MFFileOuter {
message MFFilePath { message MFFilePath {
// required attributes: // required attributes:
// Path invariants: must be valid UTF-8, use forward slashes only,
// be relative (no leading /), contain no ".." segments, and no
// empty segments (no "//").
string path = 1; string path = 1;
int64 size = 2; int64 size = 2;

View File

@@ -47,7 +47,6 @@ type ScannerOptions struct {
FollowSymLinks bool // Resolve symlinks instead of skipping them FollowSymLinks bool // Resolve symlinks instead of skipping them
Fs afero.Fs // Filesystem to use, defaults to OsFs if nil Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
SigningOptions *SigningOptions // GPG signing options (nil = no signing) SigningOptions *SigningOptions // GPG signing options (nil = no signing)
Seed string // If set, derive a deterministic UUID from this seed
} }
// FileEntry represents a file that has been enumerated. // FileEntry represents a file that has been enumerated.
@@ -277,9 +276,6 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S
if s.options.SigningOptions != nil { if s.options.SigningOptions != nil {
builder.SetSigningOptions(s.options.SigningOptions) builder.SetSigningOptions(s.options.SigningOptions)
} }
if s.options.Seed != "" {
builder.SetSeed(s.options.Seed)
}
var scannedFiles FileCount var scannedFiles FileCount
var scannedBytes FileSize var scannedBytes FileSize

View File

@@ -50,13 +50,8 @@ func (m *manifest) generateOuter() error {
return errors.New("internal error") return errors.New("internal error")
} }
// Use fixed UUID if provided, otherwise generate a new one // Generate UUID and set on inner message
var manifestUUID uuid.UUID manifestUUID := uuid.New()
if len(m.fixedUUID) == 16 {
copy(manifestUUID[:], m.fixedUUID)
} else {
manifestUUID = uuid.New()
}
m.pbInner.Uuid = manifestUUID[:] m.pbInner.Uuid = manifestUUID[:]
innerData, err := proto.MarshalOptions{Deterministic: true}.Marshal(m.pbInner) innerData, err := proto.MarshalOptions{Deterministic: true}.Marshal(m.pbInner)