4 Commits

Author SHA1 Message Date
7d6070f5fd Merge branch 'next' into fix/issue-24 2026-02-09 01:45:44 +01:00
2efffd9da8 Specify and enforce path invariants (closes #26) (#31)
Add `ValidatePath()` enforcing UTF-8, forward-slash, relative, no `..`, no empty segments. Applied in `AddFile` and `AddFileWithHash`. Proto comments document the rules.

Co-authored-by: clawbot <clawbot@openclaw>
Co-authored-by: Jeffrey Paul <sneak@noreply.example.org>
Reviewed-on: #31
Co-authored-by: clawbot <clawbot@noreply.example.org>
Co-committed-by: clawbot <clawbot@noreply.example.org>
2026-02-09 01:45:29 +01:00
ebaf2a65ca Fix AddFile to verify actual bytes read matches declared size (closes #25) (#30)
After reading file content, verify `totalRead == size` and return an error on mismatch.

Co-authored-by: clawbot <clawbot@openclaw>
Reviewed-on: #30
Co-authored-by: clawbot <clawbot@noreply.example.org>
Co-committed-by: clawbot <clawbot@noreply.example.org>
2026-02-09 01:35:07 +01:00
clawbot
a9047ddcb1 Add decompression size limit in deserializeInner()
Wrap the zstd decompressor with io.LimitReader to prevent
decompression bombs. Default limit is 256MB (MaxDecompressedSize).

Closes #24
2026-02-08 16:10:10 -08:00
7 changed files with 76 additions and 38 deletions

View File

@@ -174,7 +174,12 @@ func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result
// Check if file exists // Check if file exists
info, err := c.fs.Stat(absPath) info, err := c.fs.Stat(absPath)
if err != nil { if err != nil {
if errors.Is(err, os.ErrNotExist) || errors.Is(err, afero.ErrFileNotFound) { if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) {
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
}
// Check for "file does not exist" style errors
exists, _ := afero.Exists(c.fs, absPath)
if !exists {
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"} return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
} }
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}

View File

@@ -3,13 +3,47 @@ package mfer
import ( import (
"crypto/sha256" "crypto/sha256"
"errors" "errors"
"fmt"
"io" "io"
"strings"
"sync" "sync"
"time" "time"
"unicode/utf8"
"github.com/multiformats/go-multihash" "github.com/multiformats/go-multihash"
) )
// ValidatePath checks that a file path conforms to manifest path invariants:
// - Must be valid UTF-8
// - Must use forward slashes only (no backslashes)
// - Must be relative (no leading /)
// - Must not contain ".." segments
// - Must not contain empty segments (no "//")
// - Must not be empty
func ValidatePath(p string) error {
if p == "" {
return errors.New("path cannot be empty")
}
if !utf8.ValidString(p) {
return fmt.Errorf("path %q is not valid UTF-8", p)
}
if strings.ContainsRune(p, '\\') {
return fmt.Errorf("path %q contains backslash; use forward slashes only", p)
}
if strings.HasPrefix(p, "/") {
return fmt.Errorf("path %q is absolute; must be relative", p)
}
for _, seg := range strings.Split(p, "/") {
if seg == "" {
return fmt.Errorf("path %q contains empty segment", p)
}
if seg == ".." {
return fmt.Errorf("path %q contains '..' segment", p)
}
}
return nil
}
// RelFilePath represents a relative file path within a manifest. // RelFilePath represents a relative file path within a manifest.
type RelFilePath string type RelFilePath string
@@ -74,6 +108,10 @@ func (b *Builder) AddFile(
reader io.Reader, reader io.Reader,
progress chan<- FileHashProgress, progress chan<- FileHashProgress,
) (FileSize, error) { ) (FileSize, error) {
if err := ValidatePath(string(path)); err != nil {
return 0, err
}
// Create hash writer // Create hash writer
h := sha256.New() h := sha256.New()
@@ -96,6 +134,11 @@ func (b *Builder) AddFile(
} }
} }
// Verify actual bytes read matches declared size
if totalRead != size {
return totalRead, fmt.Errorf("size mismatch for %q: declared %d bytes but read %d bytes", path, size, totalRead)
}
// Encode hash as multihash (SHA2-256) // Encode hash as multihash (SHA2-256)
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256) mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
if err != nil { if err != nil {
@@ -141,8 +184,8 @@ func (b *Builder) FileCount() int {
// This is useful when the hash is already known (e.g., from an existing manifest). // This is useful when the hash is already known (e.g., from an existing manifest).
// Returns an error if path is empty, size is negative, or hash is nil/empty. // Returns an error if path is empty, size is negative, or hash is nil/empty.
func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error { func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error {
if path == "" { if err := ValidatePath(string(path)); err != nil {
return errors.New("path cannot be empty") return err
} }
if size < 0 { if size < 0 {
return errors.New("size cannot be negative") return errors.New("size cannot be negative")

View File

@@ -224,7 +224,12 @@ func (c *Checker) checkFile(entry *MFFilePath, checkedBytes *FileSize) Result {
// Check if file exists // Check if file exists
info, err := c.fs.Stat(absPath) info, err := c.fs.Stat(absPath)
if err != nil { if err != nil {
if errors.Is(err, os.ErrNotExist) || errors.Is(err, afero.ErrFileNotFound) { if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) {
return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
}
// Check for "file does not exist" style errors
exists, _ := afero.Exists(c.fs, absPath)
if !exists {
return Result{Path: relPath, Status: StatusMissing, Message: "file not found"} return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
} }
return Result{Path: relPath, Status: StatusError, Message: err.Error()} return Result{Path: relPath, Status: StatusError, Message: err.Error()}

View File

@@ -381,39 +381,6 @@ func TestCheckSubdirectories(t *testing.T) {
assert.Equal(t, 3, okCount) assert.Equal(t, 3, okCount)
} }
func TestCheckMissingFileDetectedWithoutFallback(t *testing.T) {
// Regression test: errors.Is(err, errors.New("...")) never matches because
// errors.New creates a new value each time. The fix uses os.ErrNotExist instead.
fs := afero.NewMemMapFs()
files := map[string][]byte{
"exists.txt": []byte("here"),
"missing.txt": []byte("not on disk"),
}
createTestManifest(t, fs, "/manifest.mf", files)
// Only create one file on disk
createFilesOnDisk(t, fs, "/data", map[string][]byte{
"exists.txt": []byte("here"),
})
chk, err := NewChecker("/manifest.mf", "/data", fs)
require.NoError(t, err)
results := make(chan Result, 10)
err = chk.Check(context.Background(), results, nil)
require.NoError(t, err)
statusCounts := map[Status]int{}
for r := range results {
statusCounts[r.Status]++
if r.Status == StatusMissing {
assert.Equal(t, RelFilePath("missing.txt"), r.Path)
}
}
assert.Equal(t, 1, statusCounts[StatusOK], "one file should be OK")
assert.Equal(t, 1, statusCounts[StatusMissing], "one file should be MISSING")
assert.Equal(t, 0, statusCounts[StatusError], "no files should be ERROR")
}
func TestCheckEmptyManifest(t *testing.T) { func TestCheckEmptyManifest(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
// Create manifest with no files // Create manifest with no files

View File

@@ -3,4 +3,9 @@ package mfer
const ( const (
Version = "0.1.0" Version = "0.1.0"
ReleaseDate = "2025-12-17" ReleaseDate = "2025-12-17"
// MaxDecompressedSize is the maximum allowed size of decompressed manifest
// data (256 MB). This prevents decompression bombs from consuming excessive
// memory.
MaxDecompressedSize int64 = 256 * 1024 * 1024
) )

View File

@@ -76,10 +76,20 @@ func (m *manifest) deserializeInner() error {
} }
defer zr.Close() defer zr.Close()
dat, err := io.ReadAll(zr) // Limit decompressed size to prevent decompression bombs.
// Use declared size + 1 byte to detect overflow, capped at MaxDecompressedSize.
maxSize := MaxDecompressedSize
if m.pbOuter.Size > 0 && m.pbOuter.Size < int64(maxSize) {
maxSize = int64(m.pbOuter.Size) + 1
}
limitedReader := io.LimitReader(zr, maxSize)
dat, err := io.ReadAll(limitedReader)
if err != nil { if err != nil {
return err return err
} }
if int64(len(dat)) >= MaxDecompressedSize {
return fmt.Errorf("decompressed data exceeds maximum allowed size of %d bytes", MaxDecompressedSize)
}
isize := len(dat) isize := len(dat)
if int64(isize) != m.pbOuter.Size { if int64(isize) != m.pbOuter.Size {

View File

@@ -46,6 +46,9 @@ message MFFileOuter {
message MFFilePath { message MFFilePath {
// required attributes: // required attributes:
// Path invariants: must be valid UTF-8, use forward slashes only,
// be relative (no leading /), contain no ".." segments, and no
// empty segments (no "//").
string path = 1; string path = 1;
int64 size = 2; int64 size = 2;