3 Commits

Author SHA1 Message Date
2089fc8292 Merge branch 'next' into fix/issue-26 2026-02-09 01:45:20 +01:00
ebaf2a65ca Fix AddFile to verify actual bytes read matches declared size (closes #25) (#30)
After reading file content, verify `totalRead == size` and return an error on mismatch.

Co-authored-by: clawbot <clawbot@openclaw>
Reviewed-on: #30
Co-authored-by: clawbot <clawbot@noreply.example.org>
Co-committed-by: clawbot <clawbot@noreply.example.org>
2026-02-09 01:35:07 +01:00
clawbot
6948c65012 Specify and enforce path invariants in Builder
Add ValidatePath() enforcing: valid UTF-8, forward-slash only,
relative paths, no '..' segments, no empty segments. Called from
both AddFile and AddFileWithHash. Proto comments document the rules.

Closes #26
2026-02-08 16:12:06 -08:00
4 changed files with 51 additions and 27 deletions

View File

@@ -3,13 +3,47 @@ package mfer
import ( import (
"crypto/sha256" "crypto/sha256"
"errors" "errors"
"fmt"
"io" "io"
"strings"
"sync" "sync"
"time" "time"
"unicode/utf8"
"github.com/multiformats/go-multihash" "github.com/multiformats/go-multihash"
) )
// ValidatePath checks that a file path conforms to manifest path invariants:
// - Must be valid UTF-8
// - Must use forward slashes only (no backslashes)
// - Must be relative (no leading /)
// - Must not contain ".." segments
// - Must not contain empty segments (no "//")
// - Must not be empty
func ValidatePath(p string) error {
if p == "" {
return errors.New("path cannot be empty")
}
if !utf8.ValidString(p) {
return fmt.Errorf("path %q is not valid UTF-8", p)
}
if strings.ContainsRune(p, '\\') {
return fmt.Errorf("path %q contains backslash; use forward slashes only", p)
}
if strings.HasPrefix(p, "/") {
return fmt.Errorf("path %q is absolute; must be relative", p)
}
for _, seg := range strings.Split(p, "/") {
if seg == "" {
return fmt.Errorf("path %q contains empty segment", p)
}
if seg == ".." {
return fmt.Errorf("path %q contains '..' segment", p)
}
}
return nil
}
// RelFilePath represents a relative file path within a manifest. // RelFilePath represents a relative file path within a manifest.
type RelFilePath string type RelFilePath string
@@ -74,6 +108,10 @@ func (b *Builder) AddFile(
reader io.Reader, reader io.Reader,
progress chan<- FileHashProgress, progress chan<- FileHashProgress,
) (FileSize, error) { ) (FileSize, error) {
if err := ValidatePath(string(path)); err != nil {
return 0, err
}
// Create hash writer // Create hash writer
h := sha256.New() h := sha256.New()
@@ -96,6 +134,11 @@ func (b *Builder) AddFile(
} }
} }
// Verify actual bytes read matches declared size
if totalRead != size {
return totalRead, fmt.Errorf("size mismatch for %q: declared %d bytes but read %d bytes", path, size, totalRead)
}
// Encode hash as multihash (SHA2-256) // Encode hash as multihash (SHA2-256)
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256) mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
if err != nil { if err != nil {
@@ -141,8 +184,8 @@ func (b *Builder) FileCount() int {
// This is useful when the hash is already known (e.g., from an existing manifest). // This is useful when the hash is already known (e.g., from an existing manifest).
// Returns an error if path is empty, size is negative, or hash is nil/empty. // Returns an error if path is empty, size is negative, or hash is nil/empty.
func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error { func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error {
if path == "" { if err := ValidatePath(string(path)); err != nil {
return errors.New("path cannot be empty") return err
} }
if size < 0 { if size < 0 {
return errors.New("size cannot be negative") return errors.New("size cannot be negative")

View File

@@ -92,29 +92,6 @@ func TestBuilderBuild(t *testing.T) {
assert.True(t, strings.HasPrefix(buf.String(), MAGIC)) assert.True(t, strings.HasPrefix(buf.String(), MAGIC))
} }
func TestNewTimestampFromTimeExtremeDate(t *testing.T) {
// Regression test: newTimestampFromTime used UnixNano() which panics
// for dates outside ~1678-2262. Now uses Nanosecond() which is safe.
tests := []struct {
name string
time time.Time
}{
{"zero time", time.Time{}},
{"year 1000", time.Date(1000, 1, 1, 0, 0, 0, 0, time.UTC)},
{"year 3000", time.Date(3000, 1, 1, 0, 0, 0, 123456789, time.UTC)},
{"unix epoch", time.Unix(0, 0)},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Should not panic
ts := newTimestampFromTime(tt.time)
assert.Equal(t, tt.time.Unix(), ts.Seconds)
assert.Equal(t, int32(tt.time.Nanosecond()), ts.Nanos)
})
}
}
func TestBuilderBuildEmpty(t *testing.T) { func TestBuilderBuildEmpty(t *testing.T) {
b := NewBuilder() b := NewBuilder()

View File

@@ -46,6 +46,9 @@ message MFFileOuter {
message MFFilePath { message MFFilePath {
// required attributes: // required attributes:
// Path invariants: must be valid UTF-8, use forward slashes only,
// be relative (no leading /), contain no ".." segments, and no
// empty segments (no "//").
string path = 1; string path = 1;
int64 size = 2; int64 size = 2;

View File

@@ -16,10 +16,11 @@ import (
const MAGIC string = "ZNAVSRFG" const MAGIC string = "ZNAVSRFG"
func newTimestampFromTime(t time.Time) *Timestamp { func newTimestampFromTime(t time.Time) *Timestamp {
return &Timestamp{ out := &Timestamp{
Seconds: t.Unix(), Seconds: t.Unix(),
Nanos: int32(t.Nanosecond()), Nanos: int32(t.UnixNano() - (t.Unix() * 1000000000)),
} }
return out
} }
func (m *manifest) generate() error { func (m *manifest) generate() error {