Compare commits
8 Commits
94a4e60c17
...
fix/issue-
| Author | SHA1 | Date | |
|---|---|---|---|
| d2217ec29e | |||
| 7f25970dd3 | |||
|
|
75c88d0a52 | ||
| 70af055d4e | |||
| 04b05e01e8 | |||
| 7144617d0e | |||
| 2efffd9da8 | |||
| ebaf2a65ca |
@@ -113,7 +113,7 @@ func (mfa *CLIApp) fetchManifestOperation(ctx *cli.Context) error {
|
|||||||
return fmt.Errorf("invalid path in manifest: %w", err)
|
return fmt.Errorf("invalid path in manifest: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
fileURL := baseURL.String() + f.Path
|
fileURL := baseURL.String() + encodeFilePath(f.Path)
|
||||||
log.Infof("fetching %s", f.Path)
|
log.Infof("fetching %s", f.Path)
|
||||||
|
|
||||||
if err := downloadFile(fileURL, localPath, f, progress); err != nil {
|
if err := downloadFile(fileURL, localPath, f, progress); err != nil {
|
||||||
@@ -139,6 +139,15 @@ func (mfa *CLIApp) fetchManifestOperation(ctx *cli.Context) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// encodeFilePath URL-encodes each segment of a file path while preserving slashes.
|
||||||
|
func encodeFilePath(p string) string {
|
||||||
|
segments := strings.Split(p, "/")
|
||||||
|
for i, seg := range segments {
|
||||||
|
segments[i] = url.PathEscape(seg)
|
||||||
|
}
|
||||||
|
return strings.Join(segments, "/")
|
||||||
|
}
|
||||||
|
|
||||||
// sanitizePath validates and sanitizes a file path from the manifest.
|
// sanitizePath validates and sanitizes a file path from the manifest.
|
||||||
// It prevents path traversal attacks and rejects unsafe paths.
|
// It prevents path traversal attacks and rejects unsafe paths.
|
||||||
func sanitizePath(p string) (string, error) {
|
func sanitizePath(p string) (string, error) {
|
||||||
|
|||||||
@@ -16,6 +16,29 @@ import (
|
|||||||
"sneak.berlin/go/mfer/mfer"
|
"sneak.berlin/go/mfer/mfer"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestEncodeFilePath(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
input string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{"file.txt", "file.txt"},
|
||||||
|
{"dir/file.txt", "dir/file.txt"},
|
||||||
|
{"my file.txt", "my%20file.txt"},
|
||||||
|
{"dir/my file.txt", "dir/my%20file.txt"},
|
||||||
|
{"file#1.txt", "file%231.txt"},
|
||||||
|
{"file?v=1.txt", "file%3Fv=1.txt"},
|
||||||
|
{"path/to/file with spaces.txt", "path/to/file%20with%20spaces.txt"},
|
||||||
|
{"100%done.txt", "100%25done.txt"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.input, func(t *testing.T) {
|
||||||
|
result := encodeFilePath(tt.input)
|
||||||
|
assert.Equal(t, tt.expected, result)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestSanitizePath(t *testing.T) {
|
func TestSanitizePath(t *testing.T) {
|
||||||
// Valid paths that should be accepted
|
// Valid paths that should be accepted
|
||||||
validTests := []struct {
|
validTests := []struct {
|
||||||
|
|||||||
@@ -3,13 +3,47 @@ package mfer
|
|||||||
import (
|
import (
|
||||||
"crypto/sha256"
|
"crypto/sha256"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/multiformats/go-multihash"
|
"github.com/multiformats/go-multihash"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// ValidatePath checks that a file path conforms to manifest path invariants:
|
||||||
|
// - Must be valid UTF-8
|
||||||
|
// - Must use forward slashes only (no backslashes)
|
||||||
|
// - Must be relative (no leading /)
|
||||||
|
// - Must not contain ".." segments
|
||||||
|
// - Must not contain empty segments (no "//")
|
||||||
|
// - Must not be empty
|
||||||
|
func ValidatePath(p string) error {
|
||||||
|
if p == "" {
|
||||||
|
return errors.New("path cannot be empty")
|
||||||
|
}
|
||||||
|
if !utf8.ValidString(p) {
|
||||||
|
return fmt.Errorf("path %q is not valid UTF-8", p)
|
||||||
|
}
|
||||||
|
if strings.ContainsRune(p, '\\') {
|
||||||
|
return fmt.Errorf("path %q contains backslash; use forward slashes only", p)
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(p, "/") {
|
||||||
|
return fmt.Errorf("path %q is absolute; must be relative", p)
|
||||||
|
}
|
||||||
|
for _, seg := range strings.Split(p, "/") {
|
||||||
|
if seg == "" {
|
||||||
|
return fmt.Errorf("path %q contains empty segment", p)
|
||||||
|
}
|
||||||
|
if seg == ".." {
|
||||||
|
return fmt.Errorf("path %q contains '..' segment", p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// RelFilePath represents a relative file path within a manifest.
|
// RelFilePath represents a relative file path within a manifest.
|
||||||
type RelFilePath string
|
type RelFilePath string
|
||||||
|
|
||||||
@@ -74,6 +108,10 @@ func (b *Builder) AddFile(
|
|||||||
reader io.Reader,
|
reader io.Reader,
|
||||||
progress chan<- FileHashProgress,
|
progress chan<- FileHashProgress,
|
||||||
) (FileSize, error) {
|
) (FileSize, error) {
|
||||||
|
if err := ValidatePath(string(path)); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
// Create hash writer
|
// Create hash writer
|
||||||
h := sha256.New()
|
h := sha256.New()
|
||||||
|
|
||||||
@@ -96,6 +134,11 @@ func (b *Builder) AddFile(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verify actual bytes read matches declared size
|
||||||
|
if totalRead != size {
|
||||||
|
return totalRead, fmt.Errorf("size mismatch for %q: declared %d bytes but read %d bytes", path, size, totalRead)
|
||||||
|
}
|
||||||
|
|
||||||
// Encode hash as multihash (SHA2-256)
|
// Encode hash as multihash (SHA2-256)
|
||||||
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
|
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -141,8 +184,8 @@ func (b *Builder) FileCount() int {
|
|||||||
// This is useful when the hash is already known (e.g., from an existing manifest).
|
// This is useful when the hash is already known (e.g., from an existing manifest).
|
||||||
// Returns an error if path is empty, size is negative, or hash is nil/empty.
|
// Returns an error if path is empty, size is negative, or hash is nil/empty.
|
||||||
func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error {
|
func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error {
|
||||||
if path == "" {
|
if err := ValidatePath(string(path)); err != nil {
|
||||||
return errors.New("path cannot be empty")
|
return err
|
||||||
}
|
}
|
||||||
if size < 0 {
|
if size < 0 {
|
||||||
return errors.New("size cannot be negative")
|
return errors.New("size cannot be negative")
|
||||||
|
|||||||
@@ -92,6 +92,29 @@ func TestBuilderBuild(t *testing.T) {
|
|||||||
assert.True(t, strings.HasPrefix(buf.String(), MAGIC))
|
assert.True(t, strings.HasPrefix(buf.String(), MAGIC))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNewTimestampFromTimeExtremeDate(t *testing.T) {
|
||||||
|
// Regression test: newTimestampFromTime used UnixNano() which panics
|
||||||
|
// for dates outside ~1678-2262. Now uses Nanosecond() which is safe.
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
time time.Time
|
||||||
|
}{
|
||||||
|
{"zero time", time.Time{}},
|
||||||
|
{"year 1000", time.Date(1000, 1, 1, 0, 0, 0, 0, time.UTC)},
|
||||||
|
{"year 3000", time.Date(3000, 1, 1, 0, 0, 0, 123456789, time.UTC)},
|
||||||
|
{"unix epoch", time.Unix(0, 0)},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
// Should not panic
|
||||||
|
ts := newTimestampFromTime(tt.time)
|
||||||
|
assert.Equal(t, tt.time.Unix(), ts.Seconds)
|
||||||
|
assert.Equal(t, int32(tt.time.Nanosecond()), ts.Nanos)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestBuilderBuildEmpty(t *testing.T) {
|
func TestBuilderBuildEmpty(t *testing.T) {
|
||||||
b := NewBuilder()
|
b := NewBuilder()
|
||||||
|
|
||||||
|
|||||||
@@ -224,12 +224,7 @@ func (c *Checker) checkFile(entry *MFFilePath, checkedBytes *FileSize) Result {
|
|||||||
// Check if file exists
|
// Check if file exists
|
||||||
info, err := c.fs.Stat(absPath)
|
info, err := c.fs.Stat(absPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) {
|
if errors.Is(err, os.ErrNotExist) || errors.Is(err, afero.ErrFileNotFound) {
|
||||||
return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
|
|
||||||
}
|
|
||||||
// Check for "file does not exist" style errors
|
|
||||||
exists, _ := afero.Exists(c.fs, absPath)
|
|
||||||
if !exists {
|
|
||||||
return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
|
return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
|
||||||
}
|
}
|
||||||
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
|
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
|
||||||
|
|||||||
@@ -381,6 +381,39 @@ func TestCheckSubdirectories(t *testing.T) {
|
|||||||
assert.Equal(t, 3, okCount)
|
assert.Equal(t, 3, okCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCheckMissingFileDetectedWithoutFallback(t *testing.T) {
|
||||||
|
// Regression test: errors.Is(err, errors.New("...")) never matches because
|
||||||
|
// errors.New creates a new value each time. The fix uses os.ErrNotExist instead.
|
||||||
|
fs := afero.NewMemMapFs()
|
||||||
|
files := map[string][]byte{
|
||||||
|
"exists.txt": []byte("here"),
|
||||||
|
"missing.txt": []byte("not on disk"),
|
||||||
|
}
|
||||||
|
createTestManifest(t, fs, "/manifest.mf", files)
|
||||||
|
// Only create one file on disk
|
||||||
|
createFilesOnDisk(t, fs, "/data", map[string][]byte{
|
||||||
|
"exists.txt": []byte("here"),
|
||||||
|
})
|
||||||
|
|
||||||
|
chk, err := NewChecker("/manifest.mf", "/data", fs)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
results := make(chan Result, 10)
|
||||||
|
err = chk.Check(context.Background(), results, nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
statusCounts := map[Status]int{}
|
||||||
|
for r := range results {
|
||||||
|
statusCounts[r.Status]++
|
||||||
|
if r.Status == StatusMissing {
|
||||||
|
assert.Equal(t, RelFilePath("missing.txt"), r.Path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert.Equal(t, 1, statusCounts[StatusOK], "one file should be OK")
|
||||||
|
assert.Equal(t, 1, statusCounts[StatusMissing], "one file should be MISSING")
|
||||||
|
assert.Equal(t, 0, statusCounts[StatusError], "no files should be ERROR")
|
||||||
|
}
|
||||||
|
|
||||||
func TestCheckEmptyManifest(t *testing.T) {
|
func TestCheckEmptyManifest(t *testing.T) {
|
||||||
fs := afero.NewMemMapFs()
|
fs := afero.NewMemMapFs()
|
||||||
// Create manifest with no files
|
// Create manifest with no files
|
||||||
|
|||||||
@@ -3,4 +3,9 @@ package mfer
|
|||||||
const (
|
const (
|
||||||
Version = "0.1.0"
|
Version = "0.1.0"
|
||||||
ReleaseDate = "2025-12-17"
|
ReleaseDate = "2025-12-17"
|
||||||
|
|
||||||
|
// MaxDecompressedSize is the maximum allowed size of decompressed manifest
|
||||||
|
// data (256 MB). This prevents decompression bombs from consuming excessive
|
||||||
|
// memory.
|
||||||
|
MaxDecompressedSize int64 = 256 * 1024 * 1024
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -76,10 +76,20 @@ func (m *manifest) deserializeInner() error {
|
|||||||
}
|
}
|
||||||
defer zr.Close()
|
defer zr.Close()
|
||||||
|
|
||||||
dat, err := io.ReadAll(zr)
|
// Limit decompressed size to prevent decompression bombs.
|
||||||
|
// Use declared size + 1 byte to detect overflow, capped at MaxDecompressedSize.
|
||||||
|
maxSize := MaxDecompressedSize
|
||||||
|
if m.pbOuter.Size > 0 && m.pbOuter.Size < int64(maxSize) {
|
||||||
|
maxSize = int64(m.pbOuter.Size) + 1
|
||||||
|
}
|
||||||
|
limitedReader := io.LimitReader(zr, maxSize)
|
||||||
|
dat, err := io.ReadAll(limitedReader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
if int64(len(dat)) >= MaxDecompressedSize {
|
||||||
|
return fmt.Errorf("decompressed data exceeds maximum allowed size of %d bytes", MaxDecompressedSize)
|
||||||
|
}
|
||||||
|
|
||||||
isize := len(dat)
|
isize := len(dat)
|
||||||
if int64(isize) != m.pbOuter.Size {
|
if int64(isize) != m.pbOuter.Size {
|
||||||
|
|||||||
@@ -46,6 +46,9 @@ message MFFileOuter {
|
|||||||
|
|
||||||
message MFFilePath {
|
message MFFilePath {
|
||||||
// required attributes:
|
// required attributes:
|
||||||
|
// Path invariants: must be valid UTF-8, use forward slashes only,
|
||||||
|
// be relative (no leading /), contain no ".." segments, and no
|
||||||
|
// empty segments (no "//").
|
||||||
string path = 1;
|
string path = 1;
|
||||||
int64 size = 2;
|
int64 size = 2;
|
||||||
|
|
||||||
|
|||||||
@@ -16,11 +16,10 @@ import (
|
|||||||
const MAGIC string = "ZNAVSRFG"
|
const MAGIC string = "ZNAVSRFG"
|
||||||
|
|
||||||
func newTimestampFromTime(t time.Time) *Timestamp {
|
func newTimestampFromTime(t time.Time) *Timestamp {
|
||||||
out := &Timestamp{
|
return &Timestamp{
|
||||||
Seconds: t.Unix(),
|
Seconds: t.Unix(),
|
||||||
Nanos: int32(t.UnixNano() - (t.Unix() * 1000000000)),
|
Nanos: int32(t.Nanosecond()),
|
||||||
}
|
}
|
||||||
return out
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *manifest) generate() error {
|
func (m *manifest) generate() error {
|
||||||
|
|||||||
Reference in New Issue
Block a user