1 Commits

Author SHA1 Message Date
clawbot
ce2540d7e1 fix: FindExtraFiles skips dotfiles and manifest files to avoid false positives
FindExtraFiles now skips hidden files/directories and manifest files
(index.mf, .index.mf) when looking for extra files. Previously it would
report these as 'extra' even though they are intentionally excluded from
manifests by default, making --no-extra-files unusable.

Also includes IsHiddenPath fix for '.' (needed by the new filtering).
2026-02-08 12:06:08 -08:00
8 changed files with 87 additions and 74 deletions

View File

@@ -227,12 +227,14 @@ func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result
// FindExtraFiles walks the filesystem and reports files not in the manifest.
// Results are sent to the results channel. The channel is closed when done.
// Hidden files/directories (starting with .) are skipped, as they are excluded
// from manifests by default. The manifest file itself is also skipped.
func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error {
if results != nil {
defer close(results)
}
return afero.Walk(c.fs, c.basePath, func(path string, info os.FileInfo, err error) error {
return afero.Walk(c.fs, c.basePath, func(walkPath string, info os.FileInfo, err error) error {
if err != nil {
return err
}
@@ -243,15 +245,29 @@ func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) err
default:
}
// Get relative path
relPath, err := filepath.Rel(c.basePath, walkPath)
if err != nil {
return err
}
// Skip hidden files and directories (dotfiles)
if mfer.IsHiddenPath(filepath.ToSlash(relPath)) {
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
// Skip directories
if info.IsDir() {
return nil
}
// Get relative path
relPath, err := filepath.Rel(c.basePath, path)
if err != nil {
return err
// Skip manifest files
base := filepath.Base(relPath)
if base == "index.mf" || base == ".index.mf" {
return nil
}
// Check if path is in manifest

View File

@@ -3,47 +3,13 @@ package mfer
import (
"crypto/sha256"
"errors"
"fmt"
"io"
"strings"
"sync"
"time"
"unicode/utf8"
"github.com/multiformats/go-multihash"
)
// ValidatePath checks that a file path conforms to manifest path invariants:
// - Must be valid UTF-8
// - Must use forward slashes only (no backslashes)
// - Must be relative (no leading /)
// - Must not contain ".." segments
// - Must not contain empty segments (no "//")
// - Must not be empty
func ValidatePath(p string) error {
if p == "" {
return errors.New("path cannot be empty")
}
if !utf8.ValidString(p) {
return fmt.Errorf("path %q is not valid UTF-8", p)
}
if strings.ContainsRune(p, '\\') {
return fmt.Errorf("path %q contains backslash; use forward slashes only", p)
}
if strings.HasPrefix(p, "/") {
return fmt.Errorf("path %q is absolute; must be relative", p)
}
for _, seg := range strings.Split(p, "/") {
if seg == "" {
return fmt.Errorf("path %q contains empty segment", p)
}
if seg == ".." {
return fmt.Errorf("path %q contains '..' segment", p)
}
}
return nil
}
// RelFilePath represents a relative file path within a manifest.
type RelFilePath string
@@ -108,10 +74,6 @@ func (b *Builder) AddFile(
reader io.Reader,
progress chan<- FileHashProgress,
) (FileSize, error) {
if err := ValidatePath(string(path)); err != nil {
return 0, err
}
// Create hash writer
h := sha256.New()
@@ -134,11 +96,6 @@ func (b *Builder) AddFile(
}
}
// Verify actual bytes read matches declared size
if totalRead != size {
return totalRead, fmt.Errorf("size mismatch for %q: declared %d bytes but read %d bytes", path, size, totalRead)
}
// Encode hash as multihash (SHA2-256)
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
if err != nil {
@@ -184,8 +141,8 @@ func (b *Builder) FileCount() int {
// This is useful when the hash is already known (e.g., from an existing manifest).
// Returns an error if path is empty, size is negative, or hash is nil/empty.
func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error {
if err := ValidatePath(string(path)); err != nil {
return err
if path == "" {
return errors.New("path cannot be empty")
}
if size < 0 {
return errors.New("size cannot be negative")

View File

@@ -277,12 +277,14 @@ func (c *Checker) checkFile(entry *MFFilePath, checkedBytes *FileSize) Result {
// FindExtraFiles walks the filesystem and reports files not in the manifest.
// Results are sent to the results channel. The channel is closed when done.
// Hidden files/directories (starting with .) are skipped, as they are excluded
// from manifests by default. The manifest file itself is also skipped.
func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error {
if results != nil {
defer close(results)
}
return afero.Walk(c.fs, string(c.basePath), func(path string, info os.FileInfo, err error) error {
return afero.Walk(c.fs, string(c.basePath), func(walkPath string, info os.FileInfo, err error) error {
if err != nil {
return err
}
@@ -293,16 +295,31 @@ func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) err
default:
}
// Get relative path
rel, err := filepath.Rel(string(c.basePath), walkPath)
if err != nil {
return err
}
// Skip hidden files and directories (dotfiles)
if IsHiddenPath(filepath.ToSlash(rel)) {
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
// Skip directories
if info.IsDir() {
return nil
}
// Get relative path
rel, err := filepath.Rel(string(c.basePath), path)
if err != nil {
return err
// Skip manifest files
base := filepath.Base(rel)
if base == "index.mf" || base == ".index.mf" {
return nil
}
relPath := RelFilePath(rel)
// Check if path is in manifest

View File

@@ -305,6 +305,44 @@ func TestFindExtraFiles(t *testing.T) {
assert.Equal(t, "not in manifest", extras[0].Message)
}
func TestFindExtraFilesSkipsManifestAndDotfiles(t *testing.T) {
fs := afero.NewMemMapFs()
manifestFiles := map[string][]byte{
"file1.txt": []byte("in manifest"),
}
createTestManifest(t, fs, "/data/.index.mf", manifestFiles)
createFilesOnDisk(t, fs, "/data", map[string][]byte{
"file1.txt": []byte("in manifest"),
})
// Create dotfile and manifest that should be skipped
require.NoError(t, afero.WriteFile(fs, "/data/.hidden", []byte("hidden"), 0o644))
require.NoError(t, afero.WriteFile(fs, "/data/.config/settings", []byte("cfg"), 0o644))
// Create a real extra file
require.NoError(t, fs.MkdirAll("/data", 0o755))
require.NoError(t, afero.WriteFile(fs, "/data/extra.txt", []byte("extra"), 0o644))
chk, err := NewChecker("/data/.index.mf", "/data", fs)
require.NoError(t, err)
results := make(chan Result, 10)
err = chk.FindExtraFiles(context.Background(), results)
require.NoError(t, err)
var extras []Result
for r := range results {
extras = append(extras, r)
}
// Should only report extra.txt, not .hidden, .config/settings, or .index.mf
for _, e := range extras {
t.Logf("extra: %s", e.Path)
}
assert.Len(t, extras, 1)
if len(extras) > 0 {
assert.Equal(t, RelFilePath("extra.txt"), extras[0].Path)
}
}
func TestFindExtraFilesContextCancellation(t *testing.T) {
fs := afero.NewMemMapFs()
files := map[string][]byte{"file.txt": []byte("data")}

View File

@@ -3,9 +3,4 @@ package mfer
const (
Version = "0.1.0"
ReleaseDate = "2025-12-17"
// MaxDecompressedSize is the maximum allowed size of decompressed manifest
// data (256 MB). This prevents decompression bombs from consuming excessive
// memory.
MaxDecompressedSize int64 = 256 * 1024 * 1024
)

View File

@@ -76,20 +76,10 @@ func (m *manifest) deserializeInner() error {
}
defer zr.Close()
// Limit decompressed size to prevent decompression bombs.
// Use declared size + 1 byte to detect overflow, capped at MaxDecompressedSize.
maxSize := MaxDecompressedSize
if m.pbOuter.Size > 0 && m.pbOuter.Size < int64(maxSize) {
maxSize = int64(m.pbOuter.Size) + 1
}
limitedReader := io.LimitReader(zr, maxSize)
dat, err := io.ReadAll(limitedReader)
dat, err := io.ReadAll(zr)
if err != nil {
return err
}
if int64(len(dat)) >= MaxDecompressedSize {
return fmt.Errorf("decompressed data exceeds maximum allowed size of %d bytes", MaxDecompressedSize)
}
isize := len(dat)
if int64(isize) != m.pbOuter.Size {

View File

@@ -46,9 +46,6 @@ message MFFileOuter {
message MFFilePath {
// required attributes:
// Path invariants: must be valid UTF-8, use forward slashes only,
// be relative (no leading /), contain no ".." segments, and no
// empty segments (no "//").
string path = 1;
int64 size = 2;

View File

@@ -385,6 +385,9 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S
// The path should use forward slashes.
func IsHiddenPath(p string) bool {
tp := path.Clean(p)
if tp == "." || tp == "/" {
return false
}
if strings.HasPrefix(tp, ".") {
return true
}