1 Commits

Author SHA1 Message Date
clawbot
bc4366aad4 fix: FindExtraFiles skips dotfiles and manifest files to avoid false positives
FindExtraFiles now skips hidden files/directories and manifest files
(index.mf, .index.mf) when looking for extra files. Previously it would
report these as 'extra' even though they are intentionally excluded from
manifests by default, making --no-extra-files unusable.

Also includes IsHiddenPath fix for '.' (needed by the new filtering).
2026-02-08 17:12:25 -08:00
5 changed files with 64 additions and 38 deletions

View File

@@ -113,7 +113,7 @@ func (mfa *CLIApp) fetchManifestOperation(ctx *cli.Context) error {
return fmt.Errorf("invalid path in manifest: %w", err) return fmt.Errorf("invalid path in manifest: %w", err)
} }
fileURL := baseURL.String() + encodeFilePath(f.Path) fileURL := baseURL.String() + f.Path
log.Infof("fetching %s", f.Path) log.Infof("fetching %s", f.Path)
if err := downloadFile(fileURL, localPath, f, progress); err != nil { if err := downloadFile(fileURL, localPath, f, progress); err != nil {
@@ -139,15 +139,6 @@ func (mfa *CLIApp) fetchManifestOperation(ctx *cli.Context) error {
return nil return nil
} }
// encodeFilePath URL-encodes each segment of a file path while preserving slashes.
func encodeFilePath(p string) string {
segments := strings.Split(p, "/")
for i, seg := range segments {
segments[i] = url.PathEscape(seg)
}
return strings.Join(segments, "/")
}
// sanitizePath validates and sanitizes a file path from the manifest. // sanitizePath validates and sanitizes a file path from the manifest.
// It prevents path traversal attacks and rejects unsafe paths. // It prevents path traversal attacks and rejects unsafe paths.
func sanitizePath(p string) (string, error) { func sanitizePath(p string) (string, error) {

View File

@@ -16,29 +16,6 @@ import (
"sneak.berlin/go/mfer/mfer" "sneak.berlin/go/mfer/mfer"
) )
func TestEncodeFilePath(t *testing.T) {
tests := []struct {
input string
expected string
}{
{"file.txt", "file.txt"},
{"dir/file.txt", "dir/file.txt"},
{"my file.txt", "my%20file.txt"},
{"dir/my file.txt", "dir/my%20file.txt"},
{"file#1.txt", "file%231.txt"},
{"file?v=1.txt", "file%3Fv=1.txt"},
{"path/to/file with spaces.txt", "path/to/file%20with%20spaces.txt"},
{"100%done.txt", "100%25done.txt"},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
result := encodeFilePath(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestSanitizePath(t *testing.T) { func TestSanitizePath(t *testing.T) {
// Valid paths that should be accepted // Valid paths that should be accepted
validTests := []struct { validTests := []struct {

View File

@@ -277,12 +277,14 @@ func (c *Checker) checkFile(entry *MFFilePath, checkedBytes *FileSize) Result {
// FindExtraFiles walks the filesystem and reports files not in the manifest. // FindExtraFiles walks the filesystem and reports files not in the manifest.
// Results are sent to the results channel. The channel is closed when done. // Results are sent to the results channel. The channel is closed when done.
// Hidden files/directories (starting with .) are skipped, as they are excluded
// from manifests by default. The manifest file itself is also skipped.
func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error { func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error {
if results != nil { if results != nil {
defer close(results) defer close(results)
} }
return afero.Walk(c.fs, string(c.basePath), func(path string, info os.FileInfo, err error) error { return afero.Walk(c.fs, string(c.basePath), func(walkPath string, info os.FileInfo, err error) error {
if err != nil { if err != nil {
return err return err
} }
@@ -293,16 +295,31 @@ func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) err
default: default:
} }
// Get relative path
rel, err := filepath.Rel(string(c.basePath), walkPath)
if err != nil {
return err
}
// Skip hidden files and directories (dotfiles)
if IsHiddenPath(filepath.ToSlash(rel)) {
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
// Skip directories // Skip directories
if info.IsDir() { if info.IsDir() {
return nil return nil
} }
// Get relative path // Skip manifest files
rel, err := filepath.Rel(string(c.basePath), path) base := filepath.Base(rel)
if err != nil { if base == "index.mf" || base == ".index.mf" {
return err return nil
} }
relPath := RelFilePath(rel) relPath := RelFilePath(rel)
// Check if path is in manifest // Check if path is in manifest

View File

@@ -305,6 +305,44 @@ func TestFindExtraFiles(t *testing.T) {
assert.Equal(t, "not in manifest", extras[0].Message) assert.Equal(t, "not in manifest", extras[0].Message)
} }
func TestFindExtraFilesSkipsManifestAndDotfiles(t *testing.T) {
fs := afero.NewMemMapFs()
manifestFiles := map[string][]byte{
"file1.txt": []byte("in manifest"),
}
createTestManifest(t, fs, "/data/.index.mf", manifestFiles)
createFilesOnDisk(t, fs, "/data", map[string][]byte{
"file1.txt": []byte("in manifest"),
})
// Create dotfile and manifest that should be skipped
require.NoError(t, afero.WriteFile(fs, "/data/.hidden", []byte("hidden"), 0o644))
require.NoError(t, afero.WriteFile(fs, "/data/.config/settings", []byte("cfg"), 0o644))
// Create a real extra file
require.NoError(t, fs.MkdirAll("/data", 0o755))
require.NoError(t, afero.WriteFile(fs, "/data/extra.txt", []byte("extra"), 0o644))
chk, err := NewChecker("/data/.index.mf", "/data", fs)
require.NoError(t, err)
results := make(chan Result, 10)
err = chk.FindExtraFiles(context.Background(), results)
require.NoError(t, err)
var extras []Result
for r := range results {
extras = append(extras, r)
}
// Should only report extra.txt, not .hidden, .config/settings, or .index.mf
for _, e := range extras {
t.Logf("extra: %s", e.Path)
}
assert.Len(t, extras, 1)
if len(extras) > 0 {
assert.Equal(t, RelFilePath("extra.txt"), extras[0].Path)
}
}
func TestFindExtraFilesContextCancellation(t *testing.T) { func TestFindExtraFilesContextCancellation(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
files := map[string][]byte{"file.txt": []byte("data")} files := map[string][]byte{"file.txt": []byte("data")}

View File

@@ -385,6 +385,9 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S
// The path should use forward slashes. // The path should use forward slashes.
func IsHiddenPath(p string) bool { func IsHiddenPath(p string) bool {
tp := path.Clean(p) tp := path.Clean(p)
if tp == "." || tp == "/" {
return false
}
if strings.HasPrefix(tp, ".") { if strings.HasPrefix(tp, ".") {
return true return true
} }