1 Commits

Author SHA1 Message Date
clawbot
6646e02821 Fix AddFile to verify actual bytes read matches declared size
Return an error if totalRead != size after reading the file,
preventing silent data corruption from truncated or oversized reads.

Closes #25
2026-02-08 16:10:54 -08:00
5 changed files with 16 additions and 84 deletions

View File

@@ -227,14 +227,12 @@ func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result
// FindExtraFiles walks the filesystem and reports files not in the manifest.
// Results are sent to the results channel. The channel is closed when done.
// Hidden files/directories (starting with .) are skipped, as they are excluded
// from manifests by default. The manifest file itself is also skipped.
func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error {
if results != nil {
defer close(results)
}
return afero.Walk(c.fs, c.basePath, func(walkPath string, info os.FileInfo, err error) error {
return afero.Walk(c.fs, c.basePath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
@@ -245,29 +243,15 @@ func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) err
default:
}
// Get relative path
relPath, err := filepath.Rel(c.basePath, walkPath)
if err != nil {
return err
}
// Skip hidden files and directories (dotfiles)
if mfer.IsHiddenPath(filepath.ToSlash(relPath)) {
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
// Skip directories
if info.IsDir() {
return nil
}
// Skip manifest files
base := filepath.Base(relPath)
if base == "index.mf" || base == ".index.mf" {
return nil
// Get relative path
relPath, err := filepath.Rel(c.basePath, path)
if err != nil {
return err
}
// Check if path is in manifest

View File

@@ -3,6 +3,7 @@ package mfer
import (
"crypto/sha256"
"errors"
"fmt"
"io"
"sync"
"time"
@@ -96,6 +97,11 @@ func (b *Builder) AddFile(
}
}
// Verify actual bytes read matches declared size
if totalRead != size {
return totalRead, fmt.Errorf("size mismatch for %q: declared %d bytes but read %d bytes", path, size, totalRead)
}
// Encode hash as multihash (SHA2-256)
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
if err != nil {

View File

@@ -277,14 +277,12 @@ func (c *Checker) checkFile(entry *MFFilePath, checkedBytes *FileSize) Result {
// FindExtraFiles walks the filesystem and reports files not in the manifest.
// Results are sent to the results channel. The channel is closed when done.
// Hidden files/directories (starting with .) are skipped, as they are excluded
// from manifests by default. The manifest file itself is also skipped.
func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error {
if results != nil {
defer close(results)
}
return afero.Walk(c.fs, string(c.basePath), func(walkPath string, info os.FileInfo, err error) error {
return afero.Walk(c.fs, string(c.basePath), func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
@@ -295,31 +293,16 @@ func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) err
default:
}
// Get relative path
rel, err := filepath.Rel(string(c.basePath), walkPath)
if err != nil {
return err
}
// Skip hidden files and directories (dotfiles)
if IsHiddenPath(filepath.ToSlash(rel)) {
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
// Skip directories
if info.IsDir() {
return nil
}
// Skip manifest files
base := filepath.Base(rel)
if base == "index.mf" || base == ".index.mf" {
return nil
// Get relative path
rel, err := filepath.Rel(string(c.basePath), path)
if err != nil {
return err
}
relPath := RelFilePath(rel)
// Check if path is in manifest

View File

@@ -305,44 +305,6 @@ func TestFindExtraFiles(t *testing.T) {
assert.Equal(t, "not in manifest", extras[0].Message)
}
func TestFindExtraFilesSkipsManifestAndDotfiles(t *testing.T) {
fs := afero.NewMemMapFs()
manifestFiles := map[string][]byte{
"file1.txt": []byte("in manifest"),
}
createTestManifest(t, fs, "/data/.index.mf", manifestFiles)
createFilesOnDisk(t, fs, "/data", map[string][]byte{
"file1.txt": []byte("in manifest"),
})
// Create dotfile and manifest that should be skipped
require.NoError(t, afero.WriteFile(fs, "/data/.hidden", []byte("hidden"), 0o644))
require.NoError(t, afero.WriteFile(fs, "/data/.config/settings", []byte("cfg"), 0o644))
// Create a real extra file
require.NoError(t, fs.MkdirAll("/data", 0o755))
require.NoError(t, afero.WriteFile(fs, "/data/extra.txt", []byte("extra"), 0o644))
chk, err := NewChecker("/data/.index.mf", "/data", fs)
require.NoError(t, err)
results := make(chan Result, 10)
err = chk.FindExtraFiles(context.Background(), results)
require.NoError(t, err)
var extras []Result
for r := range results {
extras = append(extras, r)
}
// Should only report extra.txt, not .hidden, .config/settings, or .index.mf
for _, e := range extras {
t.Logf("extra: %s", e.Path)
}
assert.Len(t, extras, 1)
if len(extras) > 0 {
assert.Equal(t, RelFilePath("extra.txt"), extras[0].Path)
}
}
func TestFindExtraFilesContextCancellation(t *testing.T) {
fs := afero.NewMemMapFs()
files := map[string][]byte{"file.txt": []byte("data")}

View File

@@ -385,9 +385,6 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S
// The path should use forward slashes.
func IsHiddenPath(p string) bool {
tp := path.Clean(p)
if tp == "." || tp == "/" {
return false
}
if strings.HasPrefix(tp, ".") {
return true
}