3 Commits

Author SHA1 Message Date
clawbot
d947fc81ae reduce seed iterations to 150M (~5-10s on modern hardware)
1B iterations was too slow (30s+). Benchmarked on Apple Silicon:
- 150M iterations ≈ 6.3s
- Falls within the 5-10s target range
2026-02-08 16:36:26 -08:00
clawbot
a1a8aaf922 feat: add --seed flag for deterministic manifest UUID
Adds a --seed CLI flag to 'generate' that derives a deterministic UUID
from the seed value by hashing it 1,000,000,000 times with SHA-256.
This makes manifest generation fully reproducible when the same seed
and input files are provided.

- Builder.SetSeed(seed) method for programmatic use
- deriveSeedUUID() extracted for testability
- MFER_SEED env var also supported
- Test with reduced iteration count for speed
2026-02-08 16:32:02 -08:00
clawbot
9d301d7b1d Add deterministic file ordering in Builder.Build()
Sort file entries by path (lexicographic, byte-order) before
serialization to ensure deterministic output. Add fixedUUID support
for testing reproducibility, and a test asserting byte-identical
output from two runs with the same input.

Closes #23
2026-02-08 16:09:16 -08:00
21 changed files with 726 additions and 246 deletions

23
.drone.yml Normal file
View File

@@ -0,0 +1,23 @@
kind: pipeline
name: test-docker-build
steps:
- name: test-docker-build
image: plugins/docker
network_mode: bridge
settings:
repo: sneak/mfer
build_args_from_env: [ DRONE_COMMIT_SHA ]
dry_run: true
custom_dns: [ 116.202.204.30 ]
tags:
- ${DRONE_COMMIT_SHA:0:7}
- ${DRONE_BRANCH}
- latest
- name: notify
image: plugins/slack
settings:
webhook:
from_secret: SLACK_WEBHOOK_URL
when:
event: pull_request

5
.gitignore vendored
View File

@@ -3,8 +3,3 @@
*.tmp *.tmp
*.dockerimage *.dockerimage
/vendor /vendor
vendor.tzst
modcache.tzst
# Stale files
.drone.yml

281
internal/checker/checker.go Normal file
View File

@@ -0,0 +1,281 @@
package checker
import (
"bytes"
"context"
"crypto/sha256"
"errors"
"io"
"os"
"path/filepath"
"github.com/multiformats/go-multihash"
"github.com/spf13/afero"
"sneak.berlin/go/mfer/mfer"
)
// Result represents the outcome of checking a single file.
type Result struct {
Path string // Relative path from manifest
Status Status // Verification result status
Message string // Human-readable description of the result
}
// Status represents the verification status of a file.
type Status int
const (
StatusOK Status = iota // File matches manifest (size and hash verified)
StatusMissing // File not found on disk
StatusSizeMismatch // File size differs from manifest
StatusHashMismatch // File hash differs from manifest
StatusExtra // File exists on disk but not in manifest
StatusError // Error occurred during verification
)
func (s Status) String() string {
switch s {
case StatusOK:
return "OK"
case StatusMissing:
return "MISSING"
case StatusSizeMismatch:
return "SIZE_MISMATCH"
case StatusHashMismatch:
return "HASH_MISMATCH"
case StatusExtra:
return "EXTRA"
case StatusError:
return "ERROR"
default:
return "UNKNOWN"
}
}
// CheckStatus contains progress information for the check operation.
type CheckStatus struct {
TotalFiles int64 // Total number of files in manifest
CheckedFiles int64 // Number of files checked so far
TotalBytes int64 // Total bytes to verify (sum of all file sizes)
CheckedBytes int64 // Bytes verified so far
BytesPerSec float64 // Current throughput rate
Failures int64 // Number of verification failures encountered
}
// Checker verifies files against a manifest.
type Checker struct {
basePath string
files []*mfer.MFFilePath
fs afero.Fs
// manifestPaths is a set of paths in the manifest for quick lookup
manifestPaths map[string]struct{}
}
// NewChecker creates a new Checker for the given manifest, base path, and filesystem.
// The basePath is the directory relative to which manifest paths are resolved.
// If fs is nil, the real filesystem (OsFs) is used.
func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, error) {
if fs == nil {
fs = afero.NewOsFs()
}
m, err := mfer.NewManifestFromFile(fs, manifestPath)
if err != nil {
return nil, err
}
abs, err := filepath.Abs(basePath)
if err != nil {
return nil, err
}
files := m.Files()
manifestPaths := make(map[string]struct{}, len(files))
for _, f := range files {
manifestPaths[f.Path] = struct{}{}
}
return &Checker{
basePath: abs,
files: files,
fs: fs,
manifestPaths: manifestPaths,
}, nil
}
// FileCount returns the number of files in the manifest.
func (c *Checker) FileCount() int64 {
return int64(len(c.files))
}
// TotalBytes returns the total size of all files in the manifest.
func (c *Checker) TotalBytes() int64 {
var total int64
for _, f := range c.files {
total += f.Size
}
return total
}
// Check verifies all files against the manifest.
// Results are sent to the results channel as files are checked.
// Progress updates are sent to the progress channel approximately once per second.
// Both channels are closed when the method returns.
func (c *Checker) Check(ctx context.Context, results chan<- Result, progress chan<- CheckStatus) error {
if results != nil {
defer close(results)
}
if progress != nil {
defer close(progress)
}
totalFiles := int64(len(c.files))
totalBytes := c.TotalBytes()
var checkedFiles int64
var checkedBytes int64
var failures int64
for _, entry := range c.files {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
result := c.checkFile(entry, &checkedBytes)
if result.Status != StatusOK {
failures++
}
checkedFiles++
if results != nil {
results <- result
}
// Send progress (simplified - every file for now)
if progress != nil {
sendCheckStatus(progress, CheckStatus{
TotalFiles: totalFiles,
CheckedFiles: checkedFiles,
TotalBytes: totalBytes,
CheckedBytes: checkedBytes,
Failures: failures,
})
}
}
return nil
}
func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result {
absPath := filepath.Join(c.basePath, entry.Path)
// Check if file exists
info, err := c.fs.Stat(absPath)
if err != nil {
if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) {
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
}
// Check for "file does not exist" style errors
exists, _ := afero.Exists(c.fs, absPath)
if !exists {
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
}
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
}
// Check size
if info.Size() != entry.Size {
*checkedBytes += info.Size()
return Result{
Path: entry.Path,
Status: StatusSizeMismatch,
Message: "size mismatch",
}
}
// Open and hash file
f, err := c.fs.Open(absPath)
if err != nil {
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
}
defer f.Close()
h := sha256.New()
n, err := io.Copy(h, f)
if err != nil {
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
}
*checkedBytes += n
// Encode as multihash and compare
computed, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
if err != nil {
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
}
// Check against all hashes in manifest (at least one must match)
for _, hash := range entry.Hashes {
if bytes.Equal(computed, hash.MultiHash) {
return Result{Path: entry.Path, Status: StatusOK}
}
}
return Result{Path: entry.Path, Status: StatusHashMismatch, Message: "hash mismatch"}
}
// FindExtraFiles walks the filesystem and reports files not in the manifest.
// Results are sent to the results channel. The channel is closed when done.
func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error {
if results != nil {
defer close(results)
}
return afero.Walk(c.fs, c.basePath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
select {
case <-ctx.Done():
return ctx.Err()
default:
}
// Skip directories
if info.IsDir() {
return nil
}
// Get relative path
relPath, err := filepath.Rel(c.basePath, path)
if err != nil {
return err
}
// Check if path is in manifest
if _, exists := c.manifestPaths[relPath]; !exists {
if results != nil {
results <- Result{
Path: relPath,
Status: StatusExtra,
Message: "not in manifest",
}
}
}
return nil
})
}
// sendCheckStatus sends a status update without blocking.
func sendCheckStatus(ch chan<- CheckStatus, status CheckStatus) {
if ch == nil {
return
}
select {
case ch <- status:
default:
}
}

View File

@@ -113,7 +113,7 @@ func (mfa *CLIApp) fetchManifestOperation(ctx *cli.Context) error {
return fmt.Errorf("invalid path in manifest: %w", err) return fmt.Errorf("invalid path in manifest: %w", err)
} }
fileURL := baseURL.String() + encodeFilePath(f.Path) fileURL := baseURL.String() + f.Path
log.Infof("fetching %s", f.Path) log.Infof("fetching %s", f.Path)
if err := downloadFile(fileURL, localPath, f, progress); err != nil { if err := downloadFile(fileURL, localPath, f, progress); err != nil {
@@ -139,15 +139,6 @@ func (mfa *CLIApp) fetchManifestOperation(ctx *cli.Context) error {
return nil return nil
} }
// encodeFilePath URL-encodes each segment of a file path while preserving slashes.
func encodeFilePath(p string) string {
segments := strings.Split(p, "/")
for i, seg := range segments {
segments[i] = url.PathEscape(seg)
}
return strings.Join(segments, "/")
}
// sanitizePath validates and sanitizes a file path from the manifest. // sanitizePath validates and sanitizes a file path from the manifest.
// It prevents path traversal attacks and rejects unsafe paths. // It prevents path traversal attacks and rejects unsafe paths.
func sanitizePath(p string) (string, error) { func sanitizePath(p string) (string, error) {

View File

@@ -16,29 +16,6 @@ import (
"sneak.berlin/go/mfer/mfer" "sneak.berlin/go/mfer/mfer"
) )
func TestEncodeFilePath(t *testing.T) {
tests := []struct {
input string
expected string
}{
{"file.txt", "file.txt"},
{"dir/file.txt", "dir/file.txt"},
{"my file.txt", "my%20file.txt"},
{"dir/my file.txt", "dir/my%20file.txt"},
{"file#1.txt", "file%231.txt"},
{"file?v=1.txt", "file%3Fv=1.txt"},
{"path/to/file with spaces.txt", "path/to/file%20with%20spaces.txt"},
{"100%done.txt", "100%25done.txt"},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
result := encodeFilePath(tt.input)
assert.Equal(t, tt.expected, result)
})
}
}
func TestSanitizePath(t *testing.T) { func TestSanitizePath(t *testing.T) {
// Valid paths that should be accepted // Valid paths that should be accepted
validTests := []struct { validTests := []struct {

View File

@@ -156,7 +156,7 @@ func (mfa *CLIApp) run(args []string) {
}, },
&cli.StringFlag{ &cli.StringFlag{
Name: "seed", Name: "seed",
Usage: "Seed value for deterministic manifest UUID", Usage: "Seed value for deterministic manifest UUID (hashed 150M times with SHA-256, ~5-10s)",
EnvVars: []string{"MFER_SEED"}, EnvVars: []string{"MFER_SEED"},
}, },
), ),

373
internal/scanner/scanner.go Normal file
View File

@@ -0,0 +1,373 @@
package scanner
import (
"context"
"io"
"io/fs"
"path"
"path/filepath"
"strings"
"sync"
"time"
"github.com/spf13/afero"
"sneak.berlin/go/mfer/mfer"
)
// Phase 1: Enumeration
// ---------------------
// Walking directories and calling stat() on files to collect metadata.
// Builds the list of files to be scanned. Relatively fast (metadata only).
// EnumerateStatus contains progress information for the enumeration phase.
type EnumerateStatus struct {
FilesFound int64 // Number of files discovered so far
BytesFound int64 // Total size of discovered files (from stat)
}
// Phase 2: Scan (ToManifest)
// --------------------------
// Reading file contents and computing hashes for manifest generation.
// This is the expensive phase that reads all file data.
// ScanStatus contains progress information for the scan phase.
type ScanStatus struct {
TotalFiles int64 // Total number of files to scan
ScannedFiles int64 // Number of files scanned so far
TotalBytes int64 // Total bytes to read (sum of all file sizes)
ScannedBytes int64 // Bytes read so far
BytesPerSec float64 // Current throughput rate
}
// Options configures scanner behavior.
type Options struct {
IgnoreDotfiles bool // Skip files and directories starting with a dot
FollowSymLinks bool // Resolve symlinks instead of skipping them
Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
}
// FileEntry represents a file that has been enumerated.
type FileEntry struct {
Path string // Relative path (used in manifest)
AbsPath string // Absolute path (used for reading file content)
Size int64 // File size in bytes
Mtime time.Time // Last modification time
Ctime time.Time // Creation time (platform-dependent)
}
// Scanner accumulates files and generates manifests from them.
type Scanner struct {
mu sync.RWMutex
files []*FileEntry
options *Options
fs afero.Fs
}
// New creates a new Scanner with default options.
func New() *Scanner {
return NewWithOptions(nil)
}
// NewWithOptions creates a new Scanner with the given options.
func NewWithOptions(opts *Options) *Scanner {
if opts == nil {
opts = &Options{}
}
fs := opts.Fs
if fs == nil {
fs = afero.NewOsFs()
}
return &Scanner{
files: make([]*FileEntry, 0),
options: opts,
fs: fs,
}
}
// EnumerateFile adds a single file to the scanner, calling stat() to get metadata.
func (s *Scanner) EnumerateFile(filePath string) error {
abs, err := filepath.Abs(filePath)
if err != nil {
return err
}
info, err := s.fs.Stat(abs)
if err != nil {
return err
}
// For single files, use the filename as the relative path
basePath := filepath.Dir(abs)
return s.enumerateFileWithInfo(filepath.Base(abs), basePath, info, nil)
}
// EnumeratePath walks a directory path and adds all files to the scanner.
// If progress is non-nil, status updates are sent as files are discovered.
// The progress channel is closed when the method returns.
func (s *Scanner) EnumeratePath(inputPath string, progress chan<- EnumerateStatus) error {
if progress != nil {
defer close(progress)
}
abs, err := filepath.Abs(inputPath)
if err != nil {
return err
}
afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs))
return s.enumerateFS(afs, abs, progress)
}
// EnumeratePaths walks multiple directory paths and adds all files to the scanner.
// If progress is non-nil, status updates are sent as files are discovered.
// The progress channel is closed when the method returns.
func (s *Scanner) EnumeratePaths(progress chan<- EnumerateStatus, inputPaths ...string) error {
if progress != nil {
defer close(progress)
}
for _, p := range inputPaths {
abs, err := filepath.Abs(p)
if err != nil {
return err
}
afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs))
if err := s.enumerateFS(afs, abs, progress); err != nil {
return err
}
}
return nil
}
// EnumerateFS walks an afero filesystem and adds all files to the scanner.
// If progress is non-nil, status updates are sent as files are discovered.
// The progress channel is closed when the method returns.
// basePath is used to compute absolute paths for file reading.
func (s *Scanner) EnumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error {
if progress != nil {
defer close(progress)
}
return s.enumerateFS(afs, basePath, progress)
}
// enumerateFS is the internal implementation that doesn't close the progress channel.
func (s *Scanner) enumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error {
return afero.Walk(afs, "/", func(p string, info fs.FileInfo, err error) error {
if err != nil {
return err
}
if s.options.IgnoreDotfiles && pathIsHidden(p) {
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
return s.enumerateFileWithInfo(p, basePath, info, progress)
})
}
// enumerateFileWithInfo adds a file with pre-existing fs.FileInfo.
func (s *Scanner) enumerateFileWithInfo(filePath string, basePath string, info fs.FileInfo, progress chan<- EnumerateStatus) error {
if info.IsDir() {
// Manifests contain only files, directories are implied
return nil
}
// Clean the path - remove leading slash if present
cleanPath := filePath
if len(cleanPath) > 0 && cleanPath[0] == '/' {
cleanPath = cleanPath[1:]
}
// Compute absolute path for file reading
absPath := filepath.Join(basePath, cleanPath)
entry := &FileEntry{
Path: cleanPath,
AbsPath: absPath,
Size: info.Size(),
Mtime: info.ModTime(),
// Note: Ctime not available from fs.FileInfo on all platforms
// Will need platform-specific code to extract it
}
s.mu.Lock()
s.files = append(s.files, entry)
filesFound := int64(len(s.files))
var bytesFound int64
for _, f := range s.files {
bytesFound += f.Size
}
s.mu.Unlock()
sendEnumerateStatus(progress, EnumerateStatus{
FilesFound: filesFound,
BytesFound: bytesFound,
})
return nil
}
// Files returns a copy of all files added to the scanner.
func (s *Scanner) Files() []*FileEntry {
s.mu.RLock()
defer s.mu.RUnlock()
out := make([]*FileEntry, len(s.files))
copy(out, s.files)
return out
}
// FileCount returns the number of files in the scanner.
func (s *Scanner) FileCount() int64 {
s.mu.RLock()
defer s.mu.RUnlock()
return int64(len(s.files))
}
// TotalBytes returns the total size of all files in the scanner.
func (s *Scanner) TotalBytes() int64 {
s.mu.RLock()
defer s.mu.RUnlock()
var total int64
for _, f := range s.files {
total += f.Size
}
return total
}
// ToManifest reads all file contents, computes hashes, and generates a manifest.
// If progress is non-nil, status updates are sent approximately once per second.
// The progress channel is closed when the method returns.
// The manifest is written to the provided io.Writer.
func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- ScanStatus) error {
if progress != nil {
defer close(progress)
}
s.mu.RLock()
files := make([]*FileEntry, len(s.files))
copy(files, s.files)
totalFiles := int64(len(files))
var totalBytes int64
for _, f := range files {
totalBytes += f.Size
}
s.mu.RUnlock()
builder := mfer.NewBuilder()
var scannedFiles int64
var scannedBytes int64
lastProgressTime := time.Now()
startTime := time.Now()
for _, entry := range files {
// Check for cancellation
select {
case <-ctx.Done():
return ctx.Err()
default:
}
// Open file
f, err := s.fs.Open(entry.AbsPath)
if err != nil {
return err
}
// Add to manifest with progress callback
bytesRead, err := builder.AddFile(
entry.Path,
entry.Size,
entry.Mtime,
f,
func(fileBytes int64) {
// Send progress at most once per second
now := time.Now()
if progress != nil && now.Sub(lastProgressTime) >= time.Second {
elapsed := now.Sub(startTime).Seconds()
currentBytes := scannedBytes + fileBytes
var rate float64
if elapsed > 0 {
rate = float64(currentBytes) / elapsed
}
sendScanStatus(progress, ScanStatus{
TotalFiles: totalFiles,
ScannedFiles: scannedFiles,
TotalBytes: totalBytes,
ScannedBytes: currentBytes,
BytesPerSec: rate,
})
lastProgressTime = now
}
},
)
f.Close()
if err != nil {
return err
}
scannedFiles++
scannedBytes += bytesRead
}
// Send final progress
if progress != nil {
elapsed := time.Since(startTime).Seconds()
var rate float64
if elapsed > 0 {
rate = float64(scannedBytes) / elapsed
}
sendScanStatus(progress, ScanStatus{
TotalFiles: totalFiles,
ScannedFiles: scannedFiles,
TotalBytes: totalBytes,
ScannedBytes: scannedBytes,
BytesPerSec: rate,
})
}
// Build and write manifest
return builder.Build(w)
}
// pathIsHidden returns true if the path or any of its parent directories
// start with a dot (hidden files/directories).
func pathIsHidden(p string) bool {
tp := path.Clean(p)
if strings.HasPrefix(tp, ".") {
return true
}
for {
d, f := path.Split(tp)
if strings.HasPrefix(f, ".") {
return true
}
if d == "" {
return false
}
tp = d[0 : len(d)-1] // trim trailing slash from dir
}
}
// sendEnumerateStatus sends a status update without blocking.
// If the channel is full, the update is dropped.
func sendEnumerateStatus(ch chan<- EnumerateStatus, status EnumerateStatus) {
if ch == nil {
return
}
select {
case ch <- status:
default:
// Channel full, drop this update
}
}
// sendScanStatus sends a status update without blocking.
// If the channel is full, the update is dropped.
func sendScanStatus(ch chan<- ScanStatus, status ScanStatus) {
if ch == nil {
return
}
select {
case ch <- status:
default:
// Channel full, drop this update
}
}

View File

@@ -3,48 +3,14 @@ package mfer
import ( import (
"crypto/sha256" "crypto/sha256"
"errors" "errors"
"fmt"
"io" "io"
"sort" "sort"
"strings"
"sync" "sync"
"time" "time"
"unicode/utf8"
"github.com/multiformats/go-multihash" "github.com/multiformats/go-multihash"
) )
// ValidatePath checks that a file path conforms to manifest path invariants:
// - Must be valid UTF-8
// - Must use forward slashes only (no backslashes)
// - Must be relative (no leading /)
// - Must not contain ".." segments
// - Must not contain empty segments (no "//")
// - Must not be empty
func ValidatePath(p string) error {
if p == "" {
return errors.New("path cannot be empty")
}
if !utf8.ValidString(p) {
return fmt.Errorf("path %q is not valid UTF-8", p)
}
if strings.ContainsRune(p, '\\') {
return fmt.Errorf("path %q contains backslash; use forward slashes only", p)
}
if strings.HasPrefix(p, "/") {
return fmt.Errorf("path %q is absolute; must be relative", p)
}
for _, seg := range strings.Split(p, "/") {
if seg == "" {
return fmt.Errorf("path %q contains empty segment", p)
}
if seg == ".." {
return fmt.Errorf("path %q contains '..' segment", p)
}
}
return nil
}
// RelFilePath represents a relative file path within a manifest. // RelFilePath represents a relative file path within a manifest.
type RelFilePath string type RelFilePath string
@@ -92,12 +58,25 @@ type Builder struct {
fixedUUID []byte // if set, use this UUID instead of generating one fixedUUID []byte // if set, use this UUID instead of generating one
} }
// seedIterations is the number of SHA-256 rounds used to derive a UUID from a seed.
// Tuned to take approximately 5-10 seconds on modern hardware.
const seedIterations = 150_000_000
// SetSeed derives a deterministic UUID from the given seed string. // SetSeed derives a deterministic UUID from the given seed string.
// The seed is hashed once with SHA-256 and the first 16 bytes are used // The seed is hashed 150,000,000 times with SHA-256 to produce
// as a fixed UUID for the manifest. // 16 bytes used as a fixed UUID for the manifest (~5-10s on modern hardware).
func (b *Builder) SetSeed(seed string) { func (b *Builder) SetSeed(seed string) {
b.fixedUUID = deriveSeedUUID(seed, seedIterations)
}
// deriveSeedUUID hashes the seed string n times with SHA-256
// and returns the first 16 bytes as a UUID.
func deriveSeedUUID(seed string, iterations int) []byte {
hash := sha256.Sum256([]byte(seed)) hash := sha256.Sum256([]byte(seed))
b.fixedUUID = hash[:16] for i := 1; i < iterations; i++ {
hash = sha256.Sum256(hash[:])
}
return hash[:16]
} }
// NewBuilder creates a new Builder. // NewBuilder creates a new Builder.
@@ -118,10 +97,6 @@ func (b *Builder) AddFile(
reader io.Reader, reader io.Reader,
progress chan<- FileHashProgress, progress chan<- FileHashProgress,
) (FileSize, error) { ) (FileSize, error) {
if err := ValidatePath(string(path)); err != nil {
return 0, err
}
// Create hash writer // Create hash writer
h := sha256.New() h := sha256.New()
@@ -144,11 +119,6 @@ func (b *Builder) AddFile(
} }
} }
// Verify actual bytes read matches declared size
if totalRead != size {
return totalRead, fmt.Errorf("size mismatch for %q: declared %d bytes but read %d bytes", path, size, totalRead)
}
// Encode hash as multihash (SHA2-256) // Encode hash as multihash (SHA2-256)
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256) mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
if err != nil { if err != nil {
@@ -194,8 +164,8 @@ func (b *Builder) FileCount() int {
// This is useful when the hash is already known (e.g., from an existing manifest). // This is useful when the hash is already known (e.g., from an existing manifest).
// Returns an error if path is empty, size is negative, or hash is nil/empty. // Returns an error if path is empty, size is negative, or hash is nil/empty.
func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error { func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error {
if err := ValidatePath(string(path)); err != nil { if path == "" {
return err return errors.New("path cannot be empty")
} }
if size < 0 { if size < 0 {
return errors.New("size cannot be negative") return errors.New("size cannot be negative")

View File

@@ -92,29 +92,6 @@ func TestBuilderBuild(t *testing.T) {
assert.True(t, strings.HasPrefix(buf.String(), MAGIC)) assert.True(t, strings.HasPrefix(buf.String(), MAGIC))
} }
func TestNewTimestampFromTimeExtremeDate(t *testing.T) {
// Regression test: newTimestampFromTime used UnixNano() which panics
// for dates outside ~1678-2262. Now uses Nanosecond() which is safe.
tests := []struct {
name string
time time.Time
}{
{"zero time", time.Time{}},
{"year 1000", time.Date(1000, 1, 1, 0, 0, 0, 0, time.UTC)},
{"year 3000", time.Date(3000, 1, 1, 0, 0, 0, 123456789, time.UTC)},
{"unix epoch", time.Unix(0, 0)},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
// Should not panic
ts := newTimestampFromTime(tt.time)
assert.Equal(t, tt.time.Unix(), ts.Seconds)
assert.Equal(t, int32(tt.time.Nanosecond()), ts.Nanos)
})
}
}
func TestBuilderDeterministicOutput(t *testing.T) { func TestBuilderDeterministicOutput(t *testing.T) {
buildManifest := func() []byte { buildManifest := func() []byte {
b := NewBuilder() b := NewBuilder()
@@ -150,17 +127,15 @@ func TestBuilderDeterministicOutput(t *testing.T) {
assert.Equal(t, out1, out2, "two builds with same input should produce byte-identical output") assert.Equal(t, out1, out2, "two builds with same input should produce byte-identical output")
} }
func TestSetSeedDeterministic(t *testing.T) { func TestDeriveSeedUUID(t *testing.T) {
b1 := NewBuilder() // Use a small iteration count for testing (production uses 1B)
b1.SetSeed("test-seed-value") uuid1 := deriveSeedUUID("test-seed-value", 1000)
b2 := NewBuilder() uuid2 := deriveSeedUUID("test-seed-value", 1000)
b2.SetSeed("test-seed-value") assert.Equal(t, uuid1, uuid2, "same seed should produce same UUID")
assert.Equal(t, b1.fixedUUID, b2.fixedUUID, "same seed should produce same UUID") assert.Len(t, uuid1, 16, "UUID should be 16 bytes")
assert.Len(t, b1.fixedUUID, 16, "UUID should be 16 bytes")
b3 := NewBuilder() uuid3 := deriveSeedUUID("different-seed", 1000)
b3.SetSeed("different-seed") assert.NotEqual(t, uuid1, uuid3, "different seeds should produce different UUIDs")
assert.NotEqual(t, b1.fixedUUID, b3.fixedUUID, "different seeds should produce different UUIDs")
} }
func TestBuilderBuildEmpty(t *testing.T) { func TestBuilderBuildEmpty(t *testing.T) {

View File

@@ -224,7 +224,12 @@ func (c *Checker) checkFile(entry *MFFilePath, checkedBytes *FileSize) Result {
// Check if file exists // Check if file exists
info, err := c.fs.Stat(absPath) info, err := c.fs.Stat(absPath)
if err != nil { if err != nil {
if errors.Is(err, os.ErrNotExist) || errors.Is(err, afero.ErrFileNotFound) { if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) {
return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
}
// Check for "file does not exist" style errors
exists, _ := afero.Exists(c.fs, absPath)
if !exists {
return Result{Path: relPath, Status: StatusMissing, Message: "file not found"} return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
} }
return Result{Path: relPath, Status: StatusError, Message: err.Error()} return Result{Path: relPath, Status: StatusError, Message: err.Error()}
@@ -272,14 +277,12 @@ func (c *Checker) checkFile(entry *MFFilePath, checkedBytes *FileSize) Result {
// FindExtraFiles walks the filesystem and reports files not in the manifest. // FindExtraFiles walks the filesystem and reports files not in the manifest.
// Results are sent to the results channel. The channel is closed when done. // Results are sent to the results channel. The channel is closed when done.
// Hidden files/directories (starting with .) are skipped, as they are excluded
// from manifests by default. The manifest file itself is also skipped.
func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error { func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error {
if results != nil { if results != nil {
defer close(results) defer close(results)
} }
return afero.Walk(c.fs, string(c.basePath), func(walkPath string, info os.FileInfo, err error) error { return afero.Walk(c.fs, string(c.basePath), func(path string, info os.FileInfo, err error) error {
if err != nil { if err != nil {
return err return err
} }
@@ -290,31 +293,16 @@ func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) err
default: default:
} }
// Get relative path
rel, err := filepath.Rel(string(c.basePath), walkPath)
if err != nil {
return err
}
// Skip hidden files and directories (dotfiles)
if IsHiddenPath(filepath.ToSlash(rel)) {
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
// Skip directories // Skip directories
if info.IsDir() { if info.IsDir() {
return nil return nil
} }
// Skip manifest files // Get relative path
base := filepath.Base(rel) rel, err := filepath.Rel(string(c.basePath), path)
if base == "index.mf" || base == ".index.mf" { if err != nil {
return nil return err
} }
relPath := RelFilePath(rel) relPath := RelFilePath(rel)
// Check if path is in manifest // Check if path is in manifest

View File

@@ -305,44 +305,6 @@ func TestFindExtraFiles(t *testing.T) {
assert.Equal(t, "not in manifest", extras[0].Message) assert.Equal(t, "not in manifest", extras[0].Message)
} }
func TestFindExtraFilesSkipsManifestAndDotfiles(t *testing.T) {
fs := afero.NewMemMapFs()
manifestFiles := map[string][]byte{
"file1.txt": []byte("in manifest"),
}
createTestManifest(t, fs, "/data/.index.mf", manifestFiles)
createFilesOnDisk(t, fs, "/data", map[string][]byte{
"file1.txt": []byte("in manifest"),
})
// Create dotfile and manifest that should be skipped
require.NoError(t, afero.WriteFile(fs, "/data/.hidden", []byte("hidden"), 0o644))
require.NoError(t, afero.WriteFile(fs, "/data/.config/settings", []byte("cfg"), 0o644))
// Create a real extra file
require.NoError(t, fs.MkdirAll("/data", 0o755))
require.NoError(t, afero.WriteFile(fs, "/data/extra.txt", []byte("extra"), 0o644))
chk, err := NewChecker("/data/.index.mf", "/data", fs)
require.NoError(t, err)
results := make(chan Result, 10)
err = chk.FindExtraFiles(context.Background(), results)
require.NoError(t, err)
var extras []Result
for r := range results {
extras = append(extras, r)
}
// Should only report extra.txt, not .hidden, .config/settings, or .index.mf
for _, e := range extras {
t.Logf("extra: %s", e.Path)
}
assert.Len(t, extras, 1)
if len(extras) > 0 {
assert.Equal(t, RelFilePath("extra.txt"), extras[0].Path)
}
}
func TestFindExtraFilesContextCancellation(t *testing.T) { func TestFindExtraFilesContextCancellation(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
files := map[string][]byte{"file.txt": []byte("data")} files := map[string][]byte{"file.txt": []byte("data")}
@@ -419,39 +381,6 @@ func TestCheckSubdirectories(t *testing.T) {
assert.Equal(t, 3, okCount) assert.Equal(t, 3, okCount)
} }
func TestCheckMissingFileDetectedWithoutFallback(t *testing.T) {
// Regression test: errors.Is(err, errors.New("...")) never matches because
// errors.New creates a new value each time. The fix uses os.ErrNotExist instead.
fs := afero.NewMemMapFs()
files := map[string][]byte{
"exists.txt": []byte("here"),
"missing.txt": []byte("not on disk"),
}
createTestManifest(t, fs, "/manifest.mf", files)
// Only create one file on disk
createFilesOnDisk(t, fs, "/data", map[string][]byte{
"exists.txt": []byte("here"),
})
chk, err := NewChecker("/manifest.mf", "/data", fs)
require.NoError(t, err)
results := make(chan Result, 10)
err = chk.Check(context.Background(), results, nil)
require.NoError(t, err)
statusCounts := map[Status]int{}
for r := range results {
statusCounts[r.Status]++
if r.Status == StatusMissing {
assert.Equal(t, RelFilePath("missing.txt"), r.Path)
}
}
assert.Equal(t, 1, statusCounts[StatusOK], "one file should be OK")
assert.Equal(t, 1, statusCounts[StatusMissing], "one file should be MISSING")
assert.Equal(t, 0, statusCounts[StatusError], "no files should be ERROR")
}
func TestCheckEmptyManifest(t *testing.T) { func TestCheckEmptyManifest(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
// Create manifest with no files // Create manifest with no files

View File

@@ -3,9 +3,4 @@ package mfer
const ( const (
Version = "0.1.0" Version = "0.1.0"
ReleaseDate = "2025-12-17" ReleaseDate = "2025-12-17"
// MaxDecompressedSize is the maximum allowed size of decompressed manifest
// data (256 MB). This prevents decompression bombs from consuming excessive
// memory.
MaxDecompressedSize int64 = 256 * 1024 * 1024
) )

View File

@@ -76,20 +76,10 @@ func (m *manifest) deserializeInner() error {
} }
defer zr.Close() defer zr.Close()
// Limit decompressed size to prevent decompression bombs. dat, err := io.ReadAll(zr)
// Use declared size + 1 byte to detect overflow, capped at MaxDecompressedSize.
maxSize := MaxDecompressedSize
if m.pbOuter.Size > 0 && m.pbOuter.Size < int64(maxSize) {
maxSize = int64(m.pbOuter.Size) + 1
}
limitedReader := io.LimitReader(zr, maxSize)
dat, err := io.ReadAll(limitedReader)
if err != nil { if err != nil {
return err return err
} }
if int64(len(dat)) >= MaxDecompressedSize {
return fmt.Errorf("decompressed data exceeds maximum allowed size of %d bytes", MaxDecompressedSize)
}
isize := len(dat) isize := len(dat)
if int64(isize) != m.pbOuter.Size { if int64(isize) != m.pbOuter.Size {

View File

@@ -100,7 +100,7 @@ func gpgExtractPubKeyFingerprint(pubKey []byte) (string, error) {
if err != nil { if err != nil {
return "", fmt.Errorf("failed to create temp dir: %w", err) return "", fmt.Errorf("failed to create temp dir: %w", err)
} }
defer func() { _ = os.RemoveAll(tmpDir) }() defer os.RemoveAll(tmpDir)
// Set restrictive permissions // Set restrictive permissions
if err := os.Chmod(tmpDir, 0o700); err != nil { if err := os.Chmod(tmpDir, 0o700); err != nil {
@@ -158,7 +158,7 @@ func gpgVerify(data, signature, pubKey []byte) error {
if err != nil { if err != nil {
return fmt.Errorf("failed to create temp dir: %w", err) return fmt.Errorf("failed to create temp dir: %w", err)
} }
defer func() { _ = os.RemoveAll(tmpDir) }() defer os.RemoveAll(tmpDir)
// Set restrictive permissions // Set restrictive permissions
if err := os.Chmod(tmpDir, 0o700); err != nil { if err := os.Chmod(tmpDir, 0o700); err != nil {

View File

@@ -34,15 +34,15 @@ func testGPGEnv(t *testing.T) (GPGKeyID, func()) {
// Save original GNUPGHOME and set new one // Save original GNUPGHOME and set new one
origGPGHome := os.Getenv("GNUPGHOME") origGPGHome := os.Getenv("GNUPGHOME")
require.NoError(t, os.Setenv("GNUPGHOME", gpgHome)) os.Setenv("GNUPGHOME", gpgHome)
cleanup := func() { cleanup := func() {
if origGPGHome == "" { if origGPGHome == "" {
_ = os.Unsetenv("GNUPGHOME") os.Unsetenv("GNUPGHOME")
} else { } else {
_ = os.Setenv("GNUPGHOME", origGPGHome) os.Setenv("GNUPGHOME", origGPGHome)
} }
_ = os.RemoveAll(gpgHome) os.RemoveAll(gpgHome)
} }
// Generate a test key with no passphrase // Generate a test key with no passphrase

View File

@@ -46,9 +46,6 @@ message MFFileOuter {
message MFFilePath { message MFFilePath {
// required attributes: // required attributes:
// Path invariants: must be valid UTF-8, use forward slashes only,
// be relative (no leading /), contain no ".." segments, and no
// empty segments (no "//").
string path = 1; string path = 1;
int64 size = 2; int64 size = 2;

View File

@@ -389,9 +389,6 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S
// The path should use forward slashes. // The path should use forward slashes.
func IsHiddenPath(p string) bool { func IsHiddenPath(p string) bool {
tp := path.Clean(p) tp := path.Clean(p)
if tp == "." || tp == "/" {
return false
}
if strings.HasPrefix(tp, ".") { if strings.HasPrefix(tp, ".") {
return true return true
} }

View File

@@ -352,8 +352,6 @@ func TestIsHiddenPath(t *testing.T) {
{"/absolute/.hidden", true}, {"/absolute/.hidden", true},
{"./relative", false}, // path.Clean removes leading ./ {"./relative", false}, // path.Clean removes leading ./
{"a/b/c/.d/e", true}, {"a/b/c/.d/e", true},
{".", false}, // current directory is not hidden
{"/", false}, // root is not hidden
} }
for _, tt := range tests { for _, tt := range tests {

View File

@@ -16,10 +16,11 @@ import (
const MAGIC string = "ZNAVSRFG" const MAGIC string = "ZNAVSRFG"
func newTimestampFromTime(t time.Time) *Timestamp { func newTimestampFromTime(t time.Time) *Timestamp {
return &Timestamp{ out := &Timestamp{
Seconds: t.Unix(), Seconds: t.Unix(),
Nanos: int32(t.Nanosecond()), Nanos: int32(t.UnixNano() - (t.Unix() * 1000000000)),
} }
return out
} }
func (m *manifest) generate() error { func (m *manifest) generate() error {

BIN
modcache.tzst Normal file

Binary file not shown.

BIN
vendor.tzst Normal file

Binary file not shown.