Add custom types for type safety throughout codebase

- Add FileCount, FileSize, RelFilePath, AbsFilePath, ModTime, Multihash types
- Add UnixSeconds and UnixNanos types for timestamp handling
- Add URL types (ManifestURL, FileURL, BaseURL) with safe path joining
- Consolidate scanner package into mfer package
- Update checker to use custom types in Result and CheckStatus
- Add ModTime.Timestamp() method for protobuf conversion
- Update all tests to use proper custom types
This commit is contained in:
Jeffrey Paul 2025-12-18 01:01:18 -08:00
parent a9f0d2abe4
commit dc115c5ba2
9 changed files with 428 additions and 247 deletions

View File

@ -17,7 +17,7 @@ import (
// Result represents the outcome of checking a single file.
type Result struct {
Path string // Relative path from manifest
Path mfer.RelFilePath // Relative path from manifest
Status Status // Verification result status
Message string // Human-readable description of the result
}
@ -55,22 +55,22 @@ func (s Status) String() string {
// CheckStatus contains progress information for the check operation.
type CheckStatus struct {
TotalFiles int64 // Total number of files in manifest
CheckedFiles int64 // Number of files checked so far
TotalBytes int64 // Total bytes to verify (sum of all file sizes)
CheckedBytes int64 // Bytes verified so far
TotalFiles mfer.FileCount // Total number of files in manifest
CheckedFiles mfer.FileCount // Number of files checked so far
TotalBytes mfer.FileSize // Total bytes to verify (sum of all file sizes)
CheckedBytes mfer.FileSize // Bytes verified so far
BytesPerSec float64 // Current throughput rate
ETA time.Duration // Estimated time to completion
Failures int64 // Number of verification failures encountered
Failures mfer.FileCount // Number of verification failures encountered
}
// Checker verifies files against a manifest.
type Checker struct {
basePath string
basePath mfer.AbsFilePath
files []*mfer.MFFilePath
fs afero.Fs
// manifestPaths is a set of paths in the manifest for quick lookup
manifestPaths map[string]struct{}
manifestPaths map[mfer.RelFilePath]struct{}
}
// NewChecker creates a new Checker for the given manifest, base path, and filesystem.
@ -92,13 +92,13 @@ func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, er
}
files := m.Files()
manifestPaths := make(map[string]struct{}, len(files))
manifestPaths := make(map[mfer.RelFilePath]struct{}, len(files))
for _, f := range files {
manifestPaths[f.Path] = struct{}{}
manifestPaths[mfer.RelFilePath(f.Path)] = struct{}{}
}
return &Checker{
basePath: abs,
basePath: mfer.AbsFilePath(abs),
files: files,
fs: fs,
manifestPaths: manifestPaths,
@ -106,15 +106,15 @@ func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, er
}
// FileCount returns the number of files in the manifest.
func (c *Checker) FileCount() int64 {
return int64(len(c.files))
func (c *Checker) FileCount() mfer.FileCount {
return mfer.FileCount(len(c.files))
}
// TotalBytes returns the total size of all files in the manifest.
func (c *Checker) TotalBytes() int64 {
var total int64
func (c *Checker) TotalBytes() mfer.FileSize {
var total mfer.FileSize
for _, f := range c.files {
total += f.Size
total += mfer.FileSize(f.Size)
}
return total
}
@ -131,12 +131,12 @@ func (c *Checker) Check(ctx context.Context, results chan<- Result, progress cha
defer close(progress)
}
totalFiles := int64(len(c.files))
totalFiles := mfer.FileCount(len(c.files))
totalBytes := c.TotalBytes()
var checkedFiles int64
var checkedBytes int64
var failures int64
var checkedFiles mfer.FileCount
var checkedBytes mfer.FileSize
var failures mfer.FileCount
startTime := time.Now()
@ -186,28 +186,29 @@ func (c *Checker) Check(ctx context.Context, results chan<- Result, progress cha
return nil
}
func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result {
absPath := filepath.Join(c.basePath, entry.Path)
func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *mfer.FileSize) Result {
absPath := filepath.Join(string(c.basePath), entry.Path)
relPath := mfer.RelFilePath(entry.Path)
// Check if file exists
info, err := c.fs.Stat(absPath)
if err != nil {
if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) {
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
}
// Check for "file does not exist" style errors
exists, _ := afero.Exists(c.fs, absPath)
if !exists {
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
}
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
}
// Check size
if info.Size() != entry.Size {
*checkedBytes += info.Size()
*checkedBytes += mfer.FileSize(info.Size())
return Result{
Path: entry.Path,
Path: relPath,
Status: StatusSizeMismatch,
Message: "size mismatch",
}
@ -216,31 +217,31 @@ func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result
// Open and hash file
f, err := c.fs.Open(absPath)
if err != nil {
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
}
defer func() { _ = f.Close() }()
h := sha256.New()
n, err := io.Copy(h, f)
if err != nil {
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
}
*checkedBytes += n
*checkedBytes += mfer.FileSize(n)
// Encode as multihash and compare
computed, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
if err != nil {
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
}
// Check against all hashes in manifest (at least one must match)
for _, hash := range entry.Hashes {
if bytes.Equal(computed, hash.MultiHash) {
return Result{Path: entry.Path, Status: StatusOK}
return Result{Path: relPath, Status: StatusOK}
}
}
return Result{Path: entry.Path, Status: StatusHashMismatch, Message: "hash mismatch"}
return Result{Path: relPath, Status: StatusHashMismatch, Message: "hash mismatch"}
}
// FindExtraFiles walks the filesystem and reports files not in the manifest.
@ -250,7 +251,7 @@ func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) err
defer close(results)
}
return afero.Walk(c.fs, c.basePath, func(path string, info os.FileInfo, err error) error {
return afero.Walk(c.fs, string(c.basePath), func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
@ -267,10 +268,11 @@ func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) err
}
// Get relative path
relPath, err := filepath.Rel(c.basePath, path)
rel, err := filepath.Rel(string(c.basePath), path)
if err != nil {
return err
}
relPath := mfer.RelFilePath(rel)
// Check if path is in manifest
if _, exists := c.manifestPaths[relPath]; !exists {

View File

@ -40,7 +40,7 @@ func createTestManifest(t *testing.T, fs afero.Fs, manifestPath string, files ma
builder := mfer.NewBuilder()
for path, content := range files {
reader := bytes.NewReader(content)
_, err := builder.AddFile(path, int64(len(content)), time.Now(), reader, nil)
_, err := builder.AddFile(mfer.RelFilePath(path), mfer.FileSize(len(content)), mfer.ModTime(time.Now()), reader, nil)
require.NoError(t, err)
}
@ -72,7 +72,7 @@ func TestNewChecker(t *testing.T) {
chk, err := NewChecker("/manifest.mf", "/", fs)
require.NoError(t, err)
assert.NotNil(t, chk)
assert.Equal(t, int64(2), chk.FileCount())
assert.Equal(t, mfer.FileCount(2), chk.FileCount())
})
t.Run("missing manifest", func(t *testing.T) {
@ -101,8 +101,8 @@ func TestCheckerFileCountAndTotalBytes(t *testing.T) {
chk, err := NewChecker("/manifest.mf", "/", fs)
require.NoError(t, err)
assert.Equal(t, int64(3), chk.FileCount())
assert.Equal(t, int64(2+11+1000), chk.TotalBytes())
assert.Equal(t, mfer.FileCount(3), chk.FileCount())
assert.Equal(t, mfer.FileSize(2+11+1000), chk.TotalBytes())
}
func TestCheckAllFilesOK(t *testing.T) {
@ -158,7 +158,7 @@ func TestCheckMissingFile(t *testing.T) {
okCount++
case StatusMissing:
missingCount++
assert.Equal(t, "missing.txt", r.Path)
assert.Equal(t, mfer.RelFilePath("missing.txt"), r.Path)
}
}
@ -186,7 +186,7 @@ func TestCheckSizeMismatch(t *testing.T) {
r := <-results
assert.Equal(t, StatusSizeMismatch, r.Status)
assert.Equal(t, "file.txt", r.Path)
assert.Equal(t, mfer.RelFilePath("file.txt"), r.Path)
}
func TestCheckHashMismatch(t *testing.T) {
@ -212,7 +212,7 @@ func TestCheckHashMismatch(t *testing.T) {
r := <-results
assert.Equal(t, StatusHashMismatch, r.Status)
assert.Equal(t, "file.txt", r.Path)
assert.Equal(t, mfer.RelFilePath("file.txt"), r.Path)
}
func TestCheckWithProgress(t *testing.T) {
@ -246,11 +246,11 @@ func TestCheckWithProgress(t *testing.T) {
assert.NotEmpty(t, progressUpdates)
// Final progress should show all files checked
final := progressUpdates[len(progressUpdates)-1]
assert.Equal(t, int64(2), final.TotalFiles)
assert.Equal(t, int64(2), final.CheckedFiles)
assert.Equal(t, int64(300), final.TotalBytes)
assert.Equal(t, int64(300), final.CheckedBytes)
assert.Equal(t, int64(0), final.Failures)
assert.Equal(t, mfer.FileCount(2), final.TotalFiles)
assert.Equal(t, mfer.FileCount(2), final.CheckedFiles)
assert.Equal(t, mfer.FileSize(300), final.TotalBytes)
assert.Equal(t, mfer.FileSize(300), final.CheckedBytes)
assert.Equal(t, mfer.FileCount(0), final.Failures)
}
func TestCheckContextCancellation(t *testing.T) {
@ -301,7 +301,7 @@ func TestFindExtraFiles(t *testing.T) {
}
assert.Len(t, extras, 1)
assert.Equal(t, "file2.txt", extras[0].Path)
assert.Equal(t, mfer.RelFilePath("file2.txt"), extras[0].Path)
assert.Equal(t, StatusExtra, extras[0].Status)
assert.Equal(t, "not in manifest", extras[0].Message)
}
@ -390,8 +390,8 @@ func TestCheckEmptyManifest(t *testing.T) {
chk, err := NewChecker("/manifest.mf", "/data", fs)
require.NoError(t, err)
assert.Equal(t, int64(0), chk.FileCount())
assert.Equal(t, int64(0), chk.TotalBytes())
assert.Equal(t, mfer.FileCount(0), chk.FileCount())
assert.Equal(t, mfer.FileSize(0), chk.TotalBytes())
results := make(chan Result, 10)
err = chk.Check(context.Background(), results, nil)

View File

@ -113,7 +113,7 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error {
}
// Handle dotfiles
if !includeDotfiles && pathIsHidden(relPath) {
if !includeDotfiles && mfer.IsHiddenPath(filepath.ToSlash(relPath)) {
if info.IsDir() {
return filepath.SkipDir
}
@ -274,10 +274,14 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error {
hashedFiles++
// Add to builder with computed hash
addFileToBuilder(builder, e.path, e.size, e.mtime, hash)
if err := addFileToBuilder(builder, e.path, e.size, e.mtime, hash); err != nil {
return fmt.Errorf("failed to add %s: %w", e.path, err)
}
} else {
// Use existing entry
addExistingToBuilder(builder, e.existing)
if err := addExistingToBuilder(builder, e.existing); err != nil {
return fmt.Errorf("failed to add %s: %w", e.path, err)
}
}
}
@ -360,38 +364,15 @@ func hashFile(r io.Reader, size int64, progress func(int64)) ([]byte, int64, err
}
// addFileToBuilder adds a new file entry to the builder
func addFileToBuilder(b *mfer.Builder, path string, size int64, mtime time.Time, hash []byte) {
// Use the builder's internal method indirectly by creating an entry
// Since Builder.AddFile reads from a reader, we need to use a different approach
// We'll access the builder's files directly through a custom method
b.AddFileWithHash(path, size, mtime, hash)
func addFileToBuilder(b *mfer.Builder, path string, size int64, mtime time.Time, hash []byte) error {
return b.AddFileWithHash(mfer.RelFilePath(path), mfer.FileSize(size), mfer.ModTime(mtime), hash)
}
// addExistingToBuilder adds an existing manifest entry to the builder
func addExistingToBuilder(b *mfer.Builder, entry *mfer.MFFilePath) {
func addExistingToBuilder(b *mfer.Builder, entry *mfer.MFFilePath) error {
mtime := time.Unix(entry.Mtime.Seconds, int64(entry.Mtime.Nanos))
if len(entry.Hashes) > 0 {
b.AddFileWithHash(entry.Path, entry.Size, mtime, entry.Hashes[0].MultiHash)
if len(entry.Hashes) == 0 {
return nil
}
}
// pathIsHidden checks if a path contains hidden components
func pathIsHidden(p string) bool {
// "." is not hidden, it's the current directory
if p == "." {
return false
}
// Check each path component
for p != "" && p != "." && p != "/" {
base := filepath.Base(p)
if len(base) > 0 && base[0] == '.' {
return true
}
parent := filepath.Dir(p)
if parent == p {
break
}
p = parent
}
return false
return b.AddFileWithHash(mfer.RelFilePath(entry.Path), mfer.FileSize(entry.Size), mfer.ModTime(mtime), entry.Hashes[0].MultiHash)
}

View File

@ -13,29 +13,29 @@ import (
"github.com/spf13/afero"
"github.com/urfave/cli/v2"
"sneak.berlin/go/mfer/internal/log"
"sneak.berlin/go/mfer/internal/scanner"
"sneak.berlin/go/mfer/mfer"
)
func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
log.Debug("generateManifestOperation()")
opts := &scanner.Options{
opts := &mfer.ScannerOptions{
IncludeDotfiles: ctx.Bool("IncludeDotfiles"),
FollowSymLinks: ctx.Bool("FollowSymLinks"),
Fs: mfa.Fs,
}
s := scanner.NewWithOptions(opts)
s := mfer.NewScannerWithOptions(opts)
// Phase 1: Enumeration - collect paths and stat files
args := ctx.Args()
showProgress := ctx.Bool("progress")
// Set up enumeration progress reporting
var enumProgress chan scanner.EnumerateStatus
var enumProgress chan mfer.EnumerateStatus
var enumWg sync.WaitGroup
if showProgress {
enumProgress = make(chan scanner.EnumerateStatus, 1)
enumProgress = make(chan mfer.EnumerateStatus, 1)
enumWg.Add(1)
go func() {
defer enumWg.Done()
@ -117,10 +117,10 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
}()
// Phase 2: Scan - read file contents and generate manifest
var scanProgress chan scanner.ScanStatus
var scanProgress chan mfer.ScanStatus
var scanWg sync.WaitGroup
if showProgress {
scanProgress = make(chan scanner.ScanStatus, 1)
scanProgress = make(chan mfer.ScanStatus, 1)
scanWg.Add(1)
go func() {
defer scanWg.Done()

View File

@ -2,6 +2,7 @@ package mfer
import (
"crypto/sha256"
"errors"
"io"
"sync"
"time"
@ -9,9 +10,42 @@ import (
"github.com/multiformats/go-multihash"
)
// RelFilePath represents a relative file path within a manifest.
type RelFilePath string
// AbsFilePath represents an absolute file path on the filesystem.
type AbsFilePath string
// FileSize represents the size of a file in bytes.
type FileSize int64
// FileCount represents a count of files.
type FileCount int64
// ModTime represents a file's modification time.
type ModTime time.Time
// UnixSeconds represents seconds since Unix epoch.
type UnixSeconds int64
// UnixNanos represents the nanosecond component of a timestamp (0-999999999).
type UnixNanos int32
// Timestamp converts ModTime to a protobuf Timestamp.
func (m ModTime) Timestamp() *Timestamp {
t := time.Time(m)
return &Timestamp{
Seconds: t.Unix(),
Nanos: int32(t.Nanosecond()),
}
}
// Multihash represents a multihash-encoded file hash (typically SHA2-256).
type Multihash []byte
// FileHashProgress reports progress during file hashing.
type FileHashProgress struct {
BytesRead int64 // Total bytes read so far for the current file
BytesRead FileSize // Total bytes read so far for the current file
}
// Builder constructs a manifest by adding files one at a time.
@ -33,24 +67,24 @@ func NewBuilder() *Builder {
// Progress updates are sent to the progress channel (if non-nil) without blocking.
// Returns the number of bytes read.
func (b *Builder) AddFile(
path string,
size int64,
mtime time.Time,
path RelFilePath,
size FileSize,
mtime ModTime,
reader io.Reader,
progress chan<- FileHashProgress,
) (int64, error) {
) (FileSize, error) {
// Create hash writer
h := sha256.New()
// Read file in chunks, updating hash and progress
var totalRead int64
var totalRead FileSize
buf := make([]byte, 64*1024) // 64KB chunks
for {
n, err := reader.Read(buf)
if n > 0 {
h.Write(buf[:n])
totalRead += int64(n)
totalRead += FileSize(n)
sendFileHashProgress(progress, FileHashProgress{BytesRead: totalRead})
}
if err == io.EOF {
@ -69,12 +103,12 @@ func (b *Builder) AddFile(
// Create file entry
entry := &MFFilePath{
Path: path,
Size: size,
Path: string(path),
Size: int64(size),
Hashes: []*MFFileChecksum{
{MultiHash: mh},
},
Mtime: newTimestampFromTime(mtime),
Mtime: mtime.Timestamp(),
}
b.mu.Lock()
@ -104,19 +138,31 @@ func (b *Builder) FileCount() int {
// AddFileWithHash adds a file entry with a pre-computed hash.
// This is useful when the hash is already known (e.g., from an existing manifest).
func (b *Builder) AddFileWithHash(path string, size int64, mtime time.Time, hash []byte) {
// Returns an error if path is empty, size is negative, or hash is nil/empty.
func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error {
if path == "" {
return errors.New("path cannot be empty")
}
if size < 0 {
return errors.New("size cannot be negative")
}
if len(hash) == 0 {
return errors.New("hash cannot be nil or empty")
}
entry := &MFFilePath{
Path: path,
Size: size,
Path: string(path),
Size: int64(size),
Hashes: []*MFFileChecksum{
{MultiHash: hash},
},
Mtime: newTimestampFromTime(mtime),
Mtime: mtime.Timestamp(),
}
b.mu.Lock()
b.files = append(b.files, entry)
b.mu.Unlock()
return nil
}
// Build finalizes the manifest and writes it to the writer.

104
mfer/builder_test.go Normal file
View File

@ -0,0 +1,104 @@
package mfer
import (
"bytes"
"strings"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestNewBuilder(t *testing.T) {
b := NewBuilder()
assert.NotNil(t, b)
assert.Equal(t, 0, b.FileCount())
}
func TestBuilderAddFile(t *testing.T) {
b := NewBuilder()
content := []byte("test content")
reader := bytes.NewReader(content)
bytesRead, err := b.AddFile("test.txt", FileSize(len(content)), ModTime(time.Now()), reader, nil)
require.NoError(t, err)
assert.Equal(t, FileSize(len(content)), bytesRead)
assert.Equal(t, 1, b.FileCount())
}
func TestBuilderAddFileWithHash(t *testing.T) {
b := NewBuilder()
hash := make([]byte, 34) // SHA256 multihash is 34 bytes
err := b.AddFileWithHash("test.txt", 100, ModTime(time.Now()), hash)
require.NoError(t, err)
assert.Equal(t, 1, b.FileCount())
}
func TestBuilderAddFileWithHashValidation(t *testing.T) {
t.Run("empty path", func(t *testing.T) {
b := NewBuilder()
hash := make([]byte, 34)
err := b.AddFileWithHash("", 100, ModTime(time.Now()), hash)
assert.Error(t, err)
assert.Contains(t, err.Error(), "path")
})
t.Run("negative size", func(t *testing.T) {
b := NewBuilder()
hash := make([]byte, 34)
err := b.AddFileWithHash("test.txt", -1, ModTime(time.Now()), hash)
assert.Error(t, err)
assert.Contains(t, err.Error(), "size")
})
t.Run("nil hash", func(t *testing.T) {
b := NewBuilder()
err := b.AddFileWithHash("test.txt", 100, ModTime(time.Now()), nil)
assert.Error(t, err)
assert.Contains(t, err.Error(), "hash")
})
t.Run("empty hash", func(t *testing.T) {
b := NewBuilder()
err := b.AddFileWithHash("test.txt", 100, ModTime(time.Now()), []byte{})
assert.Error(t, err)
assert.Contains(t, err.Error(), "hash")
})
t.Run("valid inputs", func(t *testing.T) {
b := NewBuilder()
hash := make([]byte, 34)
err := b.AddFileWithHash("test.txt", 100, ModTime(time.Now()), hash)
assert.NoError(t, err)
assert.Equal(t, 1, b.FileCount())
})
}
func TestBuilderBuild(t *testing.T) {
b := NewBuilder()
content := []byte("test content")
reader := bytes.NewReader(content)
_, err := b.AddFile("test.txt", FileSize(len(content)), ModTime(time.Now()), reader, nil)
require.NoError(t, err)
var buf bytes.Buffer
err = b.Build(&buf)
require.NoError(t, err)
// Should have magic bytes
assert.True(t, strings.HasPrefix(buf.String(), MAGIC))
}
func TestBuilderBuildEmpty(t *testing.T) {
b := NewBuilder()
var buf bytes.Buffer
err := b.Build(&buf)
require.NoError(t, err)
// Should still produce valid manifest with 0 files
assert.True(t, strings.HasPrefix(buf.String(), MAGIC))
}

View File

@ -1,4 +1,4 @@
package scanner
package mfer
import (
"context"
@ -13,7 +13,6 @@ import (
"github.com/dustin/go-humanize"
"github.com/spf13/afero"
"sneak.berlin/go/mfer/internal/log"
"sneak.berlin/go/mfer/mfer"
)
// Phase 1: Enumeration
@ -23,8 +22,8 @@ import (
// EnumerateStatus contains progress information for the enumeration phase.
type EnumerateStatus struct {
FilesFound int64 // Number of files discovered so far
BytesFound int64 // Total size of discovered files (from stat)
FilesFound FileCount // Number of files discovered so far
BytesFound FileSize // Total size of discovered files (from stat)
}
// Phase 2: Scan (ToManifest)
@ -34,16 +33,16 @@ type EnumerateStatus struct {
// ScanStatus contains progress information for the scan phase.
type ScanStatus struct {
TotalFiles int64 // Total number of files to scan
ScannedFiles int64 // Number of files scanned so far
TotalBytes int64 // Total bytes to read (sum of all file sizes)
ScannedBytes int64 // Bytes read so far
TotalFiles FileCount // Total number of files to scan
ScannedFiles FileCount // Number of files scanned so far
TotalBytes FileSize // Total bytes to read (sum of all file sizes)
ScannedBytes FileSize // Bytes read so far
BytesPerSec float64 // Current throughput rate
ETA time.Duration // Estimated time to completion
}
// Options configures scanner behavior.
type Options struct {
// ScannerOptions configures scanner behavior.
type ScannerOptions struct {
IncludeDotfiles bool // Include files and directories starting with a dot (default: exclude)
FollowSymLinks bool // Resolve symlinks instead of skipping them
Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
@ -51,10 +50,10 @@ type Options struct {
// FileEntry represents a file that has been enumerated.
type FileEntry struct {
Path string // Relative path (used in manifest)
AbsPath string // Absolute path (used for reading file content)
Size int64 // File size in bytes
Mtime time.Time // Last modification time
Path RelFilePath // Relative path (used in manifest)
AbsPath AbsFilePath // Absolute path (used for reading file content)
Size FileSize // File size in bytes
Mtime ModTime // Last modification time
Ctime time.Time // Creation time (platform-dependent)
}
@ -62,19 +61,20 @@ type FileEntry struct {
type Scanner struct {
mu sync.RWMutex
files []*FileEntry
options *Options
totalBytes FileSize // cached sum of all file sizes
options *ScannerOptions
fs afero.Fs
}
// New creates a new Scanner with default options.
func New() *Scanner {
return NewWithOptions(nil)
// NewScanner creates a new Scanner with default options.
func NewScanner() *Scanner {
return NewScannerWithOptions(nil)
}
// NewWithOptions creates a new Scanner with the given options.
func NewWithOptions(opts *Options) *Scanner {
// NewScannerWithOptions creates a new Scanner with the given options.
func NewScannerWithOptions(opts *ScannerOptions) *Scanner {
if opts == nil {
opts = &Options{}
opts = &ScannerOptions{}
}
fs := opts.Fs
if fs == nil {
@ -154,7 +154,7 @@ func (s *Scanner) enumerateFS(afs afero.Fs, basePath string, progress chan<- Enu
if err != nil {
return err
}
if !s.options.IncludeDotfiles && pathIsHidden(p) {
if !s.options.IncludeDotfiles && IsHiddenPath(p) {
if info.IsDir() {
return filepath.SkipDir
}
@ -206,21 +206,19 @@ func (s *Scanner) enumerateFileWithInfo(filePath string, basePath string, info f
}
entry := &FileEntry{
Path: cleanPath,
AbsPath: absPath,
Size: info.Size(),
Mtime: info.ModTime(),
Path: RelFilePath(cleanPath),
AbsPath: AbsFilePath(absPath),
Size: FileSize(info.Size()),
Mtime: ModTime(info.ModTime()),
// Note: Ctime not available from fs.FileInfo on all platforms
// Will need platform-specific code to extract it
}
s.mu.Lock()
s.files = append(s.files, entry)
filesFound := int64(len(s.files))
var bytesFound int64
for _, f := range s.files {
bytesFound += f.Size
}
s.totalBytes += entry.Size
filesFound := FileCount(len(s.files))
bytesFound := s.totalBytes
s.mu.Unlock()
sendEnumerateStatus(progress, EnumerateStatus{
@ -241,21 +239,17 @@ func (s *Scanner) Files() []*FileEntry {
}
// FileCount returns the number of files in the scanner.
func (s *Scanner) FileCount() int64 {
func (s *Scanner) FileCount() FileCount {
s.mu.RLock()
defer s.mu.RUnlock()
return int64(len(s.files))
return FileCount(len(s.files))
}
// TotalBytes returns the total size of all files in the scanner.
func (s *Scanner) TotalBytes() int64 {
func (s *Scanner) TotalBytes() FileSize {
s.mu.RLock()
defer s.mu.RUnlock()
var total int64
for _, f := range s.files {
total += f.Size
}
return total
return s.totalBytes
}
// ToManifest reads all file contents, computes hashes, and generates a manifest.
@ -270,17 +264,17 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S
s.mu.RLock()
files := make([]*FileEntry, len(s.files))
copy(files, s.files)
totalFiles := int64(len(files))
var totalBytes int64
totalFiles := FileCount(len(files))
var totalBytes FileSize
for _, f := range files {
totalBytes += f.Size
}
s.mu.RUnlock()
builder := mfer.NewBuilder()
builder := NewBuilder()
var scannedFiles int64
var scannedBytes int64
var scannedFiles FileCount
var scannedBytes FileSize
lastProgressTime := time.Now()
startTime := time.Now()
@ -293,18 +287,18 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S
}
// Open file
f, err := s.fs.Open(entry.AbsPath)
f, err := s.fs.Open(string(entry.AbsPath))
if err != nil {
return err
}
// Create progress channel for this file
var fileProgress chan mfer.FileHashProgress
var fileProgress chan FileHashProgress
var wg sync.WaitGroup
if progress != nil {
fileProgress = make(chan mfer.FileHashProgress, 1)
fileProgress = make(chan FileHashProgress, 1)
wg.Add(1)
go func(baseScannedBytes int64) {
go func(baseScannedBytes FileSize) {
defer wg.Done()
for p := range fileProgress {
// Send progress at most once per second
@ -382,9 +376,10 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S
return builder.Build(w)
}
// pathIsHidden returns true if the path or any of its parent directories
// IsHiddenPath returns true if the path or any of its parent directories
// start with a dot (hidden files/directories).
func pathIsHidden(p string) bool {
// The path should use forward slashes.
func IsHiddenPath(p string) bool {
tp := path.Clean(p)
if strings.HasPrefix(tp, ".") {
return true

View File

@ -1,4 +1,4 @@
package scanner
package mfer
import (
"bytes"
@ -11,77 +11,77 @@ import (
"github.com/stretchr/testify/require"
)
func TestNew(t *testing.T) {
s := New()
func TestNewScanner(t *testing.T) {
s := NewScanner()
assert.NotNil(t, s)
assert.Equal(t, int64(0), s.FileCount())
assert.Equal(t, int64(0), s.TotalBytes())
assert.Equal(t, FileCount(0), s.FileCount())
assert.Equal(t, FileSize(0), s.TotalBytes())
}
func TestNewWithOptions(t *testing.T) {
func TestNewScannerWithOptions(t *testing.T) {
t.Run("nil options", func(t *testing.T) {
s := NewWithOptions(nil)
s := NewScannerWithOptions(nil)
assert.NotNil(t, s)
})
t.Run("with options", func(t *testing.T) {
fs := afero.NewMemMapFs()
opts := &Options{
opts := &ScannerOptions{
IncludeDotfiles: true,
FollowSymLinks: true,
Fs: fs,
}
s := NewWithOptions(opts)
s := NewScannerWithOptions(opts)
assert.NotNil(t, s)
})
}
func TestEnumerateFile(t *testing.T) {
func TestScannerEnumerateFile(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, afero.WriteFile(fs, "/test.txt", []byte("hello world"), 0644))
s := NewWithOptions(&Options{Fs: fs})
s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
err := s.EnumerateFile("/test.txt")
require.NoError(t, err)
assert.Equal(t, int64(1), s.FileCount())
assert.Equal(t, int64(11), s.TotalBytes())
assert.Equal(t, FileCount(1), s.FileCount())
assert.Equal(t, FileSize(11), s.TotalBytes())
files := s.Files()
require.Len(t, files, 1)
assert.Equal(t, "test.txt", files[0].Path)
assert.Equal(t, int64(11), files[0].Size)
assert.Equal(t, RelFilePath("test.txt"), files[0].Path)
assert.Equal(t, FileSize(11), files[0].Size)
}
func TestEnumerateFileMissing(t *testing.T) {
func TestScannerEnumerateFileMissing(t *testing.T) {
fs := afero.NewMemMapFs()
s := NewWithOptions(&Options{Fs: fs})
s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
err := s.EnumerateFile("/nonexistent.txt")
assert.Error(t, err)
}
func TestEnumeratePath(t *testing.T) {
func TestScannerEnumeratePath(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir/subdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("one"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("two"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/subdir/file3.txt", []byte("three"), 0644))
s := NewWithOptions(&Options{Fs: fs})
s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
err := s.EnumeratePath("/testdir", nil)
require.NoError(t, err)
assert.Equal(t, int64(3), s.FileCount())
assert.Equal(t, int64(3+3+5), s.TotalBytes())
assert.Equal(t, FileCount(3), s.FileCount())
assert.Equal(t, FileSize(3+3+5), s.TotalBytes())
}
func TestEnumeratePathWithProgress(t *testing.T) {
func TestScannerEnumeratePathWithProgress(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("one"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("two"), 0644))
s := NewWithOptions(&Options{Fs: fs})
s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
progress := make(chan EnumerateStatus, 10)
err := s.EnumeratePath("/testdir", progress)
@ -95,25 +95,25 @@ func TestEnumeratePathWithProgress(t *testing.T) {
assert.NotEmpty(t, updates)
// Final update should show all files
final := updates[len(updates)-1]
assert.Equal(t, int64(2), final.FilesFound)
assert.Equal(t, int64(6), final.BytesFound)
assert.Equal(t, FileCount(2), final.FilesFound)
assert.Equal(t, FileSize(6), final.BytesFound)
}
func TestEnumeratePaths(t *testing.T) {
func TestScannerEnumeratePaths(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/dir1", 0755))
require.NoError(t, fs.MkdirAll("/dir2", 0755))
require.NoError(t, afero.WriteFile(fs, "/dir1/a.txt", []byte("aaa"), 0644))
require.NoError(t, afero.WriteFile(fs, "/dir2/b.txt", []byte("bbb"), 0644))
s := NewWithOptions(&Options{Fs: fs})
s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
err := s.EnumeratePaths(nil, "/dir1", "/dir2")
require.NoError(t, err)
assert.Equal(t, int64(2), s.FileCount())
assert.Equal(t, FileCount(2), s.FileCount())
}
func TestExcludeDotfiles(t *testing.T) {
func TestScannerExcludeDotfiles(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir/.hidden", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/visible.txt", []byte("visible"), 0644))
@ -121,54 +121,31 @@ func TestExcludeDotfiles(t *testing.T) {
require.NoError(t, afero.WriteFile(fs, "/testdir/.hidden/inside.txt", []byte("inside"), 0644))
t.Run("exclude by default", func(t *testing.T) {
s := NewWithOptions(&Options{Fs: fs, IncludeDotfiles: false})
s := NewScannerWithOptions(&ScannerOptions{Fs: fs, IncludeDotfiles: false})
err := s.EnumeratePath("/testdir", nil)
require.NoError(t, err)
assert.Equal(t, int64(1), s.FileCount())
assert.Equal(t, FileCount(1), s.FileCount())
files := s.Files()
assert.Equal(t, "visible.txt", files[0].Path)
assert.Equal(t, RelFilePath("visible.txt"), files[0].Path)
})
t.Run("include when enabled", func(t *testing.T) {
s := NewWithOptions(&Options{Fs: fs, IncludeDotfiles: true})
s := NewScannerWithOptions(&ScannerOptions{Fs: fs, IncludeDotfiles: true})
err := s.EnumeratePath("/testdir", nil)
require.NoError(t, err)
assert.Equal(t, int64(3), s.FileCount())
assert.Equal(t, FileCount(3), s.FileCount())
})
}
func TestPathIsHidden(t *testing.T) {
tests := []struct {
path string
hidden bool
}{
{"file.txt", false},
{".hidden", true},
{"dir/file.txt", false},
{"dir/.hidden", true},
{".dir/file.txt", true},
{"/absolute/path", false},
{"/absolute/.hidden", true},
{"./relative", false}, // path.Clean removes leading ./
{"a/b/c/.d/e", true},
}
for _, tt := range tests {
t.Run(tt.path, func(t *testing.T) {
assert.Equal(t, tt.hidden, pathIsHidden(tt.path), "pathIsHidden(%q)", tt.path)
})
}
}
func TestToManifest(t *testing.T) {
func TestScannerToManifest(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("content one"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("content two"), 0644))
s := NewWithOptions(&Options{Fs: fs})
s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
err := s.EnumeratePath("/testdir", nil)
require.NoError(t, err)
@ -178,15 +155,15 @@ func TestToManifest(t *testing.T) {
// Manifest should have magic bytes
assert.True(t, buf.Len() > 0)
assert.Equal(t, "ZNAVSRFG", string(buf.Bytes()[:8]))
assert.Equal(t, MAGIC, string(buf.Bytes()[:8]))
}
func TestToManifestWithProgress(t *testing.T) {
func TestScannerToManifestWithProgress(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file.txt", bytes.Repeat([]byte("x"), 1000), 0644))
s := NewWithOptions(&Options{Fs: fs})
s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
err := s.EnumeratePath("/testdir", nil)
require.NoError(t, err)
@ -204,13 +181,13 @@ func TestToManifestWithProgress(t *testing.T) {
assert.NotEmpty(t, updates)
// Final update should show completion
final := updates[len(updates)-1]
assert.Equal(t, int64(1), final.TotalFiles)
assert.Equal(t, int64(1), final.ScannedFiles)
assert.Equal(t, int64(1000), final.TotalBytes)
assert.Equal(t, int64(1000), final.ScannedBytes)
assert.Equal(t, FileCount(1), final.TotalFiles)
assert.Equal(t, FileCount(1), final.ScannedFiles)
assert.Equal(t, FileSize(1000), final.TotalBytes)
assert.Equal(t, FileSize(1000), final.ScannedBytes)
}
func TestToManifestContextCancellation(t *testing.T) {
func TestScannerToManifestContextCancellation(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir", 0755))
// Create many files to ensure we have time to cancel
@ -219,7 +196,7 @@ func TestToManifestContextCancellation(t *testing.T) {
require.NoError(t, afero.WriteFile(fs, "/testdir/"+name, bytes.Repeat([]byte("x"), 100), 0644))
}
s := NewWithOptions(&Options{Fs: fs})
s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
err := s.EnumeratePath("/testdir", nil)
require.NoError(t, err)
@ -231,9 +208,9 @@ func TestToManifestContextCancellation(t *testing.T) {
assert.ErrorIs(t, err, context.Canceled)
}
func TestToManifestEmptyScanner(t *testing.T) {
func TestScannerToManifestEmptyScanner(t *testing.T) {
fs := afero.NewMemMapFs()
s := NewWithOptions(&Options{Fs: fs})
s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
var buf bytes.Buffer
err := s.ToManifest(context.Background(), &buf, nil)
@ -241,14 +218,14 @@ func TestToManifestEmptyScanner(t *testing.T) {
// Should still produce a valid manifest
assert.True(t, buf.Len() > 0)
assert.Equal(t, "ZNAVSRFG", string(buf.Bytes()[:8]))
assert.Equal(t, MAGIC, string(buf.Bytes()[:8]))
}
func TestFilesCopiesSlice(t *testing.T) {
func TestScannerFilesCopiesSlice(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, afero.WriteFile(fs, "/test.txt", []byte("hello"), 0644))
s := NewWithOptions(&Options{Fs: fs})
s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
require.NoError(t, s.EnumerateFile("/test.txt"))
files1 := s.Files()
@ -258,7 +235,7 @@ func TestFilesCopiesSlice(t *testing.T) {
assert.NotSame(t, &files1[0], &files2[0])
}
func TestEnumerateFS(t *testing.T) {
func TestScannerEnumerateFS(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir/sub", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file.txt", []byte("hello"), 0644))
@ -267,11 +244,11 @@ func TestEnumerateFS(t *testing.T) {
// Create a basepath filesystem
baseFs := afero.NewBasePathFs(fs, "/testdir")
s := NewWithOptions(&Options{Fs: fs})
s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
err := s.EnumerateFS(baseFs, "/testdir", nil)
require.NoError(t, err)
assert.Equal(t, int64(2), s.FileCount())
assert.Equal(t, FileCount(2), s.FileCount())
}
func TestSendEnumerateStatusNonBlocking(t *testing.T) {
@ -317,27 +294,27 @@ func TestSendStatusNilChannel(t *testing.T) {
sendScanStatus(nil, ScanStatus{})
}
func TestFileEntryFields(t *testing.T) {
func TestScannerFileEntryFields(t *testing.T) {
fs := afero.NewMemMapFs()
now := time.Now().Truncate(time.Second)
require.NoError(t, afero.WriteFile(fs, "/test.txt", []byte("content"), 0644))
require.NoError(t, fs.Chtimes("/test.txt", now, now))
s := NewWithOptions(&Options{Fs: fs})
s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
require.NoError(t, s.EnumerateFile("/test.txt"))
files := s.Files()
require.Len(t, files, 1)
entry := files[0]
assert.Equal(t, "test.txt", entry.Path)
assert.Contains(t, entry.AbsPath, "test.txt")
assert.Equal(t, int64(7), entry.Size)
assert.Equal(t, RelFilePath("test.txt"), entry.Path)
assert.Contains(t, string(entry.AbsPath), "test.txt")
assert.Equal(t, FileSize(7), entry.Size)
// Mtime should be set (within a second of now)
assert.WithinDuration(t, now, entry.Mtime, 2*time.Second)
assert.WithinDuration(t, now, time.Time(entry.Mtime), 2*time.Second)
}
func TestLargeFileEnumeration(t *testing.T) {
func TestScannerLargeFileEnumeration(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir", 0755))
@ -347,7 +324,7 @@ func TestLargeFileEnumeration(t *testing.T) {
require.NoError(t, afero.WriteFile(fs, name, []byte("data"), 0644))
}
s := NewWithOptions(&Options{Fs: fs})
s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
progress := make(chan EnumerateStatus, 200)
err := s.EnumeratePath("/testdir", progress)
@ -357,6 +334,29 @@ func TestLargeFileEnumeration(t *testing.T) {
for range progress {
}
assert.Equal(t, int64(100), s.FileCount())
assert.Equal(t, int64(400), s.TotalBytes()) // 100 * 4 bytes
assert.Equal(t, FileCount(100), s.FileCount())
assert.Equal(t, FileSize(400), s.TotalBytes()) // 100 * 4 bytes
}
func TestIsHiddenPath(t *testing.T) {
tests := []struct {
path string
hidden bool
}{
{"file.txt", false},
{".hidden", true},
{"dir/file.txt", false},
{"dir/.hidden", true},
{".dir/file.txt", true},
{"/absolute/path", false},
{"/absolute/.hidden", true},
{"./relative", false}, // path.Clean removes leading ./
{"a/b/c/.d/e", true},
}
for _, tt := range tests {
t.Run(tt.path, func(t *testing.T) {
assert.Equal(t, tt.hidden, IsHiddenPath(tt.path), "IsHiddenPath(%q)", tt.path)
})
}
}

53
mfer/url.go Normal file
View File

@ -0,0 +1,53 @@
package mfer
import (
"net/url"
"strings"
)
// ManifestURL represents a URL pointing to a manifest file.
type ManifestURL string
// FileURL represents a URL pointing to a file to be fetched.
type FileURL string
// BaseURL represents a base URL for constructing file URLs.
type BaseURL string
// JoinPath safely joins a relative file path to a base URL.
// The path is properly URL-encoded to prevent path traversal.
func (b BaseURL) JoinPath(path RelFilePath) (FileURL, error) {
base, err := url.Parse(string(b))
if err != nil {
return "", err
}
// Ensure base path ends with /
if !strings.HasSuffix(base.Path, "/") {
base.Path += "/"
}
// Parse and encode the relative path
ref, err := url.Parse(url.PathEscape(string(path)))
if err != nil {
return "", err
}
resolved := base.ResolveReference(ref)
return FileURL(resolved.String()), nil
}
// String returns the URL as a string.
func (b BaseURL) String() string {
return string(b)
}
// String returns the URL as a string.
func (f FileURL) String() string {
return string(f)
}
// String returns the URL as a string.
func (m ManifestURL) String() string {
return string(m)
}