Add custom types for type safety throughout codebase

- Add FileCount, FileSize, RelFilePath, AbsFilePath, ModTime, Multihash types
- Add UnixSeconds and UnixNanos types for timestamp handling
- Add URL types (ManifestURL, FileURL, BaseURL) with safe path joining
- Consolidate scanner package into mfer package
- Update checker to use custom types in Result and CheckStatus
- Add ModTime.Timestamp() method for protobuf conversion
- Update all tests to use proper custom types
This commit is contained in:
Jeffrey Paul 2025-12-18 01:01:18 -08:00
parent a9f0d2abe4
commit dc115c5ba2
9 changed files with 428 additions and 247 deletions

View File

@ -17,7 +17,7 @@ import (
// Result represents the outcome of checking a single file. // Result represents the outcome of checking a single file.
type Result struct { type Result struct {
Path string // Relative path from manifest Path mfer.RelFilePath // Relative path from manifest
Status Status // Verification result status Status Status // Verification result status
Message string // Human-readable description of the result Message string // Human-readable description of the result
} }
@ -55,22 +55,22 @@ func (s Status) String() string {
// CheckStatus contains progress information for the check operation. // CheckStatus contains progress information for the check operation.
type CheckStatus struct { type CheckStatus struct {
TotalFiles int64 // Total number of files in manifest TotalFiles mfer.FileCount // Total number of files in manifest
CheckedFiles int64 // Number of files checked so far CheckedFiles mfer.FileCount // Number of files checked so far
TotalBytes int64 // Total bytes to verify (sum of all file sizes) TotalBytes mfer.FileSize // Total bytes to verify (sum of all file sizes)
CheckedBytes int64 // Bytes verified so far CheckedBytes mfer.FileSize // Bytes verified so far
BytesPerSec float64 // Current throughput rate BytesPerSec float64 // Current throughput rate
ETA time.Duration // Estimated time to completion ETA time.Duration // Estimated time to completion
Failures int64 // Number of verification failures encountered Failures mfer.FileCount // Number of verification failures encountered
} }
// Checker verifies files against a manifest. // Checker verifies files against a manifest.
type Checker struct { type Checker struct {
basePath string basePath mfer.AbsFilePath
files []*mfer.MFFilePath files []*mfer.MFFilePath
fs afero.Fs fs afero.Fs
// manifestPaths is a set of paths in the manifest for quick lookup // manifestPaths is a set of paths in the manifest for quick lookup
manifestPaths map[string]struct{} manifestPaths map[mfer.RelFilePath]struct{}
} }
// NewChecker creates a new Checker for the given manifest, base path, and filesystem. // NewChecker creates a new Checker for the given manifest, base path, and filesystem.
@ -92,13 +92,13 @@ func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, er
} }
files := m.Files() files := m.Files()
manifestPaths := make(map[string]struct{}, len(files)) manifestPaths := make(map[mfer.RelFilePath]struct{}, len(files))
for _, f := range files { for _, f := range files {
manifestPaths[f.Path] = struct{}{} manifestPaths[mfer.RelFilePath(f.Path)] = struct{}{}
} }
return &Checker{ return &Checker{
basePath: abs, basePath: mfer.AbsFilePath(abs),
files: files, files: files,
fs: fs, fs: fs,
manifestPaths: manifestPaths, manifestPaths: manifestPaths,
@ -106,15 +106,15 @@ func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, er
} }
// FileCount returns the number of files in the manifest. // FileCount returns the number of files in the manifest.
func (c *Checker) FileCount() int64 { func (c *Checker) FileCount() mfer.FileCount {
return int64(len(c.files)) return mfer.FileCount(len(c.files))
} }
// TotalBytes returns the total size of all files in the manifest. // TotalBytes returns the total size of all files in the manifest.
func (c *Checker) TotalBytes() int64 { func (c *Checker) TotalBytes() mfer.FileSize {
var total int64 var total mfer.FileSize
for _, f := range c.files { for _, f := range c.files {
total += f.Size total += mfer.FileSize(f.Size)
} }
return total return total
} }
@ -131,12 +131,12 @@ func (c *Checker) Check(ctx context.Context, results chan<- Result, progress cha
defer close(progress) defer close(progress)
} }
totalFiles := int64(len(c.files)) totalFiles := mfer.FileCount(len(c.files))
totalBytes := c.TotalBytes() totalBytes := c.TotalBytes()
var checkedFiles int64 var checkedFiles mfer.FileCount
var checkedBytes int64 var checkedBytes mfer.FileSize
var failures int64 var failures mfer.FileCount
startTime := time.Now() startTime := time.Now()
@ -186,28 +186,29 @@ func (c *Checker) Check(ctx context.Context, results chan<- Result, progress cha
return nil return nil
} }
func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result { func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *mfer.FileSize) Result {
absPath := filepath.Join(c.basePath, entry.Path) absPath := filepath.Join(string(c.basePath), entry.Path)
relPath := mfer.RelFilePath(entry.Path)
// Check if file exists // Check if file exists
info, err := c.fs.Stat(absPath) info, err := c.fs.Stat(absPath)
if err != nil { if err != nil {
if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) { if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) {
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"} return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
} }
// Check for "file does not exist" style errors // Check for "file does not exist" style errors
exists, _ := afero.Exists(c.fs, absPath) exists, _ := afero.Exists(c.fs, absPath)
if !exists { if !exists {
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"} return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
} }
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} return Result{Path: relPath, Status: StatusError, Message: err.Error()}
} }
// Check size // Check size
if info.Size() != entry.Size { if info.Size() != entry.Size {
*checkedBytes += info.Size() *checkedBytes += mfer.FileSize(info.Size())
return Result{ return Result{
Path: entry.Path, Path: relPath,
Status: StatusSizeMismatch, Status: StatusSizeMismatch,
Message: "size mismatch", Message: "size mismatch",
} }
@ -216,31 +217,31 @@ func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result
// Open and hash file // Open and hash file
f, err := c.fs.Open(absPath) f, err := c.fs.Open(absPath)
if err != nil { if err != nil {
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} return Result{Path: relPath, Status: StatusError, Message: err.Error()}
} }
defer func() { _ = f.Close() }() defer func() { _ = f.Close() }()
h := sha256.New() h := sha256.New()
n, err := io.Copy(h, f) n, err := io.Copy(h, f)
if err != nil { if err != nil {
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} return Result{Path: relPath, Status: StatusError, Message: err.Error()}
} }
*checkedBytes += n *checkedBytes += mfer.FileSize(n)
// Encode as multihash and compare // Encode as multihash and compare
computed, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256) computed, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
if err != nil { if err != nil {
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} return Result{Path: relPath, Status: StatusError, Message: err.Error()}
} }
// Check against all hashes in manifest (at least one must match) // Check against all hashes in manifest (at least one must match)
for _, hash := range entry.Hashes { for _, hash := range entry.Hashes {
if bytes.Equal(computed, hash.MultiHash) { if bytes.Equal(computed, hash.MultiHash) {
return Result{Path: entry.Path, Status: StatusOK} return Result{Path: relPath, Status: StatusOK}
} }
} }
return Result{Path: entry.Path, Status: StatusHashMismatch, Message: "hash mismatch"} return Result{Path: relPath, Status: StatusHashMismatch, Message: "hash mismatch"}
} }
// FindExtraFiles walks the filesystem and reports files not in the manifest. // FindExtraFiles walks the filesystem and reports files not in the manifest.
@ -250,7 +251,7 @@ func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) err
defer close(results) defer close(results)
} }
return afero.Walk(c.fs, c.basePath, func(path string, info os.FileInfo, err error) error { return afero.Walk(c.fs, string(c.basePath), func(path string, info os.FileInfo, err error) error {
if err != nil { if err != nil {
return err return err
} }
@ -267,10 +268,11 @@ func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) err
} }
// Get relative path // Get relative path
relPath, err := filepath.Rel(c.basePath, path) rel, err := filepath.Rel(string(c.basePath), path)
if err != nil { if err != nil {
return err return err
} }
relPath := mfer.RelFilePath(rel)
// Check if path is in manifest // Check if path is in manifest
if _, exists := c.manifestPaths[relPath]; !exists { if _, exists := c.manifestPaths[relPath]; !exists {

View File

@ -40,7 +40,7 @@ func createTestManifest(t *testing.T, fs afero.Fs, manifestPath string, files ma
builder := mfer.NewBuilder() builder := mfer.NewBuilder()
for path, content := range files { for path, content := range files {
reader := bytes.NewReader(content) reader := bytes.NewReader(content)
_, err := builder.AddFile(path, int64(len(content)), time.Now(), reader, nil) _, err := builder.AddFile(mfer.RelFilePath(path), mfer.FileSize(len(content)), mfer.ModTime(time.Now()), reader, nil)
require.NoError(t, err) require.NoError(t, err)
} }
@ -72,7 +72,7 @@ func TestNewChecker(t *testing.T) {
chk, err := NewChecker("/manifest.mf", "/", fs) chk, err := NewChecker("/manifest.mf", "/", fs)
require.NoError(t, err) require.NoError(t, err)
assert.NotNil(t, chk) assert.NotNil(t, chk)
assert.Equal(t, int64(2), chk.FileCount()) assert.Equal(t, mfer.FileCount(2), chk.FileCount())
}) })
t.Run("missing manifest", func(t *testing.T) { t.Run("missing manifest", func(t *testing.T) {
@ -101,8 +101,8 @@ func TestCheckerFileCountAndTotalBytes(t *testing.T) {
chk, err := NewChecker("/manifest.mf", "/", fs) chk, err := NewChecker("/manifest.mf", "/", fs)
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, int64(3), chk.FileCount()) assert.Equal(t, mfer.FileCount(3), chk.FileCount())
assert.Equal(t, int64(2+11+1000), chk.TotalBytes()) assert.Equal(t, mfer.FileSize(2+11+1000), chk.TotalBytes())
} }
func TestCheckAllFilesOK(t *testing.T) { func TestCheckAllFilesOK(t *testing.T) {
@ -158,7 +158,7 @@ func TestCheckMissingFile(t *testing.T) {
okCount++ okCount++
case StatusMissing: case StatusMissing:
missingCount++ missingCount++
assert.Equal(t, "missing.txt", r.Path) assert.Equal(t, mfer.RelFilePath("missing.txt"), r.Path)
} }
} }
@ -186,7 +186,7 @@ func TestCheckSizeMismatch(t *testing.T) {
r := <-results r := <-results
assert.Equal(t, StatusSizeMismatch, r.Status) assert.Equal(t, StatusSizeMismatch, r.Status)
assert.Equal(t, "file.txt", r.Path) assert.Equal(t, mfer.RelFilePath("file.txt"), r.Path)
} }
func TestCheckHashMismatch(t *testing.T) { func TestCheckHashMismatch(t *testing.T) {
@ -212,7 +212,7 @@ func TestCheckHashMismatch(t *testing.T) {
r := <-results r := <-results
assert.Equal(t, StatusHashMismatch, r.Status) assert.Equal(t, StatusHashMismatch, r.Status)
assert.Equal(t, "file.txt", r.Path) assert.Equal(t, mfer.RelFilePath("file.txt"), r.Path)
} }
func TestCheckWithProgress(t *testing.T) { func TestCheckWithProgress(t *testing.T) {
@ -246,11 +246,11 @@ func TestCheckWithProgress(t *testing.T) {
assert.NotEmpty(t, progressUpdates) assert.NotEmpty(t, progressUpdates)
// Final progress should show all files checked // Final progress should show all files checked
final := progressUpdates[len(progressUpdates)-1] final := progressUpdates[len(progressUpdates)-1]
assert.Equal(t, int64(2), final.TotalFiles) assert.Equal(t, mfer.FileCount(2), final.TotalFiles)
assert.Equal(t, int64(2), final.CheckedFiles) assert.Equal(t, mfer.FileCount(2), final.CheckedFiles)
assert.Equal(t, int64(300), final.TotalBytes) assert.Equal(t, mfer.FileSize(300), final.TotalBytes)
assert.Equal(t, int64(300), final.CheckedBytes) assert.Equal(t, mfer.FileSize(300), final.CheckedBytes)
assert.Equal(t, int64(0), final.Failures) assert.Equal(t, mfer.FileCount(0), final.Failures)
} }
func TestCheckContextCancellation(t *testing.T) { func TestCheckContextCancellation(t *testing.T) {
@ -301,7 +301,7 @@ func TestFindExtraFiles(t *testing.T) {
} }
assert.Len(t, extras, 1) assert.Len(t, extras, 1)
assert.Equal(t, "file2.txt", extras[0].Path) assert.Equal(t, mfer.RelFilePath("file2.txt"), extras[0].Path)
assert.Equal(t, StatusExtra, extras[0].Status) assert.Equal(t, StatusExtra, extras[0].Status)
assert.Equal(t, "not in manifest", extras[0].Message) assert.Equal(t, "not in manifest", extras[0].Message)
} }
@ -390,8 +390,8 @@ func TestCheckEmptyManifest(t *testing.T) {
chk, err := NewChecker("/manifest.mf", "/data", fs) chk, err := NewChecker("/manifest.mf", "/data", fs)
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, int64(0), chk.FileCount()) assert.Equal(t, mfer.FileCount(0), chk.FileCount())
assert.Equal(t, int64(0), chk.TotalBytes()) assert.Equal(t, mfer.FileSize(0), chk.TotalBytes())
results := make(chan Result, 10) results := make(chan Result, 10)
err = chk.Check(context.Background(), results, nil) err = chk.Check(context.Background(), results, nil)

View File

@ -113,7 +113,7 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error {
} }
// Handle dotfiles // Handle dotfiles
if !includeDotfiles && pathIsHidden(relPath) { if !includeDotfiles && mfer.IsHiddenPath(filepath.ToSlash(relPath)) {
if info.IsDir() { if info.IsDir() {
return filepath.SkipDir return filepath.SkipDir
} }
@ -274,10 +274,14 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error {
hashedFiles++ hashedFiles++
// Add to builder with computed hash // Add to builder with computed hash
addFileToBuilder(builder, e.path, e.size, e.mtime, hash) if err := addFileToBuilder(builder, e.path, e.size, e.mtime, hash); err != nil {
return fmt.Errorf("failed to add %s: %w", e.path, err)
}
} else { } else {
// Use existing entry // Use existing entry
addExistingToBuilder(builder, e.existing) if err := addExistingToBuilder(builder, e.existing); err != nil {
return fmt.Errorf("failed to add %s: %w", e.path, err)
}
} }
} }
@ -360,38 +364,15 @@ func hashFile(r io.Reader, size int64, progress func(int64)) ([]byte, int64, err
} }
// addFileToBuilder adds a new file entry to the builder // addFileToBuilder adds a new file entry to the builder
func addFileToBuilder(b *mfer.Builder, path string, size int64, mtime time.Time, hash []byte) { func addFileToBuilder(b *mfer.Builder, path string, size int64, mtime time.Time, hash []byte) error {
// Use the builder's internal method indirectly by creating an entry return b.AddFileWithHash(mfer.RelFilePath(path), mfer.FileSize(size), mfer.ModTime(mtime), hash)
// Since Builder.AddFile reads from a reader, we need to use a different approach
// We'll access the builder's files directly through a custom method
b.AddFileWithHash(path, size, mtime, hash)
} }
// addExistingToBuilder adds an existing manifest entry to the builder // addExistingToBuilder adds an existing manifest entry to the builder
func addExistingToBuilder(b *mfer.Builder, entry *mfer.MFFilePath) { func addExistingToBuilder(b *mfer.Builder, entry *mfer.MFFilePath) error {
mtime := time.Unix(entry.Mtime.Seconds, int64(entry.Mtime.Nanos)) mtime := time.Unix(entry.Mtime.Seconds, int64(entry.Mtime.Nanos))
if len(entry.Hashes) > 0 { if len(entry.Hashes) == 0 {
b.AddFileWithHash(entry.Path, entry.Size, mtime, entry.Hashes[0].MultiHash) return nil
} }
} return b.AddFileWithHash(mfer.RelFilePath(entry.Path), mfer.FileSize(entry.Size), mfer.ModTime(mtime), entry.Hashes[0].MultiHash)
// pathIsHidden checks if a path contains hidden components
func pathIsHidden(p string) bool {
// "." is not hidden, it's the current directory
if p == "." {
return false
}
// Check each path component
for p != "" && p != "." && p != "/" {
base := filepath.Base(p)
if len(base) > 0 && base[0] == '.' {
return true
}
parent := filepath.Dir(p)
if parent == p {
break
}
p = parent
}
return false
} }

View File

@ -13,29 +13,29 @@ import (
"github.com/spf13/afero" "github.com/spf13/afero"
"github.com/urfave/cli/v2" "github.com/urfave/cli/v2"
"sneak.berlin/go/mfer/internal/log" "sneak.berlin/go/mfer/internal/log"
"sneak.berlin/go/mfer/internal/scanner" "sneak.berlin/go/mfer/mfer"
) )
func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error { func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
log.Debug("generateManifestOperation()") log.Debug("generateManifestOperation()")
opts := &scanner.Options{ opts := &mfer.ScannerOptions{
IncludeDotfiles: ctx.Bool("IncludeDotfiles"), IncludeDotfiles: ctx.Bool("IncludeDotfiles"),
FollowSymLinks: ctx.Bool("FollowSymLinks"), FollowSymLinks: ctx.Bool("FollowSymLinks"),
Fs: mfa.Fs, Fs: mfa.Fs,
} }
s := scanner.NewWithOptions(opts) s := mfer.NewScannerWithOptions(opts)
// Phase 1: Enumeration - collect paths and stat files // Phase 1: Enumeration - collect paths and stat files
args := ctx.Args() args := ctx.Args()
showProgress := ctx.Bool("progress") showProgress := ctx.Bool("progress")
// Set up enumeration progress reporting // Set up enumeration progress reporting
var enumProgress chan scanner.EnumerateStatus var enumProgress chan mfer.EnumerateStatus
var enumWg sync.WaitGroup var enumWg sync.WaitGroup
if showProgress { if showProgress {
enumProgress = make(chan scanner.EnumerateStatus, 1) enumProgress = make(chan mfer.EnumerateStatus, 1)
enumWg.Add(1) enumWg.Add(1)
go func() { go func() {
defer enumWg.Done() defer enumWg.Done()
@ -117,10 +117,10 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
}() }()
// Phase 2: Scan - read file contents and generate manifest // Phase 2: Scan - read file contents and generate manifest
var scanProgress chan scanner.ScanStatus var scanProgress chan mfer.ScanStatus
var scanWg sync.WaitGroup var scanWg sync.WaitGroup
if showProgress { if showProgress {
scanProgress = make(chan scanner.ScanStatus, 1) scanProgress = make(chan mfer.ScanStatus, 1)
scanWg.Add(1) scanWg.Add(1)
go func() { go func() {
defer scanWg.Done() defer scanWg.Done()

View File

@ -2,6 +2,7 @@ package mfer
import ( import (
"crypto/sha256" "crypto/sha256"
"errors"
"io" "io"
"sync" "sync"
"time" "time"
@ -9,9 +10,42 @@ import (
"github.com/multiformats/go-multihash" "github.com/multiformats/go-multihash"
) )
// RelFilePath represents a relative file path within a manifest.
type RelFilePath string
// AbsFilePath represents an absolute file path on the filesystem.
type AbsFilePath string
// FileSize represents the size of a file in bytes.
type FileSize int64
// FileCount represents a count of files.
type FileCount int64
// ModTime represents a file's modification time.
type ModTime time.Time
// UnixSeconds represents seconds since Unix epoch.
type UnixSeconds int64
// UnixNanos represents the nanosecond component of a timestamp (0-999999999).
type UnixNanos int32
// Timestamp converts ModTime to a protobuf Timestamp.
func (m ModTime) Timestamp() *Timestamp {
t := time.Time(m)
return &Timestamp{
Seconds: t.Unix(),
Nanos: int32(t.Nanosecond()),
}
}
// Multihash represents a multihash-encoded file hash (typically SHA2-256).
type Multihash []byte
// FileHashProgress reports progress during file hashing. // FileHashProgress reports progress during file hashing.
type FileHashProgress struct { type FileHashProgress struct {
BytesRead int64 // Total bytes read so far for the current file BytesRead FileSize // Total bytes read so far for the current file
} }
// Builder constructs a manifest by adding files one at a time. // Builder constructs a manifest by adding files one at a time.
@ -33,24 +67,24 @@ func NewBuilder() *Builder {
// Progress updates are sent to the progress channel (if non-nil) without blocking. // Progress updates are sent to the progress channel (if non-nil) without blocking.
// Returns the number of bytes read. // Returns the number of bytes read.
func (b *Builder) AddFile( func (b *Builder) AddFile(
path string, path RelFilePath,
size int64, size FileSize,
mtime time.Time, mtime ModTime,
reader io.Reader, reader io.Reader,
progress chan<- FileHashProgress, progress chan<- FileHashProgress,
) (int64, error) { ) (FileSize, error) {
// Create hash writer // Create hash writer
h := sha256.New() h := sha256.New()
// Read file in chunks, updating hash and progress // Read file in chunks, updating hash and progress
var totalRead int64 var totalRead FileSize
buf := make([]byte, 64*1024) // 64KB chunks buf := make([]byte, 64*1024) // 64KB chunks
for { for {
n, err := reader.Read(buf) n, err := reader.Read(buf)
if n > 0 { if n > 0 {
h.Write(buf[:n]) h.Write(buf[:n])
totalRead += int64(n) totalRead += FileSize(n)
sendFileHashProgress(progress, FileHashProgress{BytesRead: totalRead}) sendFileHashProgress(progress, FileHashProgress{BytesRead: totalRead})
} }
if err == io.EOF { if err == io.EOF {
@ -69,12 +103,12 @@ func (b *Builder) AddFile(
// Create file entry // Create file entry
entry := &MFFilePath{ entry := &MFFilePath{
Path: path, Path: string(path),
Size: size, Size: int64(size),
Hashes: []*MFFileChecksum{ Hashes: []*MFFileChecksum{
{MultiHash: mh}, {MultiHash: mh},
}, },
Mtime: newTimestampFromTime(mtime), Mtime: mtime.Timestamp(),
} }
b.mu.Lock() b.mu.Lock()
@ -104,19 +138,31 @@ func (b *Builder) FileCount() int {
// AddFileWithHash adds a file entry with a pre-computed hash. // AddFileWithHash adds a file entry with a pre-computed hash.
// This is useful when the hash is already known (e.g., from an existing manifest). // This is useful when the hash is already known (e.g., from an existing manifest).
func (b *Builder) AddFileWithHash(path string, size int64, mtime time.Time, hash []byte) { // Returns an error if path is empty, size is negative, or hash is nil/empty.
func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error {
if path == "" {
return errors.New("path cannot be empty")
}
if size < 0 {
return errors.New("size cannot be negative")
}
if len(hash) == 0 {
return errors.New("hash cannot be nil or empty")
}
entry := &MFFilePath{ entry := &MFFilePath{
Path: path, Path: string(path),
Size: size, Size: int64(size),
Hashes: []*MFFileChecksum{ Hashes: []*MFFileChecksum{
{MultiHash: hash}, {MultiHash: hash},
}, },
Mtime: newTimestampFromTime(mtime), Mtime: mtime.Timestamp(),
} }
b.mu.Lock() b.mu.Lock()
b.files = append(b.files, entry) b.files = append(b.files, entry)
b.mu.Unlock() b.mu.Unlock()
return nil
} }
// Build finalizes the manifest and writes it to the writer. // Build finalizes the manifest and writes it to the writer.

104
mfer/builder_test.go Normal file
View File

@ -0,0 +1,104 @@
package mfer
import (
"bytes"
"strings"
"testing"
"time"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestNewBuilder(t *testing.T) {
b := NewBuilder()
assert.NotNil(t, b)
assert.Equal(t, 0, b.FileCount())
}
func TestBuilderAddFile(t *testing.T) {
b := NewBuilder()
content := []byte("test content")
reader := bytes.NewReader(content)
bytesRead, err := b.AddFile("test.txt", FileSize(len(content)), ModTime(time.Now()), reader, nil)
require.NoError(t, err)
assert.Equal(t, FileSize(len(content)), bytesRead)
assert.Equal(t, 1, b.FileCount())
}
func TestBuilderAddFileWithHash(t *testing.T) {
b := NewBuilder()
hash := make([]byte, 34) // SHA256 multihash is 34 bytes
err := b.AddFileWithHash("test.txt", 100, ModTime(time.Now()), hash)
require.NoError(t, err)
assert.Equal(t, 1, b.FileCount())
}
func TestBuilderAddFileWithHashValidation(t *testing.T) {
t.Run("empty path", func(t *testing.T) {
b := NewBuilder()
hash := make([]byte, 34)
err := b.AddFileWithHash("", 100, ModTime(time.Now()), hash)
assert.Error(t, err)
assert.Contains(t, err.Error(), "path")
})
t.Run("negative size", func(t *testing.T) {
b := NewBuilder()
hash := make([]byte, 34)
err := b.AddFileWithHash("test.txt", -1, ModTime(time.Now()), hash)
assert.Error(t, err)
assert.Contains(t, err.Error(), "size")
})
t.Run("nil hash", func(t *testing.T) {
b := NewBuilder()
err := b.AddFileWithHash("test.txt", 100, ModTime(time.Now()), nil)
assert.Error(t, err)
assert.Contains(t, err.Error(), "hash")
})
t.Run("empty hash", func(t *testing.T) {
b := NewBuilder()
err := b.AddFileWithHash("test.txt", 100, ModTime(time.Now()), []byte{})
assert.Error(t, err)
assert.Contains(t, err.Error(), "hash")
})
t.Run("valid inputs", func(t *testing.T) {
b := NewBuilder()
hash := make([]byte, 34)
err := b.AddFileWithHash("test.txt", 100, ModTime(time.Now()), hash)
assert.NoError(t, err)
assert.Equal(t, 1, b.FileCount())
})
}
func TestBuilderBuild(t *testing.T) {
b := NewBuilder()
content := []byte("test content")
reader := bytes.NewReader(content)
_, err := b.AddFile("test.txt", FileSize(len(content)), ModTime(time.Now()), reader, nil)
require.NoError(t, err)
var buf bytes.Buffer
err = b.Build(&buf)
require.NoError(t, err)
// Should have magic bytes
assert.True(t, strings.HasPrefix(buf.String(), MAGIC))
}
func TestBuilderBuildEmpty(t *testing.T) {
b := NewBuilder()
var buf bytes.Buffer
err := b.Build(&buf)
require.NoError(t, err)
// Should still produce valid manifest with 0 files
assert.True(t, strings.HasPrefix(buf.String(), MAGIC))
}

View File

@ -1,4 +1,4 @@
package scanner package mfer
import ( import (
"context" "context"
@ -13,7 +13,6 @@ import (
"github.com/dustin/go-humanize" "github.com/dustin/go-humanize"
"github.com/spf13/afero" "github.com/spf13/afero"
"sneak.berlin/go/mfer/internal/log" "sneak.berlin/go/mfer/internal/log"
"sneak.berlin/go/mfer/mfer"
) )
// Phase 1: Enumeration // Phase 1: Enumeration
@ -23,8 +22,8 @@ import (
// EnumerateStatus contains progress information for the enumeration phase. // EnumerateStatus contains progress information for the enumeration phase.
type EnumerateStatus struct { type EnumerateStatus struct {
FilesFound int64 // Number of files discovered so far FilesFound FileCount // Number of files discovered so far
BytesFound int64 // Total size of discovered files (from stat) BytesFound FileSize // Total size of discovered files (from stat)
} }
// Phase 2: Scan (ToManifest) // Phase 2: Scan (ToManifest)
@ -34,16 +33,16 @@ type EnumerateStatus struct {
// ScanStatus contains progress information for the scan phase. // ScanStatus contains progress information for the scan phase.
type ScanStatus struct { type ScanStatus struct {
TotalFiles int64 // Total number of files to scan TotalFiles FileCount // Total number of files to scan
ScannedFiles int64 // Number of files scanned so far ScannedFiles FileCount // Number of files scanned so far
TotalBytes int64 // Total bytes to read (sum of all file sizes) TotalBytes FileSize // Total bytes to read (sum of all file sizes)
ScannedBytes int64 // Bytes read so far ScannedBytes FileSize // Bytes read so far
BytesPerSec float64 // Current throughput rate BytesPerSec float64 // Current throughput rate
ETA time.Duration // Estimated time to completion ETA time.Duration // Estimated time to completion
} }
// Options configures scanner behavior. // ScannerOptions configures scanner behavior.
type Options struct { type ScannerOptions struct {
IncludeDotfiles bool // Include files and directories starting with a dot (default: exclude) IncludeDotfiles bool // Include files and directories starting with a dot (default: exclude)
FollowSymLinks bool // Resolve symlinks instead of skipping them FollowSymLinks bool // Resolve symlinks instead of skipping them
Fs afero.Fs // Filesystem to use, defaults to OsFs if nil Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
@ -51,10 +50,10 @@ type Options struct {
// FileEntry represents a file that has been enumerated. // FileEntry represents a file that has been enumerated.
type FileEntry struct { type FileEntry struct {
Path string // Relative path (used in manifest) Path RelFilePath // Relative path (used in manifest)
AbsPath string // Absolute path (used for reading file content) AbsPath AbsFilePath // Absolute path (used for reading file content)
Size int64 // File size in bytes Size FileSize // File size in bytes
Mtime time.Time // Last modification time Mtime ModTime // Last modification time
Ctime time.Time // Creation time (platform-dependent) Ctime time.Time // Creation time (platform-dependent)
} }
@ -62,19 +61,20 @@ type FileEntry struct {
type Scanner struct { type Scanner struct {
mu sync.RWMutex mu sync.RWMutex
files []*FileEntry files []*FileEntry
options *Options totalBytes FileSize // cached sum of all file sizes
options *ScannerOptions
fs afero.Fs fs afero.Fs
} }
// New creates a new Scanner with default options. // NewScanner creates a new Scanner with default options.
func New() *Scanner { func NewScanner() *Scanner {
return NewWithOptions(nil) return NewScannerWithOptions(nil)
} }
// NewWithOptions creates a new Scanner with the given options. // NewScannerWithOptions creates a new Scanner with the given options.
func NewWithOptions(opts *Options) *Scanner { func NewScannerWithOptions(opts *ScannerOptions) *Scanner {
if opts == nil { if opts == nil {
opts = &Options{} opts = &ScannerOptions{}
} }
fs := opts.Fs fs := opts.Fs
if fs == nil { if fs == nil {
@ -154,7 +154,7 @@ func (s *Scanner) enumerateFS(afs afero.Fs, basePath string, progress chan<- Enu
if err != nil { if err != nil {
return err return err
} }
if !s.options.IncludeDotfiles && pathIsHidden(p) { if !s.options.IncludeDotfiles && IsHiddenPath(p) {
if info.IsDir() { if info.IsDir() {
return filepath.SkipDir return filepath.SkipDir
} }
@ -206,21 +206,19 @@ func (s *Scanner) enumerateFileWithInfo(filePath string, basePath string, info f
} }
entry := &FileEntry{ entry := &FileEntry{
Path: cleanPath, Path: RelFilePath(cleanPath),
AbsPath: absPath, AbsPath: AbsFilePath(absPath),
Size: info.Size(), Size: FileSize(info.Size()),
Mtime: info.ModTime(), Mtime: ModTime(info.ModTime()),
// Note: Ctime not available from fs.FileInfo on all platforms // Note: Ctime not available from fs.FileInfo on all platforms
// Will need platform-specific code to extract it // Will need platform-specific code to extract it
} }
s.mu.Lock() s.mu.Lock()
s.files = append(s.files, entry) s.files = append(s.files, entry)
filesFound := int64(len(s.files)) s.totalBytes += entry.Size
var bytesFound int64 filesFound := FileCount(len(s.files))
for _, f := range s.files { bytesFound := s.totalBytes
bytesFound += f.Size
}
s.mu.Unlock() s.mu.Unlock()
sendEnumerateStatus(progress, EnumerateStatus{ sendEnumerateStatus(progress, EnumerateStatus{
@ -241,21 +239,17 @@ func (s *Scanner) Files() []*FileEntry {
} }
// FileCount returns the number of files in the scanner. // FileCount returns the number of files in the scanner.
func (s *Scanner) FileCount() int64 { func (s *Scanner) FileCount() FileCount {
s.mu.RLock() s.mu.RLock()
defer s.mu.RUnlock() defer s.mu.RUnlock()
return int64(len(s.files)) return FileCount(len(s.files))
} }
// TotalBytes returns the total size of all files in the scanner. // TotalBytes returns the total size of all files in the scanner.
func (s *Scanner) TotalBytes() int64 { func (s *Scanner) TotalBytes() FileSize {
s.mu.RLock() s.mu.RLock()
defer s.mu.RUnlock() defer s.mu.RUnlock()
var total int64 return s.totalBytes
for _, f := range s.files {
total += f.Size
}
return total
} }
// ToManifest reads all file contents, computes hashes, and generates a manifest. // ToManifest reads all file contents, computes hashes, and generates a manifest.
@ -270,17 +264,17 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S
s.mu.RLock() s.mu.RLock()
files := make([]*FileEntry, len(s.files)) files := make([]*FileEntry, len(s.files))
copy(files, s.files) copy(files, s.files)
totalFiles := int64(len(files)) totalFiles := FileCount(len(files))
var totalBytes int64 var totalBytes FileSize
for _, f := range files { for _, f := range files {
totalBytes += f.Size totalBytes += f.Size
} }
s.mu.RUnlock() s.mu.RUnlock()
builder := mfer.NewBuilder() builder := NewBuilder()
var scannedFiles int64 var scannedFiles FileCount
var scannedBytes int64 var scannedBytes FileSize
lastProgressTime := time.Now() lastProgressTime := time.Now()
startTime := time.Now() startTime := time.Now()
@ -293,18 +287,18 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S
} }
// Open file // Open file
f, err := s.fs.Open(entry.AbsPath) f, err := s.fs.Open(string(entry.AbsPath))
if err != nil { if err != nil {
return err return err
} }
// Create progress channel for this file // Create progress channel for this file
var fileProgress chan mfer.FileHashProgress var fileProgress chan FileHashProgress
var wg sync.WaitGroup var wg sync.WaitGroup
if progress != nil { if progress != nil {
fileProgress = make(chan mfer.FileHashProgress, 1) fileProgress = make(chan FileHashProgress, 1)
wg.Add(1) wg.Add(1)
go func(baseScannedBytes int64) { go func(baseScannedBytes FileSize) {
defer wg.Done() defer wg.Done()
for p := range fileProgress { for p := range fileProgress {
// Send progress at most once per second // Send progress at most once per second
@ -382,9 +376,10 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S
return builder.Build(w) return builder.Build(w)
} }
// pathIsHidden returns true if the path or any of its parent directories // IsHiddenPath returns true if the path or any of its parent directories
// start with a dot (hidden files/directories). // start with a dot (hidden files/directories).
func pathIsHidden(p string) bool { // The path should use forward slashes.
func IsHiddenPath(p string) bool {
tp := path.Clean(p) tp := path.Clean(p)
if strings.HasPrefix(tp, ".") { if strings.HasPrefix(tp, ".") {
return true return true

View File

@ -1,4 +1,4 @@
package scanner package mfer
import ( import (
"bytes" "bytes"
@ -11,77 +11,77 @@ import (
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
) )
func TestNew(t *testing.T) { func TestNewScanner(t *testing.T) {
s := New() s := NewScanner()
assert.NotNil(t, s) assert.NotNil(t, s)
assert.Equal(t, int64(0), s.FileCount()) assert.Equal(t, FileCount(0), s.FileCount())
assert.Equal(t, int64(0), s.TotalBytes()) assert.Equal(t, FileSize(0), s.TotalBytes())
} }
func TestNewWithOptions(t *testing.T) { func TestNewScannerWithOptions(t *testing.T) {
t.Run("nil options", func(t *testing.T) { t.Run("nil options", func(t *testing.T) {
s := NewWithOptions(nil) s := NewScannerWithOptions(nil)
assert.NotNil(t, s) assert.NotNil(t, s)
}) })
t.Run("with options", func(t *testing.T) { t.Run("with options", func(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
opts := &Options{ opts := &ScannerOptions{
IncludeDotfiles: true, IncludeDotfiles: true,
FollowSymLinks: true, FollowSymLinks: true,
Fs: fs, Fs: fs,
} }
s := NewWithOptions(opts) s := NewScannerWithOptions(opts)
assert.NotNil(t, s) assert.NotNil(t, s)
}) })
} }
func TestEnumerateFile(t *testing.T) { func TestScannerEnumerateFile(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
require.NoError(t, afero.WriteFile(fs, "/test.txt", []byte("hello world"), 0644)) require.NoError(t, afero.WriteFile(fs, "/test.txt", []byte("hello world"), 0644))
s := NewWithOptions(&Options{Fs: fs}) s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
err := s.EnumerateFile("/test.txt") err := s.EnumerateFile("/test.txt")
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, int64(1), s.FileCount()) assert.Equal(t, FileCount(1), s.FileCount())
assert.Equal(t, int64(11), s.TotalBytes()) assert.Equal(t, FileSize(11), s.TotalBytes())
files := s.Files() files := s.Files()
require.Len(t, files, 1) require.Len(t, files, 1)
assert.Equal(t, "test.txt", files[0].Path) assert.Equal(t, RelFilePath("test.txt"), files[0].Path)
assert.Equal(t, int64(11), files[0].Size) assert.Equal(t, FileSize(11), files[0].Size)
} }
func TestEnumerateFileMissing(t *testing.T) { func TestScannerEnumerateFileMissing(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
s := NewWithOptions(&Options{Fs: fs}) s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
err := s.EnumerateFile("/nonexistent.txt") err := s.EnumerateFile("/nonexistent.txt")
assert.Error(t, err) assert.Error(t, err)
} }
func TestEnumeratePath(t *testing.T) { func TestScannerEnumeratePath(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir/subdir", 0755)) require.NoError(t, fs.MkdirAll("/testdir/subdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("one"), 0644)) require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("one"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("two"), 0644)) require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("two"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/subdir/file3.txt", []byte("three"), 0644)) require.NoError(t, afero.WriteFile(fs, "/testdir/subdir/file3.txt", []byte("three"), 0644))
s := NewWithOptions(&Options{Fs: fs}) s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
err := s.EnumeratePath("/testdir", nil) err := s.EnumeratePath("/testdir", nil)
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, int64(3), s.FileCount()) assert.Equal(t, FileCount(3), s.FileCount())
assert.Equal(t, int64(3+3+5), s.TotalBytes()) assert.Equal(t, FileSize(3+3+5), s.TotalBytes())
} }
func TestEnumeratePathWithProgress(t *testing.T) { func TestScannerEnumeratePathWithProgress(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir", 0755)) require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("one"), 0644)) require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("one"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("two"), 0644)) require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("two"), 0644))
s := NewWithOptions(&Options{Fs: fs}) s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
progress := make(chan EnumerateStatus, 10) progress := make(chan EnumerateStatus, 10)
err := s.EnumeratePath("/testdir", progress) err := s.EnumeratePath("/testdir", progress)
@ -95,25 +95,25 @@ func TestEnumeratePathWithProgress(t *testing.T) {
assert.NotEmpty(t, updates) assert.NotEmpty(t, updates)
// Final update should show all files // Final update should show all files
final := updates[len(updates)-1] final := updates[len(updates)-1]
assert.Equal(t, int64(2), final.FilesFound) assert.Equal(t, FileCount(2), final.FilesFound)
assert.Equal(t, int64(6), final.BytesFound) assert.Equal(t, FileSize(6), final.BytesFound)
} }
func TestEnumeratePaths(t *testing.T) { func TestScannerEnumeratePaths(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/dir1", 0755)) require.NoError(t, fs.MkdirAll("/dir1", 0755))
require.NoError(t, fs.MkdirAll("/dir2", 0755)) require.NoError(t, fs.MkdirAll("/dir2", 0755))
require.NoError(t, afero.WriteFile(fs, "/dir1/a.txt", []byte("aaa"), 0644)) require.NoError(t, afero.WriteFile(fs, "/dir1/a.txt", []byte("aaa"), 0644))
require.NoError(t, afero.WriteFile(fs, "/dir2/b.txt", []byte("bbb"), 0644)) require.NoError(t, afero.WriteFile(fs, "/dir2/b.txt", []byte("bbb"), 0644))
s := NewWithOptions(&Options{Fs: fs}) s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
err := s.EnumeratePaths(nil, "/dir1", "/dir2") err := s.EnumeratePaths(nil, "/dir1", "/dir2")
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, int64(2), s.FileCount()) assert.Equal(t, FileCount(2), s.FileCount())
} }
func TestExcludeDotfiles(t *testing.T) { func TestScannerExcludeDotfiles(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir/.hidden", 0755)) require.NoError(t, fs.MkdirAll("/testdir/.hidden", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/visible.txt", []byte("visible"), 0644)) require.NoError(t, afero.WriteFile(fs, "/testdir/visible.txt", []byte("visible"), 0644))
@ -121,54 +121,31 @@ func TestExcludeDotfiles(t *testing.T) {
require.NoError(t, afero.WriteFile(fs, "/testdir/.hidden/inside.txt", []byte("inside"), 0644)) require.NoError(t, afero.WriteFile(fs, "/testdir/.hidden/inside.txt", []byte("inside"), 0644))
t.Run("exclude by default", func(t *testing.T) { t.Run("exclude by default", func(t *testing.T) {
s := NewWithOptions(&Options{Fs: fs, IncludeDotfiles: false}) s := NewScannerWithOptions(&ScannerOptions{Fs: fs, IncludeDotfiles: false})
err := s.EnumeratePath("/testdir", nil) err := s.EnumeratePath("/testdir", nil)
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, int64(1), s.FileCount()) assert.Equal(t, FileCount(1), s.FileCount())
files := s.Files() files := s.Files()
assert.Equal(t, "visible.txt", files[0].Path) assert.Equal(t, RelFilePath("visible.txt"), files[0].Path)
}) })
t.Run("include when enabled", func(t *testing.T) { t.Run("include when enabled", func(t *testing.T) {
s := NewWithOptions(&Options{Fs: fs, IncludeDotfiles: true}) s := NewScannerWithOptions(&ScannerOptions{Fs: fs, IncludeDotfiles: true})
err := s.EnumeratePath("/testdir", nil) err := s.EnumeratePath("/testdir", nil)
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, int64(3), s.FileCount()) assert.Equal(t, FileCount(3), s.FileCount())
}) })
} }
func TestPathIsHidden(t *testing.T) { func TestScannerToManifest(t *testing.T) {
tests := []struct {
path string
hidden bool
}{
{"file.txt", false},
{".hidden", true},
{"dir/file.txt", false},
{"dir/.hidden", true},
{".dir/file.txt", true},
{"/absolute/path", false},
{"/absolute/.hidden", true},
{"./relative", false}, // path.Clean removes leading ./
{"a/b/c/.d/e", true},
}
for _, tt := range tests {
t.Run(tt.path, func(t *testing.T) {
assert.Equal(t, tt.hidden, pathIsHidden(tt.path), "pathIsHidden(%q)", tt.path)
})
}
}
func TestToManifest(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir", 0755)) require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("content one"), 0644)) require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("content one"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("content two"), 0644)) require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("content two"), 0644))
s := NewWithOptions(&Options{Fs: fs}) s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
err := s.EnumeratePath("/testdir", nil) err := s.EnumeratePath("/testdir", nil)
require.NoError(t, err) require.NoError(t, err)
@ -178,15 +155,15 @@ func TestToManifest(t *testing.T) {
// Manifest should have magic bytes // Manifest should have magic bytes
assert.True(t, buf.Len() > 0) assert.True(t, buf.Len() > 0)
assert.Equal(t, "ZNAVSRFG", string(buf.Bytes()[:8])) assert.Equal(t, MAGIC, string(buf.Bytes()[:8]))
} }
func TestToManifestWithProgress(t *testing.T) { func TestScannerToManifestWithProgress(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir", 0755)) require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file.txt", bytes.Repeat([]byte("x"), 1000), 0644)) require.NoError(t, afero.WriteFile(fs, "/testdir/file.txt", bytes.Repeat([]byte("x"), 1000), 0644))
s := NewWithOptions(&Options{Fs: fs}) s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
err := s.EnumeratePath("/testdir", nil) err := s.EnumeratePath("/testdir", nil)
require.NoError(t, err) require.NoError(t, err)
@ -204,13 +181,13 @@ func TestToManifestWithProgress(t *testing.T) {
assert.NotEmpty(t, updates) assert.NotEmpty(t, updates)
// Final update should show completion // Final update should show completion
final := updates[len(updates)-1] final := updates[len(updates)-1]
assert.Equal(t, int64(1), final.TotalFiles) assert.Equal(t, FileCount(1), final.TotalFiles)
assert.Equal(t, int64(1), final.ScannedFiles) assert.Equal(t, FileCount(1), final.ScannedFiles)
assert.Equal(t, int64(1000), final.TotalBytes) assert.Equal(t, FileSize(1000), final.TotalBytes)
assert.Equal(t, int64(1000), final.ScannedBytes) assert.Equal(t, FileSize(1000), final.ScannedBytes)
} }
func TestToManifestContextCancellation(t *testing.T) { func TestScannerToManifestContextCancellation(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir", 0755)) require.NoError(t, fs.MkdirAll("/testdir", 0755))
// Create many files to ensure we have time to cancel // Create many files to ensure we have time to cancel
@ -219,7 +196,7 @@ func TestToManifestContextCancellation(t *testing.T) {
require.NoError(t, afero.WriteFile(fs, "/testdir/"+name, bytes.Repeat([]byte("x"), 100), 0644)) require.NoError(t, afero.WriteFile(fs, "/testdir/"+name, bytes.Repeat([]byte("x"), 100), 0644))
} }
s := NewWithOptions(&Options{Fs: fs}) s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
err := s.EnumeratePath("/testdir", nil) err := s.EnumeratePath("/testdir", nil)
require.NoError(t, err) require.NoError(t, err)
@ -231,9 +208,9 @@ func TestToManifestContextCancellation(t *testing.T) {
assert.ErrorIs(t, err, context.Canceled) assert.ErrorIs(t, err, context.Canceled)
} }
func TestToManifestEmptyScanner(t *testing.T) { func TestScannerToManifestEmptyScanner(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
s := NewWithOptions(&Options{Fs: fs}) s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
var buf bytes.Buffer var buf bytes.Buffer
err := s.ToManifest(context.Background(), &buf, nil) err := s.ToManifest(context.Background(), &buf, nil)
@ -241,14 +218,14 @@ func TestToManifestEmptyScanner(t *testing.T) {
// Should still produce a valid manifest // Should still produce a valid manifest
assert.True(t, buf.Len() > 0) assert.True(t, buf.Len() > 0)
assert.Equal(t, "ZNAVSRFG", string(buf.Bytes()[:8])) assert.Equal(t, MAGIC, string(buf.Bytes()[:8]))
} }
func TestFilesCopiesSlice(t *testing.T) { func TestScannerFilesCopiesSlice(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
require.NoError(t, afero.WriteFile(fs, "/test.txt", []byte("hello"), 0644)) require.NoError(t, afero.WriteFile(fs, "/test.txt", []byte("hello"), 0644))
s := NewWithOptions(&Options{Fs: fs}) s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
require.NoError(t, s.EnumerateFile("/test.txt")) require.NoError(t, s.EnumerateFile("/test.txt"))
files1 := s.Files() files1 := s.Files()
@ -258,7 +235,7 @@ func TestFilesCopiesSlice(t *testing.T) {
assert.NotSame(t, &files1[0], &files2[0]) assert.NotSame(t, &files1[0], &files2[0])
} }
func TestEnumerateFS(t *testing.T) { func TestScannerEnumerateFS(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir/sub", 0755)) require.NoError(t, fs.MkdirAll("/testdir/sub", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file.txt", []byte("hello"), 0644)) require.NoError(t, afero.WriteFile(fs, "/testdir/file.txt", []byte("hello"), 0644))
@ -267,11 +244,11 @@ func TestEnumerateFS(t *testing.T) {
// Create a basepath filesystem // Create a basepath filesystem
baseFs := afero.NewBasePathFs(fs, "/testdir") baseFs := afero.NewBasePathFs(fs, "/testdir")
s := NewWithOptions(&Options{Fs: fs}) s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
err := s.EnumerateFS(baseFs, "/testdir", nil) err := s.EnumerateFS(baseFs, "/testdir", nil)
require.NoError(t, err) require.NoError(t, err)
assert.Equal(t, int64(2), s.FileCount()) assert.Equal(t, FileCount(2), s.FileCount())
} }
func TestSendEnumerateStatusNonBlocking(t *testing.T) { func TestSendEnumerateStatusNonBlocking(t *testing.T) {
@ -317,27 +294,27 @@ func TestSendStatusNilChannel(t *testing.T) {
sendScanStatus(nil, ScanStatus{}) sendScanStatus(nil, ScanStatus{})
} }
func TestFileEntryFields(t *testing.T) { func TestScannerFileEntryFields(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
now := time.Now().Truncate(time.Second) now := time.Now().Truncate(time.Second)
require.NoError(t, afero.WriteFile(fs, "/test.txt", []byte("content"), 0644)) require.NoError(t, afero.WriteFile(fs, "/test.txt", []byte("content"), 0644))
require.NoError(t, fs.Chtimes("/test.txt", now, now)) require.NoError(t, fs.Chtimes("/test.txt", now, now))
s := NewWithOptions(&Options{Fs: fs}) s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
require.NoError(t, s.EnumerateFile("/test.txt")) require.NoError(t, s.EnumerateFile("/test.txt"))
files := s.Files() files := s.Files()
require.Len(t, files, 1) require.Len(t, files, 1)
entry := files[0] entry := files[0]
assert.Equal(t, "test.txt", entry.Path) assert.Equal(t, RelFilePath("test.txt"), entry.Path)
assert.Contains(t, entry.AbsPath, "test.txt") assert.Contains(t, string(entry.AbsPath), "test.txt")
assert.Equal(t, int64(7), entry.Size) assert.Equal(t, FileSize(7), entry.Size)
// Mtime should be set (within a second of now) // Mtime should be set (within a second of now)
assert.WithinDuration(t, now, entry.Mtime, 2*time.Second) assert.WithinDuration(t, now, time.Time(entry.Mtime), 2*time.Second)
} }
func TestLargeFileEnumeration(t *testing.T) { func TestScannerLargeFileEnumeration(t *testing.T) {
fs := afero.NewMemMapFs() fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir", 0755)) require.NoError(t, fs.MkdirAll("/testdir", 0755))
@ -347,7 +324,7 @@ func TestLargeFileEnumeration(t *testing.T) {
require.NoError(t, afero.WriteFile(fs, name, []byte("data"), 0644)) require.NoError(t, afero.WriteFile(fs, name, []byte("data"), 0644))
} }
s := NewWithOptions(&Options{Fs: fs}) s := NewScannerWithOptions(&ScannerOptions{Fs: fs})
progress := make(chan EnumerateStatus, 200) progress := make(chan EnumerateStatus, 200)
err := s.EnumeratePath("/testdir", progress) err := s.EnumeratePath("/testdir", progress)
@ -357,6 +334,29 @@ func TestLargeFileEnumeration(t *testing.T) {
for range progress { for range progress {
} }
assert.Equal(t, int64(100), s.FileCount()) assert.Equal(t, FileCount(100), s.FileCount())
assert.Equal(t, int64(400), s.TotalBytes()) // 100 * 4 bytes assert.Equal(t, FileSize(400), s.TotalBytes()) // 100 * 4 bytes
}
func TestIsHiddenPath(t *testing.T) {
tests := []struct {
path string
hidden bool
}{
{"file.txt", false},
{".hidden", true},
{"dir/file.txt", false},
{"dir/.hidden", true},
{".dir/file.txt", true},
{"/absolute/path", false},
{"/absolute/.hidden", true},
{"./relative", false}, // path.Clean removes leading ./
{"a/b/c/.d/e", true},
}
for _, tt := range tests {
t.Run(tt.path, func(t *testing.T) {
assert.Equal(t, tt.hidden, IsHiddenPath(tt.path), "IsHiddenPath(%q)", tt.path)
})
}
} }

53
mfer/url.go Normal file
View File

@ -0,0 +1,53 @@
package mfer
import (
"net/url"
"strings"
)
// ManifestURL represents a URL pointing to a manifest file.
type ManifestURL string
// FileURL represents a URL pointing to a file to be fetched.
type FileURL string
// BaseURL represents a base URL for constructing file URLs.
type BaseURL string
// JoinPath safely joins a relative file path to a base URL.
// The path is properly URL-encoded to prevent path traversal.
func (b BaseURL) JoinPath(path RelFilePath) (FileURL, error) {
base, err := url.Parse(string(b))
if err != nil {
return "", err
}
// Ensure base path ends with /
if !strings.HasSuffix(base.Path, "/") {
base.Path += "/"
}
// Parse and encode the relative path
ref, err := url.Parse(url.PathEscape(string(path)))
if err != nil {
return "", err
}
resolved := base.ResolveReference(ref)
return FileURL(resolved.String()), nil
}
// String returns the URL as a string.
func (b BaseURL) String() string {
return string(b)
}
// String returns the URL as a string.
func (f FileURL) String() string {
return string(f)
}
// String returns the URL as a string.
func (m ManifestURL) String() string {
return string(m)
}