Add custom types for type safety throughout codebase

- Add FileCount, FileSize, RelFilePath, AbsFilePath, ModTime, Multihash types
- Add UnixSeconds and UnixNanos types for timestamp handling
- Add URL types (ManifestURL, FileURL, BaseURL) with safe path joining
- Consolidate scanner package into mfer package
- Update checker to use custom types in Result and CheckStatus
- Add ModTime.Timestamp() method for protobuf conversion
- Update all tests to use proper custom types
This commit is contained in:
2025-12-18 01:01:18 -08:00
parent a9f0d2abe4
commit dc115c5ba2
9 changed files with 428 additions and 247 deletions

View File

@@ -17,9 +17,9 @@ import (
// Result represents the outcome of checking a single file.
type Result struct {
Path string // Relative path from manifest
Status Status // Verification result status
Message string // Human-readable description of the result
Path mfer.RelFilePath // Relative path from manifest
Status Status // Verification result status
Message string // Human-readable description of the result
}
// Status represents the verification status of a file.
@@ -55,22 +55,22 @@ func (s Status) String() string {
// CheckStatus contains progress information for the check operation.
type CheckStatus struct {
TotalFiles int64 // Total number of files in manifest
CheckedFiles int64 // Number of files checked so far
TotalBytes int64 // Total bytes to verify (sum of all file sizes)
CheckedBytes int64 // Bytes verified so far
BytesPerSec float64 // Current throughput rate
ETA time.Duration // Estimated time to completion
Failures int64 // Number of verification failures encountered
TotalFiles mfer.FileCount // Total number of files in manifest
CheckedFiles mfer.FileCount // Number of files checked so far
TotalBytes mfer.FileSize // Total bytes to verify (sum of all file sizes)
CheckedBytes mfer.FileSize // Bytes verified so far
BytesPerSec float64 // Current throughput rate
ETA time.Duration // Estimated time to completion
Failures mfer.FileCount // Number of verification failures encountered
}
// Checker verifies files against a manifest.
type Checker struct {
basePath string
basePath mfer.AbsFilePath
files []*mfer.MFFilePath
fs afero.Fs
// manifestPaths is a set of paths in the manifest for quick lookup
manifestPaths map[string]struct{}
manifestPaths map[mfer.RelFilePath]struct{}
}
// NewChecker creates a new Checker for the given manifest, base path, and filesystem.
@@ -92,13 +92,13 @@ func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, er
}
files := m.Files()
manifestPaths := make(map[string]struct{}, len(files))
manifestPaths := make(map[mfer.RelFilePath]struct{}, len(files))
for _, f := range files {
manifestPaths[f.Path] = struct{}{}
manifestPaths[mfer.RelFilePath(f.Path)] = struct{}{}
}
return &Checker{
basePath: abs,
basePath: mfer.AbsFilePath(abs),
files: files,
fs: fs,
manifestPaths: manifestPaths,
@@ -106,15 +106,15 @@ func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, er
}
// FileCount returns the number of files in the manifest.
func (c *Checker) FileCount() int64 {
return int64(len(c.files))
func (c *Checker) FileCount() mfer.FileCount {
return mfer.FileCount(len(c.files))
}
// TotalBytes returns the total size of all files in the manifest.
func (c *Checker) TotalBytes() int64 {
var total int64
func (c *Checker) TotalBytes() mfer.FileSize {
var total mfer.FileSize
for _, f := range c.files {
total += f.Size
total += mfer.FileSize(f.Size)
}
return total
}
@@ -131,12 +131,12 @@ func (c *Checker) Check(ctx context.Context, results chan<- Result, progress cha
defer close(progress)
}
totalFiles := int64(len(c.files))
totalFiles := mfer.FileCount(len(c.files))
totalBytes := c.TotalBytes()
var checkedFiles int64
var checkedBytes int64
var failures int64
var checkedFiles mfer.FileCount
var checkedBytes mfer.FileSize
var failures mfer.FileCount
startTime := time.Now()
@@ -186,28 +186,29 @@ func (c *Checker) Check(ctx context.Context, results chan<- Result, progress cha
return nil
}
func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result {
absPath := filepath.Join(c.basePath, entry.Path)
func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *mfer.FileSize) Result {
absPath := filepath.Join(string(c.basePath), entry.Path)
relPath := mfer.RelFilePath(entry.Path)
// Check if file exists
info, err := c.fs.Stat(absPath)
if err != nil {
if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) {
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
}
// Check for "file does not exist" style errors
exists, _ := afero.Exists(c.fs, absPath)
if !exists {
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
}
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
}
// Check size
if info.Size() != entry.Size {
*checkedBytes += info.Size()
*checkedBytes += mfer.FileSize(info.Size())
return Result{
Path: entry.Path,
Path: relPath,
Status: StatusSizeMismatch,
Message: "size mismatch",
}
@@ -216,31 +217,31 @@ func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result
// Open and hash file
f, err := c.fs.Open(absPath)
if err != nil {
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
}
defer func() { _ = f.Close() }()
h := sha256.New()
n, err := io.Copy(h, f)
if err != nil {
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
}
*checkedBytes += n
*checkedBytes += mfer.FileSize(n)
// Encode as multihash and compare
computed, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
if err != nil {
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
}
// Check against all hashes in manifest (at least one must match)
for _, hash := range entry.Hashes {
if bytes.Equal(computed, hash.MultiHash) {
return Result{Path: entry.Path, Status: StatusOK}
return Result{Path: relPath, Status: StatusOK}
}
}
return Result{Path: entry.Path, Status: StatusHashMismatch, Message: "hash mismatch"}
return Result{Path: relPath, Status: StatusHashMismatch, Message: "hash mismatch"}
}
// FindExtraFiles walks the filesystem and reports files not in the manifest.
@@ -250,7 +251,7 @@ func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) err
defer close(results)
}
return afero.Walk(c.fs, c.basePath, func(path string, info os.FileInfo, err error) error {
return afero.Walk(c.fs, string(c.basePath), func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
@@ -267,10 +268,11 @@ func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) err
}
// Get relative path
relPath, err := filepath.Rel(c.basePath, path)
rel, err := filepath.Rel(string(c.basePath), path)
if err != nil {
return err
}
relPath := mfer.RelFilePath(rel)
// Check if path is in manifest
if _, exists := c.manifestPaths[relPath]; !exists {

View File

@@ -40,7 +40,7 @@ func createTestManifest(t *testing.T, fs afero.Fs, manifestPath string, files ma
builder := mfer.NewBuilder()
for path, content := range files {
reader := bytes.NewReader(content)
_, err := builder.AddFile(path, int64(len(content)), time.Now(), reader, nil)
_, err := builder.AddFile(mfer.RelFilePath(path), mfer.FileSize(len(content)), mfer.ModTime(time.Now()), reader, nil)
require.NoError(t, err)
}
@@ -72,7 +72,7 @@ func TestNewChecker(t *testing.T) {
chk, err := NewChecker("/manifest.mf", "/", fs)
require.NoError(t, err)
assert.NotNil(t, chk)
assert.Equal(t, int64(2), chk.FileCount())
assert.Equal(t, mfer.FileCount(2), chk.FileCount())
})
t.Run("missing manifest", func(t *testing.T) {
@@ -101,8 +101,8 @@ func TestCheckerFileCountAndTotalBytes(t *testing.T) {
chk, err := NewChecker("/manifest.mf", "/", fs)
require.NoError(t, err)
assert.Equal(t, int64(3), chk.FileCount())
assert.Equal(t, int64(2+11+1000), chk.TotalBytes())
assert.Equal(t, mfer.FileCount(3), chk.FileCount())
assert.Equal(t, mfer.FileSize(2+11+1000), chk.TotalBytes())
}
func TestCheckAllFilesOK(t *testing.T) {
@@ -158,7 +158,7 @@ func TestCheckMissingFile(t *testing.T) {
okCount++
case StatusMissing:
missingCount++
assert.Equal(t, "missing.txt", r.Path)
assert.Equal(t, mfer.RelFilePath("missing.txt"), r.Path)
}
}
@@ -186,7 +186,7 @@ func TestCheckSizeMismatch(t *testing.T) {
r := <-results
assert.Equal(t, StatusSizeMismatch, r.Status)
assert.Equal(t, "file.txt", r.Path)
assert.Equal(t, mfer.RelFilePath("file.txt"), r.Path)
}
func TestCheckHashMismatch(t *testing.T) {
@@ -212,7 +212,7 @@ func TestCheckHashMismatch(t *testing.T) {
r := <-results
assert.Equal(t, StatusHashMismatch, r.Status)
assert.Equal(t, "file.txt", r.Path)
assert.Equal(t, mfer.RelFilePath("file.txt"), r.Path)
}
func TestCheckWithProgress(t *testing.T) {
@@ -246,11 +246,11 @@ func TestCheckWithProgress(t *testing.T) {
assert.NotEmpty(t, progressUpdates)
// Final progress should show all files checked
final := progressUpdates[len(progressUpdates)-1]
assert.Equal(t, int64(2), final.TotalFiles)
assert.Equal(t, int64(2), final.CheckedFiles)
assert.Equal(t, int64(300), final.TotalBytes)
assert.Equal(t, int64(300), final.CheckedBytes)
assert.Equal(t, int64(0), final.Failures)
assert.Equal(t, mfer.FileCount(2), final.TotalFiles)
assert.Equal(t, mfer.FileCount(2), final.CheckedFiles)
assert.Equal(t, mfer.FileSize(300), final.TotalBytes)
assert.Equal(t, mfer.FileSize(300), final.CheckedBytes)
assert.Equal(t, mfer.FileCount(0), final.Failures)
}
func TestCheckContextCancellation(t *testing.T) {
@@ -301,7 +301,7 @@ func TestFindExtraFiles(t *testing.T) {
}
assert.Len(t, extras, 1)
assert.Equal(t, "file2.txt", extras[0].Path)
assert.Equal(t, mfer.RelFilePath("file2.txt"), extras[0].Path)
assert.Equal(t, StatusExtra, extras[0].Status)
assert.Equal(t, "not in manifest", extras[0].Message)
}
@@ -390,8 +390,8 @@ func TestCheckEmptyManifest(t *testing.T) {
chk, err := NewChecker("/manifest.mf", "/data", fs)
require.NoError(t, err)
assert.Equal(t, int64(0), chk.FileCount())
assert.Equal(t, int64(0), chk.TotalBytes())
assert.Equal(t, mfer.FileCount(0), chk.FileCount())
assert.Equal(t, mfer.FileSize(0), chk.TotalBytes())
results := make(chan Result, 10)
err = chk.Check(context.Background(), results, nil)

View File

@@ -113,7 +113,7 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error {
}
// Handle dotfiles
if !includeDotfiles && pathIsHidden(relPath) {
if !includeDotfiles && mfer.IsHiddenPath(filepath.ToSlash(relPath)) {
if info.IsDir() {
return filepath.SkipDir
}
@@ -274,10 +274,14 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error {
hashedFiles++
// Add to builder with computed hash
addFileToBuilder(builder, e.path, e.size, e.mtime, hash)
if err := addFileToBuilder(builder, e.path, e.size, e.mtime, hash); err != nil {
return fmt.Errorf("failed to add %s: %w", e.path, err)
}
} else {
// Use existing entry
addExistingToBuilder(builder, e.existing)
if err := addExistingToBuilder(builder, e.existing); err != nil {
return fmt.Errorf("failed to add %s: %w", e.path, err)
}
}
}
@@ -360,38 +364,15 @@ func hashFile(r io.Reader, size int64, progress func(int64)) ([]byte, int64, err
}
// addFileToBuilder adds a new file entry to the builder
func addFileToBuilder(b *mfer.Builder, path string, size int64, mtime time.Time, hash []byte) {
// Use the builder's internal method indirectly by creating an entry
// Since Builder.AddFile reads from a reader, we need to use a different approach
// We'll access the builder's files directly through a custom method
b.AddFileWithHash(path, size, mtime, hash)
func addFileToBuilder(b *mfer.Builder, path string, size int64, mtime time.Time, hash []byte) error {
return b.AddFileWithHash(mfer.RelFilePath(path), mfer.FileSize(size), mfer.ModTime(mtime), hash)
}
// addExistingToBuilder adds an existing manifest entry to the builder
func addExistingToBuilder(b *mfer.Builder, entry *mfer.MFFilePath) {
func addExistingToBuilder(b *mfer.Builder, entry *mfer.MFFilePath) error {
mtime := time.Unix(entry.Mtime.Seconds, int64(entry.Mtime.Nanos))
if len(entry.Hashes) > 0 {
b.AddFileWithHash(entry.Path, entry.Size, mtime, entry.Hashes[0].MultiHash)
if len(entry.Hashes) == 0 {
return nil
}
}
// pathIsHidden checks if a path contains hidden components
func pathIsHidden(p string) bool {
// "." is not hidden, it's the current directory
if p == "." {
return false
}
// Check each path component
for p != "" && p != "." && p != "/" {
base := filepath.Base(p)
if len(base) > 0 && base[0] == '.' {
return true
}
parent := filepath.Dir(p)
if parent == p {
break
}
p = parent
}
return false
return b.AddFileWithHash(mfer.RelFilePath(entry.Path), mfer.FileSize(entry.Size), mfer.ModTime(mtime), entry.Hashes[0].MultiHash)
}

View File

@@ -13,29 +13,29 @@ import (
"github.com/spf13/afero"
"github.com/urfave/cli/v2"
"sneak.berlin/go/mfer/internal/log"
"sneak.berlin/go/mfer/internal/scanner"
"sneak.berlin/go/mfer/mfer"
)
func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
log.Debug("generateManifestOperation()")
opts := &scanner.Options{
opts := &mfer.ScannerOptions{
IncludeDotfiles: ctx.Bool("IncludeDotfiles"),
FollowSymLinks: ctx.Bool("FollowSymLinks"),
Fs: mfa.Fs,
}
s := scanner.NewWithOptions(opts)
s := mfer.NewScannerWithOptions(opts)
// Phase 1: Enumeration - collect paths and stat files
args := ctx.Args()
showProgress := ctx.Bool("progress")
// Set up enumeration progress reporting
var enumProgress chan scanner.EnumerateStatus
var enumProgress chan mfer.EnumerateStatus
var enumWg sync.WaitGroup
if showProgress {
enumProgress = make(chan scanner.EnumerateStatus, 1)
enumProgress = make(chan mfer.EnumerateStatus, 1)
enumWg.Add(1)
go func() {
defer enumWg.Done()
@@ -117,10 +117,10 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
}()
// Phase 2: Scan - read file contents and generate manifest
var scanProgress chan scanner.ScanStatus
var scanProgress chan mfer.ScanStatus
var scanWg sync.WaitGroup
if showProgress {
scanProgress = make(chan scanner.ScanStatus, 1)
scanProgress = make(chan mfer.ScanStatus, 1)
scanWg.Add(1)
go func() {
defer scanWg.Done()

View File

@@ -1,428 +0,0 @@
package scanner
import (
"context"
"io"
"io/fs"
"path"
"path/filepath"
"strings"
"sync"
"time"
"github.com/dustin/go-humanize"
"github.com/spf13/afero"
"sneak.berlin/go/mfer/internal/log"
"sneak.berlin/go/mfer/mfer"
)
// Phase 1: Enumeration
// ---------------------
// Walking directories and calling stat() on files to collect metadata.
// Builds the list of files to be scanned. Relatively fast (metadata only).
// EnumerateStatus contains progress information for the enumeration phase.
type EnumerateStatus struct {
FilesFound int64 // Number of files discovered so far
BytesFound int64 // Total size of discovered files (from stat)
}
// Phase 2: Scan (ToManifest)
// --------------------------
// Reading file contents and computing hashes for manifest generation.
// This is the expensive phase that reads all file data.
// ScanStatus contains progress information for the scan phase.
type ScanStatus struct {
TotalFiles int64 // Total number of files to scan
ScannedFiles int64 // Number of files scanned so far
TotalBytes int64 // Total bytes to read (sum of all file sizes)
ScannedBytes int64 // Bytes read so far
BytesPerSec float64 // Current throughput rate
ETA time.Duration // Estimated time to completion
}
// Options configures scanner behavior.
type Options struct {
IncludeDotfiles bool // Include files and directories starting with a dot (default: exclude)
FollowSymLinks bool // Resolve symlinks instead of skipping them
Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
}
// FileEntry represents a file that has been enumerated.
type FileEntry struct {
Path string // Relative path (used in manifest)
AbsPath string // Absolute path (used for reading file content)
Size int64 // File size in bytes
Mtime time.Time // Last modification time
Ctime time.Time // Creation time (platform-dependent)
}
// Scanner accumulates files and generates manifests from them.
type Scanner struct {
mu sync.RWMutex
files []*FileEntry
options *Options
fs afero.Fs
}
// New creates a new Scanner with default options.
func New() *Scanner {
return NewWithOptions(nil)
}
// NewWithOptions creates a new Scanner with the given options.
func NewWithOptions(opts *Options) *Scanner {
if opts == nil {
opts = &Options{}
}
fs := opts.Fs
if fs == nil {
fs = afero.NewOsFs()
}
return &Scanner{
files: make([]*FileEntry, 0),
options: opts,
fs: fs,
}
}
// EnumerateFile adds a single file to the scanner, calling stat() to get metadata.
func (s *Scanner) EnumerateFile(filePath string) error {
abs, err := filepath.Abs(filePath)
if err != nil {
return err
}
info, err := s.fs.Stat(abs)
if err != nil {
return err
}
// For single files, use the filename as the relative path
basePath := filepath.Dir(abs)
return s.enumerateFileWithInfo(filepath.Base(abs), basePath, info, nil)
}
// EnumeratePath walks a directory path and adds all files to the scanner.
// If progress is non-nil, status updates are sent as files are discovered.
// The progress channel is closed when the method returns.
func (s *Scanner) EnumeratePath(inputPath string, progress chan<- EnumerateStatus) error {
if progress != nil {
defer close(progress)
}
abs, err := filepath.Abs(inputPath)
if err != nil {
return err
}
afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs))
return s.enumerateFS(afs, abs, progress)
}
// EnumeratePaths walks multiple directory paths and adds all files to the scanner.
// If progress is non-nil, status updates are sent as files are discovered.
// The progress channel is closed when the method returns.
func (s *Scanner) EnumeratePaths(progress chan<- EnumerateStatus, inputPaths ...string) error {
if progress != nil {
defer close(progress)
}
for _, p := range inputPaths {
abs, err := filepath.Abs(p)
if err != nil {
return err
}
afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs))
if err := s.enumerateFS(afs, abs, progress); err != nil {
return err
}
}
return nil
}
// EnumerateFS walks an afero filesystem and adds all files to the scanner.
// If progress is non-nil, status updates are sent as files are discovered.
// The progress channel is closed when the method returns.
// basePath is used to compute absolute paths for file reading.
func (s *Scanner) EnumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error {
if progress != nil {
defer close(progress)
}
return s.enumerateFS(afs, basePath, progress)
}
// enumerateFS is the internal implementation that doesn't close the progress channel.
func (s *Scanner) enumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error {
return afero.Walk(afs, "/", func(p string, info fs.FileInfo, err error) error {
if err != nil {
return err
}
if !s.options.IncludeDotfiles && pathIsHidden(p) {
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
return s.enumerateFileWithInfo(p, basePath, info, progress)
})
}
// enumerateFileWithInfo adds a file with pre-existing fs.FileInfo.
func (s *Scanner) enumerateFileWithInfo(filePath string, basePath string, info fs.FileInfo, progress chan<- EnumerateStatus) error {
if info.IsDir() {
// Manifests contain only files, directories are implied
return nil
}
// Clean the path - remove leading slash if present
cleanPath := filePath
if len(cleanPath) > 0 && cleanPath[0] == '/' {
cleanPath = cleanPath[1:]
}
// Compute absolute path for file reading
absPath := filepath.Join(basePath, cleanPath)
// Handle symlinks
if info.Mode()&fs.ModeSymlink != 0 {
if !s.options.FollowSymLinks {
// Skip symlinks when not following them
return nil
}
// Resolve symlink to get real file info
realPath, err := filepath.EvalSymlinks(absPath)
if err != nil {
// Skip broken symlinks
return nil
}
realInfo, err := s.fs.Stat(realPath)
if err != nil {
return nil
}
// Skip if symlink points to a directory
if realInfo.IsDir() {
return nil
}
// Use resolved path for reading, but keep original path in manifest
absPath = realPath
info = realInfo
}
entry := &FileEntry{
Path: cleanPath,
AbsPath: absPath,
Size: info.Size(),
Mtime: info.ModTime(),
// Note: Ctime not available from fs.FileInfo on all platforms
// Will need platform-specific code to extract it
}
s.mu.Lock()
s.files = append(s.files, entry)
filesFound := int64(len(s.files))
var bytesFound int64
for _, f := range s.files {
bytesFound += f.Size
}
s.mu.Unlock()
sendEnumerateStatus(progress, EnumerateStatus{
FilesFound: filesFound,
BytesFound: bytesFound,
})
return nil
}
// Files returns a copy of all files added to the scanner.
func (s *Scanner) Files() []*FileEntry {
s.mu.RLock()
defer s.mu.RUnlock()
out := make([]*FileEntry, len(s.files))
copy(out, s.files)
return out
}
// FileCount returns the number of files in the scanner.
func (s *Scanner) FileCount() int64 {
s.mu.RLock()
defer s.mu.RUnlock()
return int64(len(s.files))
}
// TotalBytes returns the total size of all files in the scanner.
func (s *Scanner) TotalBytes() int64 {
s.mu.RLock()
defer s.mu.RUnlock()
var total int64
for _, f := range s.files {
total += f.Size
}
return total
}
// ToManifest reads all file contents, computes hashes, and generates a manifest.
// If progress is non-nil, status updates are sent approximately once per second.
// The progress channel is closed when the method returns.
// The manifest is written to the provided io.Writer.
func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- ScanStatus) error {
if progress != nil {
defer close(progress)
}
s.mu.RLock()
files := make([]*FileEntry, len(s.files))
copy(files, s.files)
totalFiles := int64(len(files))
var totalBytes int64
for _, f := range files {
totalBytes += f.Size
}
s.mu.RUnlock()
builder := mfer.NewBuilder()
var scannedFiles int64
var scannedBytes int64
lastProgressTime := time.Now()
startTime := time.Now()
for _, entry := range files {
// Check for cancellation
select {
case <-ctx.Done():
return ctx.Err()
default:
}
// Open file
f, err := s.fs.Open(entry.AbsPath)
if err != nil {
return err
}
// Create progress channel for this file
var fileProgress chan mfer.FileHashProgress
var wg sync.WaitGroup
if progress != nil {
fileProgress = make(chan mfer.FileHashProgress, 1)
wg.Add(1)
go func(baseScannedBytes int64) {
defer wg.Done()
for p := range fileProgress {
// Send progress at most once per second
now := time.Now()
if now.Sub(lastProgressTime) >= time.Second {
elapsed := now.Sub(startTime).Seconds()
currentBytes := baseScannedBytes + p.BytesRead
var rate float64
var eta time.Duration
if elapsed > 0 && currentBytes > 0 {
rate = float64(currentBytes) / elapsed
remainingBytes := totalBytes - currentBytes
if rate > 0 {
eta = time.Duration(float64(remainingBytes)/rate) * time.Second
}
}
sendScanStatus(progress, ScanStatus{
TotalFiles: totalFiles,
ScannedFiles: scannedFiles,
TotalBytes: totalBytes,
ScannedBytes: currentBytes,
BytesPerSec: rate,
ETA: eta,
})
lastProgressTime = now
}
}
}(scannedBytes)
}
// Add to manifest with progress channel
bytesRead, err := builder.AddFile(
entry.Path,
entry.Size,
entry.Mtime,
f,
fileProgress,
)
_ = f.Close()
// Close channel and wait for goroutine to finish
if fileProgress != nil {
close(fileProgress)
wg.Wait()
}
if err != nil {
return err
}
log.Verbosef("+ %s (%s)", entry.Path, humanize.IBytes(uint64(bytesRead)))
scannedFiles++
scannedBytes += bytesRead
}
// Send final progress (ETA is 0 at completion)
if progress != nil {
elapsed := time.Since(startTime).Seconds()
var rate float64
if elapsed > 0 {
rate = float64(scannedBytes) / elapsed
}
sendScanStatus(progress, ScanStatus{
TotalFiles: totalFiles,
ScannedFiles: scannedFiles,
TotalBytes: totalBytes,
ScannedBytes: scannedBytes,
BytesPerSec: rate,
ETA: 0,
})
}
// Build and write manifest
return builder.Build(w)
}
// pathIsHidden returns true if the path or any of its parent directories
// start with a dot (hidden files/directories).
func pathIsHidden(p string) bool {
tp := path.Clean(p)
if strings.HasPrefix(tp, ".") {
return true
}
for {
d, f := path.Split(tp)
if strings.HasPrefix(f, ".") {
return true
}
if d == "" {
return false
}
tp = d[0 : len(d)-1] // trim trailing slash from dir
}
}
// sendEnumerateStatus sends a status update without blocking.
// If the channel is full, the update is dropped.
func sendEnumerateStatus(ch chan<- EnumerateStatus, status EnumerateStatus) {
if ch == nil {
return
}
select {
case ch <- status:
default:
// Channel full, drop this update
}
}
// sendScanStatus sends a status update without blocking.
// If the channel is full, the update is dropped.
func sendScanStatus(ch chan<- ScanStatus, status ScanStatus) {
if ch == nil {
return
}
select {
case ch <- status:
default:
// Channel full, drop this update
}
}

View File

@@ -1,362 +0,0 @@
package scanner
import (
"bytes"
"context"
"testing"
"time"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestNew(t *testing.T) {
s := New()
assert.NotNil(t, s)
assert.Equal(t, int64(0), s.FileCount())
assert.Equal(t, int64(0), s.TotalBytes())
}
func TestNewWithOptions(t *testing.T) {
t.Run("nil options", func(t *testing.T) {
s := NewWithOptions(nil)
assert.NotNil(t, s)
})
t.Run("with options", func(t *testing.T) {
fs := afero.NewMemMapFs()
opts := &Options{
IncludeDotfiles: true,
FollowSymLinks: true,
Fs: fs,
}
s := NewWithOptions(opts)
assert.NotNil(t, s)
})
}
func TestEnumerateFile(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, afero.WriteFile(fs, "/test.txt", []byte("hello world"), 0644))
s := NewWithOptions(&Options{Fs: fs})
err := s.EnumerateFile("/test.txt")
require.NoError(t, err)
assert.Equal(t, int64(1), s.FileCount())
assert.Equal(t, int64(11), s.TotalBytes())
files := s.Files()
require.Len(t, files, 1)
assert.Equal(t, "test.txt", files[0].Path)
assert.Equal(t, int64(11), files[0].Size)
}
func TestEnumerateFileMissing(t *testing.T) {
fs := afero.NewMemMapFs()
s := NewWithOptions(&Options{Fs: fs})
err := s.EnumerateFile("/nonexistent.txt")
assert.Error(t, err)
}
func TestEnumeratePath(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir/subdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("one"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("two"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/subdir/file3.txt", []byte("three"), 0644))
s := NewWithOptions(&Options{Fs: fs})
err := s.EnumeratePath("/testdir", nil)
require.NoError(t, err)
assert.Equal(t, int64(3), s.FileCount())
assert.Equal(t, int64(3+3+5), s.TotalBytes())
}
func TestEnumeratePathWithProgress(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("one"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("two"), 0644))
s := NewWithOptions(&Options{Fs: fs})
progress := make(chan EnumerateStatus, 10)
err := s.EnumeratePath("/testdir", progress)
require.NoError(t, err)
var updates []EnumerateStatus
for p := range progress {
updates = append(updates, p)
}
assert.NotEmpty(t, updates)
// Final update should show all files
final := updates[len(updates)-1]
assert.Equal(t, int64(2), final.FilesFound)
assert.Equal(t, int64(6), final.BytesFound)
}
func TestEnumeratePaths(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/dir1", 0755))
require.NoError(t, fs.MkdirAll("/dir2", 0755))
require.NoError(t, afero.WriteFile(fs, "/dir1/a.txt", []byte("aaa"), 0644))
require.NoError(t, afero.WriteFile(fs, "/dir2/b.txt", []byte("bbb"), 0644))
s := NewWithOptions(&Options{Fs: fs})
err := s.EnumeratePaths(nil, "/dir1", "/dir2")
require.NoError(t, err)
assert.Equal(t, int64(2), s.FileCount())
}
func TestExcludeDotfiles(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir/.hidden", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/visible.txt", []byte("visible"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/.hidden.txt", []byte("hidden"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/.hidden/inside.txt", []byte("inside"), 0644))
t.Run("exclude by default", func(t *testing.T) {
s := NewWithOptions(&Options{Fs: fs, IncludeDotfiles: false})
err := s.EnumeratePath("/testdir", nil)
require.NoError(t, err)
assert.Equal(t, int64(1), s.FileCount())
files := s.Files()
assert.Equal(t, "visible.txt", files[0].Path)
})
t.Run("include when enabled", func(t *testing.T) {
s := NewWithOptions(&Options{Fs: fs, IncludeDotfiles: true})
err := s.EnumeratePath("/testdir", nil)
require.NoError(t, err)
assert.Equal(t, int64(3), s.FileCount())
})
}
func TestPathIsHidden(t *testing.T) {
tests := []struct {
path string
hidden bool
}{
{"file.txt", false},
{".hidden", true},
{"dir/file.txt", false},
{"dir/.hidden", true},
{".dir/file.txt", true},
{"/absolute/path", false},
{"/absolute/.hidden", true},
{"./relative", false}, // path.Clean removes leading ./
{"a/b/c/.d/e", true},
}
for _, tt := range tests {
t.Run(tt.path, func(t *testing.T) {
assert.Equal(t, tt.hidden, pathIsHidden(tt.path), "pathIsHidden(%q)", tt.path)
})
}
}
func TestToManifest(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("content one"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("content two"), 0644))
s := NewWithOptions(&Options{Fs: fs})
err := s.EnumeratePath("/testdir", nil)
require.NoError(t, err)
var buf bytes.Buffer
err = s.ToManifest(context.Background(), &buf, nil)
require.NoError(t, err)
// Manifest should have magic bytes
assert.True(t, buf.Len() > 0)
assert.Equal(t, "ZNAVSRFG", string(buf.Bytes()[:8]))
}
func TestToManifestWithProgress(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file.txt", bytes.Repeat([]byte("x"), 1000), 0644))
s := NewWithOptions(&Options{Fs: fs})
err := s.EnumeratePath("/testdir", nil)
require.NoError(t, err)
var buf bytes.Buffer
progress := make(chan ScanStatus, 10)
err = s.ToManifest(context.Background(), &buf, progress)
require.NoError(t, err)
var updates []ScanStatus
for p := range progress {
updates = append(updates, p)
}
assert.NotEmpty(t, updates)
// Final update should show completion
final := updates[len(updates)-1]
assert.Equal(t, int64(1), final.TotalFiles)
assert.Equal(t, int64(1), final.ScannedFiles)
assert.Equal(t, int64(1000), final.TotalBytes)
assert.Equal(t, int64(1000), final.ScannedBytes)
}
func TestToManifestContextCancellation(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir", 0755))
// Create many files to ensure we have time to cancel
for i := 0; i < 100; i++ {
name := string(rune('a'+i%26)) + string(rune('0'+i/26)) + ".txt"
require.NoError(t, afero.WriteFile(fs, "/testdir/"+name, bytes.Repeat([]byte("x"), 100), 0644))
}
s := NewWithOptions(&Options{Fs: fs})
err := s.EnumeratePath("/testdir", nil)
require.NoError(t, err)
ctx, cancel := context.WithCancel(context.Background())
cancel() // Cancel immediately
var buf bytes.Buffer
err = s.ToManifest(ctx, &buf, nil)
assert.ErrorIs(t, err, context.Canceled)
}
func TestToManifestEmptyScanner(t *testing.T) {
fs := afero.NewMemMapFs()
s := NewWithOptions(&Options{Fs: fs})
var buf bytes.Buffer
err := s.ToManifest(context.Background(), &buf, nil)
require.NoError(t, err)
// Should still produce a valid manifest
assert.True(t, buf.Len() > 0)
assert.Equal(t, "ZNAVSRFG", string(buf.Bytes()[:8]))
}
func TestFilesCopiesSlice(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, afero.WriteFile(fs, "/test.txt", []byte("hello"), 0644))
s := NewWithOptions(&Options{Fs: fs})
require.NoError(t, s.EnumerateFile("/test.txt"))
files1 := s.Files()
files2 := s.Files()
// Should be different slices
assert.NotSame(t, &files1[0], &files2[0])
}
func TestEnumerateFS(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir/sub", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file.txt", []byte("hello"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/sub/nested.txt", []byte("world"), 0644))
// Create a basepath filesystem
baseFs := afero.NewBasePathFs(fs, "/testdir")
s := NewWithOptions(&Options{Fs: fs})
err := s.EnumerateFS(baseFs, "/testdir", nil)
require.NoError(t, err)
assert.Equal(t, int64(2), s.FileCount())
}
func TestSendEnumerateStatusNonBlocking(t *testing.T) {
// Channel with no buffer - send should not block
ch := make(chan EnumerateStatus)
// This should not block
done := make(chan bool)
go func() {
sendEnumerateStatus(ch, EnumerateStatus{FilesFound: 1})
done <- true
}()
select {
case <-done:
// Success - did not block
case <-time.After(100 * time.Millisecond):
t.Fatal("sendEnumerateStatus blocked on full channel")
}
}
func TestSendScanStatusNonBlocking(t *testing.T) {
// Channel with no buffer - send should not block
ch := make(chan ScanStatus)
done := make(chan bool)
go func() {
sendScanStatus(ch, ScanStatus{ScannedFiles: 1})
done <- true
}()
select {
case <-done:
// Success - did not block
case <-time.After(100 * time.Millisecond):
t.Fatal("sendScanStatus blocked on full channel")
}
}
func TestSendStatusNilChannel(t *testing.T) {
// Should not panic with nil channel
sendEnumerateStatus(nil, EnumerateStatus{})
sendScanStatus(nil, ScanStatus{})
}
func TestFileEntryFields(t *testing.T) {
fs := afero.NewMemMapFs()
now := time.Now().Truncate(time.Second)
require.NoError(t, afero.WriteFile(fs, "/test.txt", []byte("content"), 0644))
require.NoError(t, fs.Chtimes("/test.txt", now, now))
s := NewWithOptions(&Options{Fs: fs})
require.NoError(t, s.EnumerateFile("/test.txt"))
files := s.Files()
require.Len(t, files, 1)
entry := files[0]
assert.Equal(t, "test.txt", entry.Path)
assert.Contains(t, entry.AbsPath, "test.txt")
assert.Equal(t, int64(7), entry.Size)
// Mtime should be set (within a second of now)
assert.WithinDuration(t, now, entry.Mtime, 2*time.Second)
}
func TestLargeFileEnumeration(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, fs.MkdirAll("/testdir", 0755))
// Create 100 files
for i := 0; i < 100; i++ {
name := "/testdir/" + string(rune('a'+i%26)) + string(rune('0'+i/26%10)) + ".txt"
require.NoError(t, afero.WriteFile(fs, name, []byte("data"), 0644))
}
s := NewWithOptions(&Options{Fs: fs})
progress := make(chan EnumerateStatus, 200)
err := s.EnumeratePath("/testdir", progress)
require.NoError(t, err)
// Drain channel
for range progress {
}
assert.Equal(t, int64(100), s.FileCount())
assert.Equal(t, int64(400), s.TotalBytes()) // 100 * 4 bytes
}