diff --git a/internal/checker/checker.go b/internal/checker/checker.go index a113e2b..f822fa2 100644 --- a/internal/checker/checker.go +++ b/internal/checker/checker.go @@ -17,9 +17,9 @@ import ( // Result represents the outcome of checking a single file. type Result struct { - Path string // Relative path from manifest - Status Status // Verification result status - Message string // Human-readable description of the result + Path mfer.RelFilePath // Relative path from manifest + Status Status // Verification result status + Message string // Human-readable description of the result } // Status represents the verification status of a file. @@ -55,22 +55,22 @@ func (s Status) String() string { // CheckStatus contains progress information for the check operation. type CheckStatus struct { - TotalFiles int64 // Total number of files in manifest - CheckedFiles int64 // Number of files checked so far - TotalBytes int64 // Total bytes to verify (sum of all file sizes) - CheckedBytes int64 // Bytes verified so far - BytesPerSec float64 // Current throughput rate - ETA time.Duration // Estimated time to completion - Failures int64 // Number of verification failures encountered + TotalFiles mfer.FileCount // Total number of files in manifest + CheckedFiles mfer.FileCount // Number of files checked so far + TotalBytes mfer.FileSize // Total bytes to verify (sum of all file sizes) + CheckedBytes mfer.FileSize // Bytes verified so far + BytesPerSec float64 // Current throughput rate + ETA time.Duration // Estimated time to completion + Failures mfer.FileCount // Number of verification failures encountered } // Checker verifies files against a manifest. type Checker struct { - basePath string + basePath mfer.AbsFilePath files []*mfer.MFFilePath fs afero.Fs // manifestPaths is a set of paths in the manifest for quick lookup - manifestPaths map[string]struct{} + manifestPaths map[mfer.RelFilePath]struct{} } // NewChecker creates a new Checker for the given manifest, base path, and filesystem. @@ -92,13 +92,13 @@ func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, er } files := m.Files() - manifestPaths := make(map[string]struct{}, len(files)) + manifestPaths := make(map[mfer.RelFilePath]struct{}, len(files)) for _, f := range files { - manifestPaths[f.Path] = struct{}{} + manifestPaths[mfer.RelFilePath(f.Path)] = struct{}{} } return &Checker{ - basePath: abs, + basePath: mfer.AbsFilePath(abs), files: files, fs: fs, manifestPaths: manifestPaths, @@ -106,15 +106,15 @@ func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, er } // FileCount returns the number of files in the manifest. -func (c *Checker) FileCount() int64 { - return int64(len(c.files)) +func (c *Checker) FileCount() mfer.FileCount { + return mfer.FileCount(len(c.files)) } // TotalBytes returns the total size of all files in the manifest. -func (c *Checker) TotalBytes() int64 { - var total int64 +func (c *Checker) TotalBytes() mfer.FileSize { + var total mfer.FileSize for _, f := range c.files { - total += f.Size + total += mfer.FileSize(f.Size) } return total } @@ -131,12 +131,12 @@ func (c *Checker) Check(ctx context.Context, results chan<- Result, progress cha defer close(progress) } - totalFiles := int64(len(c.files)) + totalFiles := mfer.FileCount(len(c.files)) totalBytes := c.TotalBytes() - var checkedFiles int64 - var checkedBytes int64 - var failures int64 + var checkedFiles mfer.FileCount + var checkedBytes mfer.FileSize + var failures mfer.FileCount startTime := time.Now() @@ -186,28 +186,29 @@ func (c *Checker) Check(ctx context.Context, results chan<- Result, progress cha return nil } -func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result { - absPath := filepath.Join(c.basePath, entry.Path) +func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *mfer.FileSize) Result { + absPath := filepath.Join(string(c.basePath), entry.Path) + relPath := mfer.RelFilePath(entry.Path) // Check if file exists info, err := c.fs.Stat(absPath) if err != nil { if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) { - return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"} + return Result{Path: relPath, Status: StatusMissing, Message: "file not found"} } // Check for "file does not exist" style errors exists, _ := afero.Exists(c.fs, absPath) if !exists { - return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"} + return Result{Path: relPath, Status: StatusMissing, Message: "file not found"} } - return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} + return Result{Path: relPath, Status: StatusError, Message: err.Error()} } // Check size if info.Size() != entry.Size { - *checkedBytes += info.Size() + *checkedBytes += mfer.FileSize(info.Size()) return Result{ - Path: entry.Path, + Path: relPath, Status: StatusSizeMismatch, Message: "size mismatch", } @@ -216,31 +217,31 @@ func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result // Open and hash file f, err := c.fs.Open(absPath) if err != nil { - return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} + return Result{Path: relPath, Status: StatusError, Message: err.Error()} } defer func() { _ = f.Close() }() h := sha256.New() n, err := io.Copy(h, f) if err != nil { - return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} + return Result{Path: relPath, Status: StatusError, Message: err.Error()} } - *checkedBytes += n + *checkedBytes += mfer.FileSize(n) // Encode as multihash and compare computed, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256) if err != nil { - return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} + return Result{Path: relPath, Status: StatusError, Message: err.Error()} } // Check against all hashes in manifest (at least one must match) for _, hash := range entry.Hashes { if bytes.Equal(computed, hash.MultiHash) { - return Result{Path: entry.Path, Status: StatusOK} + return Result{Path: relPath, Status: StatusOK} } } - return Result{Path: entry.Path, Status: StatusHashMismatch, Message: "hash mismatch"} + return Result{Path: relPath, Status: StatusHashMismatch, Message: "hash mismatch"} } // FindExtraFiles walks the filesystem and reports files not in the manifest. @@ -250,7 +251,7 @@ func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) err defer close(results) } - return afero.Walk(c.fs, c.basePath, func(path string, info os.FileInfo, err error) error { + return afero.Walk(c.fs, string(c.basePath), func(path string, info os.FileInfo, err error) error { if err != nil { return err } @@ -267,10 +268,11 @@ func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) err } // Get relative path - relPath, err := filepath.Rel(c.basePath, path) + rel, err := filepath.Rel(string(c.basePath), path) if err != nil { return err } + relPath := mfer.RelFilePath(rel) // Check if path is in manifest if _, exists := c.manifestPaths[relPath]; !exists { diff --git a/internal/checker/checker_test.go b/internal/checker/checker_test.go index d6bc462..cba9cde 100644 --- a/internal/checker/checker_test.go +++ b/internal/checker/checker_test.go @@ -40,7 +40,7 @@ func createTestManifest(t *testing.T, fs afero.Fs, manifestPath string, files ma builder := mfer.NewBuilder() for path, content := range files { reader := bytes.NewReader(content) - _, err := builder.AddFile(path, int64(len(content)), time.Now(), reader, nil) + _, err := builder.AddFile(mfer.RelFilePath(path), mfer.FileSize(len(content)), mfer.ModTime(time.Now()), reader, nil) require.NoError(t, err) } @@ -72,7 +72,7 @@ func TestNewChecker(t *testing.T) { chk, err := NewChecker("/manifest.mf", "/", fs) require.NoError(t, err) assert.NotNil(t, chk) - assert.Equal(t, int64(2), chk.FileCount()) + assert.Equal(t, mfer.FileCount(2), chk.FileCount()) }) t.Run("missing manifest", func(t *testing.T) { @@ -101,8 +101,8 @@ func TestCheckerFileCountAndTotalBytes(t *testing.T) { chk, err := NewChecker("/manifest.mf", "/", fs) require.NoError(t, err) - assert.Equal(t, int64(3), chk.FileCount()) - assert.Equal(t, int64(2+11+1000), chk.TotalBytes()) + assert.Equal(t, mfer.FileCount(3), chk.FileCount()) + assert.Equal(t, mfer.FileSize(2+11+1000), chk.TotalBytes()) } func TestCheckAllFilesOK(t *testing.T) { @@ -158,7 +158,7 @@ func TestCheckMissingFile(t *testing.T) { okCount++ case StatusMissing: missingCount++ - assert.Equal(t, "missing.txt", r.Path) + assert.Equal(t, mfer.RelFilePath("missing.txt"), r.Path) } } @@ -186,7 +186,7 @@ func TestCheckSizeMismatch(t *testing.T) { r := <-results assert.Equal(t, StatusSizeMismatch, r.Status) - assert.Equal(t, "file.txt", r.Path) + assert.Equal(t, mfer.RelFilePath("file.txt"), r.Path) } func TestCheckHashMismatch(t *testing.T) { @@ -212,7 +212,7 @@ func TestCheckHashMismatch(t *testing.T) { r := <-results assert.Equal(t, StatusHashMismatch, r.Status) - assert.Equal(t, "file.txt", r.Path) + assert.Equal(t, mfer.RelFilePath("file.txt"), r.Path) } func TestCheckWithProgress(t *testing.T) { @@ -246,11 +246,11 @@ func TestCheckWithProgress(t *testing.T) { assert.NotEmpty(t, progressUpdates) // Final progress should show all files checked final := progressUpdates[len(progressUpdates)-1] - assert.Equal(t, int64(2), final.TotalFiles) - assert.Equal(t, int64(2), final.CheckedFiles) - assert.Equal(t, int64(300), final.TotalBytes) - assert.Equal(t, int64(300), final.CheckedBytes) - assert.Equal(t, int64(0), final.Failures) + assert.Equal(t, mfer.FileCount(2), final.TotalFiles) + assert.Equal(t, mfer.FileCount(2), final.CheckedFiles) + assert.Equal(t, mfer.FileSize(300), final.TotalBytes) + assert.Equal(t, mfer.FileSize(300), final.CheckedBytes) + assert.Equal(t, mfer.FileCount(0), final.Failures) } func TestCheckContextCancellation(t *testing.T) { @@ -301,7 +301,7 @@ func TestFindExtraFiles(t *testing.T) { } assert.Len(t, extras, 1) - assert.Equal(t, "file2.txt", extras[0].Path) + assert.Equal(t, mfer.RelFilePath("file2.txt"), extras[0].Path) assert.Equal(t, StatusExtra, extras[0].Status) assert.Equal(t, "not in manifest", extras[0].Message) } @@ -390,8 +390,8 @@ func TestCheckEmptyManifest(t *testing.T) { chk, err := NewChecker("/manifest.mf", "/data", fs) require.NoError(t, err) - assert.Equal(t, int64(0), chk.FileCount()) - assert.Equal(t, int64(0), chk.TotalBytes()) + assert.Equal(t, mfer.FileCount(0), chk.FileCount()) + assert.Equal(t, mfer.FileSize(0), chk.TotalBytes()) results := make(chan Result, 10) err = chk.Check(context.Background(), results, nil) diff --git a/internal/cli/freshen.go b/internal/cli/freshen.go index 1de2713..ac4c96b 100644 --- a/internal/cli/freshen.go +++ b/internal/cli/freshen.go @@ -113,7 +113,7 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error { } // Handle dotfiles - if !includeDotfiles && pathIsHidden(relPath) { + if !includeDotfiles && mfer.IsHiddenPath(filepath.ToSlash(relPath)) { if info.IsDir() { return filepath.SkipDir } @@ -274,10 +274,14 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error { hashedFiles++ // Add to builder with computed hash - addFileToBuilder(builder, e.path, e.size, e.mtime, hash) + if err := addFileToBuilder(builder, e.path, e.size, e.mtime, hash); err != nil { + return fmt.Errorf("failed to add %s: %w", e.path, err) + } } else { // Use existing entry - addExistingToBuilder(builder, e.existing) + if err := addExistingToBuilder(builder, e.existing); err != nil { + return fmt.Errorf("failed to add %s: %w", e.path, err) + } } } @@ -360,38 +364,15 @@ func hashFile(r io.Reader, size int64, progress func(int64)) ([]byte, int64, err } // addFileToBuilder adds a new file entry to the builder -func addFileToBuilder(b *mfer.Builder, path string, size int64, mtime time.Time, hash []byte) { - // Use the builder's internal method indirectly by creating an entry - // Since Builder.AddFile reads from a reader, we need to use a different approach - // We'll access the builder's files directly through a custom method - b.AddFileWithHash(path, size, mtime, hash) +func addFileToBuilder(b *mfer.Builder, path string, size int64, mtime time.Time, hash []byte) error { + return b.AddFileWithHash(mfer.RelFilePath(path), mfer.FileSize(size), mfer.ModTime(mtime), hash) } // addExistingToBuilder adds an existing manifest entry to the builder -func addExistingToBuilder(b *mfer.Builder, entry *mfer.MFFilePath) { +func addExistingToBuilder(b *mfer.Builder, entry *mfer.MFFilePath) error { mtime := time.Unix(entry.Mtime.Seconds, int64(entry.Mtime.Nanos)) - if len(entry.Hashes) > 0 { - b.AddFileWithHash(entry.Path, entry.Size, mtime, entry.Hashes[0].MultiHash) + if len(entry.Hashes) == 0 { + return nil } -} - -// pathIsHidden checks if a path contains hidden components -func pathIsHidden(p string) bool { - // "." is not hidden, it's the current directory - if p == "." { - return false - } - // Check each path component - for p != "" && p != "." && p != "/" { - base := filepath.Base(p) - if len(base) > 0 && base[0] == '.' { - return true - } - parent := filepath.Dir(p) - if parent == p { - break - } - p = parent - } - return false + return b.AddFileWithHash(mfer.RelFilePath(entry.Path), mfer.FileSize(entry.Size), mfer.ModTime(mtime), entry.Hashes[0].MultiHash) } diff --git a/internal/cli/gen.go b/internal/cli/gen.go index f1d87f0..15d8633 100644 --- a/internal/cli/gen.go +++ b/internal/cli/gen.go @@ -13,29 +13,29 @@ import ( "github.com/spf13/afero" "github.com/urfave/cli/v2" "sneak.berlin/go/mfer/internal/log" - "sneak.berlin/go/mfer/internal/scanner" + "sneak.berlin/go/mfer/mfer" ) func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error { log.Debug("generateManifestOperation()") - opts := &scanner.Options{ + opts := &mfer.ScannerOptions{ IncludeDotfiles: ctx.Bool("IncludeDotfiles"), FollowSymLinks: ctx.Bool("FollowSymLinks"), Fs: mfa.Fs, } - s := scanner.NewWithOptions(opts) + s := mfer.NewScannerWithOptions(opts) // Phase 1: Enumeration - collect paths and stat files args := ctx.Args() showProgress := ctx.Bool("progress") // Set up enumeration progress reporting - var enumProgress chan scanner.EnumerateStatus + var enumProgress chan mfer.EnumerateStatus var enumWg sync.WaitGroup if showProgress { - enumProgress = make(chan scanner.EnumerateStatus, 1) + enumProgress = make(chan mfer.EnumerateStatus, 1) enumWg.Add(1) go func() { defer enumWg.Done() @@ -117,10 +117,10 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error { }() // Phase 2: Scan - read file contents and generate manifest - var scanProgress chan scanner.ScanStatus + var scanProgress chan mfer.ScanStatus var scanWg sync.WaitGroup if showProgress { - scanProgress = make(chan scanner.ScanStatus, 1) + scanProgress = make(chan mfer.ScanStatus, 1) scanWg.Add(1) go func() { defer scanWg.Done() diff --git a/mfer/builder.go b/mfer/builder.go index 585abc5..df5eca2 100644 --- a/mfer/builder.go +++ b/mfer/builder.go @@ -2,6 +2,7 @@ package mfer import ( "crypto/sha256" + "errors" "io" "sync" "time" @@ -9,9 +10,42 @@ import ( "github.com/multiformats/go-multihash" ) +// RelFilePath represents a relative file path within a manifest. +type RelFilePath string + +// AbsFilePath represents an absolute file path on the filesystem. +type AbsFilePath string + +// FileSize represents the size of a file in bytes. +type FileSize int64 + +// FileCount represents a count of files. +type FileCount int64 + +// ModTime represents a file's modification time. +type ModTime time.Time + +// UnixSeconds represents seconds since Unix epoch. +type UnixSeconds int64 + +// UnixNanos represents the nanosecond component of a timestamp (0-999999999). +type UnixNanos int32 + +// Timestamp converts ModTime to a protobuf Timestamp. +func (m ModTime) Timestamp() *Timestamp { + t := time.Time(m) + return &Timestamp{ + Seconds: t.Unix(), + Nanos: int32(t.Nanosecond()), + } +} + +// Multihash represents a multihash-encoded file hash (typically SHA2-256). +type Multihash []byte + // FileHashProgress reports progress during file hashing. type FileHashProgress struct { - BytesRead int64 // Total bytes read so far for the current file + BytesRead FileSize // Total bytes read so far for the current file } // Builder constructs a manifest by adding files one at a time. @@ -33,24 +67,24 @@ func NewBuilder() *Builder { // Progress updates are sent to the progress channel (if non-nil) without blocking. // Returns the number of bytes read. func (b *Builder) AddFile( - path string, - size int64, - mtime time.Time, + path RelFilePath, + size FileSize, + mtime ModTime, reader io.Reader, progress chan<- FileHashProgress, -) (int64, error) { +) (FileSize, error) { // Create hash writer h := sha256.New() // Read file in chunks, updating hash and progress - var totalRead int64 + var totalRead FileSize buf := make([]byte, 64*1024) // 64KB chunks for { n, err := reader.Read(buf) if n > 0 { h.Write(buf[:n]) - totalRead += int64(n) + totalRead += FileSize(n) sendFileHashProgress(progress, FileHashProgress{BytesRead: totalRead}) } if err == io.EOF { @@ -69,12 +103,12 @@ func (b *Builder) AddFile( // Create file entry entry := &MFFilePath{ - Path: path, - Size: size, + Path: string(path), + Size: int64(size), Hashes: []*MFFileChecksum{ {MultiHash: mh}, }, - Mtime: newTimestampFromTime(mtime), + Mtime: mtime.Timestamp(), } b.mu.Lock() @@ -104,19 +138,31 @@ func (b *Builder) FileCount() int { // AddFileWithHash adds a file entry with a pre-computed hash. // This is useful when the hash is already known (e.g., from an existing manifest). -func (b *Builder) AddFileWithHash(path string, size int64, mtime time.Time, hash []byte) { +// Returns an error if path is empty, size is negative, or hash is nil/empty. +func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error { + if path == "" { + return errors.New("path cannot be empty") + } + if size < 0 { + return errors.New("size cannot be negative") + } + if len(hash) == 0 { + return errors.New("hash cannot be nil or empty") + } + entry := &MFFilePath{ - Path: path, - Size: size, + Path: string(path), + Size: int64(size), Hashes: []*MFFileChecksum{ {MultiHash: hash}, }, - Mtime: newTimestampFromTime(mtime), + Mtime: mtime.Timestamp(), } b.mu.Lock() b.files = append(b.files, entry) b.mu.Unlock() + return nil } // Build finalizes the manifest and writes it to the writer. diff --git a/mfer/builder_test.go b/mfer/builder_test.go new file mode 100644 index 0000000..a92994f --- /dev/null +++ b/mfer/builder_test.go @@ -0,0 +1,104 @@ +package mfer + +import ( + "bytes" + "strings" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestNewBuilder(t *testing.T) { + b := NewBuilder() + assert.NotNil(t, b) + assert.Equal(t, 0, b.FileCount()) +} + +func TestBuilderAddFile(t *testing.T) { + b := NewBuilder() + content := []byte("test content") + reader := bytes.NewReader(content) + + bytesRead, err := b.AddFile("test.txt", FileSize(len(content)), ModTime(time.Now()), reader, nil) + require.NoError(t, err) + assert.Equal(t, FileSize(len(content)), bytesRead) + assert.Equal(t, 1, b.FileCount()) +} + +func TestBuilderAddFileWithHash(t *testing.T) { + b := NewBuilder() + hash := make([]byte, 34) // SHA256 multihash is 34 bytes + + err := b.AddFileWithHash("test.txt", 100, ModTime(time.Now()), hash) + require.NoError(t, err) + assert.Equal(t, 1, b.FileCount()) +} + +func TestBuilderAddFileWithHashValidation(t *testing.T) { + t.Run("empty path", func(t *testing.T) { + b := NewBuilder() + hash := make([]byte, 34) + err := b.AddFileWithHash("", 100, ModTime(time.Now()), hash) + assert.Error(t, err) + assert.Contains(t, err.Error(), "path") + }) + + t.Run("negative size", func(t *testing.T) { + b := NewBuilder() + hash := make([]byte, 34) + err := b.AddFileWithHash("test.txt", -1, ModTime(time.Now()), hash) + assert.Error(t, err) + assert.Contains(t, err.Error(), "size") + }) + + t.Run("nil hash", func(t *testing.T) { + b := NewBuilder() + err := b.AddFileWithHash("test.txt", 100, ModTime(time.Now()), nil) + assert.Error(t, err) + assert.Contains(t, err.Error(), "hash") + }) + + t.Run("empty hash", func(t *testing.T) { + b := NewBuilder() + err := b.AddFileWithHash("test.txt", 100, ModTime(time.Now()), []byte{}) + assert.Error(t, err) + assert.Contains(t, err.Error(), "hash") + }) + + t.Run("valid inputs", func(t *testing.T) { + b := NewBuilder() + hash := make([]byte, 34) + err := b.AddFileWithHash("test.txt", 100, ModTime(time.Now()), hash) + assert.NoError(t, err) + assert.Equal(t, 1, b.FileCount()) + }) +} + +func TestBuilderBuild(t *testing.T) { + b := NewBuilder() + content := []byte("test content") + reader := bytes.NewReader(content) + + _, err := b.AddFile("test.txt", FileSize(len(content)), ModTime(time.Now()), reader, nil) + require.NoError(t, err) + + var buf bytes.Buffer + err = b.Build(&buf) + require.NoError(t, err) + + // Should have magic bytes + assert.True(t, strings.HasPrefix(buf.String(), MAGIC)) +} + +func TestBuilderBuildEmpty(t *testing.T) { + b := NewBuilder() + + var buf bytes.Buffer + err := b.Build(&buf) + require.NoError(t, err) + + // Should still produce valid manifest with 0 files + assert.True(t, strings.HasPrefix(buf.String(), MAGIC)) +} diff --git a/internal/scanner/scanner.go b/mfer/scanner.go similarity index 82% rename from internal/scanner/scanner.go rename to mfer/scanner.go index f286ebd..645b685 100644 --- a/internal/scanner/scanner.go +++ b/mfer/scanner.go @@ -1,4 +1,4 @@ -package scanner +package mfer import ( "context" @@ -13,7 +13,6 @@ import ( "github.com/dustin/go-humanize" "github.com/spf13/afero" "sneak.berlin/go/mfer/internal/log" - "sneak.berlin/go/mfer/mfer" ) // Phase 1: Enumeration @@ -23,8 +22,8 @@ import ( // EnumerateStatus contains progress information for the enumeration phase. type EnumerateStatus struct { - FilesFound int64 // Number of files discovered so far - BytesFound int64 // Total size of discovered files (from stat) + FilesFound FileCount // Number of files discovered so far + BytesFound FileSize // Total size of discovered files (from stat) } // Phase 2: Scan (ToManifest) @@ -34,16 +33,16 @@ type EnumerateStatus struct { // ScanStatus contains progress information for the scan phase. type ScanStatus struct { - TotalFiles int64 // Total number of files to scan - ScannedFiles int64 // Number of files scanned so far - TotalBytes int64 // Total bytes to read (sum of all file sizes) - ScannedBytes int64 // Bytes read so far + TotalFiles FileCount // Total number of files to scan + ScannedFiles FileCount // Number of files scanned so far + TotalBytes FileSize // Total bytes to read (sum of all file sizes) + ScannedBytes FileSize // Bytes read so far BytesPerSec float64 // Current throughput rate ETA time.Duration // Estimated time to completion } -// Options configures scanner behavior. -type Options struct { +// ScannerOptions configures scanner behavior. +type ScannerOptions struct { IncludeDotfiles bool // Include files and directories starting with a dot (default: exclude) FollowSymLinks bool // Resolve symlinks instead of skipping them Fs afero.Fs // Filesystem to use, defaults to OsFs if nil @@ -51,30 +50,31 @@ type Options struct { // FileEntry represents a file that has been enumerated. type FileEntry struct { - Path string // Relative path (used in manifest) - AbsPath string // Absolute path (used for reading file content) - Size int64 // File size in bytes - Mtime time.Time // Last modification time - Ctime time.Time // Creation time (platform-dependent) + Path RelFilePath // Relative path (used in manifest) + AbsPath AbsFilePath // Absolute path (used for reading file content) + Size FileSize // File size in bytes + Mtime ModTime // Last modification time + Ctime time.Time // Creation time (platform-dependent) } // Scanner accumulates files and generates manifests from them. type Scanner struct { - mu sync.RWMutex - files []*FileEntry - options *Options - fs afero.Fs + mu sync.RWMutex + files []*FileEntry + totalBytes FileSize // cached sum of all file sizes + options *ScannerOptions + fs afero.Fs } -// New creates a new Scanner with default options. -func New() *Scanner { - return NewWithOptions(nil) +// NewScanner creates a new Scanner with default options. +func NewScanner() *Scanner { + return NewScannerWithOptions(nil) } -// NewWithOptions creates a new Scanner with the given options. -func NewWithOptions(opts *Options) *Scanner { +// NewScannerWithOptions creates a new Scanner with the given options. +func NewScannerWithOptions(opts *ScannerOptions) *Scanner { if opts == nil { - opts = &Options{} + opts = &ScannerOptions{} } fs := opts.Fs if fs == nil { @@ -154,7 +154,7 @@ func (s *Scanner) enumerateFS(afs afero.Fs, basePath string, progress chan<- Enu if err != nil { return err } - if !s.options.IncludeDotfiles && pathIsHidden(p) { + if !s.options.IncludeDotfiles && IsHiddenPath(p) { if info.IsDir() { return filepath.SkipDir } @@ -206,21 +206,19 @@ func (s *Scanner) enumerateFileWithInfo(filePath string, basePath string, info f } entry := &FileEntry{ - Path: cleanPath, - AbsPath: absPath, - Size: info.Size(), - Mtime: info.ModTime(), + Path: RelFilePath(cleanPath), + AbsPath: AbsFilePath(absPath), + Size: FileSize(info.Size()), + Mtime: ModTime(info.ModTime()), // Note: Ctime not available from fs.FileInfo on all platforms // Will need platform-specific code to extract it } s.mu.Lock() s.files = append(s.files, entry) - filesFound := int64(len(s.files)) - var bytesFound int64 - for _, f := range s.files { - bytesFound += f.Size - } + s.totalBytes += entry.Size + filesFound := FileCount(len(s.files)) + bytesFound := s.totalBytes s.mu.Unlock() sendEnumerateStatus(progress, EnumerateStatus{ @@ -241,21 +239,17 @@ func (s *Scanner) Files() []*FileEntry { } // FileCount returns the number of files in the scanner. -func (s *Scanner) FileCount() int64 { +func (s *Scanner) FileCount() FileCount { s.mu.RLock() defer s.mu.RUnlock() - return int64(len(s.files)) + return FileCount(len(s.files)) } // TotalBytes returns the total size of all files in the scanner. -func (s *Scanner) TotalBytes() int64 { +func (s *Scanner) TotalBytes() FileSize { s.mu.RLock() defer s.mu.RUnlock() - var total int64 - for _, f := range s.files { - total += f.Size - } - return total + return s.totalBytes } // ToManifest reads all file contents, computes hashes, and generates a manifest. @@ -270,17 +264,17 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S s.mu.RLock() files := make([]*FileEntry, len(s.files)) copy(files, s.files) - totalFiles := int64(len(files)) - var totalBytes int64 + totalFiles := FileCount(len(files)) + var totalBytes FileSize for _, f := range files { totalBytes += f.Size } s.mu.RUnlock() - builder := mfer.NewBuilder() + builder := NewBuilder() - var scannedFiles int64 - var scannedBytes int64 + var scannedFiles FileCount + var scannedBytes FileSize lastProgressTime := time.Now() startTime := time.Now() @@ -293,18 +287,18 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S } // Open file - f, err := s.fs.Open(entry.AbsPath) + f, err := s.fs.Open(string(entry.AbsPath)) if err != nil { return err } // Create progress channel for this file - var fileProgress chan mfer.FileHashProgress + var fileProgress chan FileHashProgress var wg sync.WaitGroup if progress != nil { - fileProgress = make(chan mfer.FileHashProgress, 1) + fileProgress = make(chan FileHashProgress, 1) wg.Add(1) - go func(baseScannedBytes int64) { + go func(baseScannedBytes FileSize) { defer wg.Done() for p := range fileProgress { // Send progress at most once per second @@ -382,9 +376,10 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S return builder.Build(w) } -// pathIsHidden returns true if the path or any of its parent directories +// IsHiddenPath returns true if the path or any of its parent directories // start with a dot (hidden files/directories). -func pathIsHidden(p string) bool { +// The path should use forward slashes. +func IsHiddenPath(p string) bool { tp := path.Clean(p) if strings.HasPrefix(tp, ".") { return true diff --git a/internal/scanner/scanner_test.go b/mfer/scanner_test.go similarity index 69% rename from internal/scanner/scanner_test.go rename to mfer/scanner_test.go index 67edc98..f30c67b 100644 --- a/internal/scanner/scanner_test.go +++ b/mfer/scanner_test.go @@ -1,4 +1,4 @@ -package scanner +package mfer import ( "bytes" @@ -11,77 +11,77 @@ import ( "github.com/stretchr/testify/require" ) -func TestNew(t *testing.T) { - s := New() +func TestNewScanner(t *testing.T) { + s := NewScanner() assert.NotNil(t, s) - assert.Equal(t, int64(0), s.FileCount()) - assert.Equal(t, int64(0), s.TotalBytes()) + assert.Equal(t, FileCount(0), s.FileCount()) + assert.Equal(t, FileSize(0), s.TotalBytes()) } -func TestNewWithOptions(t *testing.T) { +func TestNewScannerWithOptions(t *testing.T) { t.Run("nil options", func(t *testing.T) { - s := NewWithOptions(nil) + s := NewScannerWithOptions(nil) assert.NotNil(t, s) }) t.Run("with options", func(t *testing.T) { fs := afero.NewMemMapFs() - opts := &Options{ + opts := &ScannerOptions{ IncludeDotfiles: true, FollowSymLinks: true, Fs: fs, } - s := NewWithOptions(opts) + s := NewScannerWithOptions(opts) assert.NotNil(t, s) }) } -func TestEnumerateFile(t *testing.T) { +func TestScannerEnumerateFile(t *testing.T) { fs := afero.NewMemMapFs() require.NoError(t, afero.WriteFile(fs, "/test.txt", []byte("hello world"), 0644)) - s := NewWithOptions(&Options{Fs: fs}) + s := NewScannerWithOptions(&ScannerOptions{Fs: fs}) err := s.EnumerateFile("/test.txt") require.NoError(t, err) - assert.Equal(t, int64(1), s.FileCount()) - assert.Equal(t, int64(11), s.TotalBytes()) + assert.Equal(t, FileCount(1), s.FileCount()) + assert.Equal(t, FileSize(11), s.TotalBytes()) files := s.Files() require.Len(t, files, 1) - assert.Equal(t, "test.txt", files[0].Path) - assert.Equal(t, int64(11), files[0].Size) + assert.Equal(t, RelFilePath("test.txt"), files[0].Path) + assert.Equal(t, FileSize(11), files[0].Size) } -func TestEnumerateFileMissing(t *testing.T) { +func TestScannerEnumerateFileMissing(t *testing.T) { fs := afero.NewMemMapFs() - s := NewWithOptions(&Options{Fs: fs}) + s := NewScannerWithOptions(&ScannerOptions{Fs: fs}) err := s.EnumerateFile("/nonexistent.txt") assert.Error(t, err) } -func TestEnumeratePath(t *testing.T) { +func TestScannerEnumeratePath(t *testing.T) { fs := afero.NewMemMapFs() require.NoError(t, fs.MkdirAll("/testdir/subdir", 0755)) require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("one"), 0644)) require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("two"), 0644)) require.NoError(t, afero.WriteFile(fs, "/testdir/subdir/file3.txt", []byte("three"), 0644)) - s := NewWithOptions(&Options{Fs: fs}) + s := NewScannerWithOptions(&ScannerOptions{Fs: fs}) err := s.EnumeratePath("/testdir", nil) require.NoError(t, err) - assert.Equal(t, int64(3), s.FileCount()) - assert.Equal(t, int64(3+3+5), s.TotalBytes()) + assert.Equal(t, FileCount(3), s.FileCount()) + assert.Equal(t, FileSize(3+3+5), s.TotalBytes()) } -func TestEnumeratePathWithProgress(t *testing.T) { +func TestScannerEnumeratePathWithProgress(t *testing.T) { fs := afero.NewMemMapFs() require.NoError(t, fs.MkdirAll("/testdir", 0755)) require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("one"), 0644)) require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("two"), 0644)) - s := NewWithOptions(&Options{Fs: fs}) + s := NewScannerWithOptions(&ScannerOptions{Fs: fs}) progress := make(chan EnumerateStatus, 10) err := s.EnumeratePath("/testdir", progress) @@ -95,25 +95,25 @@ func TestEnumeratePathWithProgress(t *testing.T) { assert.NotEmpty(t, updates) // Final update should show all files final := updates[len(updates)-1] - assert.Equal(t, int64(2), final.FilesFound) - assert.Equal(t, int64(6), final.BytesFound) + assert.Equal(t, FileCount(2), final.FilesFound) + assert.Equal(t, FileSize(6), final.BytesFound) } -func TestEnumeratePaths(t *testing.T) { +func TestScannerEnumeratePaths(t *testing.T) { fs := afero.NewMemMapFs() require.NoError(t, fs.MkdirAll("/dir1", 0755)) require.NoError(t, fs.MkdirAll("/dir2", 0755)) require.NoError(t, afero.WriteFile(fs, "/dir1/a.txt", []byte("aaa"), 0644)) require.NoError(t, afero.WriteFile(fs, "/dir2/b.txt", []byte("bbb"), 0644)) - s := NewWithOptions(&Options{Fs: fs}) + s := NewScannerWithOptions(&ScannerOptions{Fs: fs}) err := s.EnumeratePaths(nil, "/dir1", "/dir2") require.NoError(t, err) - assert.Equal(t, int64(2), s.FileCount()) + assert.Equal(t, FileCount(2), s.FileCount()) } -func TestExcludeDotfiles(t *testing.T) { +func TestScannerExcludeDotfiles(t *testing.T) { fs := afero.NewMemMapFs() require.NoError(t, fs.MkdirAll("/testdir/.hidden", 0755)) require.NoError(t, afero.WriteFile(fs, "/testdir/visible.txt", []byte("visible"), 0644)) @@ -121,54 +121,31 @@ func TestExcludeDotfiles(t *testing.T) { require.NoError(t, afero.WriteFile(fs, "/testdir/.hidden/inside.txt", []byte("inside"), 0644)) t.Run("exclude by default", func(t *testing.T) { - s := NewWithOptions(&Options{Fs: fs, IncludeDotfiles: false}) + s := NewScannerWithOptions(&ScannerOptions{Fs: fs, IncludeDotfiles: false}) err := s.EnumeratePath("/testdir", nil) require.NoError(t, err) - assert.Equal(t, int64(1), s.FileCount()) + assert.Equal(t, FileCount(1), s.FileCount()) files := s.Files() - assert.Equal(t, "visible.txt", files[0].Path) + assert.Equal(t, RelFilePath("visible.txt"), files[0].Path) }) t.Run("include when enabled", func(t *testing.T) { - s := NewWithOptions(&Options{Fs: fs, IncludeDotfiles: true}) + s := NewScannerWithOptions(&ScannerOptions{Fs: fs, IncludeDotfiles: true}) err := s.EnumeratePath("/testdir", nil) require.NoError(t, err) - assert.Equal(t, int64(3), s.FileCount()) + assert.Equal(t, FileCount(3), s.FileCount()) }) } -func TestPathIsHidden(t *testing.T) { - tests := []struct { - path string - hidden bool - }{ - {"file.txt", false}, - {".hidden", true}, - {"dir/file.txt", false}, - {"dir/.hidden", true}, - {".dir/file.txt", true}, - {"/absolute/path", false}, - {"/absolute/.hidden", true}, - {"./relative", false}, // path.Clean removes leading ./ - {"a/b/c/.d/e", true}, - } - - for _, tt := range tests { - t.Run(tt.path, func(t *testing.T) { - assert.Equal(t, tt.hidden, pathIsHidden(tt.path), "pathIsHidden(%q)", tt.path) - }) - } -} - -func TestToManifest(t *testing.T) { +func TestScannerToManifest(t *testing.T) { fs := afero.NewMemMapFs() require.NoError(t, fs.MkdirAll("/testdir", 0755)) require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("content one"), 0644)) require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("content two"), 0644)) - s := NewWithOptions(&Options{Fs: fs}) + s := NewScannerWithOptions(&ScannerOptions{Fs: fs}) err := s.EnumeratePath("/testdir", nil) require.NoError(t, err) @@ -178,15 +155,15 @@ func TestToManifest(t *testing.T) { // Manifest should have magic bytes assert.True(t, buf.Len() > 0) - assert.Equal(t, "ZNAVSRFG", string(buf.Bytes()[:8])) + assert.Equal(t, MAGIC, string(buf.Bytes()[:8])) } -func TestToManifestWithProgress(t *testing.T) { +func TestScannerToManifestWithProgress(t *testing.T) { fs := afero.NewMemMapFs() require.NoError(t, fs.MkdirAll("/testdir", 0755)) require.NoError(t, afero.WriteFile(fs, "/testdir/file.txt", bytes.Repeat([]byte("x"), 1000), 0644)) - s := NewWithOptions(&Options{Fs: fs}) + s := NewScannerWithOptions(&ScannerOptions{Fs: fs}) err := s.EnumeratePath("/testdir", nil) require.NoError(t, err) @@ -204,13 +181,13 @@ func TestToManifestWithProgress(t *testing.T) { assert.NotEmpty(t, updates) // Final update should show completion final := updates[len(updates)-1] - assert.Equal(t, int64(1), final.TotalFiles) - assert.Equal(t, int64(1), final.ScannedFiles) - assert.Equal(t, int64(1000), final.TotalBytes) - assert.Equal(t, int64(1000), final.ScannedBytes) + assert.Equal(t, FileCount(1), final.TotalFiles) + assert.Equal(t, FileCount(1), final.ScannedFiles) + assert.Equal(t, FileSize(1000), final.TotalBytes) + assert.Equal(t, FileSize(1000), final.ScannedBytes) } -func TestToManifestContextCancellation(t *testing.T) { +func TestScannerToManifestContextCancellation(t *testing.T) { fs := afero.NewMemMapFs() require.NoError(t, fs.MkdirAll("/testdir", 0755)) // Create many files to ensure we have time to cancel @@ -219,7 +196,7 @@ func TestToManifestContextCancellation(t *testing.T) { require.NoError(t, afero.WriteFile(fs, "/testdir/"+name, bytes.Repeat([]byte("x"), 100), 0644)) } - s := NewWithOptions(&Options{Fs: fs}) + s := NewScannerWithOptions(&ScannerOptions{Fs: fs}) err := s.EnumeratePath("/testdir", nil) require.NoError(t, err) @@ -231,9 +208,9 @@ func TestToManifestContextCancellation(t *testing.T) { assert.ErrorIs(t, err, context.Canceled) } -func TestToManifestEmptyScanner(t *testing.T) { +func TestScannerToManifestEmptyScanner(t *testing.T) { fs := afero.NewMemMapFs() - s := NewWithOptions(&Options{Fs: fs}) + s := NewScannerWithOptions(&ScannerOptions{Fs: fs}) var buf bytes.Buffer err := s.ToManifest(context.Background(), &buf, nil) @@ -241,14 +218,14 @@ func TestToManifestEmptyScanner(t *testing.T) { // Should still produce a valid manifest assert.True(t, buf.Len() > 0) - assert.Equal(t, "ZNAVSRFG", string(buf.Bytes()[:8])) + assert.Equal(t, MAGIC, string(buf.Bytes()[:8])) } -func TestFilesCopiesSlice(t *testing.T) { +func TestScannerFilesCopiesSlice(t *testing.T) { fs := afero.NewMemMapFs() require.NoError(t, afero.WriteFile(fs, "/test.txt", []byte("hello"), 0644)) - s := NewWithOptions(&Options{Fs: fs}) + s := NewScannerWithOptions(&ScannerOptions{Fs: fs}) require.NoError(t, s.EnumerateFile("/test.txt")) files1 := s.Files() @@ -258,7 +235,7 @@ func TestFilesCopiesSlice(t *testing.T) { assert.NotSame(t, &files1[0], &files2[0]) } -func TestEnumerateFS(t *testing.T) { +func TestScannerEnumerateFS(t *testing.T) { fs := afero.NewMemMapFs() require.NoError(t, fs.MkdirAll("/testdir/sub", 0755)) require.NoError(t, afero.WriteFile(fs, "/testdir/file.txt", []byte("hello"), 0644)) @@ -267,11 +244,11 @@ func TestEnumerateFS(t *testing.T) { // Create a basepath filesystem baseFs := afero.NewBasePathFs(fs, "/testdir") - s := NewWithOptions(&Options{Fs: fs}) + s := NewScannerWithOptions(&ScannerOptions{Fs: fs}) err := s.EnumerateFS(baseFs, "/testdir", nil) require.NoError(t, err) - assert.Equal(t, int64(2), s.FileCount()) + assert.Equal(t, FileCount(2), s.FileCount()) } func TestSendEnumerateStatusNonBlocking(t *testing.T) { @@ -317,27 +294,27 @@ func TestSendStatusNilChannel(t *testing.T) { sendScanStatus(nil, ScanStatus{}) } -func TestFileEntryFields(t *testing.T) { +func TestScannerFileEntryFields(t *testing.T) { fs := afero.NewMemMapFs() now := time.Now().Truncate(time.Second) require.NoError(t, afero.WriteFile(fs, "/test.txt", []byte("content"), 0644)) require.NoError(t, fs.Chtimes("/test.txt", now, now)) - s := NewWithOptions(&Options{Fs: fs}) + s := NewScannerWithOptions(&ScannerOptions{Fs: fs}) require.NoError(t, s.EnumerateFile("/test.txt")) files := s.Files() require.Len(t, files, 1) entry := files[0] - assert.Equal(t, "test.txt", entry.Path) - assert.Contains(t, entry.AbsPath, "test.txt") - assert.Equal(t, int64(7), entry.Size) + assert.Equal(t, RelFilePath("test.txt"), entry.Path) + assert.Contains(t, string(entry.AbsPath), "test.txt") + assert.Equal(t, FileSize(7), entry.Size) // Mtime should be set (within a second of now) - assert.WithinDuration(t, now, entry.Mtime, 2*time.Second) + assert.WithinDuration(t, now, time.Time(entry.Mtime), 2*time.Second) } -func TestLargeFileEnumeration(t *testing.T) { +func TestScannerLargeFileEnumeration(t *testing.T) { fs := afero.NewMemMapFs() require.NoError(t, fs.MkdirAll("/testdir", 0755)) @@ -347,7 +324,7 @@ func TestLargeFileEnumeration(t *testing.T) { require.NoError(t, afero.WriteFile(fs, name, []byte("data"), 0644)) } - s := NewWithOptions(&Options{Fs: fs}) + s := NewScannerWithOptions(&ScannerOptions{Fs: fs}) progress := make(chan EnumerateStatus, 200) err := s.EnumeratePath("/testdir", progress) @@ -357,6 +334,29 @@ func TestLargeFileEnumeration(t *testing.T) { for range progress { } - assert.Equal(t, int64(100), s.FileCount()) - assert.Equal(t, int64(400), s.TotalBytes()) // 100 * 4 bytes + assert.Equal(t, FileCount(100), s.FileCount()) + assert.Equal(t, FileSize(400), s.TotalBytes()) // 100 * 4 bytes +} + +func TestIsHiddenPath(t *testing.T) { + tests := []struct { + path string + hidden bool + }{ + {"file.txt", false}, + {".hidden", true}, + {"dir/file.txt", false}, + {"dir/.hidden", true}, + {".dir/file.txt", true}, + {"/absolute/path", false}, + {"/absolute/.hidden", true}, + {"./relative", false}, // path.Clean removes leading ./ + {"a/b/c/.d/e", true}, + } + + for _, tt := range tests { + t.Run(tt.path, func(t *testing.T) { + assert.Equal(t, tt.hidden, IsHiddenPath(tt.path), "IsHiddenPath(%q)", tt.path) + }) + } } diff --git a/mfer/url.go b/mfer/url.go new file mode 100644 index 0000000..fb1da96 --- /dev/null +++ b/mfer/url.go @@ -0,0 +1,53 @@ +package mfer + +import ( + "net/url" + "strings" +) + +// ManifestURL represents a URL pointing to a manifest file. +type ManifestURL string + +// FileURL represents a URL pointing to a file to be fetched. +type FileURL string + +// BaseURL represents a base URL for constructing file URLs. +type BaseURL string + +// JoinPath safely joins a relative file path to a base URL. +// The path is properly URL-encoded to prevent path traversal. +func (b BaseURL) JoinPath(path RelFilePath) (FileURL, error) { + base, err := url.Parse(string(b)) + if err != nil { + return "", err + } + + // Ensure base path ends with / + if !strings.HasSuffix(base.Path, "/") { + base.Path += "/" + } + + // Parse and encode the relative path + ref, err := url.Parse(url.PathEscape(string(path))) + if err != nil { + return "", err + } + + resolved := base.ResolveReference(ref) + return FileURL(resolved.String()), nil +} + +// String returns the URL as a string. +func (b BaseURL) String() string { + return string(b) +} + +// String returns the URL as a string. +func (f FileURL) String() string { + return string(f) +} + +// String returns the URL as a string. +func (m ManifestURL) String() string { + return string(m) +}