Add testable CLI with dependency injection and new scanner/checker packages

Major changes:
- Refactor CLI to accept injected I/O streams and filesystem (afero.Fs)
  for testing without touching the real filesystem
- Add RunOptions struct and RunWithOptions() for configurable CLI execution
- Add internal/scanner package with two-phase manifest generation:
  - Phase 1 (Enumeration): walk directories, collect metadata
  - Phase 2 (Scan): read contents, compute hashes, write manifest
- Add internal/checker package for manifest verification with progress
  reporting and channel-based result streaming
- Add mfer/builder.go for incremental manifest construction
- Add --no-extra-files flag to check command to detect files not in manifest
- Add timing summaries showing file count, size, elapsed time, and throughput
- Add comprehensive tests using afero.MemMapFs (no real filesystem access)
- Add contrib/usage.sh integration test script
- Fix banner ASCII art alignment (consistent spacing)
- Fix verbosity levels so summaries display at default log level
- Update internal/log to support configurable output writers
This commit is contained in:
2025-12-17 11:00:55 -08:00
parent 13f39d598f
commit dc2ea47f6a
15 changed files with 1744 additions and 81 deletions

281
internal/checker/checker.go Normal file
View File

@@ -0,0 +1,281 @@
package checker
import (
"bytes"
"context"
"crypto/sha256"
"errors"
"io"
"os"
"path/filepath"
"github.com/multiformats/go-multihash"
"github.com/spf13/afero"
"sneak.berlin/go/mfer/mfer"
)
// Result represents the outcome of checking a single file.
type Result struct {
Path string
Status Status
Message string
}
// Status represents the verification status of a file.
type Status int
const (
StatusOK Status = iota
StatusMissing
StatusSizeMismatch
StatusHashMismatch
StatusExtra // File exists on disk but not in manifest
StatusError
)
func (s Status) String() string {
switch s {
case StatusOK:
return "OK"
case StatusMissing:
return "MISSING"
case StatusSizeMismatch:
return "SIZE_MISMATCH"
case StatusHashMismatch:
return "HASH_MISMATCH"
case StatusExtra:
return "EXTRA"
case StatusError:
return "ERROR"
default:
return "UNKNOWN"
}
}
// CheckStatus contains progress information for the check operation.
type CheckStatus struct {
TotalFiles int64
CheckedFiles int64
TotalBytes int64
CheckedBytes int64
BytesPerSec float64
Failures int64
}
// Checker verifies files against a manifest.
type Checker struct {
basePath string
files []*mfer.MFFilePath
fs afero.Fs
// manifestPaths is a set of paths in the manifest for quick lookup
manifestPaths map[string]struct{}
}
// NewChecker creates a new Checker for the given manifest, base path, and filesystem.
// The basePath is the directory relative to which manifest paths are resolved.
// If fs is nil, the real filesystem (OsFs) is used.
func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, error) {
if fs == nil {
fs = afero.NewOsFs()
}
m, err := mfer.NewManifestFromFile(fs, manifestPath)
if err != nil {
return nil, err
}
abs, err := filepath.Abs(basePath)
if err != nil {
return nil, err
}
files := m.Files()
manifestPaths := make(map[string]struct{}, len(files))
for _, f := range files {
manifestPaths[f.Path] = struct{}{}
}
return &Checker{
basePath: abs,
files: files,
fs: fs,
manifestPaths: manifestPaths,
}, nil
}
// FileCount returns the number of files in the manifest.
func (c *Checker) FileCount() int64 {
return int64(len(c.files))
}
// TotalBytes returns the total size of all files in the manifest.
func (c *Checker) TotalBytes() int64 {
var total int64
for _, f := range c.files {
total += f.Size
}
return total
}
// Check verifies all files against the manifest.
// Results are sent to the results channel as files are checked.
// Progress updates are sent to the progress channel approximately once per second.
// Both channels are closed when the method returns.
func (c *Checker) Check(ctx context.Context, results chan<- Result, progress chan<- CheckStatus) error {
if results != nil {
defer close(results)
}
if progress != nil {
defer close(progress)
}
totalFiles := int64(len(c.files))
totalBytes := c.TotalBytes()
var checkedFiles int64
var checkedBytes int64
var failures int64
for _, entry := range c.files {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
result := c.checkFile(entry, &checkedBytes)
if result.Status != StatusOK {
failures++
}
checkedFiles++
if results != nil {
results <- result
}
// Send progress (simplified - every file for now)
if progress != nil {
sendCheckStatus(progress, CheckStatus{
TotalFiles: totalFiles,
CheckedFiles: checkedFiles,
TotalBytes: totalBytes,
CheckedBytes: checkedBytes,
Failures: failures,
})
}
}
return nil
}
func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result {
absPath := filepath.Join(c.basePath, entry.Path)
// Check if file exists
info, err := c.fs.Stat(absPath)
if err != nil {
if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) {
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
}
// Check for "file does not exist" style errors
exists, _ := afero.Exists(c.fs, absPath)
if !exists {
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
}
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
}
// Check size
if info.Size() != entry.Size {
*checkedBytes += info.Size()
return Result{
Path: entry.Path,
Status: StatusSizeMismatch,
Message: "size mismatch",
}
}
// Open and hash file
f, err := c.fs.Open(absPath)
if err != nil {
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
}
defer f.Close()
h := sha256.New()
n, err := io.Copy(h, f)
if err != nil {
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
}
*checkedBytes += n
// Encode as multihash and compare
computed, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
if err != nil {
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
}
// Check against all hashes in manifest (at least one must match)
for _, hash := range entry.Hashes {
if bytes.Equal(computed, hash.MultiHash) {
return Result{Path: entry.Path, Status: StatusOK}
}
}
return Result{Path: entry.Path, Status: StatusHashMismatch, Message: "hash mismatch"}
}
// FindExtraFiles walks the filesystem and reports files not in the manifest.
// Results are sent to the results channel. The channel is closed when done.
func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error {
if results != nil {
defer close(results)
}
return afero.Walk(c.fs, c.basePath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
select {
case <-ctx.Done():
return ctx.Err()
default:
}
// Skip directories
if info.IsDir() {
return nil
}
// Get relative path
relPath, err := filepath.Rel(c.basePath, path)
if err != nil {
return err
}
// Check if path is in manifest
if _, exists := c.manifestPaths[relPath]; !exists {
if results != nil {
results <- Result{
Path: relPath,
Status: StatusExtra,
Message: "not in manifest",
}
}
}
return nil
})
}
// sendCheckStatus sends a status update without blocking.
func sendCheckStatus(ch chan<- CheckStatus, status CheckStatus) {
if ch == nil {
return
}
select {
case ch <- status:
default:
}
}

View File

@@ -1,13 +1,110 @@
package cli
import (
"errors"
"fmt"
"time"
"github.com/apex/log"
"github.com/urfave/cli/v2"
"sneak.berlin/go/mfer/internal/checker"
"sneak.berlin/go/mfer/internal/log"
)
func (mfa *CLIApp) checkManifestOperation(c *cli.Context) error {
log.WithError(errors.New("unimplemented"))
func (mfa *CLIApp) checkManifestOperation(ctx *cli.Context) error {
log.Debug("checkManifestOperation()")
// Get manifest path from args, default to index.mf
manifestPath := "index.mf"
if ctx.Args().Len() > 0 {
manifestPath = ctx.Args().Get(0)
}
basePath := ctx.String("base")
showProgress := ctx.Bool("progress")
log.Debugf("checking manifest %s with base %s", manifestPath, basePath)
// Create checker
chk, err := checker.NewChecker(manifestPath, basePath, mfa.Fs)
if err != nil {
return fmt.Errorf("failed to load manifest: %w", err)
}
log.Debugf("manifest contains %d files, %d bytes", chk.FileCount(), chk.TotalBytes())
// Set up results channel
results := make(chan checker.Result, 1)
// Set up progress channel
var progress chan checker.CheckStatus
if showProgress {
progress = make(chan checker.CheckStatus, 1)
go func() {
for status := range progress {
log.Progressf("Checking: %d/%d files, %d failures",
status.CheckedFiles,
status.TotalFiles,
status.Failures)
}
log.ProgressDone()
}()
}
// Process results in a goroutine
var failures int64
done := make(chan struct{})
go func() {
for result := range results {
if result.Status != checker.StatusOK {
failures++
log.Infof("%s: %s (%s)", result.Status, result.Path, result.Message)
} else {
log.Debugf("%s: %s", result.Status, result.Path)
}
}
close(done)
}()
// Run check
err = chk.Check(ctx.Context, results, progress)
if err != nil {
return fmt.Errorf("check failed: %w", err)
}
// Wait for results processing to complete
<-done
// Check for extra files if requested
if ctx.Bool("no-extra-files") {
extraResults := make(chan checker.Result, 1)
extraDone := make(chan struct{})
go func() {
for result := range extraResults {
failures++
log.Infof("%s: %s (%s)", result.Status, result.Path, result.Message)
}
close(extraDone)
}()
err = chk.FindExtraFiles(ctx.Context, extraResults)
if err != nil {
return fmt.Errorf("failed to check for extra files: %w", err)
}
<-extraDone
}
if !ctx.Bool("quiet") {
elapsed := time.Since(mfa.startupTime).Seconds()
rate := float64(chk.TotalBytes()) / elapsed / 1e6
if failures == 0 {
log.Infof("checked %d files (%.1f MB) in %.1fs (%.1f MB/s): all OK", chk.FileCount(), float64(chk.TotalBytes())/1e6, elapsed, rate)
} else {
log.Infof("checked %d files (%.1f MB) in %.1fs (%.1f MB/s): %d failed", chk.FileCount(), float64(chk.TotalBytes())/1e6, elapsed, rate, failures)
}
}
if failures > 0 {
mfa.exitCode = 1
}
return nil
}

View File

@@ -1,7 +1,10 @@
package cli
import (
"io"
"os"
"github.com/spf13/afero"
)
var NO_COLOR bool
@@ -13,13 +16,50 @@ func init() {
}
}
func Run(Appname, Version, Gitrev string) int {
m := &CLIApp{}
m.appname = Appname
m.version = Version
m.gitrev = Gitrev
m.exitCode = 0
// RunOptions contains all configuration for running the CLI application.
type RunOptions struct {
Appname string
Version string
Gitrev string
Args []string
Stdin io.Reader
Stdout io.Writer
Stderr io.Writer
Fs afero.Fs
}
m.run()
// DefaultRunOptions returns RunOptions configured for normal CLI execution.
func DefaultRunOptions(appname, version, gitrev string) *RunOptions {
return &RunOptions{
Appname: appname,
Version: version,
Gitrev: gitrev,
Args: os.Args,
Stdin: os.Stdin,
Stdout: os.Stdout,
Stderr: os.Stderr,
Fs: afero.NewOsFs(),
}
}
// Run creates and runs the CLI application with default options.
func Run(appname, version, gitrev string) int {
return RunWithOptions(DefaultRunOptions(appname, version, gitrev))
}
// RunWithOptions creates and runs the CLI application with the given options.
func RunWithOptions(opts *RunOptions) int {
m := &CLIApp{
appname: opts.Appname,
version: opts.Version,
gitrev: opts.Gitrev,
exitCode: 0,
Stdin: opts.Stdin,
Stdout: opts.Stdout,
Stderr: opts.Stderr,
Fs: opts.Fs,
}
m.run(opts.Args)
return m.exitCode
}

View File

@@ -1,12 +1,306 @@
package cli
import (
"bytes"
"testing"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
urfcli "github.com/urfave/cli/v2"
)
func init() {
// Prevent urfave/cli from calling os.Exit during tests
urfcli.OsExiter = func(code int) {}
}
func TestBuild(t *testing.T) {
m := &CLIApp{}
assert.NotNil(t, m)
}
func testOpts(args []string, fs afero.Fs) *RunOptions {
return &RunOptions{
Appname: "mfer",
Version: "1.0.0",
Gitrev: "abc123",
Args: args,
Stdin: &bytes.Buffer{},
Stdout: &bytes.Buffer{},
Stderr: &bytes.Buffer{},
Fs: fs,
}
}
func TestVersionCommand(t *testing.T) {
fs := afero.NewMemMapFs()
opts := testOpts([]string{"mfer", "version"}, fs)
exitCode := RunWithOptions(opts)
assert.Equal(t, 0, exitCode)
stdout := opts.Stdout.(*bytes.Buffer).String()
assert.Contains(t, stdout, "1.0.0")
assert.Contains(t, stdout, "abc123")
}
func TestHelpCommand(t *testing.T) {
fs := afero.NewMemMapFs()
opts := testOpts([]string{"mfer", "--help"}, fs)
exitCode := RunWithOptions(opts)
assert.Equal(t, 0, exitCode)
stdout := opts.Stdout.(*bytes.Buffer).String()
assert.Contains(t, stdout, "generate")
assert.Contains(t, stdout, "check")
assert.Contains(t, stdout, "fetch")
}
func TestGenerateCommand(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test files in memory filesystem
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("test content"), 0644))
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
assert.Equal(t, 0, exitCode, "stderr: %s", opts.Stderr.(*bytes.Buffer).String())
// Verify manifest was created
exists, err := afero.Exists(fs, "/testdir/test.mf")
require.NoError(t, err)
assert.True(t, exists)
}
func TestGenerateAndCheckCommand(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test files with subdirectory
require.NoError(t, fs.MkdirAll("/testdir/subdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/subdir/file2.txt", []byte("test content"), 0644))
// Generate manifest
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
require.Equal(t, 0, exitCode, "generate failed: %s", opts.Stderr.(*bytes.Buffer).String())
// Check manifest
opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/testdir/test.mf"}, fs)
exitCode = RunWithOptions(opts)
assert.Equal(t, 0, exitCode, "check failed: %s", opts.Stderr.(*bytes.Buffer).String())
}
func TestCheckCommandWithMissingFile(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test file
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644))
// Generate manifest
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
require.Equal(t, 0, exitCode, "generate failed: %s", opts.Stderr.(*bytes.Buffer).String())
// Delete the file
require.NoError(t, fs.Remove("/testdir/file1.txt"))
// Check manifest - should fail
opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/testdir/test.mf"}, fs)
exitCode = RunWithOptions(opts)
assert.Equal(t, 1, exitCode, "check should have failed for missing file")
}
func TestCheckCommandWithCorruptedFile(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test file
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644))
// Generate manifest
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
require.Equal(t, 0, exitCode, "generate failed: %s", opts.Stderr.(*bytes.Buffer).String())
// Corrupt the file (change content but keep same size)
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("HELLO WORLD"), 0644))
// Check manifest - should fail with hash mismatch
opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/testdir/test.mf"}, fs)
exitCode = RunWithOptions(opts)
assert.Equal(t, 1, exitCode, "check should have failed for corrupted file")
}
func TestCheckCommandWithSizeMismatch(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test file
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644))
// Generate manifest
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
require.Equal(t, 0, exitCode, "generate failed: %s", opts.Stderr.(*bytes.Buffer).String())
// Change file size
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("different size content here"), 0644))
// Check manifest - should fail with size mismatch
opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/testdir/test.mf"}, fs)
exitCode = RunWithOptions(opts)
assert.Equal(t, 1, exitCode, "check should have failed for size mismatch")
}
func TestBannerOutput(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test file
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644))
// Run without -q to see banner
opts := testOpts([]string{"mfer", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
assert.Equal(t, 0, exitCode)
// Banner ASCII art should be in stdout
stdout := opts.Stdout.(*bytes.Buffer).String()
assert.Contains(t, stdout, "___")
assert.Contains(t, stdout, "\\")
}
func TestUnknownCommand(t *testing.T) {
fs := afero.NewMemMapFs()
opts := testOpts([]string{"mfer", "unknown"}, fs)
exitCode := RunWithOptions(opts)
assert.Equal(t, 1, exitCode)
}
func TestGenerateWithIgnoreDotfiles(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test files including dotfiles
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/.hidden", []byte("secret"), 0644))
// Generate manifest with --ignore-dotfiles
opts := testOpts([]string{"mfer", "-q", "generate", "--ignore-dotfiles", "-o", "/testdir/test.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
require.Equal(t, 0, exitCode)
// Check that manifest exists and we can verify (hidden file won't cause failure even if missing)
exists, _ := afero.Exists(fs, "/testdir/test.mf")
assert.True(t, exists)
}
func TestMultipleInputPaths(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test files in multiple directories
require.NoError(t, fs.MkdirAll("/dir1", 0755))
require.NoError(t, fs.MkdirAll("/dir2", 0755))
require.NoError(t, afero.WriteFile(fs, "/dir1/file1.txt", []byte("content1"), 0644))
require.NoError(t, afero.WriteFile(fs, "/dir2/file2.txt", []byte("content2"), 0644))
// Generate manifest from multiple paths
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/output.mf", "/dir1", "/dir2"}, fs)
exitCode := RunWithOptions(opts)
assert.Equal(t, 0, exitCode, "stderr: %s", opts.Stderr.(*bytes.Buffer).String())
exists, _ := afero.Exists(fs, "/output.mf")
assert.True(t, exists)
}
func TestNoExtraFilesPass(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test files
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("world"), 0644))
// Generate manifest
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/manifest.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
require.Equal(t, 0, exitCode)
// Check with --no-extra-files (should pass - no extra files)
opts = testOpts([]string{"mfer", "-q", "check", "--no-extra-files", "--base", "/testdir", "/manifest.mf"}, fs)
exitCode = RunWithOptions(opts)
assert.Equal(t, 0, exitCode)
}
func TestNoExtraFilesFail(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test files
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644))
// Generate manifest
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/manifest.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
require.Equal(t, 0, exitCode)
// Add an extra file after manifest generation
require.NoError(t, afero.WriteFile(fs, "/testdir/extra.txt", []byte("extra"), 0644))
// Check with --no-extra-files (should fail - extra file exists)
opts = testOpts([]string{"mfer", "-q", "check", "--no-extra-files", "--base", "/testdir", "/manifest.mf"}, fs)
exitCode = RunWithOptions(opts)
assert.Equal(t, 1, exitCode, "check should fail when extra files exist")
}
func TestNoExtraFilesWithSubdirectory(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test files with subdirectory
require.NoError(t, fs.MkdirAll("/testdir/subdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/subdir/file2.txt", []byte("world"), 0644))
// Generate manifest
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/manifest.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
require.Equal(t, 0, exitCode)
// Add extra file in subdirectory
require.NoError(t, afero.WriteFile(fs, "/testdir/subdir/extra.txt", []byte("extra"), 0644))
// Check with --no-extra-files (should fail)
opts = testOpts([]string{"mfer", "-q", "check", "--no-extra-files", "--base", "/testdir", "/manifest.mf"}, fs)
exitCode = RunWithOptions(opts)
assert.Equal(t, 1, exitCode, "check should fail when extra files exist in subdirectory")
}
func TestCheckWithoutNoExtraFilesIgnoresExtra(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test file
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644))
// Generate manifest
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/manifest.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
require.Equal(t, 0, exitCode)
// Add extra file
require.NoError(t, afero.WriteFile(fs, "/testdir/extra.txt", []byte("extra"), 0644))
// Check WITHOUT --no-extra-files (should pass - extra files ignored)
opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/manifest.mf"}, fs)
exitCode = RunWithOptions(opts)
assert.Equal(t, 0, exitCode, "check without --no-extra-files should ignore extra files")
}

View File

@@ -1,54 +1,100 @@
package cli
import (
"bytes"
"fmt"
"path/filepath"
"time"
"github.com/urfave/cli/v2"
"sneak.berlin/go/mfer/internal/log"
"sneak.berlin/go/mfer/mfer"
"sneak.berlin/go/mfer/internal/scanner"
)
func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
log.Debug("generateManifestOperation()")
myArgs := ctx.Args()
log.Dump(myArgs)
opts := &mfer.ManifestScanOptions{
opts := &scanner.Options{
IgnoreDotfiles: ctx.Bool("IgnoreDotfiles"),
FollowSymLinks: ctx.Bool("FollowSymLinks"),
Fs: mfa.Fs,
}
paths := make([]string, ctx.Args().Len()-1)
for i := 0; i < ctx.Args().Len(); i++ {
ap, err := filepath.Abs(ctx.Args().Get(i))
if err != nil {
s := scanner.NewWithOptions(opts)
// Phase 1: Enumeration - collect paths and stat files
args := ctx.Args()
showProgress := ctx.Bool("progress")
// Set up enumeration progress reporting
var enumProgress chan scanner.EnumerateStatus
if showProgress {
enumProgress = make(chan scanner.EnumerateStatus, 1)
go func() {
for status := range enumProgress {
log.Progressf("Enumerating: %d files, %.1f MB",
status.FilesFound,
float64(status.BytesFound)/1e6)
}
log.ProgressDone()
}()
}
if args.Len() == 0 {
// Default to current directory
if err := s.EnumeratePath(".", enumProgress); err != nil {
return err
}
} else {
// Collect all paths first
paths := make([]string, 0, args.Len())
for i := 0; i < args.Len(); i++ {
ap, err := filepath.Abs(args.Get(i))
if err != nil {
return err
}
log.Debugf("enumerating path: %s", ap)
paths = append(paths, ap)
}
if err := s.EnumeratePaths(enumProgress, paths...); err != nil {
return err
}
log.Dump(ap)
paths = append(paths, ap)
}
mf, err := mfer.NewFromPaths(opts, paths...)
log.Debugf("enumerated %d files, %d bytes total", s.FileCount(), s.TotalBytes())
// Open output file
outputPath := ctx.String("output")
outFile, err := mfa.Fs.Create(outputPath)
if err != nil {
panic(err)
return fmt.Errorf("failed to create output file: %w", err)
}
mf.WithContext(ctx.Context)
defer outFile.Close()
log.Dump(mf)
// Phase 2: Scan - read file contents and generate manifest
var scanProgress chan scanner.ScanStatus
if showProgress {
scanProgress = make(chan scanner.ScanStatus, 1)
go func() {
for status := range scanProgress {
log.Progressf("Scanning: %d/%d files, %.1f MB/s",
status.ScannedFiles,
status.TotalFiles,
status.BytesPerSec/1e6)
}
log.ProgressDone()
}()
}
err = mf.Scan()
err = s.ToManifest(ctx.Context, outFile, scanProgress)
if err != nil {
return err
return fmt.Errorf("failed to generate manifest: %w", err)
}
buf := new(bytes.Buffer)
err = mf.WriteTo(buf)
if err != nil {
return err
if !ctx.Bool("quiet") {
elapsed := time.Since(mfa.startupTime).Seconds()
rate := float64(s.TotalBytes()) / elapsed / 1e6
log.Infof("wrote %d files (%.1f MB) to %s in %.1fs (%.1f MB/s)", s.FileCount(), float64(s.TotalBytes())/1e6, outputPath, elapsed, rate)
}
dat := buf.Bytes()
log.Dump(dat)
return nil
}

View File

@@ -2,9 +2,11 @@ package cli
import (
"fmt"
"io"
"os"
"time"
"github.com/spf13/afero"
"github.com/urfave/cli/v2"
"sneak.berlin/go/mfer/internal/log"
)
@@ -16,22 +18,31 @@ type CLIApp struct {
startupTime time.Time
exitCode int
app *cli.App
// I/O streams - all program input/output should go through these
Stdin io.Reader
Stdout io.Writer
Stderr io.Writer
// Fs is the filesystem abstraction - defaults to OsFs for real filesystem
Fs afero.Fs
}
const banner = ` ___ ___ ___ ___
/__/\ / /\ / /\ / /\
| |::\ / /:/_ / /:/_ / /::\
| |:|:\ / /:/ /\ / /:/ /\ / /:/\:\
__|__|:|\:\ / /:/ /:/ / /:/ /:/_ / /:/~/:/
/__/::::| \:\ /__/:/ /:/ /__/:/ /:/ /\ /__/:/ /:/___
\ \:\~~\__\/ \ \:\/:/ \ \:\/:/ /:/ \ \:\/:::::/
\ \:\ \ \::/ \ \::/ /:/ \ \::/~~~~
\ \:\ \ \:\ \ \:\/:/ \ \:\
\ \:\ \ \:\ \ \::/ \ \:\
\__\/ \__\/ \__\/ \__\/`
const banner = `
___ ___ ___ ___
/__/\ / /\ / /\ / /\
| |::\ / /:/_ / /:/_ / /::\
| |:|:\ / /:/ /\ / /:/ /\ / /:/\:\
__|__|:|\:\ / /:/ /:/ / /:/ /:/_ / /:/~/:/
/__/::::| \:\ /__/:/ /:/ /__/:/ /:/ /\ /__/:/ /:/___
\ \:\~~\__\/ \ \:\/:/ \ \:\/:/ /:/ \ \:\/:::::/
\ \:\ \ \::/ \ \::/ /:/ \ \::/~~~~
\ \:\ \ \:\ \ \:\/:/ \ \:\
\ \:\ \ \:\ \ \::/ \ \:\
\__\/ \__\/ \__\/ \__\/`
func (mfa *CLIApp) printBanner() {
fmt.Println(banner)
fmt.Fprintln(mfa.Stdout, banner)
}
func (mfa *CLIApp) VersionString() string {
@@ -47,7 +58,7 @@ func (mfa *CLIApp) setVerbosity(v int) {
}
}
func (mfa *CLIApp) run() {
func (mfa *CLIApp) run(args []string) {
mfa.startupTime = time.Now()
if NO_COLOR {
@@ -55,6 +66,8 @@ func (mfa *CLIApp) run() {
log.DisableStyling()
}
// Configure log package to use our I/O streams
log.SetOutput(mfa.Stdout, mfa.Stderr)
log.Init()
var verbosity int
@@ -64,6 +77,8 @@ func (mfa *CLIApp) run() {
Usage: "Manifest generator",
Version: mfa.VersionString(),
EnableBashCompletion: true,
Writer: mfa.Stdout,
ErrWriter: mfa.Stderr,
Flags: []cli.Flag{
&cli.BoolFlag{
Name: "verbose",
@@ -106,11 +121,17 @@ func (mfa *CLIApp) run() {
Aliases: []string{"o"},
Usage: "Specify output filename",
},
&cli.BoolFlag{
Name: "progress",
Aliases: []string{"P"},
Usage: "Show progress during enumeration and scanning",
},
},
},
{
Name: "check",
Usage: "Validate files using manifest file",
Name: "check",
Usage: "Validate files using manifest file",
ArgsUsage: "[manifest file]",
Action: func(c *cli.Context) error {
if !c.Bool("quiet") {
mfa.printBanner()
@@ -118,12 +139,29 @@ func (mfa *CLIApp) run() {
mfa.setVerbosity(verbosity)
return mfa.checkManifestOperation(c)
},
Flags: []cli.Flag{
&cli.StringFlag{
Name: "base",
Aliases: []string{"b"},
Value: ".",
Usage: "Base directory for resolving relative paths from manifest",
},
&cli.BoolFlag{
Name: "progress",
Aliases: []string{"P"},
Usage: "Show progress during checking",
},
&cli.BoolFlag{
Name: "no-extra-files",
Usage: "Fail if files exist in base directory that are not in manifest",
},
},
},
{
Name: "version",
Usage: "Show version",
Action: func(c *cli.Context) error {
fmt.Printf("%s\n", mfa.VersionString())
fmt.Fprintln(mfa.Stdout, mfa.VersionString())
return nil
},
},
@@ -142,7 +180,7 @@ func (mfa *CLIApp) run() {
}
mfa.app.HideVersion = true
err := mfa.app.Run(os.Args)
err := mfa.app.Run(args)
if err != nil {
mfa.exitCode = 1
log.WithError(err).Debugf("exiting")

View File

@@ -2,7 +2,10 @@ package log
import (
"fmt"
"io"
"os"
"runtime"
"sync"
"github.com/apex/log"
acli "github.com/apex/log/handlers/cli"
@@ -12,6 +15,39 @@ import (
type Level = log.Level
var (
// mu protects the output writers
mu sync.RWMutex
// stdout is the writer for progress output
stdout io.Writer = os.Stdout
// stderr is the writer for log output
stderr io.Writer = os.Stderr
)
// SetOutput configures the output writers for the log package.
// stdout is used for progress output, stderr is used for log messages.
func SetOutput(out, err io.Writer) {
mu.Lock()
defer mu.Unlock()
stdout = out
stderr = err
pterm.SetDefaultOutput(out)
}
// GetStdout returns the configured stdout writer.
func GetStdout() io.Writer {
mu.RLock()
defer mu.RUnlock()
return stdout
}
// GetStderr returns the configured stderr writer.
func GetStderr() io.Writer {
mu.RLock()
defer mu.RUnlock()
return stderr
}
func DisableStyling() {
pterm.DisableColor()
pterm.DisableStyling()
@@ -24,10 +60,21 @@ func DisableStyling() {
}
func Init() {
log.SetHandler(acli.Default)
mu.RLock()
w := stderr
mu.RUnlock()
log.SetHandler(acli.New(w))
log.SetLevel(log.InfoLevel)
}
func Infof(format string, args ...interface{}) {
log.Infof(format, args...)
}
func Info(arg string) {
log.Info(arg)
}
func Debugf(format string, args ...interface{}) {
DebugReal(fmt.Sprintf(format, args...), 2)
}
@@ -55,14 +102,13 @@ func EnableDebugLogging() {
func VerbosityStepsToLogLevel(l int) log.Level {
switch l {
case 1:
return log.WarnLevel
case 2:
case 0:
return log.InfoLevel
case 3:
case 1:
return log.DebugLevel
}
return log.ErrorLevel
// -vv or more
return log.DebugLevel
}
func SetLevelFromVerbosity(l int) {
@@ -87,3 +133,14 @@ func GetLevel() log.Level {
func WithError(e error) *log.Entry {
return GetLogger().WithError(e)
}
// Progressf prints a progress message that overwrites the current line.
// Use ProgressDone() when progress is complete to move to the next line.
func Progressf(format string, args ...interface{}) {
pterm.Printf("\r"+format, args...)
}
// ProgressDone completes a progress line by printing a newline.
func ProgressDone() {
pterm.Println()
}

373
internal/scanner/scanner.go Normal file
View File

@@ -0,0 +1,373 @@
package scanner
import (
"context"
"io"
"io/fs"
"path"
"path/filepath"
"strings"
"sync"
"time"
"github.com/spf13/afero"
"sneak.berlin/go/mfer/mfer"
)
// Phase 1: Enumeration
// ---------------------
// Walking directories and calling stat() on files to collect metadata.
// Builds the list of files to be scanned. Relatively fast (metadata only).
// EnumerateStatus contains progress information for the enumeration phase.
type EnumerateStatus struct {
FilesFound int64
BytesFound int64
}
// Phase 2: Scan (ToManifest)
// --------------------------
// Reading file contents and computing hashes for manifest generation.
// This is the expensive phase that reads all file data.
// ScanStatus contains progress information for the scan phase.
type ScanStatus struct {
TotalFiles int64
ScannedFiles int64
TotalBytes int64
ScannedBytes int64
BytesPerSec float64
}
// Options configures scanner behavior.
type Options struct {
IgnoreDotfiles bool
FollowSymLinks bool
Fs afero.Fs // Filesystem to use, defaults to OsFs
}
// FileEntry represents a file that has been enumerated.
type FileEntry struct {
Path string // Relative path (used in manifest)
AbsPath string // Absolute path (used for reading file content)
Size int64
Mtime time.Time
Ctime time.Time
}
// Scanner accumulates files and generates manifests from them.
type Scanner struct {
mu sync.RWMutex
files []*FileEntry
options *Options
fs afero.Fs
}
// New creates a new Scanner with default options.
func New() *Scanner {
return NewWithOptions(nil)
}
// NewWithOptions creates a new Scanner with the given options.
func NewWithOptions(opts *Options) *Scanner {
if opts == nil {
opts = &Options{}
}
fs := opts.Fs
if fs == nil {
fs = afero.NewOsFs()
}
return &Scanner{
files: make([]*FileEntry, 0),
options: opts,
fs: fs,
}
}
// EnumerateFile adds a single file to the scanner, calling stat() to get metadata.
func (s *Scanner) EnumerateFile(filePath string) error {
abs, err := filepath.Abs(filePath)
if err != nil {
return err
}
info, err := s.fs.Stat(abs)
if err != nil {
return err
}
// For single files, use the filename as the relative path
basePath := filepath.Dir(abs)
return s.enumerateFileWithInfo(filepath.Base(abs), basePath, info, nil)
}
// EnumeratePath walks a directory path and adds all files to the scanner.
// If progress is non-nil, status updates are sent as files are discovered.
// The progress channel is closed when the method returns.
func (s *Scanner) EnumeratePath(inputPath string, progress chan<- EnumerateStatus) error {
if progress != nil {
defer close(progress)
}
abs, err := filepath.Abs(inputPath)
if err != nil {
return err
}
afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs))
return s.enumerateFS(afs, abs, progress)
}
// EnumeratePaths walks multiple directory paths and adds all files to the scanner.
// If progress is non-nil, status updates are sent as files are discovered.
// The progress channel is closed when the method returns.
func (s *Scanner) EnumeratePaths(progress chan<- EnumerateStatus, inputPaths ...string) error {
if progress != nil {
defer close(progress)
}
for _, p := range inputPaths {
abs, err := filepath.Abs(p)
if err != nil {
return err
}
afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs))
if err := s.enumerateFS(afs, abs, progress); err != nil {
return err
}
}
return nil
}
// EnumerateFS walks an afero filesystem and adds all files to the scanner.
// If progress is non-nil, status updates are sent as files are discovered.
// The progress channel is closed when the method returns.
// basePath is used to compute absolute paths for file reading.
func (s *Scanner) EnumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error {
if progress != nil {
defer close(progress)
}
return s.enumerateFS(afs, basePath, progress)
}
// enumerateFS is the internal implementation that doesn't close the progress channel.
func (s *Scanner) enumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error {
return afero.Walk(afs, "/", func(p string, info fs.FileInfo, err error) error {
if err != nil {
return err
}
if s.options.IgnoreDotfiles && pathIsHidden(p) {
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
return s.enumerateFileWithInfo(p, basePath, info, progress)
})
}
// enumerateFileWithInfo adds a file with pre-existing fs.FileInfo.
func (s *Scanner) enumerateFileWithInfo(filePath string, basePath string, info fs.FileInfo, progress chan<- EnumerateStatus) error {
if info.IsDir() {
// Manifests contain only files, directories are implied
return nil
}
// Clean the path - remove leading slash if present
cleanPath := filePath
if len(cleanPath) > 0 && cleanPath[0] == '/' {
cleanPath = cleanPath[1:]
}
// Compute absolute path for file reading
absPath := filepath.Join(basePath, cleanPath)
entry := &FileEntry{
Path: cleanPath,
AbsPath: absPath,
Size: info.Size(),
Mtime: info.ModTime(),
// Note: Ctime not available from fs.FileInfo on all platforms
// Will need platform-specific code to extract it
}
s.mu.Lock()
s.files = append(s.files, entry)
filesFound := int64(len(s.files))
var bytesFound int64
for _, f := range s.files {
bytesFound += f.Size
}
s.mu.Unlock()
sendEnumerateStatus(progress, EnumerateStatus{
FilesFound: filesFound,
BytesFound: bytesFound,
})
return nil
}
// Files returns a copy of all files added to the scanner.
func (s *Scanner) Files() []*FileEntry {
s.mu.RLock()
defer s.mu.RUnlock()
out := make([]*FileEntry, len(s.files))
copy(out, s.files)
return out
}
// FileCount returns the number of files in the scanner.
func (s *Scanner) FileCount() int64 {
s.mu.RLock()
defer s.mu.RUnlock()
return int64(len(s.files))
}
// TotalBytes returns the total size of all files in the scanner.
func (s *Scanner) TotalBytes() int64 {
s.mu.RLock()
defer s.mu.RUnlock()
var total int64
for _, f := range s.files {
total += f.Size
}
return total
}
// ToManifest reads all file contents, computes hashes, and generates a manifest.
// If progress is non-nil, status updates are sent approximately once per second.
// The progress channel is closed when the method returns.
// The manifest is written to the provided io.Writer.
func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- ScanStatus) error {
if progress != nil {
defer close(progress)
}
s.mu.RLock()
files := make([]*FileEntry, len(s.files))
copy(files, s.files)
totalFiles := int64(len(files))
var totalBytes int64
for _, f := range files {
totalBytes += f.Size
}
s.mu.RUnlock()
builder := mfer.NewBuilder()
var scannedFiles int64
var scannedBytes int64
lastProgressTime := time.Now()
startTime := time.Now()
for _, entry := range files {
// Check for cancellation
select {
case <-ctx.Done():
return ctx.Err()
default:
}
// Open file
f, err := s.fs.Open(entry.AbsPath)
if err != nil {
return err
}
// Add to manifest with progress callback
bytesRead, err := builder.AddFile(
entry.Path,
entry.Size,
entry.Mtime,
f,
func(fileBytes int64) {
// Send progress at most once per second
now := time.Now()
if progress != nil && now.Sub(lastProgressTime) >= time.Second {
elapsed := now.Sub(startTime).Seconds()
currentBytes := scannedBytes + fileBytes
var rate float64
if elapsed > 0 {
rate = float64(currentBytes) / elapsed
}
sendScanStatus(progress, ScanStatus{
TotalFiles: totalFiles,
ScannedFiles: scannedFiles,
TotalBytes: totalBytes,
ScannedBytes: currentBytes,
BytesPerSec: rate,
})
lastProgressTime = now
}
},
)
f.Close()
if err != nil {
return err
}
scannedFiles++
scannedBytes += bytesRead
}
// Send final progress
if progress != nil {
elapsed := time.Since(startTime).Seconds()
var rate float64
if elapsed > 0 {
rate = float64(scannedBytes) / elapsed
}
sendScanStatus(progress, ScanStatus{
TotalFiles: totalFiles,
ScannedFiles: scannedFiles,
TotalBytes: totalBytes,
ScannedBytes: scannedBytes,
BytesPerSec: rate,
})
}
// Build and write manifest
return builder.Build(w)
}
// pathIsHidden returns true if the path or any of its parent directories
// start with a dot (hidden files/directories).
func pathIsHidden(p string) bool {
tp := path.Clean(p)
if strings.HasPrefix(tp, ".") {
return true
}
for {
d, f := path.Split(tp)
if strings.HasPrefix(f, ".") {
return true
}
if d == "" {
return false
}
tp = d[0 : len(d)-1] // trim trailing slash from dir
}
}
// sendEnumerateStatus sends a status update without blocking.
// If the channel is full, the update is dropped.
func sendEnumerateStatus(ch chan<- EnumerateStatus, status EnumerateStatus) {
if ch == nil {
return
}
select {
case ch <- status:
default:
// Channel full, drop this update
}
}
// sendScanStatus sends a status update without blocking.
// If the channel is full, the update is dropped.
func sendScanStatus(ch chan<- ScanStatus, status ScanStatus) {
if ch == nil {
return
}
select {
case ch <- status:
default:
// Channel full, drop this update
}
}