Add testable CLI with dependency injection and new scanner/checker packages
Major changes: - Refactor CLI to accept injected I/O streams and filesystem (afero.Fs) for testing without touching the real filesystem - Add RunOptions struct and RunWithOptions() for configurable CLI execution - Add internal/scanner package with two-phase manifest generation: - Phase 1 (Enumeration): walk directories, collect metadata - Phase 2 (Scan): read contents, compute hashes, write manifest - Add internal/checker package for manifest verification with progress reporting and channel-based result streaming - Add mfer/builder.go for incremental manifest construction - Add --no-extra-files flag to check command to detect files not in manifest - Add timing summaries showing file count, size, elapsed time, and throughput - Add comprehensive tests using afero.MemMapFs (no real filesystem access) - Add contrib/usage.sh integration test script - Fix banner ASCII art alignment (consistent spacing) - Fix verbosity levels so summaries display at default log level - Update internal/log to support configurable output writers
This commit is contained in:
281
internal/checker/checker.go
Normal file
281
internal/checker/checker.go
Normal file
@@ -0,0 +1,281 @@
|
||||
package checker
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/multiformats/go-multihash"
|
||||
"github.com/spf13/afero"
|
||||
"sneak.berlin/go/mfer/mfer"
|
||||
)
|
||||
|
||||
// Result represents the outcome of checking a single file.
|
||||
type Result struct {
|
||||
Path string
|
||||
Status Status
|
||||
Message string
|
||||
}
|
||||
|
||||
// Status represents the verification status of a file.
|
||||
type Status int
|
||||
|
||||
const (
|
||||
StatusOK Status = iota
|
||||
StatusMissing
|
||||
StatusSizeMismatch
|
||||
StatusHashMismatch
|
||||
StatusExtra // File exists on disk but not in manifest
|
||||
StatusError
|
||||
)
|
||||
|
||||
func (s Status) String() string {
|
||||
switch s {
|
||||
case StatusOK:
|
||||
return "OK"
|
||||
case StatusMissing:
|
||||
return "MISSING"
|
||||
case StatusSizeMismatch:
|
||||
return "SIZE_MISMATCH"
|
||||
case StatusHashMismatch:
|
||||
return "HASH_MISMATCH"
|
||||
case StatusExtra:
|
||||
return "EXTRA"
|
||||
case StatusError:
|
||||
return "ERROR"
|
||||
default:
|
||||
return "UNKNOWN"
|
||||
}
|
||||
}
|
||||
|
||||
// CheckStatus contains progress information for the check operation.
|
||||
type CheckStatus struct {
|
||||
TotalFiles int64
|
||||
CheckedFiles int64
|
||||
TotalBytes int64
|
||||
CheckedBytes int64
|
||||
BytesPerSec float64
|
||||
Failures int64
|
||||
}
|
||||
|
||||
// Checker verifies files against a manifest.
|
||||
type Checker struct {
|
||||
basePath string
|
||||
files []*mfer.MFFilePath
|
||||
fs afero.Fs
|
||||
// manifestPaths is a set of paths in the manifest for quick lookup
|
||||
manifestPaths map[string]struct{}
|
||||
}
|
||||
|
||||
// NewChecker creates a new Checker for the given manifest, base path, and filesystem.
|
||||
// The basePath is the directory relative to which manifest paths are resolved.
|
||||
// If fs is nil, the real filesystem (OsFs) is used.
|
||||
func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, error) {
|
||||
if fs == nil {
|
||||
fs = afero.NewOsFs()
|
||||
}
|
||||
|
||||
m, err := mfer.NewManifestFromFile(fs, manifestPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
abs, err := filepath.Abs(basePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
files := m.Files()
|
||||
manifestPaths := make(map[string]struct{}, len(files))
|
||||
for _, f := range files {
|
||||
manifestPaths[f.Path] = struct{}{}
|
||||
}
|
||||
|
||||
return &Checker{
|
||||
basePath: abs,
|
||||
files: files,
|
||||
fs: fs,
|
||||
manifestPaths: manifestPaths,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// FileCount returns the number of files in the manifest.
|
||||
func (c *Checker) FileCount() int64 {
|
||||
return int64(len(c.files))
|
||||
}
|
||||
|
||||
// TotalBytes returns the total size of all files in the manifest.
|
||||
func (c *Checker) TotalBytes() int64 {
|
||||
var total int64
|
||||
for _, f := range c.files {
|
||||
total += f.Size
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
// Check verifies all files against the manifest.
|
||||
// Results are sent to the results channel as files are checked.
|
||||
// Progress updates are sent to the progress channel approximately once per second.
|
||||
// Both channels are closed when the method returns.
|
||||
func (c *Checker) Check(ctx context.Context, results chan<- Result, progress chan<- CheckStatus) error {
|
||||
if results != nil {
|
||||
defer close(results)
|
||||
}
|
||||
if progress != nil {
|
||||
defer close(progress)
|
||||
}
|
||||
|
||||
totalFiles := int64(len(c.files))
|
||||
totalBytes := c.TotalBytes()
|
||||
|
||||
var checkedFiles int64
|
||||
var checkedBytes int64
|
||||
var failures int64
|
||||
|
||||
for _, entry := range c.files {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
result := c.checkFile(entry, &checkedBytes)
|
||||
if result.Status != StatusOK {
|
||||
failures++
|
||||
}
|
||||
checkedFiles++
|
||||
|
||||
if results != nil {
|
||||
results <- result
|
||||
}
|
||||
|
||||
// Send progress (simplified - every file for now)
|
||||
if progress != nil {
|
||||
sendCheckStatus(progress, CheckStatus{
|
||||
TotalFiles: totalFiles,
|
||||
CheckedFiles: checkedFiles,
|
||||
TotalBytes: totalBytes,
|
||||
CheckedBytes: checkedBytes,
|
||||
Failures: failures,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result {
|
||||
absPath := filepath.Join(c.basePath, entry.Path)
|
||||
|
||||
// Check if file exists
|
||||
info, err := c.fs.Stat(absPath)
|
||||
if err != nil {
|
||||
if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) {
|
||||
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
|
||||
}
|
||||
// Check for "file does not exist" style errors
|
||||
exists, _ := afero.Exists(c.fs, absPath)
|
||||
if !exists {
|
||||
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
|
||||
}
|
||||
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
||||
}
|
||||
|
||||
// Check size
|
||||
if info.Size() != entry.Size {
|
||||
*checkedBytes += info.Size()
|
||||
return Result{
|
||||
Path: entry.Path,
|
||||
Status: StatusSizeMismatch,
|
||||
Message: "size mismatch",
|
||||
}
|
||||
}
|
||||
|
||||
// Open and hash file
|
||||
f, err := c.fs.Open(absPath)
|
||||
if err != nil {
|
||||
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
h := sha256.New()
|
||||
n, err := io.Copy(h, f)
|
||||
if err != nil {
|
||||
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
||||
}
|
||||
*checkedBytes += n
|
||||
|
||||
// Encode as multihash and compare
|
||||
computed, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
|
||||
if err != nil {
|
||||
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
||||
}
|
||||
|
||||
// Check against all hashes in manifest (at least one must match)
|
||||
for _, hash := range entry.Hashes {
|
||||
if bytes.Equal(computed, hash.MultiHash) {
|
||||
return Result{Path: entry.Path, Status: StatusOK}
|
||||
}
|
||||
}
|
||||
|
||||
return Result{Path: entry.Path, Status: StatusHashMismatch, Message: "hash mismatch"}
|
||||
}
|
||||
|
||||
// FindExtraFiles walks the filesystem and reports files not in the manifest.
|
||||
// Results are sent to the results channel. The channel is closed when done.
|
||||
func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error {
|
||||
if results != nil {
|
||||
defer close(results)
|
||||
}
|
||||
|
||||
return afero.Walk(c.fs, c.basePath, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
// Skip directories
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get relative path
|
||||
relPath, err := filepath.Rel(c.basePath, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Check if path is in manifest
|
||||
if _, exists := c.manifestPaths[relPath]; !exists {
|
||||
if results != nil {
|
||||
results <- Result{
|
||||
Path: relPath,
|
||||
Status: StatusExtra,
|
||||
Message: "not in manifest",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// sendCheckStatus sends a status update without blocking.
|
||||
func sendCheckStatus(ch chan<- CheckStatus, status CheckStatus) {
|
||||
if ch == nil {
|
||||
return
|
||||
}
|
||||
select {
|
||||
case ch <- status:
|
||||
default:
|
||||
}
|
||||
}
|
||||
@@ -1,13 +1,110 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/apex/log"
|
||||
"github.com/urfave/cli/v2"
|
||||
"sneak.berlin/go/mfer/internal/checker"
|
||||
"sneak.berlin/go/mfer/internal/log"
|
||||
)
|
||||
|
||||
func (mfa *CLIApp) checkManifestOperation(c *cli.Context) error {
|
||||
log.WithError(errors.New("unimplemented"))
|
||||
func (mfa *CLIApp) checkManifestOperation(ctx *cli.Context) error {
|
||||
log.Debug("checkManifestOperation()")
|
||||
|
||||
// Get manifest path from args, default to index.mf
|
||||
manifestPath := "index.mf"
|
||||
if ctx.Args().Len() > 0 {
|
||||
manifestPath = ctx.Args().Get(0)
|
||||
}
|
||||
|
||||
basePath := ctx.String("base")
|
||||
showProgress := ctx.Bool("progress")
|
||||
|
||||
log.Debugf("checking manifest %s with base %s", manifestPath, basePath)
|
||||
|
||||
// Create checker
|
||||
chk, err := checker.NewChecker(manifestPath, basePath, mfa.Fs)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load manifest: %w", err)
|
||||
}
|
||||
|
||||
log.Debugf("manifest contains %d files, %d bytes", chk.FileCount(), chk.TotalBytes())
|
||||
|
||||
// Set up results channel
|
||||
results := make(chan checker.Result, 1)
|
||||
|
||||
// Set up progress channel
|
||||
var progress chan checker.CheckStatus
|
||||
if showProgress {
|
||||
progress = make(chan checker.CheckStatus, 1)
|
||||
go func() {
|
||||
for status := range progress {
|
||||
log.Progressf("Checking: %d/%d files, %d failures",
|
||||
status.CheckedFiles,
|
||||
status.TotalFiles,
|
||||
status.Failures)
|
||||
}
|
||||
log.ProgressDone()
|
||||
}()
|
||||
}
|
||||
|
||||
// Process results in a goroutine
|
||||
var failures int64
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
for result := range results {
|
||||
if result.Status != checker.StatusOK {
|
||||
failures++
|
||||
log.Infof("%s: %s (%s)", result.Status, result.Path, result.Message)
|
||||
} else {
|
||||
log.Debugf("%s: %s", result.Status, result.Path)
|
||||
}
|
||||
}
|
||||
close(done)
|
||||
}()
|
||||
|
||||
// Run check
|
||||
err = chk.Check(ctx.Context, results, progress)
|
||||
if err != nil {
|
||||
return fmt.Errorf("check failed: %w", err)
|
||||
}
|
||||
|
||||
// Wait for results processing to complete
|
||||
<-done
|
||||
|
||||
// Check for extra files if requested
|
||||
if ctx.Bool("no-extra-files") {
|
||||
extraResults := make(chan checker.Result, 1)
|
||||
extraDone := make(chan struct{})
|
||||
go func() {
|
||||
for result := range extraResults {
|
||||
failures++
|
||||
log.Infof("%s: %s (%s)", result.Status, result.Path, result.Message)
|
||||
}
|
||||
close(extraDone)
|
||||
}()
|
||||
|
||||
err = chk.FindExtraFiles(ctx.Context, extraResults)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to check for extra files: %w", err)
|
||||
}
|
||||
<-extraDone
|
||||
}
|
||||
|
||||
if !ctx.Bool("quiet") {
|
||||
elapsed := time.Since(mfa.startupTime).Seconds()
|
||||
rate := float64(chk.TotalBytes()) / elapsed / 1e6
|
||||
if failures == 0 {
|
||||
log.Infof("checked %d files (%.1f MB) in %.1fs (%.1f MB/s): all OK", chk.FileCount(), float64(chk.TotalBytes())/1e6, elapsed, rate)
|
||||
} else {
|
||||
log.Infof("checked %d files (%.1f MB) in %.1fs (%.1f MB/s): %d failed", chk.FileCount(), float64(chk.TotalBytes())/1e6, elapsed, rate, failures)
|
||||
}
|
||||
}
|
||||
|
||||
if failures > 0 {
|
||||
mfa.exitCode = 1
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,7 +1,10 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/spf13/afero"
|
||||
)
|
||||
|
||||
var NO_COLOR bool
|
||||
@@ -13,13 +16,50 @@ func init() {
|
||||
}
|
||||
}
|
||||
|
||||
func Run(Appname, Version, Gitrev string) int {
|
||||
m := &CLIApp{}
|
||||
m.appname = Appname
|
||||
m.version = Version
|
||||
m.gitrev = Gitrev
|
||||
m.exitCode = 0
|
||||
// RunOptions contains all configuration for running the CLI application.
|
||||
type RunOptions struct {
|
||||
Appname string
|
||||
Version string
|
||||
Gitrev string
|
||||
Args []string
|
||||
Stdin io.Reader
|
||||
Stdout io.Writer
|
||||
Stderr io.Writer
|
||||
Fs afero.Fs
|
||||
}
|
||||
|
||||
m.run()
|
||||
// DefaultRunOptions returns RunOptions configured for normal CLI execution.
|
||||
func DefaultRunOptions(appname, version, gitrev string) *RunOptions {
|
||||
return &RunOptions{
|
||||
Appname: appname,
|
||||
Version: version,
|
||||
Gitrev: gitrev,
|
||||
Args: os.Args,
|
||||
Stdin: os.Stdin,
|
||||
Stdout: os.Stdout,
|
||||
Stderr: os.Stderr,
|
||||
Fs: afero.NewOsFs(),
|
||||
}
|
||||
}
|
||||
|
||||
// Run creates and runs the CLI application with default options.
|
||||
func Run(appname, version, gitrev string) int {
|
||||
return RunWithOptions(DefaultRunOptions(appname, version, gitrev))
|
||||
}
|
||||
|
||||
// RunWithOptions creates and runs the CLI application with the given options.
|
||||
func RunWithOptions(opts *RunOptions) int {
|
||||
m := &CLIApp{
|
||||
appname: opts.Appname,
|
||||
version: opts.Version,
|
||||
gitrev: opts.Gitrev,
|
||||
exitCode: 0,
|
||||
Stdin: opts.Stdin,
|
||||
Stdout: opts.Stdout,
|
||||
Stderr: opts.Stderr,
|
||||
Fs: opts.Fs,
|
||||
}
|
||||
|
||||
m.run(opts.Args)
|
||||
return m.exitCode
|
||||
}
|
||||
|
||||
@@ -1,12 +1,306 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"testing"
|
||||
|
||||
"github.com/spf13/afero"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
urfcli "github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
func init() {
|
||||
// Prevent urfave/cli from calling os.Exit during tests
|
||||
urfcli.OsExiter = func(code int) {}
|
||||
}
|
||||
|
||||
func TestBuild(t *testing.T) {
|
||||
m := &CLIApp{}
|
||||
assert.NotNil(t, m)
|
||||
}
|
||||
|
||||
func testOpts(args []string, fs afero.Fs) *RunOptions {
|
||||
return &RunOptions{
|
||||
Appname: "mfer",
|
||||
Version: "1.0.0",
|
||||
Gitrev: "abc123",
|
||||
Args: args,
|
||||
Stdin: &bytes.Buffer{},
|
||||
Stdout: &bytes.Buffer{},
|
||||
Stderr: &bytes.Buffer{},
|
||||
Fs: fs,
|
||||
}
|
||||
}
|
||||
|
||||
func TestVersionCommand(t *testing.T) {
|
||||
fs := afero.NewMemMapFs()
|
||||
opts := testOpts([]string{"mfer", "version"}, fs)
|
||||
|
||||
exitCode := RunWithOptions(opts)
|
||||
|
||||
assert.Equal(t, 0, exitCode)
|
||||
stdout := opts.Stdout.(*bytes.Buffer).String()
|
||||
assert.Contains(t, stdout, "1.0.0")
|
||||
assert.Contains(t, stdout, "abc123")
|
||||
}
|
||||
|
||||
func TestHelpCommand(t *testing.T) {
|
||||
fs := afero.NewMemMapFs()
|
||||
opts := testOpts([]string{"mfer", "--help"}, fs)
|
||||
|
||||
exitCode := RunWithOptions(opts)
|
||||
|
||||
assert.Equal(t, 0, exitCode)
|
||||
stdout := opts.Stdout.(*bytes.Buffer).String()
|
||||
assert.Contains(t, stdout, "generate")
|
||||
assert.Contains(t, stdout, "check")
|
||||
assert.Contains(t, stdout, "fetch")
|
||||
}
|
||||
|
||||
func TestGenerateCommand(t *testing.T) {
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create test files in memory filesystem
|
||||
require.NoError(t, fs.MkdirAll("/testdir", 0755))
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644))
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("test content"), 0644))
|
||||
|
||||
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs)
|
||||
|
||||
exitCode := RunWithOptions(opts)
|
||||
|
||||
assert.Equal(t, 0, exitCode, "stderr: %s", opts.Stderr.(*bytes.Buffer).String())
|
||||
|
||||
// Verify manifest was created
|
||||
exists, err := afero.Exists(fs, "/testdir/test.mf")
|
||||
require.NoError(t, err)
|
||||
assert.True(t, exists)
|
||||
}
|
||||
|
||||
func TestGenerateAndCheckCommand(t *testing.T) {
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create test files with subdirectory
|
||||
require.NoError(t, fs.MkdirAll("/testdir/subdir", 0755))
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644))
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/subdir/file2.txt", []byte("test content"), 0644))
|
||||
|
||||
// Generate manifest
|
||||
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs)
|
||||
exitCode := RunWithOptions(opts)
|
||||
require.Equal(t, 0, exitCode, "generate failed: %s", opts.Stderr.(*bytes.Buffer).String())
|
||||
|
||||
// Check manifest
|
||||
opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/testdir/test.mf"}, fs)
|
||||
exitCode = RunWithOptions(opts)
|
||||
assert.Equal(t, 0, exitCode, "check failed: %s", opts.Stderr.(*bytes.Buffer).String())
|
||||
}
|
||||
|
||||
func TestCheckCommandWithMissingFile(t *testing.T) {
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create test file
|
||||
require.NoError(t, fs.MkdirAll("/testdir", 0755))
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644))
|
||||
|
||||
// Generate manifest
|
||||
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs)
|
||||
exitCode := RunWithOptions(opts)
|
||||
require.Equal(t, 0, exitCode, "generate failed: %s", opts.Stderr.(*bytes.Buffer).String())
|
||||
|
||||
// Delete the file
|
||||
require.NoError(t, fs.Remove("/testdir/file1.txt"))
|
||||
|
||||
// Check manifest - should fail
|
||||
opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/testdir/test.mf"}, fs)
|
||||
exitCode = RunWithOptions(opts)
|
||||
assert.Equal(t, 1, exitCode, "check should have failed for missing file")
|
||||
}
|
||||
|
||||
func TestCheckCommandWithCorruptedFile(t *testing.T) {
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create test file
|
||||
require.NoError(t, fs.MkdirAll("/testdir", 0755))
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644))
|
||||
|
||||
// Generate manifest
|
||||
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs)
|
||||
exitCode := RunWithOptions(opts)
|
||||
require.Equal(t, 0, exitCode, "generate failed: %s", opts.Stderr.(*bytes.Buffer).String())
|
||||
|
||||
// Corrupt the file (change content but keep same size)
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("HELLO WORLD"), 0644))
|
||||
|
||||
// Check manifest - should fail with hash mismatch
|
||||
opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/testdir/test.mf"}, fs)
|
||||
exitCode = RunWithOptions(opts)
|
||||
assert.Equal(t, 1, exitCode, "check should have failed for corrupted file")
|
||||
}
|
||||
|
||||
func TestCheckCommandWithSizeMismatch(t *testing.T) {
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create test file
|
||||
require.NoError(t, fs.MkdirAll("/testdir", 0755))
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644))
|
||||
|
||||
// Generate manifest
|
||||
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs)
|
||||
exitCode := RunWithOptions(opts)
|
||||
require.Equal(t, 0, exitCode, "generate failed: %s", opts.Stderr.(*bytes.Buffer).String())
|
||||
|
||||
// Change file size
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("different size content here"), 0644))
|
||||
|
||||
// Check manifest - should fail with size mismatch
|
||||
opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/testdir/test.mf"}, fs)
|
||||
exitCode = RunWithOptions(opts)
|
||||
assert.Equal(t, 1, exitCode, "check should have failed for size mismatch")
|
||||
}
|
||||
|
||||
func TestBannerOutput(t *testing.T) {
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create test file
|
||||
require.NoError(t, fs.MkdirAll("/testdir", 0755))
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644))
|
||||
|
||||
// Run without -q to see banner
|
||||
opts := testOpts([]string{"mfer", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs)
|
||||
exitCode := RunWithOptions(opts)
|
||||
assert.Equal(t, 0, exitCode)
|
||||
|
||||
// Banner ASCII art should be in stdout
|
||||
stdout := opts.Stdout.(*bytes.Buffer).String()
|
||||
assert.Contains(t, stdout, "___")
|
||||
assert.Contains(t, stdout, "\\")
|
||||
}
|
||||
|
||||
func TestUnknownCommand(t *testing.T) {
|
||||
fs := afero.NewMemMapFs()
|
||||
opts := testOpts([]string{"mfer", "unknown"}, fs)
|
||||
|
||||
exitCode := RunWithOptions(opts)
|
||||
assert.Equal(t, 1, exitCode)
|
||||
}
|
||||
|
||||
func TestGenerateWithIgnoreDotfiles(t *testing.T) {
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create test files including dotfiles
|
||||
require.NoError(t, fs.MkdirAll("/testdir", 0755))
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644))
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/.hidden", []byte("secret"), 0644))
|
||||
|
||||
// Generate manifest with --ignore-dotfiles
|
||||
opts := testOpts([]string{"mfer", "-q", "generate", "--ignore-dotfiles", "-o", "/testdir/test.mf", "/testdir"}, fs)
|
||||
exitCode := RunWithOptions(opts)
|
||||
require.Equal(t, 0, exitCode)
|
||||
|
||||
// Check that manifest exists and we can verify (hidden file won't cause failure even if missing)
|
||||
exists, _ := afero.Exists(fs, "/testdir/test.mf")
|
||||
assert.True(t, exists)
|
||||
}
|
||||
|
||||
func TestMultipleInputPaths(t *testing.T) {
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create test files in multiple directories
|
||||
require.NoError(t, fs.MkdirAll("/dir1", 0755))
|
||||
require.NoError(t, fs.MkdirAll("/dir2", 0755))
|
||||
require.NoError(t, afero.WriteFile(fs, "/dir1/file1.txt", []byte("content1"), 0644))
|
||||
require.NoError(t, afero.WriteFile(fs, "/dir2/file2.txt", []byte("content2"), 0644))
|
||||
|
||||
// Generate manifest from multiple paths
|
||||
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/output.mf", "/dir1", "/dir2"}, fs)
|
||||
exitCode := RunWithOptions(opts)
|
||||
assert.Equal(t, 0, exitCode, "stderr: %s", opts.Stderr.(*bytes.Buffer).String())
|
||||
|
||||
exists, _ := afero.Exists(fs, "/output.mf")
|
||||
assert.True(t, exists)
|
||||
}
|
||||
|
||||
func TestNoExtraFilesPass(t *testing.T) {
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create test files
|
||||
require.NoError(t, fs.MkdirAll("/testdir", 0755))
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644))
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("world"), 0644))
|
||||
|
||||
// Generate manifest
|
||||
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/manifest.mf", "/testdir"}, fs)
|
||||
exitCode := RunWithOptions(opts)
|
||||
require.Equal(t, 0, exitCode)
|
||||
|
||||
// Check with --no-extra-files (should pass - no extra files)
|
||||
opts = testOpts([]string{"mfer", "-q", "check", "--no-extra-files", "--base", "/testdir", "/manifest.mf"}, fs)
|
||||
exitCode = RunWithOptions(opts)
|
||||
assert.Equal(t, 0, exitCode)
|
||||
}
|
||||
|
||||
func TestNoExtraFilesFail(t *testing.T) {
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create test files
|
||||
require.NoError(t, fs.MkdirAll("/testdir", 0755))
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644))
|
||||
|
||||
// Generate manifest
|
||||
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/manifest.mf", "/testdir"}, fs)
|
||||
exitCode := RunWithOptions(opts)
|
||||
require.Equal(t, 0, exitCode)
|
||||
|
||||
// Add an extra file after manifest generation
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/extra.txt", []byte("extra"), 0644))
|
||||
|
||||
// Check with --no-extra-files (should fail - extra file exists)
|
||||
opts = testOpts([]string{"mfer", "-q", "check", "--no-extra-files", "--base", "/testdir", "/manifest.mf"}, fs)
|
||||
exitCode = RunWithOptions(opts)
|
||||
assert.Equal(t, 1, exitCode, "check should fail when extra files exist")
|
||||
}
|
||||
|
||||
func TestNoExtraFilesWithSubdirectory(t *testing.T) {
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create test files with subdirectory
|
||||
require.NoError(t, fs.MkdirAll("/testdir/subdir", 0755))
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644))
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/subdir/file2.txt", []byte("world"), 0644))
|
||||
|
||||
// Generate manifest
|
||||
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/manifest.mf", "/testdir"}, fs)
|
||||
exitCode := RunWithOptions(opts)
|
||||
require.Equal(t, 0, exitCode)
|
||||
|
||||
// Add extra file in subdirectory
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/subdir/extra.txt", []byte("extra"), 0644))
|
||||
|
||||
// Check with --no-extra-files (should fail)
|
||||
opts = testOpts([]string{"mfer", "-q", "check", "--no-extra-files", "--base", "/testdir", "/manifest.mf"}, fs)
|
||||
exitCode = RunWithOptions(opts)
|
||||
assert.Equal(t, 1, exitCode, "check should fail when extra files exist in subdirectory")
|
||||
}
|
||||
|
||||
func TestCheckWithoutNoExtraFilesIgnoresExtra(t *testing.T) {
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create test file
|
||||
require.NoError(t, fs.MkdirAll("/testdir", 0755))
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644))
|
||||
|
||||
// Generate manifest
|
||||
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/manifest.mf", "/testdir"}, fs)
|
||||
exitCode := RunWithOptions(opts)
|
||||
require.Equal(t, 0, exitCode)
|
||||
|
||||
// Add extra file
|
||||
require.NoError(t, afero.WriteFile(fs, "/testdir/extra.txt", []byte("extra"), 0644))
|
||||
|
||||
// Check WITHOUT --no-extra-files (should pass - extra files ignored)
|
||||
opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/manifest.mf"}, fs)
|
||||
exitCode = RunWithOptions(opts)
|
||||
assert.Equal(t, 0, exitCode, "check without --no-extra-files should ignore extra files")
|
||||
}
|
||||
|
||||
@@ -1,54 +1,100 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"time"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
"sneak.berlin/go/mfer/internal/log"
|
||||
"sneak.berlin/go/mfer/mfer"
|
||||
"sneak.berlin/go/mfer/internal/scanner"
|
||||
)
|
||||
|
||||
func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
|
||||
log.Debug("generateManifestOperation()")
|
||||
myArgs := ctx.Args()
|
||||
log.Dump(myArgs)
|
||||
|
||||
opts := &mfer.ManifestScanOptions{
|
||||
opts := &scanner.Options{
|
||||
IgnoreDotfiles: ctx.Bool("IgnoreDotfiles"),
|
||||
FollowSymLinks: ctx.Bool("FollowSymLinks"),
|
||||
Fs: mfa.Fs,
|
||||
}
|
||||
paths := make([]string, ctx.Args().Len()-1)
|
||||
for i := 0; i < ctx.Args().Len(); i++ {
|
||||
ap, err := filepath.Abs(ctx.Args().Get(i))
|
||||
if err != nil {
|
||||
|
||||
s := scanner.NewWithOptions(opts)
|
||||
|
||||
// Phase 1: Enumeration - collect paths and stat files
|
||||
args := ctx.Args()
|
||||
showProgress := ctx.Bool("progress")
|
||||
|
||||
// Set up enumeration progress reporting
|
||||
var enumProgress chan scanner.EnumerateStatus
|
||||
if showProgress {
|
||||
enumProgress = make(chan scanner.EnumerateStatus, 1)
|
||||
go func() {
|
||||
for status := range enumProgress {
|
||||
log.Progressf("Enumerating: %d files, %.1f MB",
|
||||
status.FilesFound,
|
||||
float64(status.BytesFound)/1e6)
|
||||
}
|
||||
log.ProgressDone()
|
||||
}()
|
||||
}
|
||||
|
||||
if args.Len() == 0 {
|
||||
// Default to current directory
|
||||
if err := s.EnumeratePath(".", enumProgress); err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// Collect all paths first
|
||||
paths := make([]string, 0, args.Len())
|
||||
for i := 0; i < args.Len(); i++ {
|
||||
ap, err := filepath.Abs(args.Get(i))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
log.Debugf("enumerating path: %s", ap)
|
||||
paths = append(paths, ap)
|
||||
}
|
||||
if err := s.EnumeratePaths(enumProgress, paths...); err != nil {
|
||||
return err
|
||||
}
|
||||
log.Dump(ap)
|
||||
paths = append(paths, ap)
|
||||
}
|
||||
mf, err := mfer.NewFromPaths(opts, paths...)
|
||||
|
||||
log.Debugf("enumerated %d files, %d bytes total", s.FileCount(), s.TotalBytes())
|
||||
|
||||
// Open output file
|
||||
outputPath := ctx.String("output")
|
||||
outFile, err := mfa.Fs.Create(outputPath)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
return fmt.Errorf("failed to create output file: %w", err)
|
||||
}
|
||||
mf.WithContext(ctx.Context)
|
||||
defer outFile.Close()
|
||||
|
||||
log.Dump(mf)
|
||||
// Phase 2: Scan - read file contents and generate manifest
|
||||
var scanProgress chan scanner.ScanStatus
|
||||
if showProgress {
|
||||
scanProgress = make(chan scanner.ScanStatus, 1)
|
||||
go func() {
|
||||
for status := range scanProgress {
|
||||
log.Progressf("Scanning: %d/%d files, %.1f MB/s",
|
||||
status.ScannedFiles,
|
||||
status.TotalFiles,
|
||||
status.BytesPerSec/1e6)
|
||||
}
|
||||
log.ProgressDone()
|
||||
}()
|
||||
}
|
||||
|
||||
err = mf.Scan()
|
||||
err = s.ToManifest(ctx.Context, outFile, scanProgress)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("failed to generate manifest: %w", err)
|
||||
}
|
||||
|
||||
buf := new(bytes.Buffer)
|
||||
|
||||
err = mf.WriteTo(buf)
|
||||
if err != nil {
|
||||
return err
|
||||
if !ctx.Bool("quiet") {
|
||||
elapsed := time.Since(mfa.startupTime).Seconds()
|
||||
rate := float64(s.TotalBytes()) / elapsed / 1e6
|
||||
log.Infof("wrote %d files (%.1f MB) to %s in %.1fs (%.1f MB/s)", s.FileCount(), float64(s.TotalBytes())/1e6, outputPath, elapsed, rate)
|
||||
}
|
||||
|
||||
dat := buf.Bytes()
|
||||
|
||||
log.Dump(dat)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -2,9 +2,11 @@ package cli
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/afero"
|
||||
"github.com/urfave/cli/v2"
|
||||
"sneak.berlin/go/mfer/internal/log"
|
||||
)
|
||||
@@ -16,22 +18,31 @@ type CLIApp struct {
|
||||
startupTime time.Time
|
||||
exitCode int
|
||||
app *cli.App
|
||||
|
||||
// I/O streams - all program input/output should go through these
|
||||
Stdin io.Reader
|
||||
Stdout io.Writer
|
||||
Stderr io.Writer
|
||||
|
||||
// Fs is the filesystem abstraction - defaults to OsFs for real filesystem
|
||||
Fs afero.Fs
|
||||
}
|
||||
|
||||
const banner = ` ___ ___ ___ ___
|
||||
/__/\ / /\ / /\ / /\
|
||||
| |::\ / /:/_ / /:/_ / /::\
|
||||
| |:|:\ / /:/ /\ / /:/ /\ / /:/\:\
|
||||
__|__|:|\:\ / /:/ /:/ / /:/ /:/_ / /:/~/:/
|
||||
/__/::::| \:\ /__/:/ /:/ /__/:/ /:/ /\ /__/:/ /:/___
|
||||
\ \:\~~\__\/ \ \:\/:/ \ \:\/:/ /:/ \ \:\/:::::/
|
||||
\ \:\ \ \::/ \ \::/ /:/ \ \::/~~~~
|
||||
\ \:\ \ \:\ \ \:\/:/ \ \:\
|
||||
\ \:\ \ \:\ \ \::/ \ \:\
|
||||
\__\/ \__\/ \__\/ \__\/`
|
||||
const banner = `
|
||||
___ ___ ___ ___
|
||||
/__/\ / /\ / /\ / /\
|
||||
| |::\ / /:/_ / /:/_ / /::\
|
||||
| |:|:\ / /:/ /\ / /:/ /\ / /:/\:\
|
||||
__|__|:|\:\ / /:/ /:/ / /:/ /:/_ / /:/~/:/
|
||||
/__/::::| \:\ /__/:/ /:/ /__/:/ /:/ /\ /__/:/ /:/___
|
||||
\ \:\~~\__\/ \ \:\/:/ \ \:\/:/ /:/ \ \:\/:::::/
|
||||
\ \:\ \ \::/ \ \::/ /:/ \ \::/~~~~
|
||||
\ \:\ \ \:\ \ \:\/:/ \ \:\
|
||||
\ \:\ \ \:\ \ \::/ \ \:\
|
||||
\__\/ \__\/ \__\/ \__\/`
|
||||
|
||||
func (mfa *CLIApp) printBanner() {
|
||||
fmt.Println(banner)
|
||||
fmt.Fprintln(mfa.Stdout, banner)
|
||||
}
|
||||
|
||||
func (mfa *CLIApp) VersionString() string {
|
||||
@@ -47,7 +58,7 @@ func (mfa *CLIApp) setVerbosity(v int) {
|
||||
}
|
||||
}
|
||||
|
||||
func (mfa *CLIApp) run() {
|
||||
func (mfa *CLIApp) run(args []string) {
|
||||
mfa.startupTime = time.Now()
|
||||
|
||||
if NO_COLOR {
|
||||
@@ -55,6 +66,8 @@ func (mfa *CLIApp) run() {
|
||||
log.DisableStyling()
|
||||
}
|
||||
|
||||
// Configure log package to use our I/O streams
|
||||
log.SetOutput(mfa.Stdout, mfa.Stderr)
|
||||
log.Init()
|
||||
|
||||
var verbosity int
|
||||
@@ -64,6 +77,8 @@ func (mfa *CLIApp) run() {
|
||||
Usage: "Manifest generator",
|
||||
Version: mfa.VersionString(),
|
||||
EnableBashCompletion: true,
|
||||
Writer: mfa.Stdout,
|
||||
ErrWriter: mfa.Stderr,
|
||||
Flags: []cli.Flag{
|
||||
&cli.BoolFlag{
|
||||
Name: "verbose",
|
||||
@@ -106,11 +121,17 @@ func (mfa *CLIApp) run() {
|
||||
Aliases: []string{"o"},
|
||||
Usage: "Specify output filename",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "progress",
|
||||
Aliases: []string{"P"},
|
||||
Usage: "Show progress during enumeration and scanning",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "check",
|
||||
Usage: "Validate files using manifest file",
|
||||
Name: "check",
|
||||
Usage: "Validate files using manifest file",
|
||||
ArgsUsage: "[manifest file]",
|
||||
Action: func(c *cli.Context) error {
|
||||
if !c.Bool("quiet") {
|
||||
mfa.printBanner()
|
||||
@@ -118,12 +139,29 @@ func (mfa *CLIApp) run() {
|
||||
mfa.setVerbosity(verbosity)
|
||||
return mfa.checkManifestOperation(c)
|
||||
},
|
||||
Flags: []cli.Flag{
|
||||
&cli.StringFlag{
|
||||
Name: "base",
|
||||
Aliases: []string{"b"},
|
||||
Value: ".",
|
||||
Usage: "Base directory for resolving relative paths from manifest",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "progress",
|
||||
Aliases: []string{"P"},
|
||||
Usage: "Show progress during checking",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "no-extra-files",
|
||||
Usage: "Fail if files exist in base directory that are not in manifest",
|
||||
},
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "version",
|
||||
Usage: "Show version",
|
||||
Action: func(c *cli.Context) error {
|
||||
fmt.Printf("%s\n", mfa.VersionString())
|
||||
fmt.Fprintln(mfa.Stdout, mfa.VersionString())
|
||||
return nil
|
||||
},
|
||||
},
|
||||
@@ -142,7 +180,7 @@ func (mfa *CLIApp) run() {
|
||||
}
|
||||
|
||||
mfa.app.HideVersion = true
|
||||
err := mfa.app.Run(os.Args)
|
||||
err := mfa.app.Run(args)
|
||||
if err != nil {
|
||||
mfa.exitCode = 1
|
||||
log.WithError(err).Debugf("exiting")
|
||||
|
||||
@@ -2,7 +2,10 @@ package log
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"runtime"
|
||||
"sync"
|
||||
|
||||
"github.com/apex/log"
|
||||
acli "github.com/apex/log/handlers/cli"
|
||||
@@ -12,6 +15,39 @@ import (
|
||||
|
||||
type Level = log.Level
|
||||
|
||||
var (
|
||||
// mu protects the output writers
|
||||
mu sync.RWMutex
|
||||
// stdout is the writer for progress output
|
||||
stdout io.Writer = os.Stdout
|
||||
// stderr is the writer for log output
|
||||
stderr io.Writer = os.Stderr
|
||||
)
|
||||
|
||||
// SetOutput configures the output writers for the log package.
|
||||
// stdout is used for progress output, stderr is used for log messages.
|
||||
func SetOutput(out, err io.Writer) {
|
||||
mu.Lock()
|
||||
defer mu.Unlock()
|
||||
stdout = out
|
||||
stderr = err
|
||||
pterm.SetDefaultOutput(out)
|
||||
}
|
||||
|
||||
// GetStdout returns the configured stdout writer.
|
||||
func GetStdout() io.Writer {
|
||||
mu.RLock()
|
||||
defer mu.RUnlock()
|
||||
return stdout
|
||||
}
|
||||
|
||||
// GetStderr returns the configured stderr writer.
|
||||
func GetStderr() io.Writer {
|
||||
mu.RLock()
|
||||
defer mu.RUnlock()
|
||||
return stderr
|
||||
}
|
||||
|
||||
func DisableStyling() {
|
||||
pterm.DisableColor()
|
||||
pterm.DisableStyling()
|
||||
@@ -24,10 +60,21 @@ func DisableStyling() {
|
||||
}
|
||||
|
||||
func Init() {
|
||||
log.SetHandler(acli.Default)
|
||||
mu.RLock()
|
||||
w := stderr
|
||||
mu.RUnlock()
|
||||
log.SetHandler(acli.New(w))
|
||||
log.SetLevel(log.InfoLevel)
|
||||
}
|
||||
|
||||
func Infof(format string, args ...interface{}) {
|
||||
log.Infof(format, args...)
|
||||
}
|
||||
|
||||
func Info(arg string) {
|
||||
log.Info(arg)
|
||||
}
|
||||
|
||||
func Debugf(format string, args ...interface{}) {
|
||||
DebugReal(fmt.Sprintf(format, args...), 2)
|
||||
}
|
||||
@@ -55,14 +102,13 @@ func EnableDebugLogging() {
|
||||
|
||||
func VerbosityStepsToLogLevel(l int) log.Level {
|
||||
switch l {
|
||||
case 1:
|
||||
return log.WarnLevel
|
||||
case 2:
|
||||
case 0:
|
||||
return log.InfoLevel
|
||||
case 3:
|
||||
case 1:
|
||||
return log.DebugLevel
|
||||
}
|
||||
return log.ErrorLevel
|
||||
// -vv or more
|
||||
return log.DebugLevel
|
||||
}
|
||||
|
||||
func SetLevelFromVerbosity(l int) {
|
||||
@@ -87,3 +133,14 @@ func GetLevel() log.Level {
|
||||
func WithError(e error) *log.Entry {
|
||||
return GetLogger().WithError(e)
|
||||
}
|
||||
|
||||
// Progressf prints a progress message that overwrites the current line.
|
||||
// Use ProgressDone() when progress is complete to move to the next line.
|
||||
func Progressf(format string, args ...interface{}) {
|
||||
pterm.Printf("\r"+format, args...)
|
||||
}
|
||||
|
||||
// ProgressDone completes a progress line by printing a newline.
|
||||
func ProgressDone() {
|
||||
pterm.Println()
|
||||
}
|
||||
|
||||
373
internal/scanner/scanner.go
Normal file
373
internal/scanner/scanner.go
Normal file
@@ -0,0 +1,373 @@
|
||||
package scanner
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"io/fs"
|
||||
"path"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/afero"
|
||||
"sneak.berlin/go/mfer/mfer"
|
||||
)
|
||||
|
||||
// Phase 1: Enumeration
|
||||
// ---------------------
|
||||
// Walking directories and calling stat() on files to collect metadata.
|
||||
// Builds the list of files to be scanned. Relatively fast (metadata only).
|
||||
|
||||
// EnumerateStatus contains progress information for the enumeration phase.
|
||||
type EnumerateStatus struct {
|
||||
FilesFound int64
|
||||
BytesFound int64
|
||||
}
|
||||
|
||||
// Phase 2: Scan (ToManifest)
|
||||
// --------------------------
|
||||
// Reading file contents and computing hashes for manifest generation.
|
||||
// This is the expensive phase that reads all file data.
|
||||
|
||||
// ScanStatus contains progress information for the scan phase.
|
||||
type ScanStatus struct {
|
||||
TotalFiles int64
|
||||
ScannedFiles int64
|
||||
TotalBytes int64
|
||||
ScannedBytes int64
|
||||
BytesPerSec float64
|
||||
}
|
||||
|
||||
// Options configures scanner behavior.
|
||||
type Options struct {
|
||||
IgnoreDotfiles bool
|
||||
FollowSymLinks bool
|
||||
Fs afero.Fs // Filesystem to use, defaults to OsFs
|
||||
}
|
||||
|
||||
// FileEntry represents a file that has been enumerated.
|
||||
type FileEntry struct {
|
||||
Path string // Relative path (used in manifest)
|
||||
AbsPath string // Absolute path (used for reading file content)
|
||||
Size int64
|
||||
Mtime time.Time
|
||||
Ctime time.Time
|
||||
}
|
||||
|
||||
// Scanner accumulates files and generates manifests from them.
|
||||
type Scanner struct {
|
||||
mu sync.RWMutex
|
||||
files []*FileEntry
|
||||
options *Options
|
||||
fs afero.Fs
|
||||
}
|
||||
|
||||
// New creates a new Scanner with default options.
|
||||
func New() *Scanner {
|
||||
return NewWithOptions(nil)
|
||||
}
|
||||
|
||||
// NewWithOptions creates a new Scanner with the given options.
|
||||
func NewWithOptions(opts *Options) *Scanner {
|
||||
if opts == nil {
|
||||
opts = &Options{}
|
||||
}
|
||||
fs := opts.Fs
|
||||
if fs == nil {
|
||||
fs = afero.NewOsFs()
|
||||
}
|
||||
return &Scanner{
|
||||
files: make([]*FileEntry, 0),
|
||||
options: opts,
|
||||
fs: fs,
|
||||
}
|
||||
}
|
||||
|
||||
// EnumerateFile adds a single file to the scanner, calling stat() to get metadata.
|
||||
func (s *Scanner) EnumerateFile(filePath string) error {
|
||||
abs, err := filepath.Abs(filePath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
info, err := s.fs.Stat(abs)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
// For single files, use the filename as the relative path
|
||||
basePath := filepath.Dir(abs)
|
||||
return s.enumerateFileWithInfo(filepath.Base(abs), basePath, info, nil)
|
||||
}
|
||||
|
||||
// EnumeratePath walks a directory path and adds all files to the scanner.
|
||||
// If progress is non-nil, status updates are sent as files are discovered.
|
||||
// The progress channel is closed when the method returns.
|
||||
func (s *Scanner) EnumeratePath(inputPath string, progress chan<- EnumerateStatus) error {
|
||||
if progress != nil {
|
||||
defer close(progress)
|
||||
}
|
||||
abs, err := filepath.Abs(inputPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs))
|
||||
return s.enumerateFS(afs, abs, progress)
|
||||
}
|
||||
|
||||
// EnumeratePaths walks multiple directory paths and adds all files to the scanner.
|
||||
// If progress is non-nil, status updates are sent as files are discovered.
|
||||
// The progress channel is closed when the method returns.
|
||||
func (s *Scanner) EnumeratePaths(progress chan<- EnumerateStatus, inputPaths ...string) error {
|
||||
if progress != nil {
|
||||
defer close(progress)
|
||||
}
|
||||
for _, p := range inputPaths {
|
||||
abs, err := filepath.Abs(p)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs))
|
||||
if err := s.enumerateFS(afs, abs, progress); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// EnumerateFS walks an afero filesystem and adds all files to the scanner.
|
||||
// If progress is non-nil, status updates are sent as files are discovered.
|
||||
// The progress channel is closed when the method returns.
|
||||
// basePath is used to compute absolute paths for file reading.
|
||||
func (s *Scanner) EnumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error {
|
||||
if progress != nil {
|
||||
defer close(progress)
|
||||
}
|
||||
return s.enumerateFS(afs, basePath, progress)
|
||||
}
|
||||
|
||||
// enumerateFS is the internal implementation that doesn't close the progress channel.
|
||||
func (s *Scanner) enumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error {
|
||||
return afero.Walk(afs, "/", func(p string, info fs.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if s.options.IgnoreDotfiles && pathIsHidden(p) {
|
||||
if info.IsDir() {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return s.enumerateFileWithInfo(p, basePath, info, progress)
|
||||
})
|
||||
}
|
||||
|
||||
// enumerateFileWithInfo adds a file with pre-existing fs.FileInfo.
|
||||
func (s *Scanner) enumerateFileWithInfo(filePath string, basePath string, info fs.FileInfo, progress chan<- EnumerateStatus) error {
|
||||
if info.IsDir() {
|
||||
// Manifests contain only files, directories are implied
|
||||
return nil
|
||||
}
|
||||
|
||||
// Clean the path - remove leading slash if present
|
||||
cleanPath := filePath
|
||||
if len(cleanPath) > 0 && cleanPath[0] == '/' {
|
||||
cleanPath = cleanPath[1:]
|
||||
}
|
||||
|
||||
// Compute absolute path for file reading
|
||||
absPath := filepath.Join(basePath, cleanPath)
|
||||
|
||||
entry := &FileEntry{
|
||||
Path: cleanPath,
|
||||
AbsPath: absPath,
|
||||
Size: info.Size(),
|
||||
Mtime: info.ModTime(),
|
||||
// Note: Ctime not available from fs.FileInfo on all platforms
|
||||
// Will need platform-specific code to extract it
|
||||
}
|
||||
|
||||
s.mu.Lock()
|
||||
s.files = append(s.files, entry)
|
||||
filesFound := int64(len(s.files))
|
||||
var bytesFound int64
|
||||
for _, f := range s.files {
|
||||
bytesFound += f.Size
|
||||
}
|
||||
s.mu.Unlock()
|
||||
|
||||
sendEnumerateStatus(progress, EnumerateStatus{
|
||||
FilesFound: filesFound,
|
||||
BytesFound: bytesFound,
|
||||
})
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Files returns a copy of all files added to the scanner.
|
||||
func (s *Scanner) Files() []*FileEntry {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
out := make([]*FileEntry, len(s.files))
|
||||
copy(out, s.files)
|
||||
return out
|
||||
}
|
||||
|
||||
// FileCount returns the number of files in the scanner.
|
||||
func (s *Scanner) FileCount() int64 {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
return int64(len(s.files))
|
||||
}
|
||||
|
||||
// TotalBytes returns the total size of all files in the scanner.
|
||||
func (s *Scanner) TotalBytes() int64 {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
var total int64
|
||||
for _, f := range s.files {
|
||||
total += f.Size
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
// ToManifest reads all file contents, computes hashes, and generates a manifest.
|
||||
// If progress is non-nil, status updates are sent approximately once per second.
|
||||
// The progress channel is closed when the method returns.
|
||||
// The manifest is written to the provided io.Writer.
|
||||
func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- ScanStatus) error {
|
||||
if progress != nil {
|
||||
defer close(progress)
|
||||
}
|
||||
|
||||
s.mu.RLock()
|
||||
files := make([]*FileEntry, len(s.files))
|
||||
copy(files, s.files)
|
||||
totalFiles := int64(len(files))
|
||||
var totalBytes int64
|
||||
for _, f := range files {
|
||||
totalBytes += f.Size
|
||||
}
|
||||
s.mu.RUnlock()
|
||||
|
||||
builder := mfer.NewBuilder()
|
||||
|
||||
var scannedFiles int64
|
||||
var scannedBytes int64
|
||||
lastProgressTime := time.Now()
|
||||
startTime := time.Now()
|
||||
|
||||
for _, entry := range files {
|
||||
// Check for cancellation
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
// Open file
|
||||
f, err := s.fs.Open(entry.AbsPath)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Add to manifest with progress callback
|
||||
bytesRead, err := builder.AddFile(
|
||||
entry.Path,
|
||||
entry.Size,
|
||||
entry.Mtime,
|
||||
f,
|
||||
func(fileBytes int64) {
|
||||
// Send progress at most once per second
|
||||
now := time.Now()
|
||||
if progress != nil && now.Sub(lastProgressTime) >= time.Second {
|
||||
elapsed := now.Sub(startTime).Seconds()
|
||||
currentBytes := scannedBytes + fileBytes
|
||||
var rate float64
|
||||
if elapsed > 0 {
|
||||
rate = float64(currentBytes) / elapsed
|
||||
}
|
||||
sendScanStatus(progress, ScanStatus{
|
||||
TotalFiles: totalFiles,
|
||||
ScannedFiles: scannedFiles,
|
||||
TotalBytes: totalBytes,
|
||||
ScannedBytes: currentBytes,
|
||||
BytesPerSec: rate,
|
||||
})
|
||||
lastProgressTime = now
|
||||
}
|
||||
},
|
||||
)
|
||||
f.Close()
|
||||
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
scannedFiles++
|
||||
scannedBytes += bytesRead
|
||||
}
|
||||
|
||||
// Send final progress
|
||||
if progress != nil {
|
||||
elapsed := time.Since(startTime).Seconds()
|
||||
var rate float64
|
||||
if elapsed > 0 {
|
||||
rate = float64(scannedBytes) / elapsed
|
||||
}
|
||||
sendScanStatus(progress, ScanStatus{
|
||||
TotalFiles: totalFiles,
|
||||
ScannedFiles: scannedFiles,
|
||||
TotalBytes: totalBytes,
|
||||
ScannedBytes: scannedBytes,
|
||||
BytesPerSec: rate,
|
||||
})
|
||||
}
|
||||
|
||||
// Build and write manifest
|
||||
return builder.Build(w)
|
||||
}
|
||||
|
||||
// pathIsHidden returns true if the path or any of its parent directories
|
||||
// start with a dot (hidden files/directories).
|
||||
func pathIsHidden(p string) bool {
|
||||
tp := path.Clean(p)
|
||||
if strings.HasPrefix(tp, ".") {
|
||||
return true
|
||||
}
|
||||
for {
|
||||
d, f := path.Split(tp)
|
||||
if strings.HasPrefix(f, ".") {
|
||||
return true
|
||||
}
|
||||
if d == "" {
|
||||
return false
|
||||
}
|
||||
tp = d[0 : len(d)-1] // trim trailing slash from dir
|
||||
}
|
||||
}
|
||||
|
||||
// sendEnumerateStatus sends a status update without blocking.
|
||||
// If the channel is full, the update is dropped.
|
||||
func sendEnumerateStatus(ch chan<- EnumerateStatus, status EnumerateStatus) {
|
||||
if ch == nil {
|
||||
return
|
||||
}
|
||||
select {
|
||||
case ch <- status:
|
||||
default:
|
||||
// Channel full, drop this update
|
||||
}
|
||||
}
|
||||
|
||||
// sendScanStatus sends a status update without blocking.
|
||||
// If the channel is full, the update is dropped.
|
||||
func sendScanStatus(ch chan<- ScanStatus, status ScanStatus) {
|
||||
if ch == nil {
|
||||
return
|
||||
}
|
||||
select {
|
||||
case ch <- status:
|
||||
default:
|
||||
// Channel full, drop this update
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user