Add testable CLI with dependency injection and new scanner/checker packages
Major changes: - Refactor CLI to accept injected I/O streams and filesystem (afero.Fs) for testing without touching the real filesystem - Add RunOptions struct and RunWithOptions() for configurable CLI execution - Add internal/scanner package with two-phase manifest generation: - Phase 1 (Enumeration): walk directories, collect metadata - Phase 2 (Scan): read contents, compute hashes, write manifest - Add internal/checker package for manifest verification with progress reporting and channel-based result streaming - Add mfer/builder.go for incremental manifest construction - Add --no-extra-files flag to check command to detect files not in manifest - Add timing summaries showing file count, size, elapsed time, and throughput - Add comprehensive tests using afero.MemMapFs (no real filesystem access) - Add contrib/usage.sh integration test script - Fix banner ASCII art alignment (consistent spacing) - Fix verbosity levels so summaries display at default log level - Update internal/log to support configurable output writers
This commit is contained in:
281
internal/checker/checker.go
Normal file
281
internal/checker/checker.go
Normal file
@@ -0,0 +1,281 @@
|
||||
package checker
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"errors"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/multiformats/go-multihash"
|
||||
"github.com/spf13/afero"
|
||||
"sneak.berlin/go/mfer/mfer"
|
||||
)
|
||||
|
||||
// Result represents the outcome of checking a single file.
|
||||
type Result struct {
|
||||
Path string
|
||||
Status Status
|
||||
Message string
|
||||
}
|
||||
|
||||
// Status represents the verification status of a file.
|
||||
type Status int
|
||||
|
||||
const (
|
||||
StatusOK Status = iota
|
||||
StatusMissing
|
||||
StatusSizeMismatch
|
||||
StatusHashMismatch
|
||||
StatusExtra // File exists on disk but not in manifest
|
||||
StatusError
|
||||
)
|
||||
|
||||
func (s Status) String() string {
|
||||
switch s {
|
||||
case StatusOK:
|
||||
return "OK"
|
||||
case StatusMissing:
|
||||
return "MISSING"
|
||||
case StatusSizeMismatch:
|
||||
return "SIZE_MISMATCH"
|
||||
case StatusHashMismatch:
|
||||
return "HASH_MISMATCH"
|
||||
case StatusExtra:
|
||||
return "EXTRA"
|
||||
case StatusError:
|
||||
return "ERROR"
|
||||
default:
|
||||
return "UNKNOWN"
|
||||
}
|
||||
}
|
||||
|
||||
// CheckStatus contains progress information for the check operation.
|
||||
type CheckStatus struct {
|
||||
TotalFiles int64
|
||||
CheckedFiles int64
|
||||
TotalBytes int64
|
||||
CheckedBytes int64
|
||||
BytesPerSec float64
|
||||
Failures int64
|
||||
}
|
||||
|
||||
// Checker verifies files against a manifest.
|
||||
type Checker struct {
|
||||
basePath string
|
||||
files []*mfer.MFFilePath
|
||||
fs afero.Fs
|
||||
// manifestPaths is a set of paths in the manifest for quick lookup
|
||||
manifestPaths map[string]struct{}
|
||||
}
|
||||
|
||||
// NewChecker creates a new Checker for the given manifest, base path, and filesystem.
|
||||
// The basePath is the directory relative to which manifest paths are resolved.
|
||||
// If fs is nil, the real filesystem (OsFs) is used.
|
||||
func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, error) {
|
||||
if fs == nil {
|
||||
fs = afero.NewOsFs()
|
||||
}
|
||||
|
||||
m, err := mfer.NewManifestFromFile(fs, manifestPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
abs, err := filepath.Abs(basePath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
files := m.Files()
|
||||
manifestPaths := make(map[string]struct{}, len(files))
|
||||
for _, f := range files {
|
||||
manifestPaths[f.Path] = struct{}{}
|
||||
}
|
||||
|
||||
return &Checker{
|
||||
basePath: abs,
|
||||
files: files,
|
||||
fs: fs,
|
||||
manifestPaths: manifestPaths,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// FileCount returns the number of files in the manifest.
|
||||
func (c *Checker) FileCount() int64 {
|
||||
return int64(len(c.files))
|
||||
}
|
||||
|
||||
// TotalBytes returns the total size of all files in the manifest.
|
||||
func (c *Checker) TotalBytes() int64 {
|
||||
var total int64
|
||||
for _, f := range c.files {
|
||||
total += f.Size
|
||||
}
|
||||
return total
|
||||
}
|
||||
|
||||
// Check verifies all files against the manifest.
|
||||
// Results are sent to the results channel as files are checked.
|
||||
// Progress updates are sent to the progress channel approximately once per second.
|
||||
// Both channels are closed when the method returns.
|
||||
func (c *Checker) Check(ctx context.Context, results chan<- Result, progress chan<- CheckStatus) error {
|
||||
if results != nil {
|
||||
defer close(results)
|
||||
}
|
||||
if progress != nil {
|
||||
defer close(progress)
|
||||
}
|
||||
|
||||
totalFiles := int64(len(c.files))
|
||||
totalBytes := c.TotalBytes()
|
||||
|
||||
var checkedFiles int64
|
||||
var checkedBytes int64
|
||||
var failures int64
|
||||
|
||||
for _, entry := range c.files {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
result := c.checkFile(entry, &checkedBytes)
|
||||
if result.Status != StatusOK {
|
||||
failures++
|
||||
}
|
||||
checkedFiles++
|
||||
|
||||
if results != nil {
|
||||
results <- result
|
||||
}
|
||||
|
||||
// Send progress (simplified - every file for now)
|
||||
if progress != nil {
|
||||
sendCheckStatus(progress, CheckStatus{
|
||||
TotalFiles: totalFiles,
|
||||
CheckedFiles: checkedFiles,
|
||||
TotalBytes: totalBytes,
|
||||
CheckedBytes: checkedBytes,
|
||||
Failures: failures,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result {
|
||||
absPath := filepath.Join(c.basePath, entry.Path)
|
||||
|
||||
// Check if file exists
|
||||
info, err := c.fs.Stat(absPath)
|
||||
if err != nil {
|
||||
if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) {
|
||||
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
|
||||
}
|
||||
// Check for "file does not exist" style errors
|
||||
exists, _ := afero.Exists(c.fs, absPath)
|
||||
if !exists {
|
||||
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
|
||||
}
|
||||
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
||||
}
|
||||
|
||||
// Check size
|
||||
if info.Size() != entry.Size {
|
||||
*checkedBytes += info.Size()
|
||||
return Result{
|
||||
Path: entry.Path,
|
||||
Status: StatusSizeMismatch,
|
||||
Message: "size mismatch",
|
||||
}
|
||||
}
|
||||
|
||||
// Open and hash file
|
||||
f, err := c.fs.Open(absPath)
|
||||
if err != nil {
|
||||
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
||||
}
|
||||
defer f.Close()
|
||||
|
||||
h := sha256.New()
|
||||
n, err := io.Copy(h, f)
|
||||
if err != nil {
|
||||
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
||||
}
|
||||
*checkedBytes += n
|
||||
|
||||
// Encode as multihash and compare
|
||||
computed, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
|
||||
if err != nil {
|
||||
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
||||
}
|
||||
|
||||
// Check against all hashes in manifest (at least one must match)
|
||||
for _, hash := range entry.Hashes {
|
||||
if bytes.Equal(computed, hash.MultiHash) {
|
||||
return Result{Path: entry.Path, Status: StatusOK}
|
||||
}
|
||||
}
|
||||
|
||||
return Result{Path: entry.Path, Status: StatusHashMismatch, Message: "hash mismatch"}
|
||||
}
|
||||
|
||||
// FindExtraFiles walks the filesystem and reports files not in the manifest.
|
||||
// Results are sent to the results channel. The channel is closed when done.
|
||||
func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error {
|
||||
if results != nil {
|
||||
defer close(results)
|
||||
}
|
||||
|
||||
return afero.Walk(c.fs, c.basePath, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
// Skip directories
|
||||
if info.IsDir() {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get relative path
|
||||
relPath, err := filepath.Rel(c.basePath, path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Check if path is in manifest
|
||||
if _, exists := c.manifestPaths[relPath]; !exists {
|
||||
if results != nil {
|
||||
results <- Result{
|
||||
Path: relPath,
|
||||
Status: StatusExtra,
|
||||
Message: "not in manifest",
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
}
|
||||
|
||||
// sendCheckStatus sends a status update without blocking.
|
||||
func sendCheckStatus(ch chan<- CheckStatus, status CheckStatus) {
|
||||
if ch == nil {
|
||||
return
|
||||
}
|
||||
select {
|
||||
case ch <- status:
|
||||
default:
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user