Major changes: - Refactor CLI to accept injected I/O streams and filesystem (afero.Fs) for testing without touching the real filesystem - Add RunOptions struct and RunWithOptions() for configurable CLI execution - Add internal/scanner package with two-phase manifest generation: - Phase 1 (Enumeration): walk directories, collect metadata - Phase 2 (Scan): read contents, compute hashes, write manifest - Add internal/checker package for manifest verification with progress reporting and channel-based result streaming - Add mfer/builder.go for incremental manifest construction - Add --no-extra-files flag to check command to detect files not in manifest - Add timing summaries showing file count, size, elapsed time, and throughput - Add comprehensive tests using afero.MemMapFs (no real filesystem access) - Add contrib/usage.sh integration test script - Fix banner ASCII art alignment (consistent spacing) - Fix verbosity levels so summaries display at default log level - Update internal/log to support configurable output writers
282 lines
6.3 KiB
Go
282 lines
6.3 KiB
Go
package checker
|
|
|
|
import (
|
|
"bytes"
|
|
"context"
|
|
"crypto/sha256"
|
|
"errors"
|
|
"io"
|
|
"os"
|
|
"path/filepath"
|
|
|
|
"github.com/multiformats/go-multihash"
|
|
"github.com/spf13/afero"
|
|
"sneak.berlin/go/mfer/mfer"
|
|
)
|
|
|
|
// Result represents the outcome of checking a single file.
|
|
type Result struct {
|
|
Path string
|
|
Status Status
|
|
Message string
|
|
}
|
|
|
|
// Status represents the verification status of a file.
|
|
type Status int
|
|
|
|
const (
|
|
StatusOK Status = iota
|
|
StatusMissing
|
|
StatusSizeMismatch
|
|
StatusHashMismatch
|
|
StatusExtra // File exists on disk but not in manifest
|
|
StatusError
|
|
)
|
|
|
|
func (s Status) String() string {
|
|
switch s {
|
|
case StatusOK:
|
|
return "OK"
|
|
case StatusMissing:
|
|
return "MISSING"
|
|
case StatusSizeMismatch:
|
|
return "SIZE_MISMATCH"
|
|
case StatusHashMismatch:
|
|
return "HASH_MISMATCH"
|
|
case StatusExtra:
|
|
return "EXTRA"
|
|
case StatusError:
|
|
return "ERROR"
|
|
default:
|
|
return "UNKNOWN"
|
|
}
|
|
}
|
|
|
|
// CheckStatus contains progress information for the check operation.
|
|
type CheckStatus struct {
|
|
TotalFiles int64
|
|
CheckedFiles int64
|
|
TotalBytes int64
|
|
CheckedBytes int64
|
|
BytesPerSec float64
|
|
Failures int64
|
|
}
|
|
|
|
// Checker verifies files against a manifest.
|
|
type Checker struct {
|
|
basePath string
|
|
files []*mfer.MFFilePath
|
|
fs afero.Fs
|
|
// manifestPaths is a set of paths in the manifest for quick lookup
|
|
manifestPaths map[string]struct{}
|
|
}
|
|
|
|
// NewChecker creates a new Checker for the given manifest, base path, and filesystem.
|
|
// The basePath is the directory relative to which manifest paths are resolved.
|
|
// If fs is nil, the real filesystem (OsFs) is used.
|
|
func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, error) {
|
|
if fs == nil {
|
|
fs = afero.NewOsFs()
|
|
}
|
|
|
|
m, err := mfer.NewManifestFromFile(fs, manifestPath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
abs, err := filepath.Abs(basePath)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
files := m.Files()
|
|
manifestPaths := make(map[string]struct{}, len(files))
|
|
for _, f := range files {
|
|
manifestPaths[f.Path] = struct{}{}
|
|
}
|
|
|
|
return &Checker{
|
|
basePath: abs,
|
|
files: files,
|
|
fs: fs,
|
|
manifestPaths: manifestPaths,
|
|
}, nil
|
|
}
|
|
|
|
// FileCount returns the number of files in the manifest.
|
|
func (c *Checker) FileCount() int64 {
|
|
return int64(len(c.files))
|
|
}
|
|
|
|
// TotalBytes returns the total size of all files in the manifest.
|
|
func (c *Checker) TotalBytes() int64 {
|
|
var total int64
|
|
for _, f := range c.files {
|
|
total += f.Size
|
|
}
|
|
return total
|
|
}
|
|
|
|
// Check verifies all files against the manifest.
|
|
// Results are sent to the results channel as files are checked.
|
|
// Progress updates are sent to the progress channel approximately once per second.
|
|
// Both channels are closed when the method returns.
|
|
func (c *Checker) Check(ctx context.Context, results chan<- Result, progress chan<- CheckStatus) error {
|
|
if results != nil {
|
|
defer close(results)
|
|
}
|
|
if progress != nil {
|
|
defer close(progress)
|
|
}
|
|
|
|
totalFiles := int64(len(c.files))
|
|
totalBytes := c.TotalBytes()
|
|
|
|
var checkedFiles int64
|
|
var checkedBytes int64
|
|
var failures int64
|
|
|
|
for _, entry := range c.files {
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
default:
|
|
}
|
|
|
|
result := c.checkFile(entry, &checkedBytes)
|
|
if result.Status != StatusOK {
|
|
failures++
|
|
}
|
|
checkedFiles++
|
|
|
|
if results != nil {
|
|
results <- result
|
|
}
|
|
|
|
// Send progress (simplified - every file for now)
|
|
if progress != nil {
|
|
sendCheckStatus(progress, CheckStatus{
|
|
TotalFiles: totalFiles,
|
|
CheckedFiles: checkedFiles,
|
|
TotalBytes: totalBytes,
|
|
CheckedBytes: checkedBytes,
|
|
Failures: failures,
|
|
})
|
|
}
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result {
|
|
absPath := filepath.Join(c.basePath, entry.Path)
|
|
|
|
// Check if file exists
|
|
info, err := c.fs.Stat(absPath)
|
|
if err != nil {
|
|
if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) {
|
|
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
|
|
}
|
|
// Check for "file does not exist" style errors
|
|
exists, _ := afero.Exists(c.fs, absPath)
|
|
if !exists {
|
|
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
|
|
}
|
|
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
|
}
|
|
|
|
// Check size
|
|
if info.Size() != entry.Size {
|
|
*checkedBytes += info.Size()
|
|
return Result{
|
|
Path: entry.Path,
|
|
Status: StatusSizeMismatch,
|
|
Message: "size mismatch",
|
|
}
|
|
}
|
|
|
|
// Open and hash file
|
|
f, err := c.fs.Open(absPath)
|
|
if err != nil {
|
|
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
|
}
|
|
defer f.Close()
|
|
|
|
h := sha256.New()
|
|
n, err := io.Copy(h, f)
|
|
if err != nil {
|
|
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
|
}
|
|
*checkedBytes += n
|
|
|
|
// Encode as multihash and compare
|
|
computed, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
|
|
if err != nil {
|
|
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
|
}
|
|
|
|
// Check against all hashes in manifest (at least one must match)
|
|
for _, hash := range entry.Hashes {
|
|
if bytes.Equal(computed, hash.MultiHash) {
|
|
return Result{Path: entry.Path, Status: StatusOK}
|
|
}
|
|
}
|
|
|
|
return Result{Path: entry.Path, Status: StatusHashMismatch, Message: "hash mismatch"}
|
|
}
|
|
|
|
// FindExtraFiles walks the filesystem and reports files not in the manifest.
|
|
// Results are sent to the results channel. The channel is closed when done.
|
|
func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error {
|
|
if results != nil {
|
|
defer close(results)
|
|
}
|
|
|
|
return afero.Walk(c.fs, c.basePath, func(path string, info os.FileInfo, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
default:
|
|
}
|
|
|
|
// Skip directories
|
|
if info.IsDir() {
|
|
return nil
|
|
}
|
|
|
|
// Get relative path
|
|
relPath, err := filepath.Rel(c.basePath, path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Check if path is in manifest
|
|
if _, exists := c.manifestPaths[relPath]; !exists {
|
|
if results != nil {
|
|
results <- Result{
|
|
Path: relPath,
|
|
Status: StatusExtra,
|
|
Message: "not in manifest",
|
|
}
|
|
}
|
|
}
|
|
|
|
return nil
|
|
})
|
|
}
|
|
|
|
// sendCheckStatus sends a status update without blocking.
|
|
func sendCheckStatus(ch chan<- CheckStatus, status CheckStatus) {
|
|
if ch == nil {
|
|
return
|
|
}
|
|
select {
|
|
case ch <- status:
|
|
default:
|
|
}
|
|
}
|