Add testable CLI with dependency injection and new scanner/checker packages

Major changes:
- Refactor CLI to accept injected I/O streams and filesystem (afero.Fs)
  for testing without touching the real filesystem
- Add RunOptions struct and RunWithOptions() for configurable CLI execution
- Add internal/scanner package with two-phase manifest generation:
  - Phase 1 (Enumeration): walk directories, collect metadata
  - Phase 2 (Scan): read contents, compute hashes, write manifest
- Add internal/checker package for manifest verification with progress
  reporting and channel-based result streaming
- Add mfer/builder.go for incremental manifest construction
- Add --no-extra-files flag to check command to detect files not in manifest
- Add timing summaries showing file count, size, elapsed time, and throughput
- Add comprehensive tests using afero.MemMapFs (no real filesystem access)
- Add contrib/usage.sh integration test script
- Fix banner ASCII art alignment (consistent spacing)
- Fix verbosity levels so summaries display at default log level
- Update internal/log to support configurable output writers
This commit is contained in:
2025-12-17 11:00:55 -08:00
parent 13f39d598f
commit dc2ea47f6a
15 changed files with 1744 additions and 81 deletions

124
mfer/builder.go Normal file
View File

@@ -0,0 +1,124 @@
package mfer
import (
"crypto/sha256"
"io"
"sync"
"time"
"github.com/multiformats/go-multihash"
)
// FileProgress is called during file processing to report bytes read.
type FileProgress func(bytesRead int64)
// ManifestBuilder constructs a manifest by adding files one at a time.
type ManifestBuilder struct {
mu sync.Mutex
files []*MFFilePath
createdAt time.Time
}
// NewBuilder creates a new ManifestBuilder.
func NewBuilder() *ManifestBuilder {
return &ManifestBuilder{
files: make([]*MFFilePath, 0),
createdAt: time.Now(),
}
}
// AddFile reads file content from reader, computes hashes, and adds to manifest.
// The progress callback is called periodically with total bytes read so far.
// Returns the number of bytes read.
func (b *ManifestBuilder) AddFile(
path string,
size int64,
mtime time.Time,
reader io.Reader,
progress FileProgress,
) (int64, error) {
// Create hash writer
h := sha256.New()
// Read file in chunks, updating hash and progress
var totalRead int64
buf := make([]byte, 64*1024) // 64KB chunks
for {
n, err := reader.Read(buf)
if n > 0 {
h.Write(buf[:n])
totalRead += int64(n)
if progress != nil {
progress(totalRead)
}
}
if err == io.EOF {
break
}
if err != nil {
return totalRead, err
}
}
// Encode hash as multihash (SHA2-256)
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
if err != nil {
return totalRead, err
}
// Create file entry
entry := &MFFilePath{
Path: path,
Size: size,
Hashes: []*MFFileChecksum{
{MultiHash: mh},
},
Mtime: newTimestampFromTime(mtime),
}
b.mu.Lock()
b.files = append(b.files, entry)
b.mu.Unlock()
return totalRead, nil
}
// FileCount returns the number of files added to the builder.
func (b *ManifestBuilder) FileCount() int {
b.mu.Lock()
defer b.mu.Unlock()
return len(b.files)
}
// Build finalizes the manifest and writes it to the writer.
func (b *ManifestBuilder) Build(w io.Writer) error {
b.mu.Lock()
defer b.mu.Unlock()
// Create inner manifest
inner := &MFFile{
Version: MFFile_VERSION_ONE,
CreatedAt: newTimestampFromTime(b.createdAt),
Files: b.files,
}
// Create a temporary manifest to use existing serialization
m := &manifest{
pbInner: inner,
}
// Generate outer wrapper
if err := m.generateOuter(); err != nil {
return err
}
// Generate final output
if err := m.generate(); err != nil {
return err
}
// Write to output
_, err := w.Write(m.output.Bytes())
return err
}

View File

@@ -6,12 +6,13 @@ import (
"errors"
"io"
"github.com/spf13/afero"
"google.golang.org/protobuf/proto"
"sneak.berlin/go/mfer/internal/bork"
"sneak.berlin/go/mfer/internal/log"
)
func (m *manifest) validateProtoOuter() error {
func (m *manifest) deserializeInner() error {
if m.pbOuter.Version != MFFileOuter_VERSION_ONE {
return errors.New("unknown version")
}
@@ -25,10 +26,9 @@ func (m *manifest) validateProtoOuter() error {
if err != nil {
return err
}
dat, err := io.ReadAll(gzr)
defer gzr.Close()
dat, err := io.ReadAll(gzr)
if err != nil {
return err
}
@@ -38,9 +38,14 @@ func (m *manifest) validateProtoOuter() error {
log.Debugf("truncated data, got %d expected %d", isize, m.pbOuter.Size)
return bork.ErrFileTruncated
}
log.Debugf("inner data size is %d", isize)
log.Dump(dat)
log.Dump(m.pbOuter.Sha256)
// Deserialize inner message
m.pbInner = new(MFFile)
if err := proto.Unmarshal(dat, m.pbInner); err != nil {
return err
}
log.Debugf("loaded manifest with %d files", len(m.pbInner.Files))
return nil
}
@@ -54,7 +59,8 @@ func validateMagic(dat []byte) bool {
return bytes.Equal(got, expected)
}
func NewFromProto(input io.Reader) (*manifest, error) {
// NewManifestFromReader reads a manifest from an io.Reader.
func NewManifestFromReader(input io.Reader) (*manifest, error) {
m := New()
dat, err := io.ReadAll(input)
if err != nil {
@@ -69,21 +75,35 @@ func NewFromProto(input io.Reader) (*manifest, error) {
bb := bytes.NewBuffer(dat[ml:])
dat = bb.Bytes()
log.Dump(dat)
// deserialize:
// deserialize outer:
m.pbOuter = new(MFFileOuter)
err = proto.Unmarshal(dat, m.pbOuter)
if err != nil {
if err := proto.Unmarshal(dat, m.pbOuter); err != nil {
return nil, err
}
ve := m.validateProtoOuter()
if ve != nil {
return nil, ve
// deserialize inner:
if err := m.deserializeInner(); err != nil {
return nil, err
}
// FIXME TODO deserialize inner
return m, nil
}
// NewManifestFromFile reads a manifest from a file path using the given filesystem.
// If fs is nil, the real filesystem (OsFs) is used.
func NewManifestFromFile(fs afero.Fs, path string) (*manifest, error) {
if fs == nil {
fs = afero.NewOsFs()
}
f, err := fs.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
return NewManifestFromReader(f)
}
// NewFromProto is deprecated, use NewManifestFromReader instead.
func NewFromProto(input io.Reader) (*manifest, error) {
return NewManifestFromReader(input)
}

View File

@@ -106,13 +106,31 @@ func NewFromFS(options *ManifestScanOptions, fs afero.Fs) (*manifest, error) {
}
func (m *manifest) GetFileCount() int64 {
if m.pbInner != nil {
return int64(len(m.pbInner.Files))
}
return int64(len(m.files))
}
func (m *manifest) GetTotalFileSize() int64 {
if m.pbInner != nil {
var total int64
for _, f := range m.pbInner.Files {
total += f.Size
}
return total
}
return m.totalFileSize
}
// Files returns all file entries from a loaded manifest.
func (m *manifest) Files() []*MFFilePath {
if m.pbInner == nil {
return nil
}
return m.pbInner.Files
}
func pathIsHidden(p string) bool {
tp := path.Clean(p)
if strings.HasPrefix(tp, ".") {