Add testable CLI with dependency injection and new scanner/checker packages
Major changes: - Refactor CLI to accept injected I/O streams and filesystem (afero.Fs) for testing without touching the real filesystem - Add RunOptions struct and RunWithOptions() for configurable CLI execution - Add internal/scanner package with two-phase manifest generation: - Phase 1 (Enumeration): walk directories, collect metadata - Phase 2 (Scan): read contents, compute hashes, write manifest - Add internal/checker package for manifest verification with progress reporting and channel-based result streaming - Add mfer/builder.go for incremental manifest construction - Add --no-extra-files flag to check command to detect files not in manifest - Add timing summaries showing file count, size, elapsed time, and throughput - Add comprehensive tests using afero.MemMapFs (no real filesystem access) - Add contrib/usage.sh integration test script - Fix banner ASCII art alignment (consistent spacing) - Fix verbosity levels so summaries display at default log level - Update internal/log to support configurable output writers
This commit is contained in:
124
mfer/builder.go
Normal file
124
mfer/builder.go
Normal file
@@ -0,0 +1,124 @@
|
||||
package mfer
|
||||
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"io"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/multiformats/go-multihash"
|
||||
)
|
||||
|
||||
// FileProgress is called during file processing to report bytes read.
|
||||
type FileProgress func(bytesRead int64)
|
||||
|
||||
// ManifestBuilder constructs a manifest by adding files one at a time.
|
||||
type ManifestBuilder struct {
|
||||
mu sync.Mutex
|
||||
files []*MFFilePath
|
||||
createdAt time.Time
|
||||
}
|
||||
|
||||
// NewBuilder creates a new ManifestBuilder.
|
||||
func NewBuilder() *ManifestBuilder {
|
||||
return &ManifestBuilder{
|
||||
files: make([]*MFFilePath, 0),
|
||||
createdAt: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
// AddFile reads file content from reader, computes hashes, and adds to manifest.
|
||||
// The progress callback is called periodically with total bytes read so far.
|
||||
// Returns the number of bytes read.
|
||||
func (b *ManifestBuilder) AddFile(
|
||||
path string,
|
||||
size int64,
|
||||
mtime time.Time,
|
||||
reader io.Reader,
|
||||
progress FileProgress,
|
||||
) (int64, error) {
|
||||
// Create hash writer
|
||||
h := sha256.New()
|
||||
|
||||
// Read file in chunks, updating hash and progress
|
||||
var totalRead int64
|
||||
buf := make([]byte, 64*1024) // 64KB chunks
|
||||
|
||||
for {
|
||||
n, err := reader.Read(buf)
|
||||
if n > 0 {
|
||||
h.Write(buf[:n])
|
||||
totalRead += int64(n)
|
||||
if progress != nil {
|
||||
progress(totalRead)
|
||||
}
|
||||
}
|
||||
if err == io.EOF {
|
||||
break
|
||||
}
|
||||
if err != nil {
|
||||
return totalRead, err
|
||||
}
|
||||
}
|
||||
|
||||
// Encode hash as multihash (SHA2-256)
|
||||
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
|
||||
if err != nil {
|
||||
return totalRead, err
|
||||
}
|
||||
|
||||
// Create file entry
|
||||
entry := &MFFilePath{
|
||||
Path: path,
|
||||
Size: size,
|
||||
Hashes: []*MFFileChecksum{
|
||||
{MultiHash: mh},
|
||||
},
|
||||
Mtime: newTimestampFromTime(mtime),
|
||||
}
|
||||
|
||||
b.mu.Lock()
|
||||
b.files = append(b.files, entry)
|
||||
b.mu.Unlock()
|
||||
|
||||
return totalRead, nil
|
||||
}
|
||||
|
||||
// FileCount returns the number of files added to the builder.
|
||||
func (b *ManifestBuilder) FileCount() int {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
return len(b.files)
|
||||
}
|
||||
|
||||
// Build finalizes the manifest and writes it to the writer.
|
||||
func (b *ManifestBuilder) Build(w io.Writer) error {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
|
||||
// Create inner manifest
|
||||
inner := &MFFile{
|
||||
Version: MFFile_VERSION_ONE,
|
||||
CreatedAt: newTimestampFromTime(b.createdAt),
|
||||
Files: b.files,
|
||||
}
|
||||
|
||||
// Create a temporary manifest to use existing serialization
|
||||
m := &manifest{
|
||||
pbInner: inner,
|
||||
}
|
||||
|
||||
// Generate outer wrapper
|
||||
if err := m.generateOuter(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Generate final output
|
||||
if err := m.generate(); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Write to output
|
||||
_, err := w.Write(m.output.Bytes())
|
||||
return err
|
||||
}
|
||||
@@ -6,12 +6,13 @@ import (
|
||||
"errors"
|
||||
"io"
|
||||
|
||||
"github.com/spf13/afero"
|
||||
"google.golang.org/protobuf/proto"
|
||||
"sneak.berlin/go/mfer/internal/bork"
|
||||
"sneak.berlin/go/mfer/internal/log"
|
||||
)
|
||||
|
||||
func (m *manifest) validateProtoOuter() error {
|
||||
func (m *manifest) deserializeInner() error {
|
||||
if m.pbOuter.Version != MFFileOuter_VERSION_ONE {
|
||||
return errors.New("unknown version")
|
||||
}
|
||||
@@ -25,10 +26,9 @@ func (m *manifest) validateProtoOuter() error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
dat, err := io.ReadAll(gzr)
|
||||
defer gzr.Close()
|
||||
|
||||
dat, err := io.ReadAll(gzr)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
@@ -38,9 +38,14 @@ func (m *manifest) validateProtoOuter() error {
|
||||
log.Debugf("truncated data, got %d expected %d", isize, m.pbOuter.Size)
|
||||
return bork.ErrFileTruncated
|
||||
}
|
||||
log.Debugf("inner data size is %d", isize)
|
||||
log.Dump(dat)
|
||||
log.Dump(m.pbOuter.Sha256)
|
||||
|
||||
// Deserialize inner message
|
||||
m.pbInner = new(MFFile)
|
||||
if err := proto.Unmarshal(dat, m.pbInner); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Debugf("loaded manifest with %d files", len(m.pbInner.Files))
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -54,7 +59,8 @@ func validateMagic(dat []byte) bool {
|
||||
return bytes.Equal(got, expected)
|
||||
}
|
||||
|
||||
func NewFromProto(input io.Reader) (*manifest, error) {
|
||||
// NewManifestFromReader reads a manifest from an io.Reader.
|
||||
func NewManifestFromReader(input io.Reader) (*manifest, error) {
|
||||
m := New()
|
||||
dat, err := io.ReadAll(input)
|
||||
if err != nil {
|
||||
@@ -69,21 +75,35 @@ func NewFromProto(input io.Reader) (*manifest, error) {
|
||||
bb := bytes.NewBuffer(dat[ml:])
|
||||
dat = bb.Bytes()
|
||||
|
||||
log.Dump(dat)
|
||||
|
||||
// deserialize:
|
||||
// deserialize outer:
|
||||
m.pbOuter = new(MFFileOuter)
|
||||
err = proto.Unmarshal(dat, m.pbOuter)
|
||||
|
||||
if err != nil {
|
||||
if err := proto.Unmarshal(dat, m.pbOuter); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
ve := m.validateProtoOuter()
|
||||
if ve != nil {
|
||||
return nil, ve
|
||||
// deserialize inner:
|
||||
if err := m.deserializeInner(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// FIXME TODO deserialize inner
|
||||
return m, nil
|
||||
}
|
||||
|
||||
// NewManifestFromFile reads a manifest from a file path using the given filesystem.
|
||||
// If fs is nil, the real filesystem (OsFs) is used.
|
||||
func NewManifestFromFile(fs afero.Fs, path string) (*manifest, error) {
|
||||
if fs == nil {
|
||||
fs = afero.NewOsFs()
|
||||
}
|
||||
f, err := fs.Open(path)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer f.Close()
|
||||
return NewManifestFromReader(f)
|
||||
}
|
||||
|
||||
// NewFromProto is deprecated, use NewManifestFromReader instead.
|
||||
func NewFromProto(input io.Reader) (*manifest, error) {
|
||||
return NewManifestFromReader(input)
|
||||
}
|
||||
|
||||
@@ -106,13 +106,31 @@ func NewFromFS(options *ManifestScanOptions, fs afero.Fs) (*manifest, error) {
|
||||
}
|
||||
|
||||
func (m *manifest) GetFileCount() int64 {
|
||||
if m.pbInner != nil {
|
||||
return int64(len(m.pbInner.Files))
|
||||
}
|
||||
return int64(len(m.files))
|
||||
}
|
||||
|
||||
func (m *manifest) GetTotalFileSize() int64 {
|
||||
if m.pbInner != nil {
|
||||
var total int64
|
||||
for _, f := range m.pbInner.Files {
|
||||
total += f.Size
|
||||
}
|
||||
return total
|
||||
}
|
||||
return m.totalFileSize
|
||||
}
|
||||
|
||||
// Files returns all file entries from a loaded manifest.
|
||||
func (m *manifest) Files() []*MFFilePath {
|
||||
if m.pbInner == nil {
|
||||
return nil
|
||||
}
|
||||
return m.pbInner.Files
|
||||
}
|
||||
|
||||
func pathIsHidden(p string) bool {
|
||||
tp := path.Clean(p)
|
||||
if strings.HasPrefix(tp, ".") {
|
||||
|
||||
Reference in New Issue
Block a user