Compare commits

..

6 Commits

Author SHA1 Message Date
fded1a0393 Remove generateInner, now handled by Builder
- Remove generateInner() from serialize.go
- Update generate() to error if pbInner not set
- Remove legacy tests that depended on old code path
- Update TODO item to reflect removal
2025-12-17 11:27:41 -08:00
5d7c729efb remove golangci-lint config files 2025-12-17 11:27:41 -08:00
48c3c09d85 Rename ManifestBuilder to Builder 2025-12-17 11:27:41 -08:00
f3be3eba84 Add TODO: change FileProgress callback to channel-based 2025-12-17 11:27:41 -08:00
5e65b3a0fd Add TODO section to README with prioritized task list 2025-12-17 11:27:41 -08:00
79fc5cca6c Add godoc strings to all exported types, functions, and fields
Documents:
- cli: NO_COLOR, RunOptions fields, CLIApp, VersionString
- checker: Result fields, Status constants, CheckStatus fields
- scanner: EnumerateStatus, ScanStatus, Options, FileEntry fields
- log: Level alias, DisableStyling, Init, Info/Debug functions,
  verbosity helpers, GetLogger, GetLevel, WithError
- mfer: ManifestScanOptions, New, NewFromPaths, NewFromFS, MAGIC
2025-12-17 11:27:41 -08:00
13 changed files with 108 additions and 188 deletions

View File

@ -1,2 +0,0 @@
run:
tests: false

View File

@ -1,2 +0,0 @@
run:
tests: false

View File

@ -192,13 +192,13 @@ Reading file contents and computing cryptographic hashes for manifest generation
### builder.go ### builder.go
- **Types** - **Types**
- `FileProgress func(bytesRead int64)` - Callback for file processing progress - `FileProgress func(bytesRead int64)` - Callback for file processing progress
- `ManifestBuilder struct` - Constructs manifests by adding files one at a time - `Builder struct` - Constructs manifests by adding files one at a time
- **Functions** - **Functions**
- `NewBuilder() *ManifestBuilder` - Creates a new ManifestBuilder - `NewBuilder() *Builder` - Creates a new Builder
- **Methods** - **Methods**
- `(*ManifestBuilder) AddFile(path string, size int64, mtime time.Time, reader io.Reader, progress FileProgress) (int64, error)` - Reads file, computes hash, adds to manifest - `(*Builder) AddFile(path string, size int64, mtime time.Time, reader io.Reader, progress FileProgress) (int64, error)` - Reads file, computes hash, adds to manifest
- `(*ManifestBuilder) FileCount() int` - Returns number of files added - `(*Builder) FileCount() int` - Returns number of files added
- `(*ManifestBuilder) Build(w io.Writer) error` - Finalizes and writes manifest - `(*Builder) Build(w io.Writer) error` - Finalizes and writes manifest
### serialize.go ### serialize.go
- **Constants** - **Constants**
@ -350,6 +350,28 @@ The manifest file would do several important things:
- **Manifest size:** Manifests must fit entirely in system memory during reading and writing. - **Manifest size:** Manifests must fit entirely in system memory during reading and writing.
# TODO
## High Priority
- [ ] **Implement `fetch` command** - Currently panics with "not implemented". Should download a manifest and its referenced files from a URL.
- [ ] **Fix import in fetch.go** - Uses `github.com/apex/log` directly instead of `internal/log`, violating codebase conventions.
## Medium Priority
- [ ] **Add `--force` flag for overwrites** - Currently silently overwrites existing manifest files. Should require `-f` to overwrite.
- [ ] **Implement FollowSymLinks option** - The flag exists in CLI and Options structs but does nothing. Scanner should use `EvalSymlinks` or `Lstat`.
- [ ] **Change FileProgress callback to channel** - `mfer/builder.go` uses a callback for progress reporting; should use channels like `EnumerateStatus` and `ScanStatus` for consistency.
- [ ] **Consolidate legacy manifest code** - `mfer/manifest.go` has old scanning code (`Scan()`, `addFile()`) that duplicates the new `internal/scanner` + `mfer/builder.go` pattern.
- [ ] **Add context cancellation to legacy code** - The old `manifest.Scan()` doesn't support context cancellation; the new scanner does.
## Lower Priority
- [ ] **Add unit tests for `internal/checker`** - Currently has no test files; only tested indirectly via CLI tests.
- [ ] **Add unit tests for `internal/scanner`** - Currently has no test files.
- [ ] **Clean up FIXMEs in manifest.go** - Validate input paths exist, validate filesystem, avoid redundant stat calls.
- [ ] **Validate input paths before scanning** - Should fail fast with a clear error if paths don't exist.
# Open Questions # Open Questions
- Should the manifest file include checksums of individual file chunks, or just for the whole assembled file? - Should the manifest file include checksums of individual file chunks, or just for the whole assembled file?

View File

@ -16,21 +16,21 @@ import (
// Result represents the outcome of checking a single file. // Result represents the outcome of checking a single file.
type Result struct { type Result struct {
Path string Path string // Relative path from manifest
Status Status Status Status // Verification result status
Message string Message string // Human-readable description of the result
} }
// Status represents the verification status of a file. // Status represents the verification status of a file.
type Status int type Status int
const ( const (
StatusOK Status = iota StatusOK Status = iota // File matches manifest (size and hash verified)
StatusMissing StatusMissing // File not found on disk
StatusSizeMismatch StatusSizeMismatch // File size differs from manifest
StatusHashMismatch StatusHashMismatch // File hash differs from manifest
StatusExtra // File exists on disk but not in manifest StatusExtra // File exists on disk but not in manifest
StatusError StatusError // Error occurred during verification
) )
func (s Status) String() string { func (s Status) String() string {
@ -54,12 +54,12 @@ func (s Status) String() string {
// CheckStatus contains progress information for the check operation. // CheckStatus contains progress information for the check operation.
type CheckStatus struct { type CheckStatus struct {
TotalFiles int64 TotalFiles int64 // Total number of files in manifest
CheckedFiles int64 CheckedFiles int64 // Number of files checked so far
TotalBytes int64 TotalBytes int64 // Total bytes to verify (sum of all file sizes)
CheckedBytes int64 CheckedBytes int64 // Bytes verified so far
BytesPerSec float64 BytesPerSec float64 // Current throughput rate
Failures int64 Failures int64 // Number of verification failures encountered
} }
// Checker verifies files against a manifest. // Checker verifies files against a manifest.

View File

@ -7,6 +7,8 @@ import (
"github.com/spf13/afero" "github.com/spf13/afero"
) )
// NO_COLOR disables colored output when set. Automatically true if the
// NO_COLOR environment variable is present (per https://no-color.org/).
var NO_COLOR bool var NO_COLOR bool
func init() { func init() {
@ -17,15 +19,16 @@ func init() {
} }
// RunOptions contains all configuration for running the CLI application. // RunOptions contains all configuration for running the CLI application.
// Use DefaultRunOptions for standard CLI execution, or construct manually for testing.
type RunOptions struct { type RunOptions struct {
Appname string Appname string // Application name displayed in help and version output
Version string Version string // Version string (typically set at build time)
Gitrev string Gitrev string // Git revision hash (typically set at build time)
Args []string Args []string // Command-line arguments (typically os.Args)
Stdin io.Reader Stdin io.Reader // Standard input stream
Stdout io.Writer Stdout io.Writer // Standard output stream
Stderr io.Writer Stderr io.Writer // Standard error stream
Fs afero.Fs Fs afero.Fs // Filesystem abstraction for file operations
} }
// DefaultRunOptions returns RunOptions configured for normal CLI execution. // DefaultRunOptions returns RunOptions configured for normal CLI execution.

View File

@ -11,6 +11,8 @@ import (
"sneak.berlin/go/mfer/internal/log" "sneak.berlin/go/mfer/internal/log"
) )
// CLIApp is the main CLI application container. It holds configuration,
// I/O streams, and filesystem abstraction to enable testing and flexibility.
type CLIApp struct { type CLIApp struct {
appname string appname string
version string version string
@ -19,13 +21,10 @@ type CLIApp struct {
exitCode int exitCode int
app *cli.App app *cli.App
// I/O streams - all program input/output should go through these Stdin io.Reader // Standard input stream
Stdin io.Reader Stdout io.Writer // Standard output stream for normal output
Stdout io.Writer Stderr io.Writer // Standard error stream for diagnostics
Stderr io.Writer Fs afero.Fs // Filesystem abstraction for all file operations
// Fs is the filesystem abstraction - defaults to OsFs for real filesystem
Fs afero.Fs
} }
const banner = ` const banner = `
@ -45,6 +44,7 @@ func (mfa *CLIApp) printBanner() {
fmt.Fprintln(mfa.Stdout, banner) fmt.Fprintln(mfa.Stdout, banner)
} }
// VersionString returns the version and git revision formatted for display.
func (mfa *CLIApp) VersionString() string { func (mfa *CLIApp) VersionString() string {
return fmt.Sprintf("%s (%s)", mfa.version, mfa.gitrev) return fmt.Sprintf("%s (%s)", mfa.version, mfa.gitrev)
} }

View File

@ -13,6 +13,7 @@ import (
"github.com/pterm/pterm" "github.com/pterm/pterm"
) )
// Level is an alias for apex/log.Level for use by callers without importing apex/log.
type Level = log.Level type Level = log.Level
var ( var (
@ -48,6 +49,7 @@ func GetStderr() io.Writer {
return stderr return stderr
} }
// DisableStyling turns off colors and styling for terminal output.
func DisableStyling() { func DisableStyling() {
pterm.DisableColor() pterm.DisableColor()
pterm.DisableStyling() pterm.DisableStyling()
@ -59,6 +61,7 @@ func DisableStyling() {
pterm.Fatal.Prefix.Text = "" pterm.Fatal.Prefix.Text = ""
} }
// Init initializes the logger with the CLI handler and default log level.
func Init() { func Init() {
mu.RLock() mu.RLock()
w := stderr w := stderr
@ -67,22 +70,27 @@ func Init() {
log.SetLevel(log.InfoLevel) log.SetLevel(log.InfoLevel)
} }
// Infof logs a formatted message at info level.
func Infof(format string, args ...interface{}) { func Infof(format string, args ...interface{}) {
log.Infof(format, args...) log.Infof(format, args...)
} }
// Info logs a message at info level.
func Info(arg string) { func Info(arg string) {
log.Info(arg) log.Info(arg)
} }
// Debugf logs a formatted message at debug level with caller location.
func Debugf(format string, args ...interface{}) { func Debugf(format string, args ...interface{}) {
DebugReal(fmt.Sprintf(format, args...), 2) DebugReal(fmt.Sprintf(format, args...), 2)
} }
// Debug logs a message at debug level with caller location.
func Debug(arg string) { func Debug(arg string) {
DebugReal(arg, 2) DebugReal(arg, 2)
} }
// DebugReal logs at debug level with caller info from the specified stack depth.
func DebugReal(arg string, cs int) { func DebugReal(arg string, cs int) {
_, callerFile, callerLine, ok := runtime.Caller(cs) _, callerFile, callerLine, ok := runtime.Caller(cs)
if !ok { if !ok {
@ -92,14 +100,18 @@ func DebugReal(arg string, cs int) {
log.Debug(tag + arg) log.Debug(tag + arg)
} }
// Dump logs a spew dump of the arguments at debug level.
func Dump(args ...interface{}) { func Dump(args ...interface{}) {
DebugReal(spew.Sdump(args...), 2) DebugReal(spew.Sdump(args...), 2)
} }
// EnableDebugLogging sets the log level to debug.
func EnableDebugLogging() { func EnableDebugLogging() {
SetLevel(log.DebugLevel) SetLevel(log.DebugLevel)
} }
// VerbosityStepsToLogLevel converts a -v count to a log level.
// 0 returns InfoLevel, 1+ returns DebugLevel.
func VerbosityStepsToLogLevel(l int) log.Level { func VerbosityStepsToLogLevel(l int) log.Level {
switch l { switch l {
case 0: case 0:
@ -111,14 +123,17 @@ func VerbosityStepsToLogLevel(l int) log.Level {
return log.DebugLevel return log.DebugLevel
} }
// SetLevelFromVerbosity sets the log level based on -v flag count.
func SetLevelFromVerbosity(l int) { func SetLevelFromVerbosity(l int) {
SetLevel(VerbosityStepsToLogLevel(l)) SetLevel(VerbosityStepsToLogLevel(l))
} }
// SetLevel sets the global log level.
func SetLevel(arg log.Level) { func SetLevel(arg log.Level) {
log.SetLevel(arg) log.SetLevel(arg)
} }
// GetLogger returns the underlying apex/log Logger.
func GetLogger() *log.Logger { func GetLogger() *log.Logger {
if logger, ok := log.Log.(*log.Logger); ok { if logger, ok := log.Log.(*log.Logger); ok {
return logger return logger
@ -126,10 +141,12 @@ func GetLogger() *log.Logger {
panic("unable to get logger") panic("unable to get logger")
} }
// GetLevel returns the current log level.
func GetLevel() log.Level { func GetLevel() log.Level {
return GetLogger().Level return GetLogger().Level
} }
// WithError returns a log entry with the error attached.
func WithError(e error) *log.Entry { func WithError(e error) *log.Entry {
return GetLogger().WithError(e) return GetLogger().WithError(e)
} }

View File

@ -21,8 +21,8 @@ import (
// EnumerateStatus contains progress information for the enumeration phase. // EnumerateStatus contains progress information for the enumeration phase.
type EnumerateStatus struct { type EnumerateStatus struct {
FilesFound int64 FilesFound int64 // Number of files discovered so far
BytesFound int64 BytesFound int64 // Total size of discovered files (from stat)
} }
// Phase 2: Scan (ToManifest) // Phase 2: Scan (ToManifest)
@ -32,27 +32,27 @@ type EnumerateStatus struct {
// ScanStatus contains progress information for the scan phase. // ScanStatus contains progress information for the scan phase.
type ScanStatus struct { type ScanStatus struct {
TotalFiles int64 TotalFiles int64 // Total number of files to scan
ScannedFiles int64 ScannedFiles int64 // Number of files scanned so far
TotalBytes int64 TotalBytes int64 // Total bytes to read (sum of all file sizes)
ScannedBytes int64 ScannedBytes int64 // Bytes read so far
BytesPerSec float64 BytesPerSec float64 // Current throughput rate
} }
// Options configures scanner behavior. // Options configures scanner behavior.
type Options struct { type Options struct {
IgnoreDotfiles bool IgnoreDotfiles bool // Skip files and directories starting with a dot
FollowSymLinks bool FollowSymLinks bool // Resolve symlinks instead of skipping them
Fs afero.Fs // Filesystem to use, defaults to OsFs Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
} }
// FileEntry represents a file that has been enumerated. // FileEntry represents a file that has been enumerated.
type FileEntry struct { type FileEntry struct {
Path string // Relative path (used in manifest) Path string // Relative path (used in manifest)
AbsPath string // Absolute path (used for reading file content) AbsPath string // Absolute path (used for reading file content)
Size int64 Size int64 // File size in bytes
Mtime time.Time Mtime time.Time // Last modification time
Ctime time.Time Ctime time.Time // Creation time (platform-dependent)
} }
// Scanner accumulates files and generates manifests from them. // Scanner accumulates files and generates manifests from them.

View File

@ -12,16 +12,16 @@ import (
// FileProgress is called during file processing to report bytes read. // FileProgress is called during file processing to report bytes read.
type FileProgress func(bytesRead int64) type FileProgress func(bytesRead int64)
// ManifestBuilder constructs a manifest by adding files one at a time. // Builder constructs a manifest by adding files one at a time.
type ManifestBuilder struct { type Builder struct {
mu sync.Mutex mu sync.Mutex
files []*MFFilePath files []*MFFilePath
createdAt time.Time createdAt time.Time
} }
// NewBuilder creates a new ManifestBuilder. // NewBuilder creates a new Builder.
func NewBuilder() *ManifestBuilder { func NewBuilder() *Builder {
return &ManifestBuilder{ return &Builder{
files: make([]*MFFilePath, 0), files: make([]*MFFilePath, 0),
createdAt: time.Now(), createdAt: time.Now(),
} }
@ -30,7 +30,7 @@ func NewBuilder() *ManifestBuilder {
// AddFile reads file content from reader, computes hashes, and adds to manifest. // AddFile reads file content from reader, computes hashes, and adds to manifest.
// The progress callback is called periodically with total bytes read so far. // The progress callback is called periodically with total bytes read so far.
// Returns the number of bytes read. // Returns the number of bytes read.
func (b *ManifestBuilder) AddFile( func (b *Builder) AddFile(
path string, path string,
size int64, size int64,
mtime time.Time, mtime time.Time,
@ -85,14 +85,14 @@ func (b *ManifestBuilder) AddFile(
} }
// FileCount returns the number of files added to the builder. // FileCount returns the number of files added to the builder.
func (b *ManifestBuilder) FileCount() int { func (b *Builder) FileCount() int {
b.mu.Lock() b.mu.Lock()
defer b.mu.Unlock() defer b.mu.Unlock()
return len(b.files) return len(b.files)
} }
// Build finalizes the manifest and writes it to the writer. // Build finalizes the manifest and writes it to the writer.
func (b *ManifestBuilder) Build(w io.Writer) error { func (b *Builder) Build(w io.Writer) error {
b.mu.Lock() b.mu.Lock()
defer b.mu.Unlock() defer b.mu.Unlock()

View File

@ -1,42 +0,0 @@
package mfer
import (
"bytes"
"testing"
"github.com/stretchr/testify/assert"
"sneak.berlin/go/mfer/internal/log"
)
func TestAPIExample(t *testing.T) {
// read from filesystem
m, err := NewFromFS(&ManifestScanOptions{
IgnoreDotfiles: true,
}, big)
assert.Nil(t, err)
assert.NotNil(t, m)
// scan for files
m.Scan()
// serialize
var buf bytes.Buffer
m.WriteTo(&buf)
// show serialized
log.Dump(buf.Bytes())
// do it again
var buf2 bytes.Buffer
m.WriteTo(&buf2)
// should be same!
assert.True(t, bytes.Equal(buf.Bytes(), buf2.Bytes()))
// deserialize
m2, err := NewFromProto(&buf)
assert.Nil(t, err)
assert.NotNil(t, m2)
log.Dump(m2)
}

View File

@ -39,9 +39,10 @@ func (m *manifest) String() string {
return fmt.Sprintf("<Manifest count=%d totalSize=%d>", len(m.files), m.totalFileSize) return fmt.Sprintf("<Manifest count=%d totalSize=%d>", len(m.files), m.totalFileSize)
} }
// ManifestScanOptions configures behavior when scanning directories for manifest generation.
type ManifestScanOptions struct { type ManifestScanOptions struct {
IgnoreDotfiles bool IgnoreDotfiles bool // Skip files and directories starting with a dot
FollowSymLinks bool FollowSymLinks bool // Resolve symlinks instead of skipping them
} }
func (m *manifest) HasError() bool { func (m *manifest) HasError() bool {
@ -77,11 +78,13 @@ func (m *manifest) addInputFS(f afero.Fs) error {
return nil return nil
} }
// New creates an empty manifest.
func New() *manifest { func New() *manifest {
m := &manifest{} m := &manifest{}
return m return m
} }
// NewFromPaths creates a manifest configured to scan the given filesystem paths.
func NewFromPaths(options *ManifestScanOptions, inputPaths ...string) (*manifest, error) { func NewFromPaths(options *ManifestScanOptions, inputPaths ...string) (*manifest, error) {
log.Dump(inputPaths) log.Dump(inputPaths)
m := New() m := New()
@ -95,6 +98,7 @@ func NewFromPaths(options *ManifestScanOptions, inputPaths ...string) (*manifest
return m, nil return m, nil
} }
// NewFromFS creates a manifest configured to scan the given afero filesystem.
func NewFromFS(options *ManifestScanOptions, fs afero.Fs) (*manifest, error) { func NewFromFS(options *ManifestScanOptions, fs afero.Fs) (*manifest, error) {
m := New() m := New()
m.scanOptions = options m.scanOptions = options

View File

@ -1,42 +1,11 @@
package mfer package mfer
import ( import (
"bytes"
"fmt"
"testing" "testing"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"sneak.berlin/go/mfer/internal/log"
) )
// Add those variables as well
var (
existingFolder = "./testdata/a-folder-that-exists"
)
var (
af *afero.Afero = &afero.Afero{Fs: afero.NewMemMapFs()}
big *afero.Afero = &afero.Afero{Fs: afero.NewMemMapFs()}
)
func init() {
log.EnableDebugLogging()
// create test files and directories
af.MkdirAll("/a/b/c", 0o755)
af.MkdirAll("/.hidden", 0o755)
af.WriteFile("/a/b/c/hello.txt", []byte("hello world\n\n\n\n"), 0o755)
af.WriteFile("/a/b/c/hello2.txt", []byte("hello world\n\n\n\n"), 0o755)
af.WriteFile("/.hidden/hello.txt", []byte("hello world\n"), 0o755)
af.WriteFile("/.hidden/hello2.txt", []byte("hello world\n"), 0o755)
big.MkdirAll("/home/user/Library", 0o755)
for i := range [25]int{} {
big.WriteFile(fmt.Sprintf("/home/user/Library/hello%d.txt", i), []byte("hello world\n"), 0o755)
}
}
func TestPathHiddenFunc(t *testing.T) { func TestPathHiddenFunc(t *testing.T) {
assert.False(t, pathIsHidden("/a/b/c/hello.txt")) assert.False(t, pathIsHidden("/a/b/c/hello.txt"))
assert.True(t, pathIsHidden("/a/b/c/.hello.txt")) assert.True(t, pathIsHidden("/a/b/c/.hello.txt"))
@ -44,31 +13,3 @@ func TestPathHiddenFunc(t *testing.T) {
assert.True(t, pathIsHidden("/.a/b/c/hello.txt")) assert.True(t, pathIsHidden("/.a/b/c/hello.txt"))
assert.False(t, pathIsHidden("./a/b/c/hello.txt")) assert.False(t, pathIsHidden("./a/b/c/hello.txt"))
} }
func TestManifestGenerationOne(t *testing.T) {
m, err := NewFromFS(&ManifestScanOptions{
IgnoreDotfiles: true,
}, af)
assert.Nil(t, err)
assert.NotNil(t, m)
m.Scan()
assert.Equal(t, int64(2), m.GetFileCount())
assert.Equal(t, int64(30), m.GetTotalFileSize())
}
func TestManifestGenerationTwo(t *testing.T) {
m, err := NewFromFS(&ManifestScanOptions{
IgnoreDotfiles: false,
}, af)
assert.Nil(t, err)
assert.NotNil(t, m)
m.Scan()
assert.Equal(t, int64(4), m.GetFileCount())
assert.Equal(t, int64(54), m.GetTotalFileSize())
err = m.generate()
assert.Nil(t, err)
var buf bytes.Buffer
err = m.WriteTo(&buf)
assert.Nil(t, err)
log.Dump(buf.Bytes())
}

View File

@ -10,9 +10,7 @@ import (
"google.golang.org/protobuf/proto" "google.golang.org/protobuf/proto"
) )
// was go-generate protoc --go_out=. --go_opt=paths=source_relative mf.proto // MAGIC is the file format magic bytes prefix (rot13 of "MANIFEST").
// rot13("MANIFEST")
const MAGIC string = "ZNAVSRFG" const MAGIC string = "ZNAVSRFG"
func newTimestampFromTime(t time.Time) *Timestamp { func newTimestampFromTime(t time.Time) *Timestamp {
@ -25,10 +23,7 @@ func newTimestampFromTime(t time.Time) *Timestamp {
func (m *manifest) generate() error { func (m *manifest) generate() error {
if m.pbInner == nil { if m.pbInner == nil {
e := m.generateInner() return errors.New("internal error: pbInner not set")
if e != nil {
return e
}
} }
if m.pbOuter == nil { if m.pbOuter == nil {
e := m.generateOuter() e := m.generateOuter()
@ -82,19 +77,3 @@ func (m *manifest) generateOuter() error {
m.pbOuter = o m.pbOuter = o
return nil return nil
} }
func (m *manifest) generateInner() error {
m.pbInner = &MFFile{
Version: MFFile_VERSION_ONE,
CreatedAt: newTimestampFromTime(time.Now()),
Files: []*MFFilePath{},
}
for _, f := range m.files {
nf := &MFFilePath{
Path: f.path,
// FIXME add more stuff
}
m.pbInner.Files = append(m.pbInner.Files, nf)
}
return nil
}