diff --git a/README.md b/README.md index 2a0df0a..dfe4c1f 100644 --- a/README.md +++ b/README.md @@ -2,6 +2,232 @@ Manifest file generator and checker. +# Phases + +Manifest generation happens in two distinct phases: + +## Phase 1: Enumeration + +Walking directories and calling `stat()` on files to collect metadata (path, size, mtime, ctime). This builds the list of files to be scanned. Relatively fast as it only reads filesystem metadata, not file contents. + +**Progress:** `EnumerateStatus` with `FilesFound` and `BytesFound` + +## Phase 2: Scan (ToManifest) + +Reading file contents and computing cryptographic hashes for manifest generation. This is the expensive phase that reads all file data from disk. + +**Progress:** `ScanStatus` with `TotalFiles`, `ScannedFiles`, `TotalBytes`, `ScannedBytes`, `BytesPerSec` + +# Code Conventions + +- **Logging:** Never use `fmt.Printf` or write to stdout/stderr directly in normal code. Use the `internal/log` package for all output (`log.Info`, `log.Infof`, `log.Debug`, `log.Debugf`, `log.Progressf`, `log.ProgressDone`). +- **Filesystem abstraction:** Use `github.com/spf13/afero` for filesystem operations to enable testing and flexibility. +- **CLI framework:** Use `github.com/urfave/cli/v2` for command-line interface. +- **Serialization:** Use Protocol Buffers for manifest file format. +- **Internal packages:** Non-exported implementation details go in `internal/` subdirectories. +- **Concurrency:** Use `sync.RWMutex` for protecting shared state; prefer channels for progress reporting. +- **Progress channels:** Use buffered channels (size 1) with non-blocking sends to avoid blocking the main operation if the consumer is slow. +- **Context support:** Long-running operations should accept `context.Context` for cancellation. +- **NO_COLOR:** Respect the `NO_COLOR` environment variable for disabling colored output. +- **Options pattern:** Use `NewWithOptions(opts *Options)` constructor pattern for configurable types. + +# Codebase Structure + +## cmd/mfer/ + +### main.go +- **Variables** + - `Appname string` - Application name + - `Version string` - Version string (set at build time) + - `Gitrev string` - Git revision (set at build time) + +## internal/cli/ + +### entry.go +- **Variables** + - `NO_COLOR bool` - Disables color output when NO_COLOR env var is set +- **Functions** + - `Run(Appname, Version, Gitrev string) int` - Main entry point for the CLI + +### mfer.go +- **Types** + - `CLIApp struct` - Main CLI application container +- **Methods** + - `(*CLIApp) VersionString() string` - Returns formatted version string + +## internal/log/ + +### log.go +- **Functions** + - `Init()` - Initializes the logger + - `Info(arg string)` - Logs at info level + - `Infof(format string, args ...interface{})` - Logs at info level with formatting + - `Debug(arg string)` - Logs at debug level with caller info + - `Debugf(format string, args ...interface{})` - Logs at debug level with formatting and caller info + - `Dump(args ...interface{})` - Logs spew dump at debug level + - `Progressf(format string, args ...interface{})` - Prints progress message (overwrites current line) + - `ProgressDone()` - Completes progress line with newline + - `EnableDebugLogging()` - Sets log level to debug + - `SetLevel(arg log.Level)` - Sets log level + - `SetLevelFromVerbosity(l int)` - Sets log level from verbosity count + - `GetLevel() log.Level` - Returns current log level + - `GetLogger() *log.Logger` - Returns underlying logger + - `WithError(e error) *log.Entry` - Returns log entry with error attached + - `DisableStyling()` - Disables colors and styling (for NO_COLOR) + +## internal/scanner/ + +### scanner.go +- **Types** + - `Options struct` - Options for scanner behavior + - `IgnoreDotfiles bool` + - `FollowSymLinks bool` + - `EnumerateStatus struct` - Progress information for enumeration phase + - `FilesFound int64` + - `BytesFound int64` + - `ScanStatus struct` - Progress information for scan phase + - `TotalFiles int64` + - `ScannedFiles int64` + - `TotalBytes int64` + - `ScannedBytes int64` + - `BytesPerSec float64` + - `FileEntry struct` - Represents an enumerated file + - `Path string` - Relative path (used in manifest) + - `AbsPath string` - Absolute path (used for reading file content) + - `Size int64` + - `Mtime time.Time` + - `Ctime time.Time` + - `Scanner struct` - Accumulates files and generates manifests +- **Functions** + - `New() *Scanner` - Creates a new Scanner with default options + - `NewWithOptions(opts *Options) *Scanner` - Creates a new Scanner with given options +- **Methods (Enumeration Phase)** + - `(*Scanner) EnumerateFile(path string) error` - Enumerates a single file, calling stat() for metadata + - `(*Scanner) EnumeratePath(inputPath string, progress chan<- EnumerateStatus) error` - Walks a directory and enumerates all files + - `(*Scanner) EnumeratePaths(progress chan<- EnumerateStatus, inputPaths ...string) error` - Walks multiple directories + - `(*Scanner) EnumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error` - Walks an afero filesystem +- **Methods (Accessors)** + - `(*Scanner) Files() []*FileEntry` - Returns copy of all enumerated files + - `(*Scanner) FileCount() int64` - Returns number of files + - `(*Scanner) TotalBytes() int64` - Returns total size of all files +- **Methods (Scan Phase)** + - `(*Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- ScanStatus) error` - Reads file contents, computes hashes, generates manifest + +## internal/checker/ + +### checker.go +- **Types** + - `Result struct` - Outcome of checking a single file + - `Path string` - File path from manifest + - `Status Status` - Verification status + - `Message string` - Error or status message + - `Status int` - Verification status enumeration + - `StatusOK` - File matches manifest + - `StatusMissing` - File not found + - `StatusSizeMismatch` - File size differs from manifest + - `StatusHashMismatch` - File hash differs from manifest + - `StatusError` - Error occurred during verification + - `CheckStatus struct` - Progress information for check operation + - `TotalFiles int64` + - `CheckedFiles int64` + - `TotalBytes int64` + - `CheckedBytes int64` + - `BytesPerSec float64` + - `Failures int64` + - `Checker struct` - Verifies files against a manifest +- **Functions** + - `NewChecker(manifestPath string, basePath string) (*Checker, error)` - Creates a new Checker for the given manifest and base path +- **Methods** + - `(s Status) String() string` - Returns string representation of status + - `(*Checker) FileCount() int64` - Returns number of files in the manifest + - `(*Checker) TotalBytes() int64` - Returns total size of all files in manifest + - `(*Checker) Check(ctx context.Context, results chan<- Result, progress chan<- CheckStatus) error` - Verifies all files against the manifest + +## mfer/ + +### manifest.go +- **Types** + - `ManifestScanOptions struct` - Options for scanning directories + - `IgnoreDotfiles bool` + - `FollowSymLinks bool` +- **Functions** + - `New() *manifest` - Creates a new empty manifest + - `NewFromPaths(options *ManifestScanOptions, inputPaths ...string) (*manifest, error)` - Creates manifest from filesystem paths + - `NewFromFS(options *ManifestScanOptions, fs afero.Fs) (*manifest, error)` - Creates manifest from afero filesystem +- **Methods** + - `(*manifest) HasError() bool` - Returns true if manifest has errors + - `(*manifest) AddError(e error) *manifest` - Adds an error to the manifest + - `(*manifest) WithContext(c context.Context) *manifest` - Sets context for cancellation + - `(*manifest) GetFileCount() int64` - Returns number of files in manifest + - `(*manifest) GetTotalFileSize() int64` - Returns total size of all files + - `(*manifest) Files() []*MFFilePath` - Returns all file entries from a loaded manifest + - `(*manifest) Scan() error` - Scans source filesystems and populates file list + +### output.go +- **Methods** + - `(*manifest) WriteToFile(path string) error` - Writes manifest to file path + - `(*manifest) WriteTo(output io.Writer) error` - Writes manifest to io.Writer + +### builder.go +- **Types** + - `FileProgress func(bytesRead int64)` - Callback for file processing progress + - `ManifestBuilder struct` - Constructs manifests by adding files one at a time +- **Functions** + - `NewBuilder() *ManifestBuilder` - Creates a new ManifestBuilder +- **Methods** + - `(*ManifestBuilder) AddFile(path string, size int64, mtime time.Time, reader io.Reader, progress FileProgress) (int64, error)` - Reads file, computes hash, adds to manifest + - `(*ManifestBuilder) FileCount() int` - Returns number of files added + - `(*ManifestBuilder) Build(w io.Writer) error` - Finalizes and writes manifest + +### serialize.go +- **Constants** + - `MAGIC string` - Magic bytes prefix for manifest files ("ZNAVSRFG") + +### deserialize.go +- **Functions** + - `NewFromProto(input io.Reader) (*manifest, error)` - Deserializes manifest from protobuf + - `NewManifestFromReader(input io.Reader) (*manifest, error)` - Reads and parses manifest from io.Reader + - `NewManifestFromFile(path string) (*manifest, error)` - Reads and parses manifest from file path + +### mf.pb.go (generated from mf.proto) +- **Enum Types** + - `MFFileOuter_Version` - Outer file format version + - `MFFileOuter_VERSION_NONE` + - `MFFileOuter_VERSION_ONE` + - `MFFileOuter_CompressionType` - Compression type for inner message + - `MFFileOuter_COMPRESSION_NONE` + - `MFFileOuter_COMPRESSION_GZIP` + - `MFFile_Version` - Inner file format version + - `MFFile_VERSION_NONE` + - `MFFile_VERSION_ONE` +- **Message Types** + - `Timestamp struct` - Timestamp with seconds and nanoseconds + - `GetSeconds() int64` + - `GetNanos() int32` + - `MFFileOuter struct` - Outer wrapper containing compressed/signed inner message + - `GetVersion() MFFileOuter_Version` + - `GetCompressionType() MFFileOuter_CompressionType` + - `GetSize() int64` + - `GetSha256() []byte` + - `GetInnerMessage() []byte` + - `GetSignature() []byte` + - `GetSigner() []byte` + - `GetSigningPubKey() []byte` + - `MFFilePath struct` - Individual file entry in manifest + - `GetPath() string` + - `GetSize() int64` + - `GetHashes() []*MFFileChecksum` + - `GetMimeType() string` + - `GetMtime() *Timestamp` + - `GetCtime() *Timestamp` + - `GetAtime() *Timestamp` + - `MFFileChecksum struct` - File checksum using multihash + - `GetMultiHash() []byte` + - `MFFile struct` - Inner manifest containing file list + - `GetVersion() MFFile_Version` + - `GetFiles() []*MFFilePath` + - `GetCreatedAt() *Timestamp` + # Build Status [![Build Status](https://drone.datavi.be/api/badges/sneak/mfer/status.svg)](https://drone.datavi.be/sneak/mfer) @@ -83,6 +309,10 @@ The manifest file would do several important things: - metadata size should not be used as an excuse to sacrifice utility (such as providing checksums over each chunk of a large file) +# Limitations + +- **Manifest size:** Manifests must fit entirely in system memory during reading and writing. + # Open Questions - Should the manifest file include checksums of individual file chunks, or just for the whole assembled file? diff --git a/contrib/usage.sh b/contrib/usage.sh new file mode 100755 index 0000000..38661f9 --- /dev/null +++ b/contrib/usage.sh @@ -0,0 +1,19 @@ +#!/bin/bash +set -euo pipefail + +# usage.sh - Generate and check a manifest from the repo +# Run from repo root: ./contrib/usage.sh + +TMPDIR=$(mktemp -d) +MANIFEST="$TMPDIR/index.mf" + +cleanup() { + rm -rf "$TMPDIR" +} +trap cleanup EXIT + +echo "Building mfer..." +go build -o "$TMPDIR/mfer" ./cmd/mfer + +"$TMPDIR/mfer" generate --ignore-dotfiles -o "$MANIFEST" . +"$TMPDIR/mfer" check --base . "$MANIFEST" diff --git a/go.mod b/go.mod index eb75322..e440182 100644 --- a/go.mod +++ b/go.mod @@ -5,12 +5,12 @@ go 1.17 require ( github.com/apex/log v1.9.0 github.com/davecgh/go-spew v1.1.1 + github.com/multiformats/go-multihash v0.2.3 github.com/pterm/pterm v0.12.35 github.com/spf13/afero v1.8.0 github.com/stretchr/testify v1.8.1 github.com/urfave/cli/v2 v2.23.6 google.golang.org/protobuf v1.28.1 - ) require ( @@ -18,17 +18,24 @@ require ( github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect github.com/fatih/color v1.7.0 // indirect github.com/gookit/color v1.4.2 // indirect + github.com/klauspost/cpuid/v2 v2.0.9 // indirect github.com/mattn/go-colorable v0.1.2 // indirect github.com/mattn/go-isatty v0.0.8 // indirect github.com/mattn/go-runewidth v0.0.13 // indirect + github.com/minio/sha256-simd v1.0.0 // indirect + github.com/mr-tron/base58 v1.2.0 // indirect + github.com/multiformats/go-varint v0.0.6 // indirect github.com/pkg/errors v0.9.1 // indirect github.com/pmezard/go-difflib v1.0.0 // indirect github.com/rivo/uniseg v0.2.0 // indirect github.com/russross/blackfriday/v2 v2.1.0 // indirect + github.com/spaolacci/murmur3 v1.1.0 // indirect github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778 // indirect github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect - golang.org/x/sys v0.0.0-20211013075003-97ac67df715c // indirect + golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e // indirect + golang.org/x/sys v0.1.0 // indirect golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect - golang.org/x/text v0.3.4 // indirect + golang.org/x/text v0.3.6 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect + lukechampine.com/blake3 v1.1.6 // indirect ) diff --git a/go.sum b/go.sum index 0a2d55c..660d476 100644 --- a/go.sum +++ b/go.sum @@ -150,6 +150,7 @@ github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7/go.mod h1:2iMrUgb github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU= github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4= github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg= @@ -169,6 +170,14 @@ github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hd github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU= github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w= github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE= +github.com/minio/sha256-simd v1.0.0 h1:v1ta+49hkWZyvaKwrQB8elexRqm6Y0aMLjCNsrYxo6g= +github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM= +github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o= +github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc= +github.com/multiformats/go-multihash v0.2.3 h1:7Lyc8XfX/IY2jWb/gI7JP+o7JEq9hOa7BFvVU9RSh+U= +github.com/multiformats/go-multihash v0.2.3/go.mod h1:dXgKXCXjBzdscBLk9JkjINiEsCKRVch90MdaGiKsvSM= +github.com/multiformats/go-varint v0.0.6 h1:gk85QWKxh3TazbLxED/NlDVv8+q+ReFJk7Y2W/KhfNY= +github.com/multiformats/go-varint v0.0.6/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE= github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY= github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= @@ -195,6 +204,8 @@ github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAm github.com/smartystreets/assertions v1.0.0/go.mod h1:kHHU4qYBaI3q23Pp3VPrmWhuIUrLW/7eUrw0BU5VaoM= github.com/smartystreets/go-aws-auth v0.0.0-20180515143844-0c1422d1fdb9/go.mod h1:SnhjPscd9TpLiy1LpzGSKh3bXCfxxXuqd9xmQJy3slM= github.com/smartystreets/gunit v1.0.0/go.mod h1:qwPWnhz6pn0NnRBP++URONOVyNkPyr4SauJk4cUOwJs= +github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI= +github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA= github.com/spf13/afero v1.8.0 h1:5MmtuhAgYeU6qpa7w7bP0dv6MBYuup0vekhSpSkoq60= github.com/spf13/afero v1.8.0/go.mod h1:CtAatgMJh6bJEIs48Ay/FOnkljP3WeGUG0MC1RfAqwo= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= @@ -240,6 +251,8 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4= golang.org/x/crypto v0.0.0-20211108221036-ceb1ce70b4fa/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= +golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e h1:T8NU3HyQ8ClP4SEE+KbFlg6n0NhuTsN4MyznaarGsZM= +golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8= @@ -305,6 +318,7 @@ golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwY golang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw= @@ -359,10 +373,12 @@ golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7w golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210225134936-a50acf3fe073/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211013075003-97ac67df715c h1:taxlMj0D/1sOAuv/CbSD+MMDof2vbyPTqz5FNYKpXt8= golang.org/x/sys v0.0.0-20211013075003-97ac67df715c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U= +golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= @@ -373,8 +389,9 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.4 h1:0YWbFKbhXG/wIiuHDSKpS0Iy7FSA+u45VtBMfQcFTTc= golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ= @@ -542,6 +559,8 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k= +lukechampine.com/blake3 v1.1.6 h1:H3cROdztr7RCfoaTpGZFQsrqvweFLrqS73j7L7cmR5c= +lukechampine.com/blake3 v1.1.6/go.mod h1:tkKEOtDkNtklkXtLNEOGNq5tcV90tJiA1vAA12R78LA= rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0= rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA= diff --git a/internal/checker/checker.go b/internal/checker/checker.go new file mode 100644 index 0000000..7f7223a --- /dev/null +++ b/internal/checker/checker.go @@ -0,0 +1,281 @@ +package checker + +import ( + "bytes" + "context" + "crypto/sha256" + "errors" + "io" + "os" + "path/filepath" + + "github.com/multiformats/go-multihash" + "github.com/spf13/afero" + "sneak.berlin/go/mfer/mfer" +) + +// Result represents the outcome of checking a single file. +type Result struct { + Path string + Status Status + Message string +} + +// Status represents the verification status of a file. +type Status int + +const ( + StatusOK Status = iota + StatusMissing + StatusSizeMismatch + StatusHashMismatch + StatusExtra // File exists on disk but not in manifest + StatusError +) + +func (s Status) String() string { + switch s { + case StatusOK: + return "OK" + case StatusMissing: + return "MISSING" + case StatusSizeMismatch: + return "SIZE_MISMATCH" + case StatusHashMismatch: + return "HASH_MISMATCH" + case StatusExtra: + return "EXTRA" + case StatusError: + return "ERROR" + default: + return "UNKNOWN" + } +} + +// CheckStatus contains progress information for the check operation. +type CheckStatus struct { + TotalFiles int64 + CheckedFiles int64 + TotalBytes int64 + CheckedBytes int64 + BytesPerSec float64 + Failures int64 +} + +// Checker verifies files against a manifest. +type Checker struct { + basePath string + files []*mfer.MFFilePath + fs afero.Fs + // manifestPaths is a set of paths in the manifest for quick lookup + manifestPaths map[string]struct{} +} + +// NewChecker creates a new Checker for the given manifest, base path, and filesystem. +// The basePath is the directory relative to which manifest paths are resolved. +// If fs is nil, the real filesystem (OsFs) is used. +func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, error) { + if fs == nil { + fs = afero.NewOsFs() + } + + m, err := mfer.NewManifestFromFile(fs, manifestPath) + if err != nil { + return nil, err + } + + abs, err := filepath.Abs(basePath) + if err != nil { + return nil, err + } + + files := m.Files() + manifestPaths := make(map[string]struct{}, len(files)) + for _, f := range files { + manifestPaths[f.Path] = struct{}{} + } + + return &Checker{ + basePath: abs, + files: files, + fs: fs, + manifestPaths: manifestPaths, + }, nil +} + +// FileCount returns the number of files in the manifest. +func (c *Checker) FileCount() int64 { + return int64(len(c.files)) +} + +// TotalBytes returns the total size of all files in the manifest. +func (c *Checker) TotalBytes() int64 { + var total int64 + for _, f := range c.files { + total += f.Size + } + return total +} + +// Check verifies all files against the manifest. +// Results are sent to the results channel as files are checked. +// Progress updates are sent to the progress channel approximately once per second. +// Both channels are closed when the method returns. +func (c *Checker) Check(ctx context.Context, results chan<- Result, progress chan<- CheckStatus) error { + if results != nil { + defer close(results) + } + if progress != nil { + defer close(progress) + } + + totalFiles := int64(len(c.files)) + totalBytes := c.TotalBytes() + + var checkedFiles int64 + var checkedBytes int64 + var failures int64 + + for _, entry := range c.files { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + result := c.checkFile(entry, &checkedBytes) + if result.Status != StatusOK { + failures++ + } + checkedFiles++ + + if results != nil { + results <- result + } + + // Send progress (simplified - every file for now) + if progress != nil { + sendCheckStatus(progress, CheckStatus{ + TotalFiles: totalFiles, + CheckedFiles: checkedFiles, + TotalBytes: totalBytes, + CheckedBytes: checkedBytes, + Failures: failures, + }) + } + } + + return nil +} + +func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result { + absPath := filepath.Join(c.basePath, entry.Path) + + // Check if file exists + info, err := c.fs.Stat(absPath) + if err != nil { + if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) { + return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"} + } + // Check for "file does not exist" style errors + exists, _ := afero.Exists(c.fs, absPath) + if !exists { + return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"} + } + return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} + } + + // Check size + if info.Size() != entry.Size { + *checkedBytes += info.Size() + return Result{ + Path: entry.Path, + Status: StatusSizeMismatch, + Message: "size mismatch", + } + } + + // Open and hash file + f, err := c.fs.Open(absPath) + if err != nil { + return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} + } + defer f.Close() + + h := sha256.New() + n, err := io.Copy(h, f) + if err != nil { + return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} + } + *checkedBytes += n + + // Encode as multihash and compare + computed, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256) + if err != nil { + return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} + } + + // Check against all hashes in manifest (at least one must match) + for _, hash := range entry.Hashes { + if bytes.Equal(computed, hash.MultiHash) { + return Result{Path: entry.Path, Status: StatusOK} + } + } + + return Result{Path: entry.Path, Status: StatusHashMismatch, Message: "hash mismatch"} +} + +// FindExtraFiles walks the filesystem and reports files not in the manifest. +// Results are sent to the results channel. The channel is closed when done. +func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error { + if results != nil { + defer close(results) + } + + return afero.Walk(c.fs, c.basePath, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + // Skip directories + if info.IsDir() { + return nil + } + + // Get relative path + relPath, err := filepath.Rel(c.basePath, path) + if err != nil { + return err + } + + // Check if path is in manifest + if _, exists := c.manifestPaths[relPath]; !exists { + if results != nil { + results <- Result{ + Path: relPath, + Status: StatusExtra, + Message: "not in manifest", + } + } + } + + return nil + }) +} + +// sendCheckStatus sends a status update without blocking. +func sendCheckStatus(ch chan<- CheckStatus, status CheckStatus) { + if ch == nil { + return + } + select { + case ch <- status: + default: + } +} diff --git a/internal/cli/check.go b/internal/cli/check.go index f38a5f2..8dd191f 100644 --- a/internal/cli/check.go +++ b/internal/cli/check.go @@ -1,13 +1,110 @@ package cli import ( - "errors" + "fmt" + "time" - "github.com/apex/log" "github.com/urfave/cli/v2" + "sneak.berlin/go/mfer/internal/checker" + "sneak.berlin/go/mfer/internal/log" ) -func (mfa *CLIApp) checkManifestOperation(c *cli.Context) error { - log.WithError(errors.New("unimplemented")) +func (mfa *CLIApp) checkManifestOperation(ctx *cli.Context) error { + log.Debug("checkManifestOperation()") + + // Get manifest path from args, default to index.mf + manifestPath := "index.mf" + if ctx.Args().Len() > 0 { + manifestPath = ctx.Args().Get(0) + } + + basePath := ctx.String("base") + showProgress := ctx.Bool("progress") + + log.Debugf("checking manifest %s with base %s", manifestPath, basePath) + + // Create checker + chk, err := checker.NewChecker(manifestPath, basePath, mfa.Fs) + if err != nil { + return fmt.Errorf("failed to load manifest: %w", err) + } + + log.Debugf("manifest contains %d files, %d bytes", chk.FileCount(), chk.TotalBytes()) + + // Set up results channel + results := make(chan checker.Result, 1) + + // Set up progress channel + var progress chan checker.CheckStatus + if showProgress { + progress = make(chan checker.CheckStatus, 1) + go func() { + for status := range progress { + log.Progressf("Checking: %d/%d files, %d failures", + status.CheckedFiles, + status.TotalFiles, + status.Failures) + } + log.ProgressDone() + }() + } + + // Process results in a goroutine + var failures int64 + done := make(chan struct{}) + go func() { + for result := range results { + if result.Status != checker.StatusOK { + failures++ + log.Infof("%s: %s (%s)", result.Status, result.Path, result.Message) + } else { + log.Debugf("%s: %s", result.Status, result.Path) + } + } + close(done) + }() + + // Run check + err = chk.Check(ctx.Context, results, progress) + if err != nil { + return fmt.Errorf("check failed: %w", err) + } + + // Wait for results processing to complete + <-done + + // Check for extra files if requested + if ctx.Bool("no-extra-files") { + extraResults := make(chan checker.Result, 1) + extraDone := make(chan struct{}) + go func() { + for result := range extraResults { + failures++ + log.Infof("%s: %s (%s)", result.Status, result.Path, result.Message) + } + close(extraDone) + }() + + err = chk.FindExtraFiles(ctx.Context, extraResults) + if err != nil { + return fmt.Errorf("failed to check for extra files: %w", err) + } + <-extraDone + } + + if !ctx.Bool("quiet") { + elapsed := time.Since(mfa.startupTime).Seconds() + rate := float64(chk.TotalBytes()) / elapsed / 1e6 + if failures == 0 { + log.Infof("checked %d files (%.1f MB) in %.1fs (%.1f MB/s): all OK", chk.FileCount(), float64(chk.TotalBytes())/1e6, elapsed, rate) + } else { + log.Infof("checked %d files (%.1f MB) in %.1fs (%.1f MB/s): %d failed", chk.FileCount(), float64(chk.TotalBytes())/1e6, elapsed, rate, failures) + } + } + + if failures > 0 { + mfa.exitCode = 1 + } + return nil } diff --git a/internal/cli/entry.go b/internal/cli/entry.go index 21b3cef..d468e50 100644 --- a/internal/cli/entry.go +++ b/internal/cli/entry.go @@ -1,7 +1,10 @@ package cli import ( + "io" "os" + + "github.com/spf13/afero" ) var NO_COLOR bool @@ -13,13 +16,50 @@ func init() { } } -func Run(Appname, Version, Gitrev string) int { - m := &CLIApp{} - m.appname = Appname - m.version = Version - m.gitrev = Gitrev - m.exitCode = 0 +// RunOptions contains all configuration for running the CLI application. +type RunOptions struct { + Appname string + Version string + Gitrev string + Args []string + Stdin io.Reader + Stdout io.Writer + Stderr io.Writer + Fs afero.Fs +} - m.run() +// DefaultRunOptions returns RunOptions configured for normal CLI execution. +func DefaultRunOptions(appname, version, gitrev string) *RunOptions { + return &RunOptions{ + Appname: appname, + Version: version, + Gitrev: gitrev, + Args: os.Args, + Stdin: os.Stdin, + Stdout: os.Stdout, + Stderr: os.Stderr, + Fs: afero.NewOsFs(), + } +} + +// Run creates and runs the CLI application with default options. +func Run(appname, version, gitrev string) int { + return RunWithOptions(DefaultRunOptions(appname, version, gitrev)) +} + +// RunWithOptions creates and runs the CLI application with the given options. +func RunWithOptions(opts *RunOptions) int { + m := &CLIApp{ + appname: opts.Appname, + version: opts.Version, + gitrev: opts.Gitrev, + exitCode: 0, + Stdin: opts.Stdin, + Stdout: opts.Stdout, + Stderr: opts.Stderr, + Fs: opts.Fs, + } + + m.run(opts.Args) return m.exitCode } diff --git a/internal/cli/entry_test.go b/internal/cli/entry_test.go index 71d4a78..dce560b 100644 --- a/internal/cli/entry_test.go +++ b/internal/cli/entry_test.go @@ -1,12 +1,306 @@ package cli import ( + "bytes" "testing" + "github.com/spf13/afero" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + urfcli "github.com/urfave/cli/v2" ) +func init() { + // Prevent urfave/cli from calling os.Exit during tests + urfcli.OsExiter = func(code int) {} +} + func TestBuild(t *testing.T) { m := &CLIApp{} assert.NotNil(t, m) } + +func testOpts(args []string, fs afero.Fs) *RunOptions { + return &RunOptions{ + Appname: "mfer", + Version: "1.0.0", + Gitrev: "abc123", + Args: args, + Stdin: &bytes.Buffer{}, + Stdout: &bytes.Buffer{}, + Stderr: &bytes.Buffer{}, + Fs: fs, + } +} + +func TestVersionCommand(t *testing.T) { + fs := afero.NewMemMapFs() + opts := testOpts([]string{"mfer", "version"}, fs) + + exitCode := RunWithOptions(opts) + + assert.Equal(t, 0, exitCode) + stdout := opts.Stdout.(*bytes.Buffer).String() + assert.Contains(t, stdout, "1.0.0") + assert.Contains(t, stdout, "abc123") +} + +func TestHelpCommand(t *testing.T) { + fs := afero.NewMemMapFs() + opts := testOpts([]string{"mfer", "--help"}, fs) + + exitCode := RunWithOptions(opts) + + assert.Equal(t, 0, exitCode) + stdout := opts.Stdout.(*bytes.Buffer).String() + assert.Contains(t, stdout, "generate") + assert.Contains(t, stdout, "check") + assert.Contains(t, stdout, "fetch") +} + +func TestGenerateCommand(t *testing.T) { + fs := afero.NewMemMapFs() + + // Create test files in memory filesystem + require.NoError(t, fs.MkdirAll("/testdir", 0755)) + require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644)) + require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("test content"), 0644)) + + opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs) + + exitCode := RunWithOptions(opts) + + assert.Equal(t, 0, exitCode, "stderr: %s", opts.Stderr.(*bytes.Buffer).String()) + + // Verify manifest was created + exists, err := afero.Exists(fs, "/testdir/test.mf") + require.NoError(t, err) + assert.True(t, exists) +} + +func TestGenerateAndCheckCommand(t *testing.T) { + fs := afero.NewMemMapFs() + + // Create test files with subdirectory + require.NoError(t, fs.MkdirAll("/testdir/subdir", 0755)) + require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644)) + require.NoError(t, afero.WriteFile(fs, "/testdir/subdir/file2.txt", []byte("test content"), 0644)) + + // Generate manifest + opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs) + exitCode := RunWithOptions(opts) + require.Equal(t, 0, exitCode, "generate failed: %s", opts.Stderr.(*bytes.Buffer).String()) + + // Check manifest + opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/testdir/test.mf"}, fs) + exitCode = RunWithOptions(opts) + assert.Equal(t, 0, exitCode, "check failed: %s", opts.Stderr.(*bytes.Buffer).String()) +} + +func TestCheckCommandWithMissingFile(t *testing.T) { + fs := afero.NewMemMapFs() + + // Create test file + require.NoError(t, fs.MkdirAll("/testdir", 0755)) + require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644)) + + // Generate manifest + opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs) + exitCode := RunWithOptions(opts) + require.Equal(t, 0, exitCode, "generate failed: %s", opts.Stderr.(*bytes.Buffer).String()) + + // Delete the file + require.NoError(t, fs.Remove("/testdir/file1.txt")) + + // Check manifest - should fail + opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/testdir/test.mf"}, fs) + exitCode = RunWithOptions(opts) + assert.Equal(t, 1, exitCode, "check should have failed for missing file") +} + +func TestCheckCommandWithCorruptedFile(t *testing.T) { + fs := afero.NewMemMapFs() + + // Create test file + require.NoError(t, fs.MkdirAll("/testdir", 0755)) + require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644)) + + // Generate manifest + opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs) + exitCode := RunWithOptions(opts) + require.Equal(t, 0, exitCode, "generate failed: %s", opts.Stderr.(*bytes.Buffer).String()) + + // Corrupt the file (change content but keep same size) + require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("HELLO WORLD"), 0644)) + + // Check manifest - should fail with hash mismatch + opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/testdir/test.mf"}, fs) + exitCode = RunWithOptions(opts) + assert.Equal(t, 1, exitCode, "check should have failed for corrupted file") +} + +func TestCheckCommandWithSizeMismatch(t *testing.T) { + fs := afero.NewMemMapFs() + + // Create test file + require.NoError(t, fs.MkdirAll("/testdir", 0755)) + require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644)) + + // Generate manifest + opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs) + exitCode := RunWithOptions(opts) + require.Equal(t, 0, exitCode, "generate failed: %s", opts.Stderr.(*bytes.Buffer).String()) + + // Change file size + require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("different size content here"), 0644)) + + // Check manifest - should fail with size mismatch + opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/testdir/test.mf"}, fs) + exitCode = RunWithOptions(opts) + assert.Equal(t, 1, exitCode, "check should have failed for size mismatch") +} + +func TestBannerOutput(t *testing.T) { + fs := afero.NewMemMapFs() + + // Create test file + require.NoError(t, fs.MkdirAll("/testdir", 0755)) + require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644)) + + // Run without -q to see banner + opts := testOpts([]string{"mfer", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs) + exitCode := RunWithOptions(opts) + assert.Equal(t, 0, exitCode) + + // Banner ASCII art should be in stdout + stdout := opts.Stdout.(*bytes.Buffer).String() + assert.Contains(t, stdout, "___") + assert.Contains(t, stdout, "\\") +} + +func TestUnknownCommand(t *testing.T) { + fs := afero.NewMemMapFs() + opts := testOpts([]string{"mfer", "unknown"}, fs) + + exitCode := RunWithOptions(opts) + assert.Equal(t, 1, exitCode) +} + +func TestGenerateWithIgnoreDotfiles(t *testing.T) { + fs := afero.NewMemMapFs() + + // Create test files including dotfiles + require.NoError(t, fs.MkdirAll("/testdir", 0755)) + require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644)) + require.NoError(t, afero.WriteFile(fs, "/testdir/.hidden", []byte("secret"), 0644)) + + // Generate manifest with --ignore-dotfiles + opts := testOpts([]string{"mfer", "-q", "generate", "--ignore-dotfiles", "-o", "/testdir/test.mf", "/testdir"}, fs) + exitCode := RunWithOptions(opts) + require.Equal(t, 0, exitCode) + + // Check that manifest exists and we can verify (hidden file won't cause failure even if missing) + exists, _ := afero.Exists(fs, "/testdir/test.mf") + assert.True(t, exists) +} + +func TestMultipleInputPaths(t *testing.T) { + fs := afero.NewMemMapFs() + + // Create test files in multiple directories + require.NoError(t, fs.MkdirAll("/dir1", 0755)) + require.NoError(t, fs.MkdirAll("/dir2", 0755)) + require.NoError(t, afero.WriteFile(fs, "/dir1/file1.txt", []byte("content1"), 0644)) + require.NoError(t, afero.WriteFile(fs, "/dir2/file2.txt", []byte("content2"), 0644)) + + // Generate manifest from multiple paths + opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/output.mf", "/dir1", "/dir2"}, fs) + exitCode := RunWithOptions(opts) + assert.Equal(t, 0, exitCode, "stderr: %s", opts.Stderr.(*bytes.Buffer).String()) + + exists, _ := afero.Exists(fs, "/output.mf") + assert.True(t, exists) +} + +func TestNoExtraFilesPass(t *testing.T) { + fs := afero.NewMemMapFs() + + // Create test files + require.NoError(t, fs.MkdirAll("/testdir", 0755)) + require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644)) + require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("world"), 0644)) + + // Generate manifest + opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/manifest.mf", "/testdir"}, fs) + exitCode := RunWithOptions(opts) + require.Equal(t, 0, exitCode) + + // Check with --no-extra-files (should pass - no extra files) + opts = testOpts([]string{"mfer", "-q", "check", "--no-extra-files", "--base", "/testdir", "/manifest.mf"}, fs) + exitCode = RunWithOptions(opts) + assert.Equal(t, 0, exitCode) +} + +func TestNoExtraFilesFail(t *testing.T) { + fs := afero.NewMemMapFs() + + // Create test files + require.NoError(t, fs.MkdirAll("/testdir", 0755)) + require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644)) + + // Generate manifest + opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/manifest.mf", "/testdir"}, fs) + exitCode := RunWithOptions(opts) + require.Equal(t, 0, exitCode) + + // Add an extra file after manifest generation + require.NoError(t, afero.WriteFile(fs, "/testdir/extra.txt", []byte("extra"), 0644)) + + // Check with --no-extra-files (should fail - extra file exists) + opts = testOpts([]string{"mfer", "-q", "check", "--no-extra-files", "--base", "/testdir", "/manifest.mf"}, fs) + exitCode = RunWithOptions(opts) + assert.Equal(t, 1, exitCode, "check should fail when extra files exist") +} + +func TestNoExtraFilesWithSubdirectory(t *testing.T) { + fs := afero.NewMemMapFs() + + // Create test files with subdirectory + require.NoError(t, fs.MkdirAll("/testdir/subdir", 0755)) + require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644)) + require.NoError(t, afero.WriteFile(fs, "/testdir/subdir/file2.txt", []byte("world"), 0644)) + + // Generate manifest + opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/manifest.mf", "/testdir"}, fs) + exitCode := RunWithOptions(opts) + require.Equal(t, 0, exitCode) + + // Add extra file in subdirectory + require.NoError(t, afero.WriteFile(fs, "/testdir/subdir/extra.txt", []byte("extra"), 0644)) + + // Check with --no-extra-files (should fail) + opts = testOpts([]string{"mfer", "-q", "check", "--no-extra-files", "--base", "/testdir", "/manifest.mf"}, fs) + exitCode = RunWithOptions(opts) + assert.Equal(t, 1, exitCode, "check should fail when extra files exist in subdirectory") +} + +func TestCheckWithoutNoExtraFilesIgnoresExtra(t *testing.T) { + fs := afero.NewMemMapFs() + + // Create test file + require.NoError(t, fs.MkdirAll("/testdir", 0755)) + require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644)) + + // Generate manifest + opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/manifest.mf", "/testdir"}, fs) + exitCode := RunWithOptions(opts) + require.Equal(t, 0, exitCode) + + // Add extra file + require.NoError(t, afero.WriteFile(fs, "/testdir/extra.txt", []byte("extra"), 0644)) + + // Check WITHOUT --no-extra-files (should pass - extra files ignored) + opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/manifest.mf"}, fs) + exitCode = RunWithOptions(opts) + assert.Equal(t, 0, exitCode, "check without --no-extra-files should ignore extra files") +} diff --git a/internal/cli/gen.go b/internal/cli/gen.go index bcff557..a0b0053 100644 --- a/internal/cli/gen.go +++ b/internal/cli/gen.go @@ -1,54 +1,100 @@ package cli import ( - "bytes" + "fmt" "path/filepath" + "time" "github.com/urfave/cli/v2" "sneak.berlin/go/mfer/internal/log" - "sneak.berlin/go/mfer/mfer" + "sneak.berlin/go/mfer/internal/scanner" ) func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error { log.Debug("generateManifestOperation()") - myArgs := ctx.Args() - log.Dump(myArgs) - opts := &mfer.ManifestScanOptions{ + opts := &scanner.Options{ IgnoreDotfiles: ctx.Bool("IgnoreDotfiles"), FollowSymLinks: ctx.Bool("FollowSymLinks"), + Fs: mfa.Fs, } - paths := make([]string, ctx.Args().Len()-1) - for i := 0; i < ctx.Args().Len(); i++ { - ap, err := filepath.Abs(ctx.Args().Get(i)) - if err != nil { + + s := scanner.NewWithOptions(opts) + + // Phase 1: Enumeration - collect paths and stat files + args := ctx.Args() + showProgress := ctx.Bool("progress") + + // Set up enumeration progress reporting + var enumProgress chan scanner.EnumerateStatus + if showProgress { + enumProgress = make(chan scanner.EnumerateStatus, 1) + go func() { + for status := range enumProgress { + log.Progressf("Enumerating: %d files, %.1f MB", + status.FilesFound, + float64(status.BytesFound)/1e6) + } + log.ProgressDone() + }() + } + + if args.Len() == 0 { + // Default to current directory + if err := s.EnumeratePath(".", enumProgress); err != nil { + return err + } + } else { + // Collect all paths first + paths := make([]string, 0, args.Len()) + for i := 0; i < args.Len(); i++ { + ap, err := filepath.Abs(args.Get(i)) + if err != nil { + return err + } + log.Debugf("enumerating path: %s", ap) + paths = append(paths, ap) + } + if err := s.EnumeratePaths(enumProgress, paths...); err != nil { return err } - log.Dump(ap) - paths = append(paths, ap) } - mf, err := mfer.NewFromPaths(opts, paths...) + + log.Debugf("enumerated %d files, %d bytes total", s.FileCount(), s.TotalBytes()) + + // Open output file + outputPath := ctx.String("output") + outFile, err := mfa.Fs.Create(outputPath) if err != nil { - panic(err) + return fmt.Errorf("failed to create output file: %w", err) } - mf.WithContext(ctx.Context) + defer outFile.Close() - log.Dump(mf) + // Phase 2: Scan - read file contents and generate manifest + var scanProgress chan scanner.ScanStatus + if showProgress { + scanProgress = make(chan scanner.ScanStatus, 1) + go func() { + for status := range scanProgress { + log.Progressf("Scanning: %d/%d files, %.1f MB/s", + status.ScannedFiles, + status.TotalFiles, + status.BytesPerSec/1e6) + } + log.ProgressDone() + }() + } - err = mf.Scan() + err = s.ToManifest(ctx.Context, outFile, scanProgress) if err != nil { - return err + return fmt.Errorf("failed to generate manifest: %w", err) } - buf := new(bytes.Buffer) - - err = mf.WriteTo(buf) - if err != nil { - return err + if !ctx.Bool("quiet") { + elapsed := time.Since(mfa.startupTime).Seconds() + rate := float64(s.TotalBytes()) / elapsed / 1e6 + log.Infof("wrote %d files (%.1f MB) to %s in %.1fs (%.1f MB/s)", s.FileCount(), float64(s.TotalBytes())/1e6, outputPath, elapsed, rate) } - dat := buf.Bytes() - - log.Dump(dat) return nil } diff --git a/internal/cli/mfer.go b/internal/cli/mfer.go index 51dce77..bc6f74a 100644 --- a/internal/cli/mfer.go +++ b/internal/cli/mfer.go @@ -2,9 +2,11 @@ package cli import ( "fmt" + "io" "os" "time" + "github.com/spf13/afero" "github.com/urfave/cli/v2" "sneak.berlin/go/mfer/internal/log" ) @@ -16,22 +18,31 @@ type CLIApp struct { startupTime time.Time exitCode int app *cli.App + + // I/O streams - all program input/output should go through these + Stdin io.Reader + Stdout io.Writer + Stderr io.Writer + + // Fs is the filesystem abstraction - defaults to OsFs for real filesystem + Fs afero.Fs } -const banner = ` ___ ___ ___ ___ - /__/\ / /\ / /\ / /\ - | |::\ / /:/_ / /:/_ / /::\ - | |:|:\ / /:/ /\ / /:/ /\ / /:/\:\ - __|__|:|\:\ / /:/ /:/ / /:/ /:/_ / /:/~/:/ - /__/::::| \:\ /__/:/ /:/ /__/:/ /:/ /\ /__/:/ /:/___ - \ \:\~~\__\/ \ \:\/:/ \ \:\/:/ /:/ \ \:\/:::::/ - \ \:\ \ \::/ \ \::/ /:/ \ \::/~~~~ - \ \:\ \ \:\ \ \:\/:/ \ \:\ - \ \:\ \ \:\ \ \::/ \ \:\ - \__\/ \__\/ \__\/ \__\/` +const banner = ` + ___ ___ ___ ___ + /__/\ / /\ / /\ / /\ + | |::\ / /:/_ / /:/_ / /::\ + | |:|:\ / /:/ /\ / /:/ /\ / /:/\:\ + __|__|:|\:\ / /:/ /:/ / /:/ /:/_ / /:/~/:/ +/__/::::| \:\ /__/:/ /:/ /__/:/ /:/ /\ /__/:/ /:/___ +\ \:\~~\__\/ \ \:\/:/ \ \:\/:/ /:/ \ \:\/:::::/ + \ \:\ \ \::/ \ \::/ /:/ \ \::/~~~~ + \ \:\ \ \:\ \ \:\/:/ \ \:\ + \ \:\ \ \:\ \ \::/ \ \:\ + \__\/ \__\/ \__\/ \__\/` func (mfa *CLIApp) printBanner() { - fmt.Println(banner) + fmt.Fprintln(mfa.Stdout, banner) } func (mfa *CLIApp) VersionString() string { @@ -47,7 +58,7 @@ func (mfa *CLIApp) setVerbosity(v int) { } } -func (mfa *CLIApp) run() { +func (mfa *CLIApp) run(args []string) { mfa.startupTime = time.Now() if NO_COLOR { @@ -55,6 +66,8 @@ func (mfa *CLIApp) run() { log.DisableStyling() } + // Configure log package to use our I/O streams + log.SetOutput(mfa.Stdout, mfa.Stderr) log.Init() var verbosity int @@ -64,6 +77,8 @@ func (mfa *CLIApp) run() { Usage: "Manifest generator", Version: mfa.VersionString(), EnableBashCompletion: true, + Writer: mfa.Stdout, + ErrWriter: mfa.Stderr, Flags: []cli.Flag{ &cli.BoolFlag{ Name: "verbose", @@ -106,11 +121,17 @@ func (mfa *CLIApp) run() { Aliases: []string{"o"}, Usage: "Specify output filename", }, + &cli.BoolFlag{ + Name: "progress", + Aliases: []string{"P"}, + Usage: "Show progress during enumeration and scanning", + }, }, }, { - Name: "check", - Usage: "Validate files using manifest file", + Name: "check", + Usage: "Validate files using manifest file", + ArgsUsage: "[manifest file]", Action: func(c *cli.Context) error { if !c.Bool("quiet") { mfa.printBanner() @@ -118,12 +139,29 @@ func (mfa *CLIApp) run() { mfa.setVerbosity(verbosity) return mfa.checkManifestOperation(c) }, + Flags: []cli.Flag{ + &cli.StringFlag{ + Name: "base", + Aliases: []string{"b"}, + Value: ".", + Usage: "Base directory for resolving relative paths from manifest", + }, + &cli.BoolFlag{ + Name: "progress", + Aliases: []string{"P"}, + Usage: "Show progress during checking", + }, + &cli.BoolFlag{ + Name: "no-extra-files", + Usage: "Fail if files exist in base directory that are not in manifest", + }, + }, }, { Name: "version", Usage: "Show version", Action: func(c *cli.Context) error { - fmt.Printf("%s\n", mfa.VersionString()) + fmt.Fprintln(mfa.Stdout, mfa.VersionString()) return nil }, }, @@ -142,7 +180,7 @@ func (mfa *CLIApp) run() { } mfa.app.HideVersion = true - err := mfa.app.Run(os.Args) + err := mfa.app.Run(args) if err != nil { mfa.exitCode = 1 log.WithError(err).Debugf("exiting") diff --git a/internal/log/log.go b/internal/log/log.go index b52409b..cf69e31 100644 --- a/internal/log/log.go +++ b/internal/log/log.go @@ -2,7 +2,10 @@ package log import ( "fmt" + "io" + "os" "runtime" + "sync" "github.com/apex/log" acli "github.com/apex/log/handlers/cli" @@ -12,6 +15,39 @@ import ( type Level = log.Level +var ( + // mu protects the output writers + mu sync.RWMutex + // stdout is the writer for progress output + stdout io.Writer = os.Stdout + // stderr is the writer for log output + stderr io.Writer = os.Stderr +) + +// SetOutput configures the output writers for the log package. +// stdout is used for progress output, stderr is used for log messages. +func SetOutput(out, err io.Writer) { + mu.Lock() + defer mu.Unlock() + stdout = out + stderr = err + pterm.SetDefaultOutput(out) +} + +// GetStdout returns the configured stdout writer. +func GetStdout() io.Writer { + mu.RLock() + defer mu.RUnlock() + return stdout +} + +// GetStderr returns the configured stderr writer. +func GetStderr() io.Writer { + mu.RLock() + defer mu.RUnlock() + return stderr +} + func DisableStyling() { pterm.DisableColor() pterm.DisableStyling() @@ -24,10 +60,21 @@ func DisableStyling() { } func Init() { - log.SetHandler(acli.Default) + mu.RLock() + w := stderr + mu.RUnlock() + log.SetHandler(acli.New(w)) log.SetLevel(log.InfoLevel) } +func Infof(format string, args ...interface{}) { + log.Infof(format, args...) +} + +func Info(arg string) { + log.Info(arg) +} + func Debugf(format string, args ...interface{}) { DebugReal(fmt.Sprintf(format, args...), 2) } @@ -55,14 +102,13 @@ func EnableDebugLogging() { func VerbosityStepsToLogLevel(l int) log.Level { switch l { - case 1: - return log.WarnLevel - case 2: + case 0: return log.InfoLevel - case 3: + case 1: return log.DebugLevel } - return log.ErrorLevel + // -vv or more + return log.DebugLevel } func SetLevelFromVerbosity(l int) { @@ -87,3 +133,14 @@ func GetLevel() log.Level { func WithError(e error) *log.Entry { return GetLogger().WithError(e) } + +// Progressf prints a progress message that overwrites the current line. +// Use ProgressDone() when progress is complete to move to the next line. +func Progressf(format string, args ...interface{}) { + pterm.Printf("\r"+format, args...) +} + +// ProgressDone completes a progress line by printing a newline. +func ProgressDone() { + pterm.Println() +} diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go new file mode 100644 index 0000000..95ba6e7 --- /dev/null +++ b/internal/scanner/scanner.go @@ -0,0 +1,373 @@ +package scanner + +import ( + "context" + "io" + "io/fs" + "path" + "path/filepath" + "strings" + "sync" + "time" + + "github.com/spf13/afero" + "sneak.berlin/go/mfer/mfer" +) + +// Phase 1: Enumeration +// --------------------- +// Walking directories and calling stat() on files to collect metadata. +// Builds the list of files to be scanned. Relatively fast (metadata only). + +// EnumerateStatus contains progress information for the enumeration phase. +type EnumerateStatus struct { + FilesFound int64 + BytesFound int64 +} + +// Phase 2: Scan (ToManifest) +// -------------------------- +// Reading file contents and computing hashes for manifest generation. +// This is the expensive phase that reads all file data. + +// ScanStatus contains progress information for the scan phase. +type ScanStatus struct { + TotalFiles int64 + ScannedFiles int64 + TotalBytes int64 + ScannedBytes int64 + BytesPerSec float64 +} + +// Options configures scanner behavior. +type Options struct { + IgnoreDotfiles bool + FollowSymLinks bool + Fs afero.Fs // Filesystem to use, defaults to OsFs +} + +// FileEntry represents a file that has been enumerated. +type FileEntry struct { + Path string // Relative path (used in manifest) + AbsPath string // Absolute path (used for reading file content) + Size int64 + Mtime time.Time + Ctime time.Time +} + +// Scanner accumulates files and generates manifests from them. +type Scanner struct { + mu sync.RWMutex + files []*FileEntry + options *Options + fs afero.Fs +} + +// New creates a new Scanner with default options. +func New() *Scanner { + return NewWithOptions(nil) +} + +// NewWithOptions creates a new Scanner with the given options. +func NewWithOptions(opts *Options) *Scanner { + if opts == nil { + opts = &Options{} + } + fs := opts.Fs + if fs == nil { + fs = afero.NewOsFs() + } + return &Scanner{ + files: make([]*FileEntry, 0), + options: opts, + fs: fs, + } +} + +// EnumerateFile adds a single file to the scanner, calling stat() to get metadata. +func (s *Scanner) EnumerateFile(filePath string) error { + abs, err := filepath.Abs(filePath) + if err != nil { + return err + } + info, err := s.fs.Stat(abs) + if err != nil { + return err + } + // For single files, use the filename as the relative path + basePath := filepath.Dir(abs) + return s.enumerateFileWithInfo(filepath.Base(abs), basePath, info, nil) +} + +// EnumeratePath walks a directory path and adds all files to the scanner. +// If progress is non-nil, status updates are sent as files are discovered. +// The progress channel is closed when the method returns. +func (s *Scanner) EnumeratePath(inputPath string, progress chan<- EnumerateStatus) error { + if progress != nil { + defer close(progress) + } + abs, err := filepath.Abs(inputPath) + if err != nil { + return err + } + afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs)) + return s.enumerateFS(afs, abs, progress) +} + +// EnumeratePaths walks multiple directory paths and adds all files to the scanner. +// If progress is non-nil, status updates are sent as files are discovered. +// The progress channel is closed when the method returns. +func (s *Scanner) EnumeratePaths(progress chan<- EnumerateStatus, inputPaths ...string) error { + if progress != nil { + defer close(progress) + } + for _, p := range inputPaths { + abs, err := filepath.Abs(p) + if err != nil { + return err + } + afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs)) + if err := s.enumerateFS(afs, abs, progress); err != nil { + return err + } + } + return nil +} + +// EnumerateFS walks an afero filesystem and adds all files to the scanner. +// If progress is non-nil, status updates are sent as files are discovered. +// The progress channel is closed when the method returns. +// basePath is used to compute absolute paths for file reading. +func (s *Scanner) EnumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error { + if progress != nil { + defer close(progress) + } + return s.enumerateFS(afs, basePath, progress) +} + +// enumerateFS is the internal implementation that doesn't close the progress channel. +func (s *Scanner) enumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error { + return afero.Walk(afs, "/", func(p string, info fs.FileInfo, err error) error { + if err != nil { + return err + } + if s.options.IgnoreDotfiles && pathIsHidden(p) { + if info.IsDir() { + return filepath.SkipDir + } + return nil + } + return s.enumerateFileWithInfo(p, basePath, info, progress) + }) +} + +// enumerateFileWithInfo adds a file with pre-existing fs.FileInfo. +func (s *Scanner) enumerateFileWithInfo(filePath string, basePath string, info fs.FileInfo, progress chan<- EnumerateStatus) error { + if info.IsDir() { + // Manifests contain only files, directories are implied + return nil + } + + // Clean the path - remove leading slash if present + cleanPath := filePath + if len(cleanPath) > 0 && cleanPath[0] == '/' { + cleanPath = cleanPath[1:] + } + + // Compute absolute path for file reading + absPath := filepath.Join(basePath, cleanPath) + + entry := &FileEntry{ + Path: cleanPath, + AbsPath: absPath, + Size: info.Size(), + Mtime: info.ModTime(), + // Note: Ctime not available from fs.FileInfo on all platforms + // Will need platform-specific code to extract it + } + + s.mu.Lock() + s.files = append(s.files, entry) + filesFound := int64(len(s.files)) + var bytesFound int64 + for _, f := range s.files { + bytesFound += f.Size + } + s.mu.Unlock() + + sendEnumerateStatus(progress, EnumerateStatus{ + FilesFound: filesFound, + BytesFound: bytesFound, + }) + + return nil +} + +// Files returns a copy of all files added to the scanner. +func (s *Scanner) Files() []*FileEntry { + s.mu.RLock() + defer s.mu.RUnlock() + out := make([]*FileEntry, len(s.files)) + copy(out, s.files) + return out +} + +// FileCount returns the number of files in the scanner. +func (s *Scanner) FileCount() int64 { + s.mu.RLock() + defer s.mu.RUnlock() + return int64(len(s.files)) +} + +// TotalBytes returns the total size of all files in the scanner. +func (s *Scanner) TotalBytes() int64 { + s.mu.RLock() + defer s.mu.RUnlock() + var total int64 + for _, f := range s.files { + total += f.Size + } + return total +} + +// ToManifest reads all file contents, computes hashes, and generates a manifest. +// If progress is non-nil, status updates are sent approximately once per second. +// The progress channel is closed when the method returns. +// The manifest is written to the provided io.Writer. +func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- ScanStatus) error { + if progress != nil { + defer close(progress) + } + + s.mu.RLock() + files := make([]*FileEntry, len(s.files)) + copy(files, s.files) + totalFiles := int64(len(files)) + var totalBytes int64 + for _, f := range files { + totalBytes += f.Size + } + s.mu.RUnlock() + + builder := mfer.NewBuilder() + + var scannedFiles int64 + var scannedBytes int64 + lastProgressTime := time.Now() + startTime := time.Now() + + for _, entry := range files { + // Check for cancellation + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + // Open file + f, err := s.fs.Open(entry.AbsPath) + if err != nil { + return err + } + + // Add to manifest with progress callback + bytesRead, err := builder.AddFile( + entry.Path, + entry.Size, + entry.Mtime, + f, + func(fileBytes int64) { + // Send progress at most once per second + now := time.Now() + if progress != nil && now.Sub(lastProgressTime) >= time.Second { + elapsed := now.Sub(startTime).Seconds() + currentBytes := scannedBytes + fileBytes + var rate float64 + if elapsed > 0 { + rate = float64(currentBytes) / elapsed + } + sendScanStatus(progress, ScanStatus{ + TotalFiles: totalFiles, + ScannedFiles: scannedFiles, + TotalBytes: totalBytes, + ScannedBytes: currentBytes, + BytesPerSec: rate, + }) + lastProgressTime = now + } + }, + ) + f.Close() + + if err != nil { + return err + } + + scannedFiles++ + scannedBytes += bytesRead + } + + // Send final progress + if progress != nil { + elapsed := time.Since(startTime).Seconds() + var rate float64 + if elapsed > 0 { + rate = float64(scannedBytes) / elapsed + } + sendScanStatus(progress, ScanStatus{ + TotalFiles: totalFiles, + ScannedFiles: scannedFiles, + TotalBytes: totalBytes, + ScannedBytes: scannedBytes, + BytesPerSec: rate, + }) + } + + // Build and write manifest + return builder.Build(w) +} + +// pathIsHidden returns true if the path or any of its parent directories +// start with a dot (hidden files/directories). +func pathIsHidden(p string) bool { + tp := path.Clean(p) + if strings.HasPrefix(tp, ".") { + return true + } + for { + d, f := path.Split(tp) + if strings.HasPrefix(f, ".") { + return true + } + if d == "" { + return false + } + tp = d[0 : len(d)-1] // trim trailing slash from dir + } +} + +// sendEnumerateStatus sends a status update without blocking. +// If the channel is full, the update is dropped. +func sendEnumerateStatus(ch chan<- EnumerateStatus, status EnumerateStatus) { + if ch == nil { + return + } + select { + case ch <- status: + default: + // Channel full, drop this update + } +} + +// sendScanStatus sends a status update without blocking. +// If the channel is full, the update is dropped. +func sendScanStatus(ch chan<- ScanStatus, status ScanStatus) { + if ch == nil { + return + } + select { + case ch <- status: + default: + // Channel full, drop this update + } +} diff --git a/mfer/builder.go b/mfer/builder.go new file mode 100644 index 0000000..e3d0636 --- /dev/null +++ b/mfer/builder.go @@ -0,0 +1,124 @@ +package mfer + +import ( + "crypto/sha256" + "io" + "sync" + "time" + + "github.com/multiformats/go-multihash" +) + +// FileProgress is called during file processing to report bytes read. +type FileProgress func(bytesRead int64) + +// ManifestBuilder constructs a manifest by adding files one at a time. +type ManifestBuilder struct { + mu sync.Mutex + files []*MFFilePath + createdAt time.Time +} + +// NewBuilder creates a new ManifestBuilder. +func NewBuilder() *ManifestBuilder { + return &ManifestBuilder{ + files: make([]*MFFilePath, 0), + createdAt: time.Now(), + } +} + +// AddFile reads file content from reader, computes hashes, and adds to manifest. +// The progress callback is called periodically with total bytes read so far. +// Returns the number of bytes read. +func (b *ManifestBuilder) AddFile( + path string, + size int64, + mtime time.Time, + reader io.Reader, + progress FileProgress, +) (int64, error) { + // Create hash writer + h := sha256.New() + + // Read file in chunks, updating hash and progress + var totalRead int64 + buf := make([]byte, 64*1024) // 64KB chunks + + for { + n, err := reader.Read(buf) + if n > 0 { + h.Write(buf[:n]) + totalRead += int64(n) + if progress != nil { + progress(totalRead) + } + } + if err == io.EOF { + break + } + if err != nil { + return totalRead, err + } + } + + // Encode hash as multihash (SHA2-256) + mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256) + if err != nil { + return totalRead, err + } + + // Create file entry + entry := &MFFilePath{ + Path: path, + Size: size, + Hashes: []*MFFileChecksum{ + {MultiHash: mh}, + }, + Mtime: newTimestampFromTime(mtime), + } + + b.mu.Lock() + b.files = append(b.files, entry) + b.mu.Unlock() + + return totalRead, nil +} + +// FileCount returns the number of files added to the builder. +func (b *ManifestBuilder) FileCount() int { + b.mu.Lock() + defer b.mu.Unlock() + return len(b.files) +} + +// Build finalizes the manifest and writes it to the writer. +func (b *ManifestBuilder) Build(w io.Writer) error { + b.mu.Lock() + defer b.mu.Unlock() + + // Create inner manifest + inner := &MFFile{ + Version: MFFile_VERSION_ONE, + CreatedAt: newTimestampFromTime(b.createdAt), + Files: b.files, + } + + // Create a temporary manifest to use existing serialization + m := &manifest{ + pbInner: inner, + } + + // Generate outer wrapper + if err := m.generateOuter(); err != nil { + return err + } + + // Generate final output + if err := m.generate(); err != nil { + return err + } + + // Write to output + _, err := w.Write(m.output.Bytes()) + return err +} diff --git a/mfer/deserialize.go b/mfer/deserialize.go index 4bc1fcb..4bdcdbb 100644 --- a/mfer/deserialize.go +++ b/mfer/deserialize.go @@ -6,12 +6,13 @@ import ( "errors" "io" + "github.com/spf13/afero" "google.golang.org/protobuf/proto" "sneak.berlin/go/mfer/internal/bork" "sneak.berlin/go/mfer/internal/log" ) -func (m *manifest) validateProtoOuter() error { +func (m *manifest) deserializeInner() error { if m.pbOuter.Version != MFFileOuter_VERSION_ONE { return errors.New("unknown version") } @@ -25,10 +26,9 @@ func (m *manifest) validateProtoOuter() error { if err != nil { return err } - - dat, err := io.ReadAll(gzr) defer gzr.Close() + dat, err := io.ReadAll(gzr) if err != nil { return err } @@ -38,9 +38,14 @@ func (m *manifest) validateProtoOuter() error { log.Debugf("truncated data, got %d expected %d", isize, m.pbOuter.Size) return bork.ErrFileTruncated } - log.Debugf("inner data size is %d", isize) - log.Dump(dat) - log.Dump(m.pbOuter.Sha256) + + // Deserialize inner message + m.pbInner = new(MFFile) + if err := proto.Unmarshal(dat, m.pbInner); err != nil { + return err + } + + log.Debugf("loaded manifest with %d files", len(m.pbInner.Files)) return nil } @@ -54,7 +59,8 @@ func validateMagic(dat []byte) bool { return bytes.Equal(got, expected) } -func NewFromProto(input io.Reader) (*manifest, error) { +// NewManifestFromReader reads a manifest from an io.Reader. +func NewManifestFromReader(input io.Reader) (*manifest, error) { m := New() dat, err := io.ReadAll(input) if err != nil { @@ -69,21 +75,35 @@ func NewFromProto(input io.Reader) (*manifest, error) { bb := bytes.NewBuffer(dat[ml:]) dat = bb.Bytes() - log.Dump(dat) - - // deserialize: + // deserialize outer: m.pbOuter = new(MFFileOuter) - err = proto.Unmarshal(dat, m.pbOuter) - - if err != nil { + if err := proto.Unmarshal(dat, m.pbOuter); err != nil { return nil, err } - ve := m.validateProtoOuter() - if ve != nil { - return nil, ve + // deserialize inner: + if err := m.deserializeInner(); err != nil { + return nil, err } - // FIXME TODO deserialize inner return m, nil } + +// NewManifestFromFile reads a manifest from a file path using the given filesystem. +// If fs is nil, the real filesystem (OsFs) is used. +func NewManifestFromFile(fs afero.Fs, path string) (*manifest, error) { + if fs == nil { + fs = afero.NewOsFs() + } + f, err := fs.Open(path) + if err != nil { + return nil, err + } + defer f.Close() + return NewManifestFromReader(f) +} + +// NewFromProto is deprecated, use NewManifestFromReader instead. +func NewFromProto(input io.Reader) (*manifest, error) { + return NewManifestFromReader(input) +} diff --git a/mfer/manifest.go b/mfer/manifest.go index 691715c..0e98e48 100644 --- a/mfer/manifest.go +++ b/mfer/manifest.go @@ -106,13 +106,31 @@ func NewFromFS(options *ManifestScanOptions, fs afero.Fs) (*manifest, error) { } func (m *manifest) GetFileCount() int64 { + if m.pbInner != nil { + return int64(len(m.pbInner.Files)) + } return int64(len(m.files)) } func (m *manifest) GetTotalFileSize() int64 { + if m.pbInner != nil { + var total int64 + for _, f := range m.pbInner.Files { + total += f.Size + } + return total + } return m.totalFileSize } +// Files returns all file entries from a loaded manifest. +func (m *manifest) Files() []*MFFilePath { + if m.pbInner == nil { + return nil + } + return m.pbInner.Files +} + func pathIsHidden(p string) bool { tp := path.Clean(p) if strings.HasPrefix(tp, ".") {