5 Commits

Author SHA1 Message Date
clawbot
03b4b8e5c4 feat: split Dockerfile into dedicated lint stage
Add pre-built golangci/golangci-lint:v1.64.8 image as a separate lint
stage for fast lint feedback without rebuilding the entire builder.

- Lint stage: installs protobuf-compiler, generates code, runs make lint
- Build stage: pins sneak/builder by sha256, removes inline golangci-lint
- COPY --from=lint forces BuildKit to execute the lint stage
- All images pinned by sha256 digest
- Final stage (FROM scratch) unchanged
2026-03-02 00:03:04 -08:00
1f7ee256ec fix: update go directive from 1.17 to 1.22 (fixes #33) (#34)
Reviewed-on: #34
2026-02-20 11:35:44 +01:00
clawbot
28c6fbd220 fix: update go directive from 1.17 to 1.22 (fixes #33)
go.mod specified go 1.17 but protobuf generated code (mf.pb.go) uses
go1.18+ features (any) and go1.20+ features (unsafe.StringData).
Updated to go 1.22 and ran go mod tidy.
2026-02-20 02:25:21 -08:00
5aae442156 add links to metalink format (#7)
Reviewed-on: #7
2026-02-09 02:15:58 +01:00
2717685619 update readme to conform with my new readme howto standards (#8)
All checks were successful
continuous-integration/drone/push Build is passing
Reviewed-on: #8

manually approving because CI is offline rn for some reason
2024-12-09 02:50:57 +00:00
23 changed files with 292 additions and 1765 deletions

View File

@@ -1,9 +1,27 @@
################################################################################
#2345678911234567892123456789312345678941234567895123456789612345678971234567898
################################################################################
FROM sneak/builder:2022-12-08 AS builder
# Lint stage — fast feedback on formatting and lint issues
# golangci/golangci-lint:v1.64.8
FROM golangci/golangci-lint@sha256:2987913e27f4eca9c8a39129d2c7bc1e74fbcf77f181e01cea607be437aa5cb8 AS lint
WORKDIR /src
RUN apt-get update && apt-get install -y --no-install-recommends protobuf-compiler && rm -rf /var/lib/apt/lists/*
COPY go.mod go.sum ./
RUN go mod download
RUN go install google.golang.org/protobuf/cmd/protoc-gen-go@v1.28.1
COPY . .
RUN cd mfer && go generate .
RUN make lint
################################################################################
#2345678911234567892123456789312345678941234567895123456789612345678971234567898
################################################################################
# Build stage
# sneak/builder:2022-12-08
FROM sneak/builder@sha256:d61175320b02b04eb2cada3005833494ce8a4032647e4202dcb3551cf0b56f5a AS builder
ENV DEBIAN_FRONTEND noninteractive
WORKDIR /build
# Force BuildKit to run the lint stage by creating a stage dependency
COPY --from=lint /src/go.sum /dev/null
COPY ./Makefile ./.golangci.yml ./go.mod ./go.sum /build/
COPY ./vendor.tzst /build/vendor.tzst
COPY ./modcache.tzst /build/modcache.tzst
@@ -19,7 +37,6 @@ RUN mkdir -p "$(go env GOMODCACHE)" && cd "$(go env GOMODCACHE)" && \
rm /build/modcache.tzst && cd /build
RUN \
cd mfer && go generate . && cd .. && \
GOPACKAGESDEBUG=true golangci-lint run ./... && \
mkdir vendor && cd vendor && \
zstdmt -d --stdout /build/vendor.tzst | tar xf - && rm /build/vendor.tzst && \
cd .. && \

View File

@@ -33,7 +33,8 @@ fixme:
@grep -nir fixme . | grep -v Makefile
devprereqs:
which golangci-lint || go install -v github.com/golangci/golangci-lint/cmd/golangci-lint@latest
which gofumpt || go install -v mvdan.cc/gofumpt@latest
which golangci-lint || go install -v github.com/golangci/golangci-lint/cmd/golangci-lint@v1.50.1
mfer/mf.pb.go: mfer/mf.proto
cd mfer && go generate .

289
README.md
View File

@@ -1,237 +1,48 @@
# mfer
Manifest file generator and checker.
[mfer](https://git.eeqj.de/sneak/mfer) is a reference implementation library
and thin wrapper command-line utility written in [Go](https://golang.org)
and first published in 2022 under the [WTFPL](https://wtfpl.net) (public
domain) license. It specifies and generates `.mf` manifest files over a
directory tree of files to encapsulate metadata about them (such as
cryptographic checksums or signatures over same) to aid in archiving,
downloading, and streaming, or mirroring. The manifest files' data is
serialized with Google's [protobuf serialization
format](https://developers.google.com/protocol-buffers). The structure of
these files can be found [in the format
specification](https://git.eeqj.de/sneak/mfer/src/branch/main/mfer/mf.proto)
which is included in the [project
repository](https://git.eeqj.de/sneak/mfer).
# Phases
The current version is pre-1.0 and while the repo was published in 2022,
there has not yet been any versioned release. [SemVer](https://semver.org)
will be used for releases.
Manifest generation happens in two distinct phases:
## Phase 1: Enumeration
Walking directories and calling `stat()` on files to collect metadata (path, size, mtime, ctime). This builds the list of files to be scanned. Relatively fast as it only reads filesystem metadata, not file contents.
**Progress:** `EnumerateStatus` with `FilesFound` and `BytesFound`
## Phase 2: Scan (ToManifest)
Reading file contents and computing cryptographic hashes for manifest generation. This is the expensive phase that reads all file data from disk.
**Progress:** `ScanStatus` with `TotalFiles`, `ScannedFiles`, `TotalBytes`, `ScannedBytes`, `BytesPerSec`
# Code Conventions
- **Logging:** Never use `fmt.Printf` or write to stdout/stderr directly in normal code. Use the `internal/log` package for all output (`log.Info`, `log.Infof`, `log.Debug`, `log.Debugf`, `log.Progressf`, `log.ProgressDone`).
- **Filesystem abstraction:** Use `github.com/spf13/afero` for filesystem operations to enable testing and flexibility.
- **CLI framework:** Use `github.com/urfave/cli/v2` for command-line interface.
- **Serialization:** Use Protocol Buffers for manifest file format.
- **Internal packages:** Non-exported implementation details go in `internal/` subdirectories.
- **Concurrency:** Use `sync.RWMutex` for protecting shared state; prefer channels for progress reporting.
- **Progress channels:** Use buffered channels (size 1) with non-blocking sends to avoid blocking the main operation if the consumer is slow.
- **Context support:** Long-running operations should accept `context.Context` for cancellation.
- **NO_COLOR:** Respect the `NO_COLOR` environment variable for disabling colored output.
- **Options pattern:** Use `NewWithOptions(opts *Options)` constructor pattern for configurable types.
# Codebase Structure
## cmd/mfer/
### main.go
- **Variables**
- `Appname string` - Application name
- `Version string` - Version string (set at build time)
- `Gitrev string` - Git revision (set at build time)
## internal/cli/
### entry.go
- **Variables**
- `NO_COLOR bool` - Disables color output when NO_COLOR env var is set
- **Functions**
- `Run(Appname, Version, Gitrev string) int` - Main entry point for the CLI
### mfer.go
- **Types**
- `CLIApp struct` - Main CLI application container
- **Methods**
- `(*CLIApp) VersionString() string` - Returns formatted version string
## internal/log/
### log.go
- **Functions**
- `Init()` - Initializes the logger
- `Info(arg string)` - Logs at info level
- `Infof(format string, args ...interface{})` - Logs at info level with formatting
- `Debug(arg string)` - Logs at debug level with caller info
- `Debugf(format string, args ...interface{})` - Logs at debug level with formatting and caller info
- `Dump(args ...interface{})` - Logs spew dump at debug level
- `Progressf(format string, args ...interface{})` - Prints progress message (overwrites current line)
- `ProgressDone()` - Completes progress line with newline
- `EnableDebugLogging()` - Sets log level to debug
- `SetLevel(arg log.Level)` - Sets log level
- `SetLevelFromVerbosity(l int)` - Sets log level from verbosity count
- `GetLevel() log.Level` - Returns current log level
- `GetLogger() *log.Logger` - Returns underlying logger
- `WithError(e error) *log.Entry` - Returns log entry with error attached
- `DisableStyling()` - Disables colors and styling (for NO_COLOR)
## internal/scanner/
### scanner.go
- **Types**
- `Options struct` - Options for scanner behavior
- `IgnoreDotfiles bool`
- `FollowSymLinks bool`
- `EnumerateStatus struct` - Progress information for enumeration phase
- `FilesFound int64`
- `BytesFound int64`
- `ScanStatus struct` - Progress information for scan phase
- `TotalFiles int64`
- `ScannedFiles int64`
- `TotalBytes int64`
- `ScannedBytes int64`
- `BytesPerSec float64`
- `FileEntry struct` - Represents an enumerated file
- `Path string` - Relative path (used in manifest)
- `AbsPath string` - Absolute path (used for reading file content)
- `Size int64`
- `Mtime time.Time`
- `Ctime time.Time`
- `Scanner struct` - Accumulates files and generates manifests
- **Functions**
- `New() *Scanner` - Creates a new Scanner with default options
- `NewWithOptions(opts *Options) *Scanner` - Creates a new Scanner with given options
- **Methods (Enumeration Phase)**
- `(*Scanner) EnumerateFile(path string) error` - Enumerates a single file, calling stat() for metadata
- `(*Scanner) EnumeratePath(inputPath string, progress chan<- EnumerateStatus) error` - Walks a directory and enumerates all files
- `(*Scanner) EnumeratePaths(progress chan<- EnumerateStatus, inputPaths ...string) error` - Walks multiple directories
- `(*Scanner) EnumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error` - Walks an afero filesystem
- **Methods (Accessors)**
- `(*Scanner) Files() []*FileEntry` - Returns copy of all enumerated files
- `(*Scanner) FileCount() int64` - Returns number of files
- `(*Scanner) TotalBytes() int64` - Returns total size of all files
- **Methods (Scan Phase)**
- `(*Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- ScanStatus) error` - Reads file contents, computes hashes, generates manifest
## internal/checker/
### checker.go
- **Types**
- `Result struct` - Outcome of checking a single file
- `Path string` - File path from manifest
- `Status Status` - Verification status
- `Message string` - Error or status message
- `Status int` - Verification status enumeration
- `StatusOK` - File matches manifest
- `StatusMissing` - File not found
- `StatusSizeMismatch` - File size differs from manifest
- `StatusHashMismatch` - File hash differs from manifest
- `StatusError` - Error occurred during verification
- `CheckStatus struct` - Progress information for check operation
- `TotalFiles int64`
- `CheckedFiles int64`
- `TotalBytes int64`
- `CheckedBytes int64`
- `BytesPerSec float64`
- `Failures int64`
- `Checker struct` - Verifies files against a manifest
- **Functions**
- `NewChecker(manifestPath string, basePath string) (*Checker, error)` - Creates a new Checker for the given manifest and base path
- **Methods**
- `(s Status) String() string` - Returns string representation of status
- `(*Checker) FileCount() int64` - Returns number of files in the manifest
- `(*Checker) TotalBytes() int64` - Returns total size of all files in manifest
- `(*Checker) Check(ctx context.Context, results chan<- Result, progress chan<- CheckStatus) error` - Verifies all files against the manifest
## mfer/
### manifest.go
- **Types**
- `ManifestScanOptions struct` - Options for scanning directories
- `IgnoreDotfiles bool`
- `FollowSymLinks bool`
- **Functions**
- `New() *manifest` - Creates a new empty manifest
- `NewFromPaths(options *ManifestScanOptions, inputPaths ...string) (*manifest, error)` - Creates manifest from filesystem paths
- `NewFromFS(options *ManifestScanOptions, fs afero.Fs) (*manifest, error)` - Creates manifest from afero filesystem
- **Methods**
- `(*manifest) HasError() bool` - Returns true if manifest has errors
- `(*manifest) AddError(e error) *manifest` - Adds an error to the manifest
- `(*manifest) WithContext(c context.Context) *manifest` - Sets context for cancellation
- `(*manifest) GetFileCount() int64` - Returns number of files in manifest
- `(*manifest) GetTotalFileSize() int64` - Returns total size of all files
- `(*manifest) Files() []*MFFilePath` - Returns all file entries from a loaded manifest
- `(*manifest) Scan() error` - Scans source filesystems and populates file list
### output.go
- **Methods**
- `(*manifest) WriteToFile(path string) error` - Writes manifest to file path
- `(*manifest) WriteTo(output io.Writer) error` - Writes manifest to io.Writer
### builder.go
- **Types**
- `FileProgress func(bytesRead int64)` - Callback for file processing progress
- `ManifestBuilder struct` - Constructs manifests by adding files one at a time
- **Functions**
- `NewBuilder() *ManifestBuilder` - Creates a new ManifestBuilder
- **Methods**
- `(*ManifestBuilder) AddFile(path string, size int64, mtime time.Time, reader io.Reader, progress FileProgress) (int64, error)` - Reads file, computes hash, adds to manifest
- `(*ManifestBuilder) FileCount() int` - Returns number of files added
- `(*ManifestBuilder) Build(w io.Writer) error` - Finalizes and writes manifest
### serialize.go
- **Constants**
- `MAGIC string` - Magic bytes prefix for manifest files ("ZNAVSRFG")
### deserialize.go
- **Functions**
- `NewFromProto(input io.Reader) (*manifest, error)` - Deserializes manifest from protobuf
- `NewManifestFromReader(input io.Reader) (*manifest, error)` - Reads and parses manifest from io.Reader
- `NewManifestFromFile(path string) (*manifest, error)` - Reads and parses manifest from file path
### mf.pb.go (generated from mf.proto)
- **Enum Types**
- `MFFileOuter_Version` - Outer file format version
- `MFFileOuter_VERSION_NONE`
- `MFFileOuter_VERSION_ONE`
- `MFFileOuter_CompressionType` - Compression type for inner message
- `MFFileOuter_COMPRESSION_NONE`
- `MFFileOuter_COMPRESSION_GZIP`
- `MFFile_Version` - Inner file format version
- `MFFile_VERSION_NONE`
- `MFFile_VERSION_ONE`
- **Message Types**
- `Timestamp struct` - Timestamp with seconds and nanoseconds
- `GetSeconds() int64`
- `GetNanos() int32`
- `MFFileOuter struct` - Outer wrapper containing compressed/signed inner message
- `GetVersion() MFFileOuter_Version`
- `GetCompressionType() MFFileOuter_CompressionType`
- `GetSize() int64`
- `GetSha256() []byte`
- `GetInnerMessage() []byte`
- `GetSignature() []byte`
- `GetSigner() []byte`
- `GetSigningPubKey() []byte`
- `MFFilePath struct` - Individual file entry in manifest
- `GetPath() string`
- `GetSize() int64`
- `GetHashes() []*MFFileChecksum`
- `GetMimeType() string`
- `GetMtime() *Timestamp`
- `GetCtime() *Timestamp`
- `GetAtime() *Timestamp`
- `MFFileChecksum struct` - File checksum using multihash
- `GetMultiHash() []byte`
- `MFFile struct` - Inner manifest containing file list
- `GetVersion() MFFile_Version`
- `GetFiles() []*MFFilePath`
- `GetCreatedAt() *Timestamp`
This project was started by [@sneak](https://sneak.berlin) to scratch an
itch in 2022 and is currently a one-person effort, though the goal is for
this to emerge as a de-facto standard and be incorporated into other
software. A compatible javascript library is planned.
# Build Status
[![Build Status](https://drone.datavi.be/api/badges/sneak/mfer/status.svg)](https://drone.datavi.be/sneak/mfer)
# Participation
The community is as yet nonexistent so there are no defined policies or
norms yet. Primary development happens on a privately-run Gitea instance at
[https://git.eeqj.de/sneak/mfer](https://git.eeqj.de/sneak/mfer) and issues
are [tracked there](https://git.eeqj.de/sneak/mfer/issues).
Changes must always be formatted with a standard `go fmt`, syntactically
valid, and must pass the linting defined in the repository (presently only
the `golangci-lint` defaults), which can be run with a `make lint`. The
`main` branch is protected and all changes must be made via [pull
requests](https://git.eeqj.de/sneak/mfer/pulls) and pass CI to be merged.
Any changes submitted to this project must also be
[WTFPL-licensed](https://wtfpl.net) to be considered.
# Problem Statement
Given a plain URL, there is no standard way to safely and programmatically
@@ -309,10 +120,6 @@ The manifest file would do several important things:
- metadata size should not be used as an excuse to sacrifice utility (such
as providing checksums over each chunk of a large file)
# Limitations
- **Manifest size:** Manifests must fit entirely in system memory during reading and writing.
# Open Questions
- Should the manifest file include checksums of individual file chunks, or just for the whole assembled file?
@@ -400,6 +207,24 @@ regardless of filesystem format.
Please email [`sneak@sneak.berlin`](mailto:sneak@sneak.berlin) with your
desired username for an account on this Gitea instance.
I am currently interested in hiring a contractor skilled with the Go
standard library interfaces to specify this tool in full and develop a
prototype implementation.
# See Also
## Prior Art: Metalink
* [Metalink - Mozilla Wiki](https://wiki.mozilla.org/Metalink)
* [Metalink - Wikipedia](https://en.wikipedia.org/wiki/Metalink)
* [RFC 5854 - The Metalink Download Description Format](https://datatracker.ietf.org/doc/html/rfc5854)
* [RFC 6249 - Metalink/HTTP: Mirrors and Hashes](https://www.rfc-editor.org/rfc/rfc6249.html)
## Links
* Repo: [https://git.eeqj.de/sneak/mfer](https://git.eeqj.de/sneak/mfer)
* Issues: [https://git.eeqj.de/sneak/mfer/issues](https://git.eeqj.de/sneak/mfer/issues)
# Authors
* [@sneak &lt;sneak@sneak.berlin&gt;](mailto:sneak@sneak.berlin)
# License
* [WTFPL](https://wtfpl.net)

122
TODO.md Normal file
View File

@@ -0,0 +1,122 @@
# TODO: mfer 1.0
## Design Questions
*sneak: please answer inline below each question. These are preserved for posterity.*
### Format Design
**1. Should `MFFileChecksum` be simplified?**
Currently it's a separate message wrapping a single `bytes multiHash` field. Since multihash already self-describes the algorithm, `repeated bytes hashes` directly on `MFFilePath` would be simpler and reduce per-file protobuf overhead. Is the extra message layer intentional (e.g. planning to add per-hash metadata like `verified_at`)?
> *answer:*
**2. Should file permissions/mode be stored?**
The format stores mtime/ctime but not Unix file permissions. For archival use (ExFAT, filesystem-independent checksums) this may not matter, but for software distribution or filesystem restoration it's a gap. Should we reserve a field now (e.g. `optional uint32 mode = 305`) even if we don't populate it yet?
> *answer:*
**3. Should `atime` be removed from the schema?**
Access time is volatile, non-deterministic, and often disabled (`noatime`). Including it means two manifests of the same directory at different times will differ, which conflicts with the determinism goal. Remove it, or document it as "never set by default"?
> *answer:*
**4. What are the path normalization rules?**
The proto has `string path` with no specification about: always forward-slash? Must be relative? No `..` components allowed? UTF-8 NFC vs NFD normalization (macOS vs Linux)? Max path length? This is a security issue (path traversal) and a cross-platform compatibility issue. What rules should the spec mandate?
> *answer:*
**5. Should we add a version byte after the magic?**
Currently `ZNAVSRFG` is followed immediately by protobuf. Adding a version byte (`ZNAVSRFG\x01`) would allow future framing changes without requiring protobuf parsing to detect the version. `MFFileOuter.Version` serves this purpose but requires successful deserialization to read. Worth the extra byte?
> *answer:*
**6. Should we add a length-prefix after the magic?**
Protobuf is not self-delimiting. If we ever want to concatenate manifests or append data after the protobuf, the current framing is insufficient. Add a varint or fixed-width length-prefix?
> *answer:*
### Signature Design
**7. What does the outer SHA-256 hash cover — compressed or uncompressed data?**
The review notes it currently hashes compressed data (good for verifying before decompression), but this should be explicitly documented. Which is the intended behavior?
> *answer:*
**8. Should `signatureString()` sign raw bytes instead of a hex-encoded string?**
Currently the canonical string is `MAGIC-UUID-MULTIHASH` with hex encoding, which adds a transformation layer. Signing the raw `sha256` bytes (or compressed `innerMessage` directly) would be simpler. Keep the string format or switch to raw bytes?
> *answer:*
**9. Should we support detached signature files (`.mf.sig`)?**
Embedded signatures are better for single-file distribution. Detached `.mf.sig` files follow the familiar `SHASUMS`/`SHASUMS.asc` pattern and are simpler for HTTP serving. Support both modes?
> *answer:*
**10. GPG vs pure-Go crypto for signatures?**
Shelling out to `gpg` is fragile (may not be installed, version-dependent output). `github.com/ProtonMail/go-crypto` provides pure-Go OpenPGP, or we could go Ed25519/signify (simpler, no key management). Which direction?
> *answer:*
### Implementation Design
**11. Should manifests be deterministic by default?**
This means: sort file entries by path, omit `createdAt` timestamp (or make it opt-in), no `atime`. Should determinism be the default, with a `--include-timestamps` flag to opt in?
> *answer:*
**12. Should we consolidate or keep both scanner/checker implementations?**
There are two parallel implementations: `mfer/scanner.go` + `mfer/checker.go` (typed with `FileSize`, `RelFilePath`) and `internal/scanner/` + `internal/checker/` (raw `int64`, `string`). The `mfer/` versions are superior. Delete the `internal/` versions?
> *answer:*
**13. Should the `manifest` type be exported?**
Currently unexported with exported constructors (`New`, `NewFromPaths`, etc.). Consumers can't declare `var m *mfer.manifest`. Export the type, or define an interface?
> *answer:*
**14. What should the Go module path be for 1.0?**
Currently mixed between `sneak.berlin/go/mfer` and `git.eeqj.de/sneak/mfer`. Which is canonical?
> *answer:*
---
## Implementation Plan
### Phase 1: Foundation (format correctness)
- [ ] Delete `internal/scanner/` and `internal/checker/` — consolidate on `mfer/` package versions; update CLI code
- [ ] Add deterministic file ordering — sort entries by path (lexicographic, byte-order) in `Builder.Build()`; add test asserting byte-identical output from two runs
- [ ] Add decompression size limit — `io.LimitReader` in `deserializeInner()` with `m.pbOuter.Size` as bound
- [ ] Fix `errors.Is` dead code in checker — replace with `os.IsNotExist(err)` or `errors.Is(err, fs.ErrNotExist)`
- [ ] Fix `AddFile` to verify size — check `totalRead == size` after reading, return error on mismatch
- [ ] Specify path invariants — add proto comments (UTF-8, forward-slash, relative, no `..`, no leading `/`); validate in `Builder.AddFile` and `Builder.AddFileWithHash`
### Phase 2: CLI polish
- [ ] Fix flag naming — all CLI flags use kebab-case as primary (`--include-dotfiles`, `--follow-symlinks`)
- [ ] Fix URL construction in fetch — use `BaseURL.JoinPath()` or `url.JoinPath()` instead of string concatenation
- [ ] Add progress rate-limiting to Checker — throttle to once per second, matching Scanner
- [ ] Add `--deterministic` flag (or make it default) — omit `createdAt`, sort files
### Phase 3: Robustness
- [ ] Replace GPG subprocess with pure-Go crypto — `github.com/ProtonMail/go-crypto` or Ed25519/signify
- [ ] Add timeout to any remaining subprocess calls
- [ ] Add fuzzing tests for `NewManifestFromReader`
- [ ] Add retry logic to fetch — exponential backoff for transient HTTP errors
### Phase 4: Format finalization
- [ ] Remove or deprecate `atime` from proto (pending design question answer)
- [ ] Reserve `optional uint32 mode = 305` in `MFFilePath` for future file permissions
- [ ] Add version byte after magic — `ZNAVSRFG\x01` for format version 1
- [ ] Write format specification document — separate from README: magic, outer structure, compression, inner structure, path invariants, signature scheme, canonical serialization
### Phase 5: Release prep
- [ ] Finalize Go module path
- [ ] Audit all error messages for consistency and helpfulness
- [ ] Add `--version` output matching SemVer
- [ ] Tag v1.0.0

View File

@@ -3,7 +3,7 @@ package main
import (
"os"
"sneak.berlin/go/mfer/internal/cli"
"git.eeqj.de/sneak/mfer/internal/cli"
)
var (

View File

@@ -1,19 +0,0 @@
#!/bin/bash
set -euo pipefail
# usage.sh - Generate and check a manifest from the repo
# Run from repo root: ./contrib/usage.sh
TMPDIR=$(mktemp -d)
MANIFEST="$TMPDIR/index.mf"
cleanup() {
rm -rf "$TMPDIR"
}
trap cleanup EXIT
echo "Building mfer..."
go build -o "$TMPDIR/mfer" ./cmd/mfer
"$TMPDIR/mfer" generate --ignore-dotfiles -o "$MANIFEST" .
"$TMPDIR/mfer" check --base . "$MANIFEST"

16
go.mod
View File

@@ -1,11 +1,10 @@
module sneak.berlin/go/mfer
module git.eeqj.de/sneak/mfer
go 1.17
go 1.22
require (
github.com/apex/log v1.9.0
github.com/davecgh/go-spew v1.1.1
github.com/multiformats/go-multihash v0.2.3
github.com/pterm/pterm v0.12.35
github.com/spf13/afero v1.8.0
github.com/stretchr/testify v1.8.1
@@ -18,24 +17,17 @@ require (
github.com/cpuguy83/go-md2man/v2 v2.0.2 // indirect
github.com/fatih/color v1.7.0 // indirect
github.com/gookit/color v1.4.2 // indirect
github.com/klauspost/cpuid/v2 v2.0.9 // indirect
github.com/mattn/go-colorable v0.1.2 // indirect
github.com/mattn/go-isatty v0.0.8 // indirect
github.com/mattn/go-runewidth v0.0.13 // indirect
github.com/minio/sha256-simd v1.0.0 // indirect
github.com/mr-tron/base58 v1.2.0 // indirect
github.com/multiformats/go-varint v0.0.6 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/pmezard/go-difflib v1.0.0 // indirect
github.com/rivo/uniseg v0.2.0 // indirect
github.com/russross/blackfriday/v2 v2.1.0 // indirect
github.com/spaolacci/murmur3 v1.1.0 // indirect
github.com/xo/terminfo v0.0.0-20210125001918-ca9a967f8778 // indirect
github.com/xrash/smetrics v0.0.0-20201216005158-039620a65673 // indirect
golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e // indirect
golang.org/x/sys v0.1.0 // indirect
golang.org/x/sys v0.0.0-20211013075003-97ac67df715c // indirect
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211 // indirect
golang.org/x/text v0.3.6 // indirect
golang.org/x/text v0.3.4 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
lukechampine.com/blake3 v1.1.6 // indirect
)

24
go.sum
View File

@@ -37,7 +37,6 @@ cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9
cloud.google.com/go/storage v1.14.0/go.mod h1:GrKmX003DSIwi9o29oFT7YDnHYwZoctc3fOKtUw0Xmo=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/MarvinJWendt/testza v0.1.0/go.mod h1:7AxNvlfeHP7Z/hDQ5JtE3OKYT3XFUeLCDE2DQninSqs=
github.com/MarvinJWendt/testza v0.2.1/go.mod h1:God7bhG8n6uQxwdScay+gjm9/LnO4D3kkcZX4hv9Rp8=
@@ -150,7 +149,6 @@ github.com/jpillora/backoff v0.0.0-20180909062703-3050d21c67d7/go.mod h1:2iMrUgb
github.com/jstemmer/go-junit-report v0.0.0-20190106144839-af01ea7f8024/go.mod h1:6v2b51hI/fHJwM22ozAgKL4VKDeJcHhJFhtBdhmNjmU=
github.com/jstemmer/go-junit-report v0.9.1/go.mod h1:Brl9GWCQeLvo8nXZwPNNblvFj/XSXhF0NWZEnDohbsk=
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
github.com/klauspost/cpuid/v2 v2.0.4/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/klauspost/cpuid/v2 v2.0.9 h1:lgaqFMSdTdQYdZ04uHyN2d/eKdOMyi2YLSvlQIBFYa4=
github.com/klauspost/cpuid/v2 v2.0.9/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg=
github.com/kr/fs v0.1.0/go.mod h1:FFnZGqtBN9Gxj7eW1uZ42v5BccTP0vu6NEaFoC2HwRg=
@@ -170,14 +168,6 @@ github.com/mattn/go-isatty v0.0.8/go.mod h1:Iq45c/XA43vh69/j3iqttzPXn0bhXyGjM0Hd
github.com/mattn/go-runewidth v0.0.13 h1:lTGmDsbAYt5DmK6OnoV7EuIF1wEIFAcxld6ypU4OSgU=
github.com/mattn/go-runewidth v0.0.13/go.mod h1:Jdepj2loyihRzMpdS35Xk/zdY8IAYHsh153qUoGf23w=
github.com/mgutz/ansi v0.0.0-20170206155736-9520e82c474b/go.mod h1:01TrycV0kFyexm33Z7vhZRXopbI8J3TDReVlkTgMUxE=
github.com/minio/sha256-simd v1.0.0 h1:v1ta+49hkWZyvaKwrQB8elexRqm6Y0aMLjCNsrYxo6g=
github.com/minio/sha256-simd v1.0.0/go.mod h1:OuYzVNI5vcoYIAmbIvHPl3N3jUzVedXbKy5RFepssQM=
github.com/mr-tron/base58 v1.2.0 h1:T/HDJBh4ZCPbU39/+c3rRvE0uKBQlU27+QI8LJ4t64o=
github.com/mr-tron/base58 v1.2.0/go.mod h1:BinMc/sQntlIE1frQmRFPUoPA1Zkr8VRgBdjWI2mNwc=
github.com/multiformats/go-multihash v0.2.3 h1:7Lyc8XfX/IY2jWb/gI7JP+o7JEq9hOa7BFvVU9RSh+U=
github.com/multiformats/go-multihash v0.2.3/go.mod h1:dXgKXCXjBzdscBLk9JkjINiEsCKRVch90MdaGiKsvSM=
github.com/multiformats/go-varint v0.0.6 h1:gk85QWKxh3TazbLxED/NlDVv8+q+ReFJk7Y2W/KhfNY=
github.com/multiformats/go-varint v0.0.6/go.mod h1:3Ls8CIEsrijN6+B7PbrXRPxHRPuXSrVKRY101jdMZYE=
github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE=
github.com/onsi/gomega v1.5.0/go.mod h1:ex+gbHU/CVuBBDIJjb2X0qEXbFg53c61hWP/1CpauHY=
github.com/pkg/errors v0.8.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
@@ -204,8 +194,6 @@ github.com/sergi/go-diff v1.0.0/go.mod h1:0CfEIISq7TuYL3j771MWULgwwjU+GofnZX9QAm
github.com/smartystreets/assertions v1.0.0/go.mod h1:kHHU4qYBaI3q23Pp3VPrmWhuIUrLW/7eUrw0BU5VaoM=
github.com/smartystreets/go-aws-auth v0.0.0-20180515143844-0c1422d1fdb9/go.mod h1:SnhjPscd9TpLiy1LpzGSKh3bXCfxxXuqd9xmQJy3slM=
github.com/smartystreets/gunit v1.0.0/go.mod h1:qwPWnhz6pn0NnRBP++URONOVyNkPyr4SauJk4cUOwJs=
github.com/spaolacci/murmur3 v1.1.0 h1:7c1g84S4BPRrfL5Xrdp6fOJ206sU9y293DDHaoy0bLI=
github.com/spaolacci/murmur3 v1.1.0/go.mod h1:JwIasOWyU6f++ZhiEuf87xNszmSA2myDM2Kzu9HwQUA=
github.com/spf13/afero v1.8.0 h1:5MmtuhAgYeU6qpa7w7bP0dv6MBYuup0vekhSpSkoq60=
github.com/spf13/afero v1.8.0/go.mod h1:CtAatgMJh6bJEIs48Ay/FOnkljP3WeGUG0MC1RfAqwo=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
@@ -251,8 +239,6 @@ golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8U
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
golang.org/x/crypto v0.0.0-20210421170649-83a5a9bb288b/go.mod h1:T9bdIzuCu7OtxOm1hfPfRQxPLYneinmdGuTeoZ9dtd4=
golang.org/x/crypto v0.0.0-20211108221036-ceb1ce70b4fa/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc=
golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e h1:T8NU3HyQ8ClP4SEE+KbFlg6n0NhuTsN4MyznaarGsZM=
golang.org/x/crypto v0.0.0-20220525230936-793ad666bf5e/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4=
golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190306152737-a1d7652674e8/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA=
golang.org/x/exp v0.0.0-20190510132918-efd6b22b2522/go.mod h1:ZjyILWgesfNpC6sMxTJOJm9Kp84zZh5NQWvqDGG3Qr8=
@@ -318,7 +304,6 @@ golang.org/x/net v0.0.0-20201031054903-ff519b6c9102/go.mod h1:sp8m0HH+o8qH0wwXwY
golang.org/x/net v0.0.0-20201209123823-ac852fbbde11/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20201224014010-6772e930b67b/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20210226172049-e18ecbb05110/go.mod h1:m0MpNAwzfU5UDzcl9v0D8zg8gWTRqZa9RBIspLL5mdg=
golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y=
golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U=
golang.org/x/oauth2 v0.0.0-20190226205417-e64efc72b421/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
golang.org/x/oauth2 v0.0.0-20190604053449-0f29369cfe45/go.mod h1:gOpvHmFTYa4IltrdGE7lF6nIHvwfUNPOp7c8zoXwtLw=
@@ -373,12 +358,10 @@ golang.org/x/sys v0.0.0-20210104204734-6f8348627aad/go.mod h1:h1NjWce9XRLGQEsW7w
golang.org/x/sys v0.0.0-20210119212857-b64e53b001e4/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210225134936-a50acf3fe073/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210330210617-4fbd30eecc44/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210423185535-09eb48e85fd7/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20211013075003-97ac67df715c h1:taxlMj0D/1sOAuv/CbSD+MMDof2vbyPTqz5FNYKpXt8=
golang.org/x/sys v0.0.0-20211013075003-97ac67df715c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.1.0 h1:kunALQeHf1/185U1i0GOB/fy1IPRDDpuoOOqRReG57U=
golang.org/x/sys v0.1.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210220032956-6a3ed077a48d/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210615171337-6886f2dfbf5b/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
@@ -389,9 +372,8 @@ golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.1-0.20180807135948-17ff2d5776d2/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.4 h1:0YWbFKbhXG/wIiuHDSKpS0Iy7FSA+u45VtBMfQcFTTc=
golang.org/x/text v0.3.4/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/text v0.3.6 h1:aRYxNxv6iGQlyVaZmk6ZgYEDa+Jg18DxebPSrd6bg1M=
golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
@@ -559,8 +541,6 @@ honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWh
honnef.co/go/tools v0.0.1-2019.2.3/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg=
honnef.co/go/tools v0.0.1-2020.1.3/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
honnef.co/go/tools v0.0.1-2020.1.4/go.mod h1:X/FiERA/W4tHapMX5mGpAtMSVEeEUOyHaw9vFzvIQ3k=
lukechampine.com/blake3 v1.1.6 h1:H3cROdztr7RCfoaTpGZFQsrqvweFLrqS73j7L7cmR5c=
lukechampine.com/blake3 v1.1.6/go.mod h1:tkKEOtDkNtklkXtLNEOGNq5tcV90tJiA1vAA12R78LA=
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8=
rsc.io/quote/v3 v3.1.0/go.mod h1:yEA65RcK8LyAZtP9Kv3t0HmxON59tX3rD+tICJqUlj0=
rsc.io/sampler v1.3.0/go.mod h1:T1hPZKmBbMNahiBKFy5HrXp6adAjACjK9JXDnKaTXpA=

View File

@@ -1,281 +0,0 @@
package checker
import (
"bytes"
"context"
"crypto/sha256"
"errors"
"io"
"os"
"path/filepath"
"github.com/multiformats/go-multihash"
"github.com/spf13/afero"
"sneak.berlin/go/mfer/mfer"
)
// Result represents the outcome of checking a single file.
type Result struct {
Path string
Status Status
Message string
}
// Status represents the verification status of a file.
type Status int
const (
StatusOK Status = iota
StatusMissing
StatusSizeMismatch
StatusHashMismatch
StatusExtra // File exists on disk but not in manifest
StatusError
)
func (s Status) String() string {
switch s {
case StatusOK:
return "OK"
case StatusMissing:
return "MISSING"
case StatusSizeMismatch:
return "SIZE_MISMATCH"
case StatusHashMismatch:
return "HASH_MISMATCH"
case StatusExtra:
return "EXTRA"
case StatusError:
return "ERROR"
default:
return "UNKNOWN"
}
}
// CheckStatus contains progress information for the check operation.
type CheckStatus struct {
TotalFiles int64
CheckedFiles int64
TotalBytes int64
CheckedBytes int64
BytesPerSec float64
Failures int64
}
// Checker verifies files against a manifest.
type Checker struct {
basePath string
files []*mfer.MFFilePath
fs afero.Fs
// manifestPaths is a set of paths in the manifest for quick lookup
manifestPaths map[string]struct{}
}
// NewChecker creates a new Checker for the given manifest, base path, and filesystem.
// The basePath is the directory relative to which manifest paths are resolved.
// If fs is nil, the real filesystem (OsFs) is used.
func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, error) {
if fs == nil {
fs = afero.NewOsFs()
}
m, err := mfer.NewManifestFromFile(fs, manifestPath)
if err != nil {
return nil, err
}
abs, err := filepath.Abs(basePath)
if err != nil {
return nil, err
}
files := m.Files()
manifestPaths := make(map[string]struct{}, len(files))
for _, f := range files {
manifestPaths[f.Path] = struct{}{}
}
return &Checker{
basePath: abs,
files: files,
fs: fs,
manifestPaths: manifestPaths,
}, nil
}
// FileCount returns the number of files in the manifest.
func (c *Checker) FileCount() int64 {
return int64(len(c.files))
}
// TotalBytes returns the total size of all files in the manifest.
func (c *Checker) TotalBytes() int64 {
var total int64
for _, f := range c.files {
total += f.Size
}
return total
}
// Check verifies all files against the manifest.
// Results are sent to the results channel as files are checked.
// Progress updates are sent to the progress channel approximately once per second.
// Both channels are closed when the method returns.
func (c *Checker) Check(ctx context.Context, results chan<- Result, progress chan<- CheckStatus) error {
if results != nil {
defer close(results)
}
if progress != nil {
defer close(progress)
}
totalFiles := int64(len(c.files))
totalBytes := c.TotalBytes()
var checkedFiles int64
var checkedBytes int64
var failures int64
for _, entry := range c.files {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
result := c.checkFile(entry, &checkedBytes)
if result.Status != StatusOK {
failures++
}
checkedFiles++
if results != nil {
results <- result
}
// Send progress (simplified - every file for now)
if progress != nil {
sendCheckStatus(progress, CheckStatus{
TotalFiles: totalFiles,
CheckedFiles: checkedFiles,
TotalBytes: totalBytes,
CheckedBytes: checkedBytes,
Failures: failures,
})
}
}
return nil
}
func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result {
absPath := filepath.Join(c.basePath, entry.Path)
// Check if file exists
info, err := c.fs.Stat(absPath)
if err != nil {
if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) {
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
}
// Check for "file does not exist" style errors
exists, _ := afero.Exists(c.fs, absPath)
if !exists {
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
}
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
}
// Check size
if info.Size() != entry.Size {
*checkedBytes += info.Size()
return Result{
Path: entry.Path,
Status: StatusSizeMismatch,
Message: "size mismatch",
}
}
// Open and hash file
f, err := c.fs.Open(absPath)
if err != nil {
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
}
defer f.Close()
h := sha256.New()
n, err := io.Copy(h, f)
if err != nil {
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
}
*checkedBytes += n
// Encode as multihash and compare
computed, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
if err != nil {
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
}
// Check against all hashes in manifest (at least one must match)
for _, hash := range entry.Hashes {
if bytes.Equal(computed, hash.MultiHash) {
return Result{Path: entry.Path, Status: StatusOK}
}
}
return Result{Path: entry.Path, Status: StatusHashMismatch, Message: "hash mismatch"}
}
// FindExtraFiles walks the filesystem and reports files not in the manifest.
// Results are sent to the results channel. The channel is closed when done.
func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error {
if results != nil {
defer close(results)
}
return afero.Walk(c.fs, c.basePath, func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
select {
case <-ctx.Done():
return ctx.Err()
default:
}
// Skip directories
if info.IsDir() {
return nil
}
// Get relative path
relPath, err := filepath.Rel(c.basePath, path)
if err != nil {
return err
}
// Check if path is in manifest
if _, exists := c.manifestPaths[relPath]; !exists {
if results != nil {
results <- Result{
Path: relPath,
Status: StatusExtra,
Message: "not in manifest",
}
}
}
return nil
})
}
// sendCheckStatus sends a status update without blocking.
func sendCheckStatus(ch chan<- CheckStatus, status CheckStatus) {
if ch == nil {
return
}
select {
case ch <- status:
default:
}
}

View File

@@ -1,110 +1,13 @@
package cli
import (
"fmt"
"time"
"errors"
"github.com/apex/log"
"github.com/urfave/cli/v2"
"sneak.berlin/go/mfer/internal/checker"
"sneak.berlin/go/mfer/internal/log"
)
func (mfa *CLIApp) checkManifestOperation(ctx *cli.Context) error {
log.Debug("checkManifestOperation()")
// Get manifest path from args, default to index.mf
manifestPath := "index.mf"
if ctx.Args().Len() > 0 {
manifestPath = ctx.Args().Get(0)
}
basePath := ctx.String("base")
showProgress := ctx.Bool("progress")
log.Debugf("checking manifest %s with base %s", manifestPath, basePath)
// Create checker
chk, err := checker.NewChecker(manifestPath, basePath, mfa.Fs)
if err != nil {
return fmt.Errorf("failed to load manifest: %w", err)
}
log.Debugf("manifest contains %d files, %d bytes", chk.FileCount(), chk.TotalBytes())
// Set up results channel
results := make(chan checker.Result, 1)
// Set up progress channel
var progress chan checker.CheckStatus
if showProgress {
progress = make(chan checker.CheckStatus, 1)
go func() {
for status := range progress {
log.Progressf("Checking: %d/%d files, %d failures",
status.CheckedFiles,
status.TotalFiles,
status.Failures)
}
log.ProgressDone()
}()
}
// Process results in a goroutine
var failures int64
done := make(chan struct{})
go func() {
for result := range results {
if result.Status != checker.StatusOK {
failures++
log.Infof("%s: %s (%s)", result.Status, result.Path, result.Message)
} else {
log.Debugf("%s: %s", result.Status, result.Path)
}
}
close(done)
}()
// Run check
err = chk.Check(ctx.Context, results, progress)
if err != nil {
return fmt.Errorf("check failed: %w", err)
}
// Wait for results processing to complete
<-done
// Check for extra files if requested
if ctx.Bool("no-extra-files") {
extraResults := make(chan checker.Result, 1)
extraDone := make(chan struct{})
go func() {
for result := range extraResults {
failures++
log.Infof("%s: %s (%s)", result.Status, result.Path, result.Message)
}
close(extraDone)
}()
err = chk.FindExtraFiles(ctx.Context, extraResults)
if err != nil {
return fmt.Errorf("failed to check for extra files: %w", err)
}
<-extraDone
}
if !ctx.Bool("quiet") {
elapsed := time.Since(mfa.startupTime).Seconds()
rate := float64(chk.TotalBytes()) / elapsed / 1e6
if failures == 0 {
log.Infof("checked %d files (%.1f MB) in %.1fs (%.1f MB/s): all OK", chk.FileCount(), float64(chk.TotalBytes())/1e6, elapsed, rate)
} else {
log.Infof("checked %d files (%.1f MB) in %.1fs (%.1f MB/s): %d failed", chk.FileCount(), float64(chk.TotalBytes())/1e6, elapsed, rate, failures)
}
}
if failures > 0 {
mfa.exitCode = 1
}
func (mfa *CLIApp) checkManifestOperation(c *cli.Context) error {
log.WithError(errors.New("unimplemented"))
return nil
}

View File

@@ -1,10 +1,7 @@
package cli
import (
"io"
"os"
"github.com/spf13/afero"
)
var NO_COLOR bool
@@ -16,50 +13,13 @@ func init() {
}
}
// RunOptions contains all configuration for running the CLI application.
type RunOptions struct {
Appname string
Version string
Gitrev string
Args []string
Stdin io.Reader
Stdout io.Writer
Stderr io.Writer
Fs afero.Fs
}
func Run(Appname, Version, Gitrev string) int {
m := &CLIApp{}
m.appname = Appname
m.version = Version
m.gitrev = Gitrev
m.exitCode = 0
// DefaultRunOptions returns RunOptions configured for normal CLI execution.
func DefaultRunOptions(appname, version, gitrev string) *RunOptions {
return &RunOptions{
Appname: appname,
Version: version,
Gitrev: gitrev,
Args: os.Args,
Stdin: os.Stdin,
Stdout: os.Stdout,
Stderr: os.Stderr,
Fs: afero.NewOsFs(),
}
}
// Run creates and runs the CLI application with default options.
func Run(appname, version, gitrev string) int {
return RunWithOptions(DefaultRunOptions(appname, version, gitrev))
}
// RunWithOptions creates and runs the CLI application with the given options.
func RunWithOptions(opts *RunOptions) int {
m := &CLIApp{
appname: opts.Appname,
version: opts.Version,
gitrev: opts.Gitrev,
exitCode: 0,
Stdin: opts.Stdin,
Stdout: opts.Stdout,
Stderr: opts.Stderr,
Fs: opts.Fs,
}
m.run(opts.Args)
m.run()
return m.exitCode
}

View File

@@ -1,306 +1,12 @@
package cli
import (
"bytes"
"testing"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
urfcli "github.com/urfave/cli/v2"
)
func init() {
// Prevent urfave/cli from calling os.Exit during tests
urfcli.OsExiter = func(code int) {}
}
func TestBuild(t *testing.T) {
m := &CLIApp{}
assert.NotNil(t, m)
}
func testOpts(args []string, fs afero.Fs) *RunOptions {
return &RunOptions{
Appname: "mfer",
Version: "1.0.0",
Gitrev: "abc123",
Args: args,
Stdin: &bytes.Buffer{},
Stdout: &bytes.Buffer{},
Stderr: &bytes.Buffer{},
Fs: fs,
}
}
func TestVersionCommand(t *testing.T) {
fs := afero.NewMemMapFs()
opts := testOpts([]string{"mfer", "version"}, fs)
exitCode := RunWithOptions(opts)
assert.Equal(t, 0, exitCode)
stdout := opts.Stdout.(*bytes.Buffer).String()
assert.Contains(t, stdout, "1.0.0")
assert.Contains(t, stdout, "abc123")
}
func TestHelpCommand(t *testing.T) {
fs := afero.NewMemMapFs()
opts := testOpts([]string{"mfer", "--help"}, fs)
exitCode := RunWithOptions(opts)
assert.Equal(t, 0, exitCode)
stdout := opts.Stdout.(*bytes.Buffer).String()
assert.Contains(t, stdout, "generate")
assert.Contains(t, stdout, "check")
assert.Contains(t, stdout, "fetch")
}
func TestGenerateCommand(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test files in memory filesystem
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("test content"), 0644))
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
assert.Equal(t, 0, exitCode, "stderr: %s", opts.Stderr.(*bytes.Buffer).String())
// Verify manifest was created
exists, err := afero.Exists(fs, "/testdir/test.mf")
require.NoError(t, err)
assert.True(t, exists)
}
func TestGenerateAndCheckCommand(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test files with subdirectory
require.NoError(t, fs.MkdirAll("/testdir/subdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/subdir/file2.txt", []byte("test content"), 0644))
// Generate manifest
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
require.Equal(t, 0, exitCode, "generate failed: %s", opts.Stderr.(*bytes.Buffer).String())
// Check manifest
opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/testdir/test.mf"}, fs)
exitCode = RunWithOptions(opts)
assert.Equal(t, 0, exitCode, "check failed: %s", opts.Stderr.(*bytes.Buffer).String())
}
func TestCheckCommandWithMissingFile(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test file
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644))
// Generate manifest
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
require.Equal(t, 0, exitCode, "generate failed: %s", opts.Stderr.(*bytes.Buffer).String())
// Delete the file
require.NoError(t, fs.Remove("/testdir/file1.txt"))
// Check manifest - should fail
opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/testdir/test.mf"}, fs)
exitCode = RunWithOptions(opts)
assert.Equal(t, 1, exitCode, "check should have failed for missing file")
}
func TestCheckCommandWithCorruptedFile(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test file
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644))
// Generate manifest
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
require.Equal(t, 0, exitCode, "generate failed: %s", opts.Stderr.(*bytes.Buffer).String())
// Corrupt the file (change content but keep same size)
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("HELLO WORLD"), 0644))
// Check manifest - should fail with hash mismatch
opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/testdir/test.mf"}, fs)
exitCode = RunWithOptions(opts)
assert.Equal(t, 1, exitCode, "check should have failed for corrupted file")
}
func TestCheckCommandWithSizeMismatch(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test file
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello world"), 0644))
// Generate manifest
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
require.Equal(t, 0, exitCode, "generate failed: %s", opts.Stderr.(*bytes.Buffer).String())
// Change file size
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("different size content here"), 0644))
// Check manifest - should fail with size mismatch
opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/testdir/test.mf"}, fs)
exitCode = RunWithOptions(opts)
assert.Equal(t, 1, exitCode, "check should have failed for size mismatch")
}
func TestBannerOutput(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test file
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644))
// Run without -q to see banner
opts := testOpts([]string{"mfer", "generate", "-o", "/testdir/test.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
assert.Equal(t, 0, exitCode)
// Banner ASCII art should be in stdout
stdout := opts.Stdout.(*bytes.Buffer).String()
assert.Contains(t, stdout, "___")
assert.Contains(t, stdout, "\\")
}
func TestUnknownCommand(t *testing.T) {
fs := afero.NewMemMapFs()
opts := testOpts([]string{"mfer", "unknown"}, fs)
exitCode := RunWithOptions(opts)
assert.Equal(t, 1, exitCode)
}
func TestGenerateWithIgnoreDotfiles(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test files including dotfiles
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/.hidden", []byte("secret"), 0644))
// Generate manifest with --ignore-dotfiles
opts := testOpts([]string{"mfer", "-q", "generate", "--ignore-dotfiles", "-o", "/testdir/test.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
require.Equal(t, 0, exitCode)
// Check that manifest exists and we can verify (hidden file won't cause failure even if missing)
exists, _ := afero.Exists(fs, "/testdir/test.mf")
assert.True(t, exists)
}
func TestMultipleInputPaths(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test files in multiple directories
require.NoError(t, fs.MkdirAll("/dir1", 0755))
require.NoError(t, fs.MkdirAll("/dir2", 0755))
require.NoError(t, afero.WriteFile(fs, "/dir1/file1.txt", []byte("content1"), 0644))
require.NoError(t, afero.WriteFile(fs, "/dir2/file2.txt", []byte("content2"), 0644))
// Generate manifest from multiple paths
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/output.mf", "/dir1", "/dir2"}, fs)
exitCode := RunWithOptions(opts)
assert.Equal(t, 0, exitCode, "stderr: %s", opts.Stderr.(*bytes.Buffer).String())
exists, _ := afero.Exists(fs, "/output.mf")
assert.True(t, exists)
}
func TestNoExtraFilesPass(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test files
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/file2.txt", []byte("world"), 0644))
// Generate manifest
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/manifest.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
require.Equal(t, 0, exitCode)
// Check with --no-extra-files (should pass - no extra files)
opts = testOpts([]string{"mfer", "-q", "check", "--no-extra-files", "--base", "/testdir", "/manifest.mf"}, fs)
exitCode = RunWithOptions(opts)
assert.Equal(t, 0, exitCode)
}
func TestNoExtraFilesFail(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test files
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644))
// Generate manifest
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/manifest.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
require.Equal(t, 0, exitCode)
// Add an extra file after manifest generation
require.NoError(t, afero.WriteFile(fs, "/testdir/extra.txt", []byte("extra"), 0644))
// Check with --no-extra-files (should fail - extra file exists)
opts = testOpts([]string{"mfer", "-q", "check", "--no-extra-files", "--base", "/testdir", "/manifest.mf"}, fs)
exitCode = RunWithOptions(opts)
assert.Equal(t, 1, exitCode, "check should fail when extra files exist")
}
func TestNoExtraFilesWithSubdirectory(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test files with subdirectory
require.NoError(t, fs.MkdirAll("/testdir/subdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644))
require.NoError(t, afero.WriteFile(fs, "/testdir/subdir/file2.txt", []byte("world"), 0644))
// Generate manifest
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/manifest.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
require.Equal(t, 0, exitCode)
// Add extra file in subdirectory
require.NoError(t, afero.WriteFile(fs, "/testdir/subdir/extra.txt", []byte("extra"), 0644))
// Check with --no-extra-files (should fail)
opts = testOpts([]string{"mfer", "-q", "check", "--no-extra-files", "--base", "/testdir", "/manifest.mf"}, fs)
exitCode = RunWithOptions(opts)
assert.Equal(t, 1, exitCode, "check should fail when extra files exist in subdirectory")
}
func TestCheckWithoutNoExtraFilesIgnoresExtra(t *testing.T) {
fs := afero.NewMemMapFs()
// Create test file
require.NoError(t, fs.MkdirAll("/testdir", 0755))
require.NoError(t, afero.WriteFile(fs, "/testdir/file1.txt", []byte("hello"), 0644))
// Generate manifest
opts := testOpts([]string{"mfer", "-q", "generate", "-o", "/manifest.mf", "/testdir"}, fs)
exitCode := RunWithOptions(opts)
require.Equal(t, 0, exitCode)
// Add extra file
require.NoError(t, afero.WriteFile(fs, "/testdir/extra.txt", []byte("extra"), 0644))
// Check WITHOUT --no-extra-files (should pass - extra files ignored)
opts = testOpts([]string{"mfer", "-q", "check", "--base", "/testdir", "/manifest.mf"}, fs)
exitCode = RunWithOptions(opts)
assert.Equal(t, 0, exitCode, "check without --no-extra-files should ignore extra files")
}

View File

@@ -1,100 +1,54 @@
package cli
import (
"fmt"
"bytes"
"path/filepath"
"time"
"git.eeqj.de/sneak/mfer/internal/log"
"git.eeqj.de/sneak/mfer/mfer"
"github.com/urfave/cli/v2"
"sneak.berlin/go/mfer/internal/log"
"sneak.berlin/go/mfer/internal/scanner"
)
func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
log.Debug("generateManifestOperation()")
myArgs := ctx.Args()
log.Dump(myArgs)
opts := &scanner.Options{
opts := &mfer.ManifestScanOptions{
IgnoreDotfiles: ctx.Bool("IgnoreDotfiles"),
FollowSymLinks: ctx.Bool("FollowSymLinks"),
Fs: mfa.Fs,
}
s := scanner.NewWithOptions(opts)
// Phase 1: Enumeration - collect paths and stat files
args := ctx.Args()
showProgress := ctx.Bool("progress")
// Set up enumeration progress reporting
var enumProgress chan scanner.EnumerateStatus
if showProgress {
enumProgress = make(chan scanner.EnumerateStatus, 1)
go func() {
for status := range enumProgress {
log.Progressf("Enumerating: %d files, %.1f MB",
status.FilesFound,
float64(status.BytesFound)/1e6)
}
log.ProgressDone()
}()
}
if args.Len() == 0 {
// Default to current directory
if err := s.EnumeratePath(".", enumProgress); err != nil {
return err
}
} else {
// Collect all paths first
paths := make([]string, 0, args.Len())
for i := 0; i < args.Len(); i++ {
ap, err := filepath.Abs(args.Get(i))
if err != nil {
return err
}
log.Debugf("enumerating path: %s", ap)
paths = append(paths, ap)
}
if err := s.EnumeratePaths(enumProgress, paths...); err != nil {
paths := make([]string, ctx.Args().Len()-1)
for i := 0; i < ctx.Args().Len(); i++ {
ap, err := filepath.Abs(ctx.Args().Get(i))
if err != nil {
return err
}
log.Dump(ap)
paths = append(paths, ap)
}
log.Debugf("enumerated %d files, %d bytes total", s.FileCount(), s.TotalBytes())
// Open output file
outputPath := ctx.String("output")
outFile, err := mfa.Fs.Create(outputPath)
mf, err := mfer.NewFromPaths(opts, paths...)
if err != nil {
return fmt.Errorf("failed to create output file: %w", err)
panic(err)
}
defer outFile.Close()
mf.WithContext(ctx.Context)
// Phase 2: Scan - read file contents and generate manifest
var scanProgress chan scanner.ScanStatus
if showProgress {
scanProgress = make(chan scanner.ScanStatus, 1)
go func() {
for status := range scanProgress {
log.Progressf("Scanning: %d/%d files, %.1f MB/s",
status.ScannedFiles,
status.TotalFiles,
status.BytesPerSec/1e6)
}
log.ProgressDone()
}()
}
log.Dump(mf)
err = s.ToManifest(ctx.Context, outFile, scanProgress)
err = mf.Scan()
if err != nil {
return fmt.Errorf("failed to generate manifest: %w", err)
return err
}
if !ctx.Bool("quiet") {
elapsed := time.Since(mfa.startupTime).Seconds()
rate := float64(s.TotalBytes()) / elapsed / 1e6
log.Infof("wrote %d files (%.1f MB) to %s in %.1fs (%.1f MB/s)", s.FileCount(), float64(s.TotalBytes())/1e6, outputPath, elapsed, rate)
buf := new(bytes.Buffer)
err = mf.WriteTo(buf)
if err != nil {
return err
}
dat := buf.Bytes()
log.Dump(dat)
return nil
}

View File

@@ -2,13 +2,11 @@ package cli
import (
"fmt"
"io"
"os"
"time"
"github.com/spf13/afero"
"git.eeqj.de/sneak/mfer/internal/log"
"github.com/urfave/cli/v2"
"sneak.berlin/go/mfer/internal/log"
)
type CLIApp struct {
@@ -18,31 +16,22 @@ type CLIApp struct {
startupTime time.Time
exitCode int
app *cli.App
// I/O streams - all program input/output should go through these
Stdin io.Reader
Stdout io.Writer
Stderr io.Writer
// Fs is the filesystem abstraction - defaults to OsFs for real filesystem
Fs afero.Fs
}
const banner = `
___ ___ ___ ___
/__/\ / /\ / /\ / /\
| |::\ / /:/_ / /:/_ / /::\
| |:|:\ / /:/ /\ / /:/ /\ / /:/\:\
__|__|:|\:\ / /:/ /:/ / /:/ /:/_ / /:/~/:/
/__/::::| \:\ /__/:/ /:/ /__/:/ /:/ /\ /__/:/ /:/___
\ \:\~~\__\/ \ \:\/:/ \ \:\/:/ /:/ \ \:\/:::::/
\ \:\ \ \::/ \ \::/ /:/ \ \::/~~~~
\ \:\ \ \:\ \ \:\/:/ \ \:\
\ \:\ \ \:\ \ \::/ \ \:\
\__\/ \__\/ \__\/ \__\/`
const banner = ` ___ ___ ___ ___
/__/\ / /\ / /\ / /\
| |::\ / /:/_ / /:/_ / /::\
| |:|:\ / /:/ /\ / /:/ /\ / /:/\:\
__|__|:|\:\ / /:/ /:/ / /:/ /:/_ / /:/~/:/
/__/::::| \:\ /__/:/ /:/ /__/:/ /:/ /\ /__/:/ /:/___
\ \:\~~\__\/ \ \:\/:/ \ \:\/:/ /:/ \ \:\/:::::/
\ \:\ \ \::/ \ \::/ /:/ \ \::/~~~~
\ \:\ \ \:\ \ \:\/:/ \ \:\
\ \:\ \ \:\ \ \::/ \ \:\
\__\/ \__\/ \__\/ \__\/`
func (mfa *CLIApp) printBanner() {
fmt.Fprintln(mfa.Stdout, banner)
fmt.Println(banner)
}
func (mfa *CLIApp) VersionString() string {
@@ -58,7 +47,7 @@ func (mfa *CLIApp) setVerbosity(v int) {
}
}
func (mfa *CLIApp) run(args []string) {
func (mfa *CLIApp) run() {
mfa.startupTime = time.Now()
if NO_COLOR {
@@ -66,8 +55,6 @@ func (mfa *CLIApp) run(args []string) {
log.DisableStyling()
}
// Configure log package to use our I/O streams
log.SetOutput(mfa.Stdout, mfa.Stderr)
log.Init()
var verbosity int
@@ -77,8 +64,6 @@ func (mfa *CLIApp) run(args []string) {
Usage: "Manifest generator",
Version: mfa.VersionString(),
EnableBashCompletion: true,
Writer: mfa.Stdout,
ErrWriter: mfa.Stderr,
Flags: []cli.Flag{
&cli.BoolFlag{
Name: "verbose",
@@ -121,17 +106,11 @@ func (mfa *CLIApp) run(args []string) {
Aliases: []string{"o"},
Usage: "Specify output filename",
},
&cli.BoolFlag{
Name: "progress",
Aliases: []string{"P"},
Usage: "Show progress during enumeration and scanning",
},
},
},
{
Name: "check",
Usage: "Validate files using manifest file",
ArgsUsage: "[manifest file]",
Name: "check",
Usage: "Validate files using manifest file",
Action: func(c *cli.Context) error {
if !c.Bool("quiet") {
mfa.printBanner()
@@ -139,29 +118,12 @@ func (mfa *CLIApp) run(args []string) {
mfa.setVerbosity(verbosity)
return mfa.checkManifestOperation(c)
},
Flags: []cli.Flag{
&cli.StringFlag{
Name: "base",
Aliases: []string{"b"},
Value: ".",
Usage: "Base directory for resolving relative paths from manifest",
},
&cli.BoolFlag{
Name: "progress",
Aliases: []string{"P"},
Usage: "Show progress during checking",
},
&cli.BoolFlag{
Name: "no-extra-files",
Usage: "Fail if files exist in base directory that are not in manifest",
},
},
},
{
Name: "version",
Usage: "Show version",
Action: func(c *cli.Context) error {
fmt.Fprintln(mfa.Stdout, mfa.VersionString())
fmt.Printf("%s\n", mfa.VersionString())
return nil
},
},
@@ -180,7 +142,7 @@ func (mfa *CLIApp) run(args []string) {
}
mfa.app.HideVersion = true
err := mfa.app.Run(args)
err := mfa.app.Run(os.Args)
if err != nil {
mfa.exitCode = 1
log.WithError(err).Debugf("exiting")

View File

@@ -2,10 +2,7 @@ package log
import (
"fmt"
"io"
"os"
"runtime"
"sync"
"github.com/apex/log"
acli "github.com/apex/log/handlers/cli"
@@ -15,39 +12,6 @@ import (
type Level = log.Level
var (
// mu protects the output writers
mu sync.RWMutex
// stdout is the writer for progress output
stdout io.Writer = os.Stdout
// stderr is the writer for log output
stderr io.Writer = os.Stderr
)
// SetOutput configures the output writers for the log package.
// stdout is used for progress output, stderr is used for log messages.
func SetOutput(out, err io.Writer) {
mu.Lock()
defer mu.Unlock()
stdout = out
stderr = err
pterm.SetDefaultOutput(out)
}
// GetStdout returns the configured stdout writer.
func GetStdout() io.Writer {
mu.RLock()
defer mu.RUnlock()
return stdout
}
// GetStderr returns the configured stderr writer.
func GetStderr() io.Writer {
mu.RLock()
defer mu.RUnlock()
return stderr
}
func DisableStyling() {
pterm.DisableColor()
pterm.DisableStyling()
@@ -60,21 +24,10 @@ func DisableStyling() {
}
func Init() {
mu.RLock()
w := stderr
mu.RUnlock()
log.SetHandler(acli.New(w))
log.SetHandler(acli.Default)
log.SetLevel(log.InfoLevel)
}
func Infof(format string, args ...interface{}) {
log.Infof(format, args...)
}
func Info(arg string) {
log.Info(arg)
}
func Debugf(format string, args ...interface{}) {
DebugReal(fmt.Sprintf(format, args...), 2)
}
@@ -102,13 +55,14 @@ func EnableDebugLogging() {
func VerbosityStepsToLogLevel(l int) log.Level {
switch l {
case 0:
return log.InfoLevel
case 1:
return log.WarnLevel
case 2:
return log.InfoLevel
case 3:
return log.DebugLevel
}
// -vv or more
return log.DebugLevel
return log.ErrorLevel
}
func SetLevelFromVerbosity(l int) {
@@ -133,14 +87,3 @@ func GetLevel() log.Level {
func WithError(e error) *log.Entry {
return GetLogger().WithError(e)
}
// Progressf prints a progress message that overwrites the current line.
// Use ProgressDone() when progress is complete to move to the next line.
func Progressf(format string, args ...interface{}) {
pterm.Printf("\r"+format, args...)
}
// ProgressDone completes a progress line by printing a newline.
func ProgressDone() {
pterm.Println()
}

View File

@@ -1,373 +0,0 @@
package scanner
import (
"context"
"io"
"io/fs"
"path"
"path/filepath"
"strings"
"sync"
"time"
"github.com/spf13/afero"
"sneak.berlin/go/mfer/mfer"
)
// Phase 1: Enumeration
// ---------------------
// Walking directories and calling stat() on files to collect metadata.
// Builds the list of files to be scanned. Relatively fast (metadata only).
// EnumerateStatus contains progress information for the enumeration phase.
type EnumerateStatus struct {
FilesFound int64
BytesFound int64
}
// Phase 2: Scan (ToManifest)
// --------------------------
// Reading file contents and computing hashes for manifest generation.
// This is the expensive phase that reads all file data.
// ScanStatus contains progress information for the scan phase.
type ScanStatus struct {
TotalFiles int64
ScannedFiles int64
TotalBytes int64
ScannedBytes int64
BytesPerSec float64
}
// Options configures scanner behavior.
type Options struct {
IgnoreDotfiles bool
FollowSymLinks bool
Fs afero.Fs // Filesystem to use, defaults to OsFs
}
// FileEntry represents a file that has been enumerated.
type FileEntry struct {
Path string // Relative path (used in manifest)
AbsPath string // Absolute path (used for reading file content)
Size int64
Mtime time.Time
Ctime time.Time
}
// Scanner accumulates files and generates manifests from them.
type Scanner struct {
mu sync.RWMutex
files []*FileEntry
options *Options
fs afero.Fs
}
// New creates a new Scanner with default options.
func New() *Scanner {
return NewWithOptions(nil)
}
// NewWithOptions creates a new Scanner with the given options.
func NewWithOptions(opts *Options) *Scanner {
if opts == nil {
opts = &Options{}
}
fs := opts.Fs
if fs == nil {
fs = afero.NewOsFs()
}
return &Scanner{
files: make([]*FileEntry, 0),
options: opts,
fs: fs,
}
}
// EnumerateFile adds a single file to the scanner, calling stat() to get metadata.
func (s *Scanner) EnumerateFile(filePath string) error {
abs, err := filepath.Abs(filePath)
if err != nil {
return err
}
info, err := s.fs.Stat(abs)
if err != nil {
return err
}
// For single files, use the filename as the relative path
basePath := filepath.Dir(abs)
return s.enumerateFileWithInfo(filepath.Base(abs), basePath, info, nil)
}
// EnumeratePath walks a directory path and adds all files to the scanner.
// If progress is non-nil, status updates are sent as files are discovered.
// The progress channel is closed when the method returns.
func (s *Scanner) EnumeratePath(inputPath string, progress chan<- EnumerateStatus) error {
if progress != nil {
defer close(progress)
}
abs, err := filepath.Abs(inputPath)
if err != nil {
return err
}
afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs))
return s.enumerateFS(afs, abs, progress)
}
// EnumeratePaths walks multiple directory paths and adds all files to the scanner.
// If progress is non-nil, status updates are sent as files are discovered.
// The progress channel is closed when the method returns.
func (s *Scanner) EnumeratePaths(progress chan<- EnumerateStatus, inputPaths ...string) error {
if progress != nil {
defer close(progress)
}
for _, p := range inputPaths {
abs, err := filepath.Abs(p)
if err != nil {
return err
}
afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs))
if err := s.enumerateFS(afs, abs, progress); err != nil {
return err
}
}
return nil
}
// EnumerateFS walks an afero filesystem and adds all files to the scanner.
// If progress is non-nil, status updates are sent as files are discovered.
// The progress channel is closed when the method returns.
// basePath is used to compute absolute paths for file reading.
func (s *Scanner) EnumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error {
if progress != nil {
defer close(progress)
}
return s.enumerateFS(afs, basePath, progress)
}
// enumerateFS is the internal implementation that doesn't close the progress channel.
func (s *Scanner) enumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error {
return afero.Walk(afs, "/", func(p string, info fs.FileInfo, err error) error {
if err != nil {
return err
}
if s.options.IgnoreDotfiles && pathIsHidden(p) {
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
return s.enumerateFileWithInfo(p, basePath, info, progress)
})
}
// enumerateFileWithInfo adds a file with pre-existing fs.FileInfo.
func (s *Scanner) enumerateFileWithInfo(filePath string, basePath string, info fs.FileInfo, progress chan<- EnumerateStatus) error {
if info.IsDir() {
// Manifests contain only files, directories are implied
return nil
}
// Clean the path - remove leading slash if present
cleanPath := filePath
if len(cleanPath) > 0 && cleanPath[0] == '/' {
cleanPath = cleanPath[1:]
}
// Compute absolute path for file reading
absPath := filepath.Join(basePath, cleanPath)
entry := &FileEntry{
Path: cleanPath,
AbsPath: absPath,
Size: info.Size(),
Mtime: info.ModTime(),
// Note: Ctime not available from fs.FileInfo on all platforms
// Will need platform-specific code to extract it
}
s.mu.Lock()
s.files = append(s.files, entry)
filesFound := int64(len(s.files))
var bytesFound int64
for _, f := range s.files {
bytesFound += f.Size
}
s.mu.Unlock()
sendEnumerateStatus(progress, EnumerateStatus{
FilesFound: filesFound,
BytesFound: bytesFound,
})
return nil
}
// Files returns a copy of all files added to the scanner.
func (s *Scanner) Files() []*FileEntry {
s.mu.RLock()
defer s.mu.RUnlock()
out := make([]*FileEntry, len(s.files))
copy(out, s.files)
return out
}
// FileCount returns the number of files in the scanner.
func (s *Scanner) FileCount() int64 {
s.mu.RLock()
defer s.mu.RUnlock()
return int64(len(s.files))
}
// TotalBytes returns the total size of all files in the scanner.
func (s *Scanner) TotalBytes() int64 {
s.mu.RLock()
defer s.mu.RUnlock()
var total int64
for _, f := range s.files {
total += f.Size
}
return total
}
// ToManifest reads all file contents, computes hashes, and generates a manifest.
// If progress is non-nil, status updates are sent approximately once per second.
// The progress channel is closed when the method returns.
// The manifest is written to the provided io.Writer.
func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- ScanStatus) error {
if progress != nil {
defer close(progress)
}
s.mu.RLock()
files := make([]*FileEntry, len(s.files))
copy(files, s.files)
totalFiles := int64(len(files))
var totalBytes int64
for _, f := range files {
totalBytes += f.Size
}
s.mu.RUnlock()
builder := mfer.NewBuilder()
var scannedFiles int64
var scannedBytes int64
lastProgressTime := time.Now()
startTime := time.Now()
for _, entry := range files {
// Check for cancellation
select {
case <-ctx.Done():
return ctx.Err()
default:
}
// Open file
f, err := s.fs.Open(entry.AbsPath)
if err != nil {
return err
}
// Add to manifest with progress callback
bytesRead, err := builder.AddFile(
entry.Path,
entry.Size,
entry.Mtime,
f,
func(fileBytes int64) {
// Send progress at most once per second
now := time.Now()
if progress != nil && now.Sub(lastProgressTime) >= time.Second {
elapsed := now.Sub(startTime).Seconds()
currentBytes := scannedBytes + fileBytes
var rate float64
if elapsed > 0 {
rate = float64(currentBytes) / elapsed
}
sendScanStatus(progress, ScanStatus{
TotalFiles: totalFiles,
ScannedFiles: scannedFiles,
TotalBytes: totalBytes,
ScannedBytes: currentBytes,
BytesPerSec: rate,
})
lastProgressTime = now
}
},
)
f.Close()
if err != nil {
return err
}
scannedFiles++
scannedBytes += bytesRead
}
// Send final progress
if progress != nil {
elapsed := time.Since(startTime).Seconds()
var rate float64
if elapsed > 0 {
rate = float64(scannedBytes) / elapsed
}
sendScanStatus(progress, ScanStatus{
TotalFiles: totalFiles,
ScannedFiles: scannedFiles,
TotalBytes: totalBytes,
ScannedBytes: scannedBytes,
BytesPerSec: rate,
})
}
// Build and write manifest
return builder.Build(w)
}
// pathIsHidden returns true if the path or any of its parent directories
// start with a dot (hidden files/directories).
func pathIsHidden(p string) bool {
tp := path.Clean(p)
if strings.HasPrefix(tp, ".") {
return true
}
for {
d, f := path.Split(tp)
if strings.HasPrefix(f, ".") {
return true
}
if d == "" {
return false
}
tp = d[0 : len(d)-1] // trim trailing slash from dir
}
}
// sendEnumerateStatus sends a status update without blocking.
// If the channel is full, the update is dropped.
func sendEnumerateStatus(ch chan<- EnumerateStatus, status EnumerateStatus) {
if ch == nil {
return
}
select {
case ch <- status:
default:
// Channel full, drop this update
}
}
// sendScanStatus sends a status update without blocking.
// If the channel is full, the update is dropped.
func sendScanStatus(ch chan<- ScanStatus, status ScanStatus) {
if ch == nil {
return
}
select {
case ch <- status:
default:
// Channel full, drop this update
}
}

View File

@@ -1,124 +0,0 @@
package mfer
import (
"crypto/sha256"
"io"
"sync"
"time"
"github.com/multiformats/go-multihash"
)
// FileProgress is called during file processing to report bytes read.
type FileProgress func(bytesRead int64)
// ManifestBuilder constructs a manifest by adding files one at a time.
type ManifestBuilder struct {
mu sync.Mutex
files []*MFFilePath
createdAt time.Time
}
// NewBuilder creates a new ManifestBuilder.
func NewBuilder() *ManifestBuilder {
return &ManifestBuilder{
files: make([]*MFFilePath, 0),
createdAt: time.Now(),
}
}
// AddFile reads file content from reader, computes hashes, and adds to manifest.
// The progress callback is called periodically with total bytes read so far.
// Returns the number of bytes read.
func (b *ManifestBuilder) AddFile(
path string,
size int64,
mtime time.Time,
reader io.Reader,
progress FileProgress,
) (int64, error) {
// Create hash writer
h := sha256.New()
// Read file in chunks, updating hash and progress
var totalRead int64
buf := make([]byte, 64*1024) // 64KB chunks
for {
n, err := reader.Read(buf)
if n > 0 {
h.Write(buf[:n])
totalRead += int64(n)
if progress != nil {
progress(totalRead)
}
}
if err == io.EOF {
break
}
if err != nil {
return totalRead, err
}
}
// Encode hash as multihash (SHA2-256)
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
if err != nil {
return totalRead, err
}
// Create file entry
entry := &MFFilePath{
Path: path,
Size: size,
Hashes: []*MFFileChecksum{
{MultiHash: mh},
},
Mtime: newTimestampFromTime(mtime),
}
b.mu.Lock()
b.files = append(b.files, entry)
b.mu.Unlock()
return totalRead, nil
}
// FileCount returns the number of files added to the builder.
func (b *ManifestBuilder) FileCount() int {
b.mu.Lock()
defer b.mu.Unlock()
return len(b.files)
}
// Build finalizes the manifest and writes it to the writer.
func (b *ManifestBuilder) Build(w io.Writer) error {
b.mu.Lock()
defer b.mu.Unlock()
// Create inner manifest
inner := &MFFile{
Version: MFFile_VERSION_ONE,
CreatedAt: newTimestampFromTime(b.createdAt),
Files: b.files,
}
// Create a temporary manifest to use existing serialization
m := &manifest{
pbInner: inner,
}
// Generate outer wrapper
if err := m.generateOuter(); err != nil {
return err
}
// Generate final output
if err := m.generate(); err != nil {
return err
}
// Write to output
_, err := w.Write(m.output.Bytes())
return err
}

View File

@@ -6,13 +6,12 @@ import (
"errors"
"io"
"github.com/spf13/afero"
"git.eeqj.de/sneak/mfer/internal/bork"
"git.eeqj.de/sneak/mfer/internal/log"
"google.golang.org/protobuf/proto"
"sneak.berlin/go/mfer/internal/bork"
"sneak.berlin/go/mfer/internal/log"
)
func (m *manifest) deserializeInner() error {
func (m *manifest) validateProtoOuter() error {
if m.pbOuter.Version != MFFileOuter_VERSION_ONE {
return errors.New("unknown version")
}
@@ -26,9 +25,10 @@ func (m *manifest) deserializeInner() error {
if err != nil {
return err
}
defer gzr.Close()
dat, err := io.ReadAll(gzr)
defer gzr.Close()
if err != nil {
return err
}
@@ -38,14 +38,9 @@ func (m *manifest) deserializeInner() error {
log.Debugf("truncated data, got %d expected %d", isize, m.pbOuter.Size)
return bork.ErrFileTruncated
}
// Deserialize inner message
m.pbInner = new(MFFile)
if err := proto.Unmarshal(dat, m.pbInner); err != nil {
return err
}
log.Debugf("loaded manifest with %d files", len(m.pbInner.Files))
log.Debugf("inner data size is %d", isize)
log.Dump(dat)
log.Dump(m.pbOuter.Sha256)
return nil
}
@@ -59,8 +54,7 @@ func validateMagic(dat []byte) bool {
return bytes.Equal(got, expected)
}
// NewManifestFromReader reads a manifest from an io.Reader.
func NewManifestFromReader(input io.Reader) (*manifest, error) {
func NewFromProto(input io.Reader) (*manifest, error) {
m := New()
dat, err := io.ReadAll(input)
if err != nil {
@@ -75,35 +69,21 @@ func NewManifestFromReader(input io.Reader) (*manifest, error) {
bb := bytes.NewBuffer(dat[ml:])
dat = bb.Bytes()
// deserialize outer:
log.Dump(dat)
// deserialize:
m.pbOuter = new(MFFileOuter)
if err := proto.Unmarshal(dat, m.pbOuter); err != nil {
return nil, err
}
err = proto.Unmarshal(dat, m.pbOuter)
// deserialize inner:
if err := m.deserializeInner(); err != nil {
return nil, err
}
return m, nil
}
// NewManifestFromFile reads a manifest from a file path using the given filesystem.
// If fs is nil, the real filesystem (OsFs) is used.
func NewManifestFromFile(fs afero.Fs, path string) (*manifest, error) {
if fs == nil {
fs = afero.NewOsFs()
}
f, err := fs.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
return NewManifestFromReader(f)
}
// NewFromProto is deprecated, use NewManifestFromReader instead.
func NewFromProto(input io.Reader) (*manifest, error) {
return NewManifestFromReader(input)
ve := m.validateProtoOuter()
if ve != nil {
return nil, ve
}
// FIXME TODO deserialize inner
return m, nil
}

View File

@@ -4,8 +4,8 @@ import (
"bytes"
"testing"
"git.eeqj.de/sneak/mfer/internal/log"
"github.com/stretchr/testify/assert"
"sneak.berlin/go/mfer/internal/log"
)
func TestAPIExample(t *testing.T) {

View File

@@ -10,8 +10,8 @@ import (
"path/filepath"
"strings"
"git.eeqj.de/sneak/mfer/internal/log"
"github.com/spf13/afero"
"sneak.berlin/go/mfer/internal/log"
)
type manifestFile struct {
@@ -106,31 +106,13 @@ func NewFromFS(options *ManifestScanOptions, fs afero.Fs) (*manifest, error) {
}
func (m *manifest) GetFileCount() int64 {
if m.pbInner != nil {
return int64(len(m.pbInner.Files))
}
return int64(len(m.files))
}
func (m *manifest) GetTotalFileSize() int64 {
if m.pbInner != nil {
var total int64
for _, f := range m.pbInner.Files {
total += f.Size
}
return total
}
return m.totalFileSize
}
// Files returns all file entries from a loaded manifest.
func (m *manifest) Files() []*MFFilePath {
if m.pbInner == nil {
return nil
}
return m.pbInner.Files
}
func pathIsHidden(p string) bool {
tp := path.Clean(p)
if strings.HasPrefix(tp, ".") {

View File

@@ -1,3 +0,0 @@
package mfer
//go:generate protoc ./mf.proto --go_out=paths=source_relative:.

View File

@@ -5,9 +5,9 @@ import (
"fmt"
"testing"
"git.eeqj.de/sneak/mfer/internal/log"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"sneak.berlin/go/mfer/internal/log"
)
// Add those variables as well

View File

@@ -10,7 +10,7 @@ import (
"google.golang.org/protobuf/proto"
)
// was go-generate protoc --go_out=. --go_opt=paths=source_relative mf.proto
//go:generate protoc --go_out=. --go_opt=paths=source_relative mf.proto
// rot13("MANIFEST")
const MAGIC string = "ZNAVSRFG"