4 Commits

Author SHA1 Message Date
user
7cdb1fe0a8 fix: bump go directive from 1.17 to 1.22 (closes #33)
The protobuf generated code uses unsafe.StringData (go1.20+) and
any (go1.18+), which are incompatible with go 1.17.

make test output: all tests pass.
2026-02-20 02:24:24 -08:00
5aae442156 add links to metalink format (#7)
Reviewed-on: #7
2026-02-09 02:15:58 +01:00
2717685619 update readme to conform with my new readme howto standards (#8)
All checks were successful
continuous-integration/drone/push Build is passing
Reviewed-on: #8

manually approving because CI is offline rn for some reason
2024-12-09 02:50:57 +00:00
7df558d8d0 next (#5)
All checks were successful
continuous-integration/drone/push Build is passing
Co-authored-by: sneak <sneak@sneak.berlin>
Reviewed-on: #5
2022-12-09 00:02:33 +00:00
20 changed files with 262 additions and 92 deletions

View File

@@ -1,7 +1,3 @@
*.tzst
*.tar
/buildimage
/dockerdeps
/tmp
*.docker.tzst
*.tmp
*.dockerimage
.git

View File

@@ -7,8 +7,17 @@ steps:
network_mode: bridge
settings:
repo: sneak/mfer
build_args_from_env: [ DRONE_COMMIT_SHA ]
dry_run: true
custom_dns: [ 116.202.204.30 ]
tags:
- ${DRONE_COMMIT_SHA}
- ${DRONE_COMMIT_SHA:0:7}
- ${DRONE_BRANCH}
- latest
- name: notify
image: plugins/slack
settings:
webhook:
from_secret: SLACK_WEBHOOK_URL
when:
event: pull_request

6
.gitignore vendored
View File

@@ -1,8 +1,6 @@
mfer/*.pb.go
/mfer.cmd
vendor
/tmp
*.tmp
*.docker.tzst
*.tzst
/builddeps/modcache.tar
*.dockerimage
/vendor

View File

@@ -1,29 +1,30 @@
################################################################################
#2345678911234567892123456789312345678941234567895123456789612345678971234567898
################################################################################
FROM sneak/builder:main AS builder
ENV GOPATH /go
FROM sneak/builder:2022-12-08 AS builder
ENV DEBIAN_FRONTEND noninteractive
WORKDIR /build
COPY ./go.mod ./go.sum .
RUN \
go mod download -x
################################################################################
#### caching phase done
################################################################################
WORKDIR /build
COPY ./Makefile ./.golangci.yml ./go.mod ./go.sum .
COPY ./Makefile ./.golangci.yml ./go.mod ./go.sum /build/
COPY ./vendor.tzst /build/vendor.tzst
COPY ./modcache.tzst /build/modcache.tzst
COPY ./internal ./internal
COPY ./bin/gitrev.sh ./bin/gitrev.sh
COPY ./mfer ./mfer
COPY ./cmd ./cmd
RUN find /build
ARG GITREV unknown
ARG DRONE_COMMIT_SHA unknown
RUN mkdir -p "$(go env GOMODCACHE)" && cd "$(go env GOMODCACHE)" && \
zstdmt -d --stdout /build/modcache.tzst | tar xf - && \
rm /build/modcache.tzst && cd /build
RUN \
cd mfer && go generate . && cd .. && \
GOPACKAGESDEBUG=true golangci-lint run ./... && \
mkdir vendor && cd vendor && \
zstdmt -d --stdout /build/vendor.tzst | tar xf - && rm /build/vendor.tzst && \
cd .. && \
make mfer.cmd
RUN go mod vendor && tar -c . | zstdmt -19 > /src.tzst
RUN rm -rf /build/vendor && go mod vendor && tar -c . | zstdmt -19 > /src.tzst
################################################################################
#2345678911234567892123456789312345678941234567895123456789612345678971234567898
################################################################################

View File

@@ -44,9 +44,9 @@ mfer.cmd: $(SOURCEFILES) mfer/mf.pb.go
cd cmd/mfer && go build -tags urfave_cli_no_docs -o ../../mfer.cmd $(GOFLAGS) .
clean:
rm -rfv mfer/*.pb.go mfer.cmd cmd/mfer/mfer
rm -rfv mfer/*.pb.go mfer.cmd cmd/mfer/mfer *.dockerimage
fmt:
fmt: mfer/mf.pb.go
gofumpt -l -w mfer internal cmd
golangci-lint run --fix
-prettier -w *.json
@@ -56,9 +56,9 @@ lint:
golangci-lint run
sh -c 'test -z "$$(gofmt -l .)"'
docker: sneak-mfer.$(ARCH).docker.tzst
docker: sneak-mfer.$(ARCH).tzst.dockerimage
sneak-mfer.$(ARCH).docker.tzst: $(SOURCEFILES)
sneak-mfer.$(ARCH).tzst.dockerimage: $(SOURCEFILES) vendor.tzst modcache.tzst
docker build --progress plain --build-arg GITREV=$(GITREV_BUILD) -t sneak/mfer .
docker save sneak/mfer | pv | zstdmt -19 > $@
du -sh $@
@@ -66,3 +66,17 @@ sneak-mfer.$(ARCH).docker.tzst: $(SOURCEFILES)
godoc:
open http://127.0.0.1:6060
godoc -http=:6060
vendor.tzst: go.mod go.sum
go mod tidy
go mod vendor
cd vendor && tar -c . | pv | zstdmt -19 > $(PWD)/$@.tmp
rm -rf vendor
mv $@.tmp $@
modcache.tzst: go.mod go.sum
go mod tidy
cd $(HOME)/go/pkg && chmod -R u+rw . && rm -rf mod sumdb
go mod download -x
cd $(shell go env GOMODCACHE) && tar -c . | pv | zstdmt -19 > $(PWD)/$@.tmp
mv $@.tmp $@

View File

@@ -1,11 +1,48 @@
# mfer
Manifest file generator and checker.
[mfer](https://git.eeqj.de/sneak/mfer) is a reference implementation library
and thin wrapper command-line utility written in [Go](https://golang.org)
and first published in 2022 under the [WTFPL](https://wtfpl.net) (public
domain) license. It specifies and generates `.mf` manifest files over a
directory tree of files to encapsulate metadata about them (such as
cryptographic checksums or signatures over same) to aid in archiving,
downloading, and streaming, or mirroring. The manifest files' data is
serialized with Google's [protobuf serialization
format](https://developers.google.com/protocol-buffers). The structure of
these files can be found [in the format
specification](https://git.eeqj.de/sneak/mfer/src/branch/main/mfer/mf.proto)
which is included in the [project
repository](https://git.eeqj.de/sneak/mfer).
The current version is pre-1.0 and while the repo was published in 2022,
there has not yet been any versioned release. [SemVer](https://semver.org)
will be used for releases.
This project was started by [@sneak](https://sneak.berlin) to scratch an
itch in 2022 and is currently a one-person effort, though the goal is for
this to emerge as a de-facto standard and be incorporated into other
software. A compatible javascript library is planned.
# Build Status
[![Build Status](https://drone.datavi.be/api/badges/sneak/mfer/status.svg)](https://drone.datavi.be/sneak/mfer)
# Participation
The community is as yet nonexistent so there are no defined policies or
norms yet. Primary development happens on a privately-run Gitea instance at
[https://git.eeqj.de/sneak/mfer](https://git.eeqj.de/sneak/mfer) and issues
are [tracked there](https://git.eeqj.de/sneak/mfer/issues).
Changes must always be formatted with a standard `go fmt`, syntactically
valid, and must pass the linting defined in the repository (presently only
the `golangci-lint` defaults), which can be run with a `make lint`. The
`main` branch is protected and all changes must be made via [pull
requests](https://git.eeqj.de/sneak/mfer/pulls) and pass CI to be merged.
Any changes submitted to this project must also be
[WTFPL-licensed](https://wtfpl.net) to be considered.
# Problem Statement
Given a plain URL, there is no standard way to safely and programmatically
@@ -170,6 +207,24 @@ regardless of filesystem format.
Please email [`sneak@sneak.berlin`](mailto:sneak@sneak.berlin) with your
desired username for an account on this Gitea instance.
I am currently interested in hiring a contractor skilled with the Go
standard library interfaces to specify this tool in full and develop a
prototype implementation.
# See Also
## Prior Art: Metalink
* [Metalink - Mozilla Wiki](https://wiki.mozilla.org/Metalink)
* [Metalink - Wikipedia](https://en.wikipedia.org/wiki/Metalink)
* [RFC 5854 - The Metalink Download Description Format](https://datatracker.ietf.org/doc/html/rfc5854)
* [RFC 6249 - Metalink/HTTP: Mirrors and Hashes](https://www.rfc-editor.org/rfc/rfc6249.html)
## Links
* Repo: [https://git.eeqj.de/sneak/mfer](https://git.eeqj.de/sneak/mfer)
* Issues: [https://git.eeqj.de/sneak/mfer/issues](https://git.eeqj.de/sneak/mfer/issues)
# Authors
* [@sneak &lt;sneak@sneak.berlin&gt;](mailto:sneak@sneak.berlin)
# License
* [WTFPL](https://wtfpl.net)

122
TODO.md Normal file
View File

@@ -0,0 +1,122 @@
# TODO: mfer 1.0
## Design Questions
*sneak: please answer inline below each question. These are preserved for posterity.*
### Format Design
**1. Should `MFFileChecksum` be simplified?**
Currently it's a separate message wrapping a single `bytes multiHash` field. Since multihash already self-describes the algorithm, `repeated bytes hashes` directly on `MFFilePath` would be simpler and reduce per-file protobuf overhead. Is the extra message layer intentional (e.g. planning to add per-hash metadata like `verified_at`)?
> *answer:*
**2. Should file permissions/mode be stored?**
The format stores mtime/ctime but not Unix file permissions. For archival use (ExFAT, filesystem-independent checksums) this may not matter, but for software distribution or filesystem restoration it's a gap. Should we reserve a field now (e.g. `optional uint32 mode = 305`) even if we don't populate it yet?
> *answer:*
**3. Should `atime` be removed from the schema?**
Access time is volatile, non-deterministic, and often disabled (`noatime`). Including it means two manifests of the same directory at different times will differ, which conflicts with the determinism goal. Remove it, or document it as "never set by default"?
> *answer:*
**4. What are the path normalization rules?**
The proto has `string path` with no specification about: always forward-slash? Must be relative? No `..` components allowed? UTF-8 NFC vs NFD normalization (macOS vs Linux)? Max path length? This is a security issue (path traversal) and a cross-platform compatibility issue. What rules should the spec mandate?
> *answer:*
**5. Should we add a version byte after the magic?**
Currently `ZNAVSRFG` is followed immediately by protobuf. Adding a version byte (`ZNAVSRFG\x01`) would allow future framing changes without requiring protobuf parsing to detect the version. `MFFileOuter.Version` serves this purpose but requires successful deserialization to read. Worth the extra byte?
> *answer:*
**6. Should we add a length-prefix after the magic?**
Protobuf is not self-delimiting. If we ever want to concatenate manifests or append data after the protobuf, the current framing is insufficient. Add a varint or fixed-width length-prefix?
> *answer:*
### Signature Design
**7. What does the outer SHA-256 hash cover — compressed or uncompressed data?**
The review notes it currently hashes compressed data (good for verifying before decompression), but this should be explicitly documented. Which is the intended behavior?
> *answer:*
**8. Should `signatureString()` sign raw bytes instead of a hex-encoded string?**
Currently the canonical string is `MAGIC-UUID-MULTIHASH` with hex encoding, which adds a transformation layer. Signing the raw `sha256` bytes (or compressed `innerMessage` directly) would be simpler. Keep the string format or switch to raw bytes?
> *answer:*
**9. Should we support detached signature files (`.mf.sig`)?**
Embedded signatures are better for single-file distribution. Detached `.mf.sig` files follow the familiar `SHASUMS`/`SHASUMS.asc` pattern and are simpler for HTTP serving. Support both modes?
> *answer:*
**10. GPG vs pure-Go crypto for signatures?**
Shelling out to `gpg` is fragile (may not be installed, version-dependent output). `github.com/ProtonMail/go-crypto` provides pure-Go OpenPGP, or we could go Ed25519/signify (simpler, no key management). Which direction?
> *answer:*
### Implementation Design
**11. Should manifests be deterministic by default?**
This means: sort file entries by path, omit `createdAt` timestamp (or make it opt-in), no `atime`. Should determinism be the default, with a `--include-timestamps` flag to opt in?
> *answer:*
**12. Should we consolidate or keep both scanner/checker implementations?**
There are two parallel implementations: `mfer/scanner.go` + `mfer/checker.go` (typed with `FileSize`, `RelFilePath`) and `internal/scanner/` + `internal/checker/` (raw `int64`, `string`). The `mfer/` versions are superior. Delete the `internal/` versions?
> *answer:*
**13. Should the `manifest` type be exported?**
Currently unexported with exported constructors (`New`, `NewFromPaths`, etc.). Consumers can't declare `var m *mfer.manifest`. Export the type, or define an interface?
> *answer:*
**14. What should the Go module path be for 1.0?**
Currently mixed between `sneak.berlin/go/mfer` and `git.eeqj.de/sneak/mfer`. Which is canonical?
> *answer:*
---
## Implementation Plan
### Phase 1: Foundation (format correctness)
- [ ] Delete `internal/scanner/` and `internal/checker/` — consolidate on `mfer/` package versions; update CLI code
- [ ] Add deterministic file ordering — sort entries by path (lexicographic, byte-order) in `Builder.Build()`; add test asserting byte-identical output from two runs
- [ ] Add decompression size limit — `io.LimitReader` in `deserializeInner()` with `m.pbOuter.Size` as bound
- [ ] Fix `errors.Is` dead code in checker — replace with `os.IsNotExist(err)` or `errors.Is(err, fs.ErrNotExist)`
- [ ] Fix `AddFile` to verify size — check `totalRead == size` after reading, return error on mismatch
- [ ] Specify path invariants — add proto comments (UTF-8, forward-slash, relative, no `..`, no leading `/`); validate in `Builder.AddFile` and `Builder.AddFileWithHash`
### Phase 2: CLI polish
- [ ] Fix flag naming — all CLI flags use kebab-case as primary (`--include-dotfiles`, `--follow-symlinks`)
- [ ] Fix URL construction in fetch — use `BaseURL.JoinPath()` or `url.JoinPath()` instead of string concatenation
- [ ] Add progress rate-limiting to Checker — throttle to once per second, matching Scanner
- [ ] Add `--deterministic` flag (or make it default) — omit `createdAt`, sort files
### Phase 3: Robustness
- [ ] Replace GPG subprocess with pure-Go crypto — `github.com/ProtonMail/go-crypto` or Ed25519/signify
- [ ] Add timeout to any remaining subprocess calls
- [ ] Add fuzzing tests for `NewManifestFromReader`
- [ ] Add retry logic to fetch — exponential backoff for transient HTTP errors
### Phase 4: Format finalization
- [ ] Remove or deprecate `atime` from proto (pending design question answer)
- [ ] Reserve `optional uint32 mode = 305` in `MFFilePath` for future file permissions
- [ ] Add version byte after magic — `ZNAVSRFG\x01` for format version 1
- [ ] Write format specification document — separate from README: magic, outer structure, compression, inner structure, path invariants, signature scheme, canonical serialization
### Phase 5: Release prep
- [ ] Finalize Go module path
- [ ] Audit all error messages for consistency and helpfulness
- [ ] Add `--version` output matching SemVer
- [ ] Tag v1.0.0

View File

@@ -1,4 +1,10 @@
#!/bin/bash
#
if [[ ! -z "$DRONE_COMMIT_SHA" ]]; then
echo "${DRONE_COMMIT_SHA:0:7}"
exit 0
fi
if [[ ! -z "$GITREV" ]]; then
echo $GITREV
else

View File

@@ -1,21 +0,0 @@
## build image:
FROM golang:1.19.3-bullseye AS builder
ENV DEBIAN_FRONTEND noninteractive
RUN apt update && apt install -y make bzip2 curl unzip
RUN mkdir -p /build
WORKDIR /build
# install newer protoc
RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v21.10/protoc-21.10-linux-aarch_64.zip && \
unzip *.zip -d /usr/local && rm -v *.zip && protoc --version
RUN go install -v google.golang.org/protobuf/cmd/protoc-gen-go@v1.28.1
RUN go env
COPY ./go.mod .
COPY ./go.sum .
RUN --mount=type=cache,target=/go/pkg go mod download -x
RUN rm -rfv /var/cache/* /var/tmp/*

3
go.mod
View File

@@ -1,6 +1,6 @@
module git.eeqj.de/sneak/mfer
go 1.17
go 1.22
require (
github.com/apex/log v1.9.0
@@ -10,7 +10,6 @@ require (
github.com/stretchr/testify v1.8.1
github.com/urfave/cli/v2 v2.23.6
google.golang.org/protobuf v1.28.1
)
require (

1
go.sum
View File

@@ -37,7 +37,6 @@ cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9
cloud.google.com/go/storage v1.14.0/go.mod h1:GrKmX003DSIwi9o29oFT7YDnHYwZoctc3fOKtUw0Xmo=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/MarvinJWendt/testza v0.1.0/go.mod h1:7AxNvlfeHP7Z/hDQ5JtE3OKYT3XFUeLCDE2DQninSqs=
github.com/MarvinJWendt/testza v0.2.1/go.mod h1:God7bhG8n6uQxwdScay+gjm9/LnO4D3kkcZX4hv9Rp8=

View File

@@ -5,9 +5,11 @@ import (
"fmt"
)
var ErrMissingMagic = errors.New("missing magic bytes in file")
var ErrFileTruncated = errors.New("file/stream is truncated abnormally")
var (
ErrMissingMagic = errors.New("missing magic bytes in file")
ErrFileTruncated = errors.New("file/stream is truncated abnormally")
)
func Newf(format string, args ...interface{}) error {
return errors.New(fmt.Sprintf(format, args...))
return fmt.Errorf(format, args...)
}

View File

@@ -0,0 +1,11 @@
package bork
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestBuild(t *testing.T) {
assert.NotNil(t, ErrMissingMagic)
}

View File

@@ -1,32 +0,0 @@
package cli
import "fmt"
// FIXME make this write to a bytes.Buffer with fprintf
func DumpByteSlice(b []byte) {
var a [16]byte
n := (len(b) + 15) &^ 15
for i := 0; i < n; i++ {
if i%16 == 0 {
fmt.Printf("%4d", i)
}
if i%8 == 0 {
fmt.Print(" ")
}
if i < len(b) {
fmt.Printf(" %02X", b[i])
} else {
fmt.Print(" ")
}
if i >= len(b) {
a[i%16] = ' '
} else if b[i] < 32 || b[i] > 126 {
a[i%16] = '.'
} else {
a[i%16] = b[i]
}
if i%16 == 15 {
fmt.Printf(" %s\n", string(a[:]))
}
}
}

12
internal/log/log_test.go Normal file
View File

@@ -0,0 +1,12 @@
package log
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestBuild(t *testing.T) {
Init()
assert.True(t, true)
}

View File

@@ -9,7 +9,6 @@ import (
)
func TestAPIExample(t *testing.T) {
// read from filesystem
m, err := NewFromFS(&ManifestScanOptions{
IgnoreDotfiles: true,

View File

@@ -32,7 +32,7 @@ func init() {
af.WriteFile("/.hidden/hello2.txt", []byte("hello world\n"), 0o755)
big.MkdirAll("/home/user/Library", 0o755)
for i, _ := range [25]int{} {
for i := range [25]int{} {
big.WriteFile(fmt.Sprintf("/home/user/Library/hello%d.txt", i), []byte("hello world\n"), 0o755)
}
}

BIN
modcache.tzst Normal file

Binary file not shown.

Binary file not shown.

BIN
vendor.tzst Normal file

Binary file not shown.