10 Commits

Author SHA1 Message Date
d3a92e398b add deps in repo 2022-12-08 22:25:06 +04:00
6f74522513 still working 2022-12-06 21:09:06 +04:00
86d724ee35 getting warmer 2022-12-06 20:42:26 +04:00
b0c16462c4 latest. linted and building, not working yet 2022-12-06 18:29:19 +04:00
aa3c159521 latest - uses custom build image now 2022-12-06 17:43:07 +04:00
a2bf7ee607 latest - does not work
Some checks failed
continuous-integration/drone/push Build is failing
continuous-integration/drone/pr Build is failing
2022-12-06 06:29:01 +04:00
ec3e7c23eb latest
Some checks failed
continuous-integration/drone/pr Build is failing
continuous-integration/drone/push Build is failing
2022-12-06 02:59:08 +04:00
a9f23c79d2 latest
Some checks failed
continuous-integration/drone/push Build is failing
continuous-integration/drone/pr Build is failing
2022-12-05 14:40:57 +04:00
bd4b135e17 test me pls
Some checks failed
continuous-integration/drone/push Build is failing
continuous-integration/drone/pr Build is failing
2022-12-04 22:55:22 +04:00
bc5b2b039a latest 2022-12-04 13:19:21 +04:00
20 changed files with 92 additions and 262 deletions

View File

@@ -1,3 +1,7 @@
*.tzst
*.tar
/buildimage
/dockerdeps
/tmp
*.docker.tzst
*.tmp
*.dockerimage
.git

View File

@@ -7,17 +7,8 @@ steps:
network_mode: bridge
settings:
repo: sneak/mfer
build_args_from_env: [ DRONE_COMMIT_SHA ]
dry_run: true
custom_dns: [ 116.202.204.30 ]
tags:
- ${DRONE_COMMIT_SHA:0:7}
- ${DRONE_COMMIT_SHA}
- ${DRONE_BRANCH}
- latest
- name: notify
image: plugins/slack
settings:
webhook:
from_secret: SLACK_WEBHOOK_URL
when:
event: pull_request

6
.gitignore vendored
View File

@@ -1,6 +1,8 @@
mfer/*.pb.go
/mfer.cmd
vendor
/tmp
*.tmp
*.dockerimage
/vendor
*.docker.tzst
*.tzst
/builddeps/modcache.tar

View File

@@ -1,30 +1,29 @@
################################################################################
#2345678911234567892123456789312345678941234567895123456789612345678971234567898
################################################################################
FROM sneak/builder:2022-12-08 AS builder
FROM sneak/builder:main AS builder
ENV GOPATH /go
ENV DEBIAN_FRONTEND noninteractive
WORKDIR /build
COPY ./Makefile ./.golangci.yml ./go.mod ./go.sum /build/
COPY ./vendor.tzst /build/vendor.tzst
COPY ./modcache.tzst /build/modcache.tzst
COPY ./go.mod ./go.sum .
RUN \
go mod download -x
################################################################################
#### caching phase done
################################################################################
WORKDIR /build
COPY ./Makefile ./.golangci.yml ./go.mod ./go.sum .
COPY ./internal ./internal
COPY ./bin/gitrev.sh ./bin/gitrev.sh
COPY ./mfer ./mfer
COPY ./cmd ./cmd
RUN find /build
ARG GITREV unknown
ARG DRONE_COMMIT_SHA unknown
RUN mkdir -p "$(go env GOMODCACHE)" && cd "$(go env GOMODCACHE)" && \
zstdmt -d --stdout /build/modcache.tzst | tar xf - && \
rm /build/modcache.tzst && cd /build
RUN \
cd mfer && go generate . && cd .. && \
GOPACKAGESDEBUG=true golangci-lint run ./... && \
mkdir vendor && cd vendor && \
zstdmt -d --stdout /build/vendor.tzst | tar xf - && rm /build/vendor.tzst && \
cd .. && \
make mfer.cmd
RUN rm -rf /build/vendor && go mod vendor && tar -c . | zstdmt -19 > /src.tzst
RUN go mod vendor && tar -c . | zstdmt -19 > /src.tzst
################################################################################
#2345678911234567892123456789312345678941234567895123456789612345678971234567898
################################################################################

View File

@@ -44,9 +44,9 @@ mfer.cmd: $(SOURCEFILES) mfer/mf.pb.go
cd cmd/mfer && go build -tags urfave_cli_no_docs -o ../../mfer.cmd $(GOFLAGS) .
clean:
rm -rfv mfer/*.pb.go mfer.cmd cmd/mfer/mfer *.dockerimage
rm -rfv mfer/*.pb.go mfer.cmd cmd/mfer/mfer
fmt: mfer/mf.pb.go
fmt:
gofumpt -l -w mfer internal cmd
golangci-lint run --fix
-prettier -w *.json
@@ -56,9 +56,9 @@ lint:
golangci-lint run
sh -c 'test -z "$$(gofmt -l .)"'
docker: sneak-mfer.$(ARCH).tzst.dockerimage
docker: sneak-mfer.$(ARCH).docker.tzst
sneak-mfer.$(ARCH).tzst.dockerimage: $(SOURCEFILES) vendor.tzst modcache.tzst
sneak-mfer.$(ARCH).docker.tzst: $(SOURCEFILES)
docker build --progress plain --build-arg GITREV=$(GITREV_BUILD) -t sneak/mfer .
docker save sneak/mfer | pv | zstdmt -19 > $@
du -sh $@
@@ -66,17 +66,3 @@ sneak-mfer.$(ARCH).tzst.dockerimage: $(SOURCEFILES) vendor.tzst modcache.tzst
godoc:
open http://127.0.0.1:6060
godoc -http=:6060
vendor.tzst: go.mod go.sum
go mod tidy
go mod vendor
cd vendor && tar -c . | pv | zstdmt -19 > $(PWD)/$@.tmp
rm -rf vendor
mv $@.tmp $@
modcache.tzst: go.mod go.sum
go mod tidy
cd $(HOME)/go/pkg && chmod -R u+rw . && rm -rf mod sumdb
go mod download -x
cd $(shell go env GOMODCACHE) && tar -c . | pv | zstdmt -19 > $(PWD)/$@.tmp
mv $@.tmp $@

View File

@@ -1,48 +1,11 @@
# mfer
[mfer](https://git.eeqj.de/sneak/mfer) is a reference implementation library
and thin wrapper command-line utility written in [Go](https://golang.org)
and first published in 2022 under the [WTFPL](https://wtfpl.net) (public
domain) license. It specifies and generates `.mf` manifest files over a
directory tree of files to encapsulate metadata about them (such as
cryptographic checksums or signatures over same) to aid in archiving,
downloading, and streaming, or mirroring. The manifest files' data is
serialized with Google's [protobuf serialization
format](https://developers.google.com/protocol-buffers). The structure of
these files can be found [in the format
specification](https://git.eeqj.de/sneak/mfer/src/branch/main/mfer/mf.proto)
which is included in the [project
repository](https://git.eeqj.de/sneak/mfer).
The current version is pre-1.0 and while the repo was published in 2022,
there has not yet been any versioned release. [SemVer](https://semver.org)
will be used for releases.
This project was started by [@sneak](https://sneak.berlin) to scratch an
itch in 2022 and is currently a one-person effort, though the goal is for
this to emerge as a de-facto standard and be incorporated into other
software. A compatible javascript library is planned.
Manifest file generator and checker.
# Build Status
[![Build Status](https://drone.datavi.be/api/badges/sneak/mfer/status.svg)](https://drone.datavi.be/sneak/mfer)
# Participation
The community is as yet nonexistent so there are no defined policies or
norms yet. Primary development happens on a privately-run Gitea instance at
[https://git.eeqj.de/sneak/mfer](https://git.eeqj.de/sneak/mfer) and issues
are [tracked there](https://git.eeqj.de/sneak/mfer/issues).
Changes must always be formatted with a standard `go fmt`, syntactically
valid, and must pass the linting defined in the repository (presently only
the `golangci-lint` defaults), which can be run with a `make lint`. The
`main` branch is protected and all changes must be made via [pull
requests](https://git.eeqj.de/sneak/mfer/pulls) and pass CI to be merged.
Any changes submitted to this project must also be
[WTFPL-licensed](https://wtfpl.net) to be considered.
# Problem Statement
Given a plain URL, there is no standard way to safely and programmatically
@@ -207,24 +170,6 @@ regardless of filesystem format.
Please email [`sneak@sneak.berlin`](mailto:sneak@sneak.berlin) with your
desired username for an account on this Gitea instance.
# See Also
## Prior Art: Metalink
* [Metalink - Mozilla Wiki](https://wiki.mozilla.org/Metalink)
* [Metalink - Wikipedia](https://en.wikipedia.org/wiki/Metalink)
* [RFC 5854 - The Metalink Download Description Format](https://datatracker.ietf.org/doc/html/rfc5854)
* [RFC 6249 - Metalink/HTTP: Mirrors and Hashes](https://www.rfc-editor.org/rfc/rfc6249.html)
## Links
* Repo: [https://git.eeqj.de/sneak/mfer](https://git.eeqj.de/sneak/mfer)
* Issues: [https://git.eeqj.de/sneak/mfer/issues](https://git.eeqj.de/sneak/mfer/issues)
# Authors
* [@sneak <sneak@sneak.berlin>](mailto:sneak@sneak.berlin)
# License
* [WTFPL](https://wtfpl.net)
I am currently interested in hiring a contractor skilled with the Go
standard library interfaces to specify this tool in full and develop a
prototype implementation.

122
TODO.md
View File

@@ -1,122 +0,0 @@
# TODO: mfer 1.0
## Design Questions
*sneak: please answer inline below each question. These are preserved for posterity.*
### Format Design
**1. Should `MFFileChecksum` be simplified?**
Currently it's a separate message wrapping a single `bytes multiHash` field. Since multihash already self-describes the algorithm, `repeated bytes hashes` directly on `MFFilePath` would be simpler and reduce per-file protobuf overhead. Is the extra message layer intentional (e.g. planning to add per-hash metadata like `verified_at`)?
> *answer:*
**2. Should file permissions/mode be stored?**
The format stores mtime/ctime but not Unix file permissions. For archival use (ExFAT, filesystem-independent checksums) this may not matter, but for software distribution or filesystem restoration it's a gap. Should we reserve a field now (e.g. `optional uint32 mode = 305`) even if we don't populate it yet?
> *answer:*
**3. Should `atime` be removed from the schema?**
Access time is volatile, non-deterministic, and often disabled (`noatime`). Including it means two manifests of the same directory at different times will differ, which conflicts with the determinism goal. Remove it, or document it as "never set by default"?
> *answer:*
**4. What are the path normalization rules?**
The proto has `string path` with no specification about: always forward-slash? Must be relative? No `..` components allowed? UTF-8 NFC vs NFD normalization (macOS vs Linux)? Max path length? This is a security issue (path traversal) and a cross-platform compatibility issue. What rules should the spec mandate?
> *answer:*
**5. Should we add a version byte after the magic?**
Currently `ZNAVSRFG` is followed immediately by protobuf. Adding a version byte (`ZNAVSRFG\x01`) would allow future framing changes without requiring protobuf parsing to detect the version. `MFFileOuter.Version` serves this purpose but requires successful deserialization to read. Worth the extra byte?
> *answer:*
**6. Should we add a length-prefix after the magic?**
Protobuf is not self-delimiting. If we ever want to concatenate manifests or append data after the protobuf, the current framing is insufficient. Add a varint or fixed-width length-prefix?
> *answer:*
### Signature Design
**7. What does the outer SHA-256 hash cover — compressed or uncompressed data?**
The review notes it currently hashes compressed data (good for verifying before decompression), but this should be explicitly documented. Which is the intended behavior?
> *answer:*
**8. Should `signatureString()` sign raw bytes instead of a hex-encoded string?**
Currently the canonical string is `MAGIC-UUID-MULTIHASH` with hex encoding, which adds a transformation layer. Signing the raw `sha256` bytes (or compressed `innerMessage` directly) would be simpler. Keep the string format or switch to raw bytes?
> *answer:*
**9. Should we support detached signature files (`.mf.sig`)?**
Embedded signatures are better for single-file distribution. Detached `.mf.sig` files follow the familiar `SHASUMS`/`SHASUMS.asc` pattern and are simpler for HTTP serving. Support both modes?
> *answer:*
**10. GPG vs pure-Go crypto for signatures?**
Shelling out to `gpg` is fragile (may not be installed, version-dependent output). `github.com/ProtonMail/go-crypto` provides pure-Go OpenPGP, or we could go Ed25519/signify (simpler, no key management). Which direction?
> *answer:*
### Implementation Design
**11. Should manifests be deterministic by default?**
This means: sort file entries by path, omit `createdAt` timestamp (or make it opt-in), no `atime`. Should determinism be the default, with a `--include-timestamps` flag to opt in?
> *answer:*
**12. Should we consolidate or keep both scanner/checker implementations?**
There are two parallel implementations: `mfer/scanner.go` + `mfer/checker.go` (typed with `FileSize`, `RelFilePath`) and `internal/scanner/` + `internal/checker/` (raw `int64`, `string`). The `mfer/` versions are superior. Delete the `internal/` versions?
> *answer:*
**13. Should the `manifest` type be exported?**
Currently unexported with exported constructors (`New`, `NewFromPaths`, etc.). Consumers can't declare `var m *mfer.manifest`. Export the type, or define an interface?
> *answer:*
**14. What should the Go module path be for 1.0?**
Currently mixed between `sneak.berlin/go/mfer` and `git.eeqj.de/sneak/mfer`. Which is canonical?
> *answer:*
---
## Implementation Plan
### Phase 1: Foundation (format correctness)
- [ ] Delete `internal/scanner/` and `internal/checker/` — consolidate on `mfer/` package versions; update CLI code
- [ ] Add deterministic file ordering — sort entries by path (lexicographic, byte-order) in `Builder.Build()`; add test asserting byte-identical output from two runs
- [ ] Add decompression size limit — `io.LimitReader` in `deserializeInner()` with `m.pbOuter.Size` as bound
- [ ] Fix `errors.Is` dead code in checker — replace with `os.IsNotExist(err)` or `errors.Is(err, fs.ErrNotExist)`
- [ ] Fix `AddFile` to verify size — check `totalRead == size` after reading, return error on mismatch
- [ ] Specify path invariants — add proto comments (UTF-8, forward-slash, relative, no `..`, no leading `/`); validate in `Builder.AddFile` and `Builder.AddFileWithHash`
### Phase 2: CLI polish
- [ ] Fix flag naming — all CLI flags use kebab-case as primary (`--include-dotfiles`, `--follow-symlinks`)
- [ ] Fix URL construction in fetch — use `BaseURL.JoinPath()` or `url.JoinPath()` instead of string concatenation
- [ ] Add progress rate-limiting to Checker — throttle to once per second, matching Scanner
- [ ] Add `--deterministic` flag (or make it default) — omit `createdAt`, sort files
### Phase 3: Robustness
- [ ] Replace GPG subprocess with pure-Go crypto — `github.com/ProtonMail/go-crypto` or Ed25519/signify
- [ ] Add timeout to any remaining subprocess calls
- [ ] Add fuzzing tests for `NewManifestFromReader`
- [ ] Add retry logic to fetch — exponential backoff for transient HTTP errors
### Phase 4: Format finalization
- [ ] Remove or deprecate `atime` from proto (pending design question answer)
- [ ] Reserve `optional uint32 mode = 305` in `MFFilePath` for future file permissions
- [ ] Add version byte after magic — `ZNAVSRFG\x01` for format version 1
- [ ] Write format specification document — separate from README: magic, outer structure, compression, inner structure, path invariants, signature scheme, canonical serialization
### Phase 5: Release prep
- [ ] Finalize Go module path
- [ ] Audit all error messages for consistency and helpfulness
- [ ] Add `--version` output matching SemVer
- [ ] Tag v1.0.0

View File

@@ -1,10 +1,4 @@
#!/bin/bash
#
if [[ ! -z "$DRONE_COMMIT_SHA" ]]; then
echo "${DRONE_COMMIT_SHA:0:7}"
exit 0
fi
if [[ ! -z "$GITREV" ]]; then
echo $GITREV
else

21
buildimage/Dockerfile Normal file
View File

@@ -0,0 +1,21 @@
## build image:
FROM golang:1.19.3-bullseye AS builder
ENV DEBIAN_FRONTEND noninteractive
RUN apt update && apt install -y make bzip2 curl unzip
RUN mkdir -p /build
WORKDIR /build
# install newer protoc
RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v21.10/protoc-21.10-linux-aarch_64.zip && \
unzip *.zip -d /usr/local && rm -v *.zip && protoc --version
RUN go install -v google.golang.org/protobuf/cmd/protoc-gen-go@v1.28.1
RUN go env
COPY ./go.mod .
COPY ./go.sum .
RUN --mount=type=cache,target=/go/pkg go mod download -x
RUN rm -rfv /var/cache/* /var/tmp/*

3
go.mod
View File

@@ -1,6 +1,6 @@
module git.eeqj.de/sneak/mfer
go 1.22
go 1.17
require (
github.com/apex/log v1.9.0
@@ -10,6 +10,7 @@ require (
github.com/stretchr/testify v1.8.1
github.com/urfave/cli/v2 v2.23.6
google.golang.org/protobuf v1.28.1
)
require (

1
go.sum
View File

@@ -37,6 +37,7 @@ cloud.google.com/go/storage v1.10.0/go.mod h1:FLPqc6j+Ki4BU591ie1oL6qBQGu2Bl/tZ9
cloud.google.com/go/storage v1.14.0/go.mod h1:GrKmX003DSIwi9o29oFT7YDnHYwZoctc3fOKtUw0Xmo=
dmitri.shuralyov.com/gpu/mtl v0.0.0-20190408044501-666a987793e9/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
github.com/BurntSushi/toml v1.2.1/go.mod h1:CxXYINrC8qIiEnFrOxCa7Jy5BFHlXnUU2pbicEuybxQ=
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
github.com/MarvinJWendt/testza v0.1.0/go.mod h1:7AxNvlfeHP7Z/hDQ5JtE3OKYT3XFUeLCDE2DQninSqs=
github.com/MarvinJWendt/testza v0.2.1/go.mod h1:God7bhG8n6uQxwdScay+gjm9/LnO4D3kkcZX4hv9Rp8=

View File

@@ -5,11 +5,9 @@ import (
"fmt"
)
var (
ErrMissingMagic = errors.New("missing magic bytes in file")
ErrFileTruncated = errors.New("file/stream is truncated abnormally")
)
var ErrMissingMagic = errors.New("missing magic bytes in file")
var ErrFileTruncated = errors.New("file/stream is truncated abnormally")
func Newf(format string, args ...interface{}) error {
return fmt.Errorf(format, args...)
return errors.New(fmt.Sprintf(format, args...))
}

View File

@@ -1,11 +0,0 @@
package bork
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestBuild(t *testing.T) {
assert.NotNil(t, ErrMissingMagic)
}

32
internal/cli/misc.go Normal file
View File

@@ -0,0 +1,32 @@
package cli
import "fmt"
// FIXME make this write to a bytes.Buffer with fprintf
func DumpByteSlice(b []byte) {
var a [16]byte
n := (len(b) + 15) &^ 15
for i := 0; i < n; i++ {
if i%16 == 0 {
fmt.Printf("%4d", i)
}
if i%8 == 0 {
fmt.Print(" ")
}
if i < len(b) {
fmt.Printf(" %02X", b[i])
} else {
fmt.Print(" ")
}
if i >= len(b) {
a[i%16] = ' '
} else if b[i] < 32 || b[i] > 126 {
a[i%16] = '.'
} else {
a[i%16] = b[i]
}
if i%16 == 15 {
fmt.Printf(" %s\n", string(a[:]))
}
}
}

View File

@@ -1,12 +0,0 @@
package log
import (
"testing"
"github.com/stretchr/testify/assert"
)
func TestBuild(t *testing.T) {
Init()
assert.True(t, true)
}

View File

@@ -9,6 +9,7 @@ import (
)
func TestAPIExample(t *testing.T) {
// read from filesystem
m, err := NewFromFS(&ManifestScanOptions{
IgnoreDotfiles: true,

View File

@@ -32,7 +32,7 @@ func init() {
af.WriteFile("/.hidden/hello2.txt", []byte("hello world\n"), 0o755)
big.MkdirAll("/home/user/Library", 0o755)
for i := range [25]int{} {
for i, _ := range [25]int{} {
big.WriteFile(fmt.Sprintf("/home/user/Library/hello%d.txt", i), []byte("hello world\n"), 0o755)
}
}

Binary file not shown.

BIN
vendor.tar Normal file

Binary file not shown.

Binary file not shown.