Compare commits
17 Commits
fix/remove
...
48-repo-st
| Author | SHA1 | Date | |
|---|---|---|---|
| 2f119ad289 | |||
| 6ba32f5b35 | |||
| e62c709d42 | |||
| 89903fa1cd | |||
| b3d10106e1 | |||
| 9712c10fe3 | |||
| 43916c7746 | |||
| bbab6e73f4 | |||
| 615eecff79 | |||
| 9b67de016d | |||
|
|
3c779465e2 | ||
|
|
5572a4901f | ||
|
|
2adc275278 | ||
|
|
6d9c07510a | ||
| 1f7ee256ec | |||
|
|
28c6fbd220 | ||
| 5aae442156 |
9
.gitea/workflows/check.yml
Normal file
9
.gitea/workflows/check.yml
Normal file
@@ -0,0 +1,9 @@
|
||||
name: check
|
||||
on: [push]
|
||||
jobs:
|
||||
check:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
# actions/checkout v4.2.2, 2026-03-16
|
||||
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
||||
- run: docker build .
|
||||
5
.gitignore
vendored
5
.gitignore
vendored
@@ -3,6 +3,11 @@
|
||||
*.tmp
|
||||
*.dockerimage
|
||||
/vendor
|
||||
vendor.tzst
|
||||
modcache.tzst
|
||||
|
||||
# Generated manifest files
|
||||
.index.mf
|
||||
|
||||
# Stale files
|
||||
.drone.yml
|
||||
|
||||
29
AGENTS.md
Normal file
29
AGENTS.md
Normal file
@@ -0,0 +1,29 @@
|
||||
# Agent Instructions
|
||||
|
||||
Read `REPO_POLICIES.md` before making any changes. It is the authoritative
|
||||
source for coding standards, formatting, linting, and workflow rules.
|
||||
|
||||
## Workflow
|
||||
|
||||
- When fixing a bug, write a failing test FIRST. Only after the test fails,
|
||||
write the code to fix the bug. Then ensure the test passes. Leave the test in
|
||||
place and commit it with the bugfix. Don't run shell commands to test bugfixes
|
||||
or reproduce bugs. Write tests!
|
||||
|
||||
- After each change, run `make fmt`, then `make test`, then `make lint`. Fix any
|
||||
failures before committing.
|
||||
|
||||
- After each change, commit only the files you've changed. Push after committing.
|
||||
|
||||
## Attribution
|
||||
|
||||
- Never mention Claude, Anthropic, or any AI/LLM tooling in commit messages. Do
|
||||
not use attribution.
|
||||
|
||||
## Repository-Specific Notes
|
||||
|
||||
- This is a Go library + CLI tool for generating `.mf` manifest files.
|
||||
- The proto definition is in `mfer/mf.proto`; generated `.pb.go` files are
|
||||
committed (required for `go get` compatibility).
|
||||
- The format specification is in `FORMAT.md`.
|
||||
- See `TODO.md` for the 1.0 implementation plan and open design questions.
|
||||
20
CLAUDE.md
20
CLAUDE.md
@@ -1,20 +0,0 @@
|
||||
# Important Rules
|
||||
|
||||
- when fixing a bug, write a failing test FIRST. only after the test fails, write
|
||||
the code to fix the bug. then ensure the test passes. leave the test in
|
||||
place and commit it with the bugfix. don't run shell commands to test
|
||||
bugfixes or reproduce bugs. write tests!
|
||||
|
||||
- never, ever mention claude or anthropic in commit messages. do not use attribution
|
||||
|
||||
- after each change, run "make fmt".
|
||||
|
||||
- after each change, run "make test" and ensure all tests pass.
|
||||
|
||||
- after each change, run "make lint" and ensure no linting errors. fix any
|
||||
you find, one by one.
|
||||
|
||||
- after each change, commit the files you've changed. push after
|
||||
committing.
|
||||
|
||||
- NEVER use `git add -A`. always add only individual files that you've changed.
|
||||
69
Dockerfile
69
Dockerfile
@@ -1,37 +1,38 @@
|
||||
################################################################################
|
||||
#2345678911234567892123456789312345678941234567895123456789612345678971234567898
|
||||
################################################################################
|
||||
FROM sneak/builder:2022-12-08 AS builder
|
||||
ENV DEBIAN_FRONTEND noninteractive
|
||||
WORKDIR /build
|
||||
COPY ./Makefile ./.golangci.yml ./go.mod ./go.sum /build/
|
||||
COPY ./vendor.tzst /build/vendor.tzst
|
||||
COPY ./modcache.tzst /build/modcache.tzst
|
||||
COPY ./internal ./internal
|
||||
COPY ./bin/gitrev.sh ./bin/gitrev.sh
|
||||
COPY ./mfer ./mfer
|
||||
COPY ./cmd ./cmd
|
||||
ARG GITREV unknown
|
||||
ARG DRONE_COMMIT_SHA unknown
|
||||
# Lint stage — fast feedback on formatting and lint issues
|
||||
# golangci/golangci-lint:v2.0.2 (2026-03-14)
|
||||
FROM golangci/golangci-lint@sha256:d55581f7797e7a0877a7c3aaa399b01bdc57d2874d6412601a046cc4062cb62e AS lint
|
||||
|
||||
WORKDIR /src
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY . .
|
||||
|
||||
# Touch .pb.go so make does not try to regenerate via protoc (file is committed)
|
||||
RUN touch mfer/mf.pb.go
|
||||
|
||||
RUN make fmt-check
|
||||
RUN make lint
|
||||
|
||||
# Build stage — tests and compilation
|
||||
# golang:1.23 (2026-03-14)
|
||||
FROM golang@sha256:60deed95d3888cc5e4d9ff8a10c54e5edc008c6ae3fba6187be6fb592e19e8c0 AS builder
|
||||
|
||||
# Force BuildKit to run the lint stage by creating a stage dependency
|
||||
COPY --from=lint /src/go.sum /dev/null
|
||||
|
||||
WORKDIR /src
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
COPY . .
|
||||
|
||||
# Touch .pb.go so make does not try to regenerate via protoc (file is committed)
|
||||
RUN touch mfer/mf.pb.go
|
||||
|
||||
RUN make test
|
||||
RUN cd cmd/mfer && go build -tags urfave_cli_no_docs -o /mfer .
|
||||
|
||||
RUN mkdir -p "$(go env GOMODCACHE)" && cd "$(go env GOMODCACHE)" && \
|
||||
zstdmt -d --stdout /build/modcache.tzst | tar xf - && \
|
||||
rm /build/modcache.tzst && cd /build
|
||||
RUN \
|
||||
cd mfer && go generate . && cd .. && \
|
||||
GOPACKAGESDEBUG=true golangci-lint run ./... && \
|
||||
mkdir vendor && cd vendor && \
|
||||
zstdmt -d --stdout /build/vendor.tzst | tar xf - && rm /build/vendor.tzst && \
|
||||
cd .. && \
|
||||
make mfer.cmd
|
||||
RUN rm -rf /build/vendor && go mod vendor && tar -c . | zstdmt -19 > /src.tzst
|
||||
################################################################################
|
||||
#2345678911234567892123456789312345678941234567895123456789612345678971234567898
|
||||
################################################################################
|
||||
## final image
|
||||
################################################################################
|
||||
FROM scratch
|
||||
# we put all the source into the final image for posterity, it's small
|
||||
COPY --from=builder /src.tzst /src.tzst
|
||||
COPY --from=builder /build/mfer.cmd /mfer
|
||||
COPY --from=builder /mfer /mfer
|
||||
ENTRYPOINT ["/mfer"]
|
||||
|
||||
143
FORMAT.md
Normal file
143
FORMAT.md
Normal file
@@ -0,0 +1,143 @@
|
||||
# .mf File Format Specification
|
||||
|
||||
Version 1.0
|
||||
|
||||
## Overview
|
||||
|
||||
An `.mf` file is a binary manifest that describes a directory tree of files,
|
||||
including their paths, sizes, and cryptographic checksums. It supports
|
||||
optional GPG signatures for integrity verification and optional timestamps
|
||||
for metadata preservation.
|
||||
|
||||
## File Structure
|
||||
|
||||
An `.mf` file consists of two parts, concatenated:
|
||||
|
||||
1. **Magic bytes** (8 bytes): the ASCII string `ZNAVSRFG`
|
||||
2. **Outer message**: a Protocol Buffers serialized `MFFileOuter` message
|
||||
|
||||
There is no length prefix or version byte between the magic and the protobuf
|
||||
message. The protobuf message extends to the end of the file.
|
||||
|
||||
See [`mfer/mf.proto`](mfer/mf.proto) for exact field numbers and types.
|
||||
|
||||
## Outer Message (`MFFileOuter`)
|
||||
|
||||
The outer message contains:
|
||||
|
||||
| Field | Number | Type | Description |
|
||||
| ----------------- | ------ | ---------------- | ------------------------------------------------------------------------ |
|
||||
| `version` | 101 | enum | Must be `VERSION_ONE` (1) |
|
||||
| `compressionType` | 102 | enum | Compression of `innerMessage`; must be `COMPRESSION_ZSTD` (1) |
|
||||
| `size` | 103 | int64 | Uncompressed size of `innerMessage` (corruption detection) |
|
||||
| `sha256` | 104 | bytes | SHA-256 hash of the **compressed** `innerMessage` (corruption detection) |
|
||||
| `uuid` | 105 | bytes | Random v4 UUID; must match the inner message UUID |
|
||||
| `innerMessage` | 199 | bytes | Zstd-compressed serialized `MFFile` message |
|
||||
| `signature` | 201 | bytes (optional) | GPG signature (ASCII-armored or binary) |
|
||||
| `signer` | 202 | bytes (optional) | Full GPG key ID of the signer |
|
||||
| `signingPubKey` | 203 | bytes (optional) | Full GPG signing public key |
|
||||
|
||||
### SHA-256 Hash
|
||||
|
||||
The `sha256` field (104) covers the **compressed** `innerMessage` bytes.
|
||||
This allows verifying data integrity before decompression.
|
||||
|
||||
## Compression
|
||||
|
||||
The `innerMessage` field is compressed with [Zstandard (zstd)](https://facebook.github.io/zstd/).
|
||||
Implementations must enforce a decompression size limit to prevent
|
||||
decompression bombs. The reference implementation limits decompressed size to
|
||||
256 MB.
|
||||
|
||||
## Inner Message (`MFFile`)
|
||||
|
||||
After decompressing `innerMessage`, the result is a serialized `MFFile`
|
||||
(referred to as the manifest):
|
||||
|
||||
| Field | Number | Type | Description |
|
||||
| ----------- | ------ | --------------------- | ------------------------------------- |
|
||||
| `version` | 100 | enum | Must be `VERSION_ONE` (1) |
|
||||
| `files` | 101 | repeated `MFFilePath` | List of files in the manifest |
|
||||
| `uuid` | 102 | bytes | Random v4 UUID; must match outer UUID |
|
||||
| `createdAt` | 201 | Timestamp (optional) | When the manifest was created |
|
||||
|
||||
## File Entries (`MFFilePath`)
|
||||
|
||||
Each file entry contains:
|
||||
|
||||
| Field | Number | Type | Description |
|
||||
| ---------- | ------ | ------------------------- | ----------------------------------- |
|
||||
| `path` | 1 | string | Relative file path (see Path Rules) |
|
||||
| `size` | 2 | int64 | File size in bytes |
|
||||
| `hashes` | 3 | repeated `MFFileChecksum` | At least one hash required |
|
||||
| `mimeType` | 301 | string (optional) | MIME type |
|
||||
| `mtime` | 302 | Timestamp (optional) | Modification time |
|
||||
| `ctime` | 303 | Timestamp (optional) | Change time (inode metadata change) |
|
||||
|
||||
Field 304 (`atime`) has been removed from the specification. Access time is
|
||||
volatile and non-deterministic; it is not useful for integrity verification.
|
||||
|
||||
## Path Rules
|
||||
|
||||
All `path` values must satisfy these invariants:
|
||||
|
||||
- **UTF-8**: paths must be valid UTF-8
|
||||
- **Forward slashes**: use `/` as the path separator (never `\`)
|
||||
- **Relative only**: no leading `/`
|
||||
- **No parent traversal**: no `..` path segments
|
||||
- **No empty segments**: no `//` sequences
|
||||
- **No trailing slash**: paths refer to files, not directories
|
||||
|
||||
Implementations must validate these invariants when reading and writing
|
||||
manifests. Paths that violate these rules must be rejected.
|
||||
|
||||
## Hash Format (`MFFileChecksum`)
|
||||
|
||||
Each checksum is a single `bytes multiHash` field containing a
|
||||
[multihash](https://multiformats.io/multihash/)-encoded value. Multihash is
|
||||
self-describing: the encoded bytes include a varint algorithm identifier
|
||||
followed by a varint digest length followed by the digest itself.
|
||||
|
||||
The 1.0 implementation writes SHA-256 multihashes (`0x12` algorithm code).
|
||||
Implementations must be able to verify SHA-256 multihashes at minimum.
|
||||
|
||||
## Signature Scheme
|
||||
|
||||
Signing is optional. When present, the signature covers a canonical string
|
||||
constructed as:
|
||||
|
||||
```
|
||||
ZNAVSRFG-<UUID>-<SHA256>
|
||||
```
|
||||
|
||||
Where:
|
||||
|
||||
- `ZNAVSRFG` is the magic bytes string (literal ASCII)
|
||||
- `<UUID>` is the hex-encoded UUID from the outer message
|
||||
- `<SHA256>` is the hex-encoded SHA-256 hash from the outer message (covering compressed data)
|
||||
|
||||
Components are separated by hyphens. The signature is produced by GPG over
|
||||
this canonical string and stored in the `signature` field of the outer
|
||||
message.
|
||||
|
||||
## Deterministic Serialization
|
||||
|
||||
By default, manifests are generated deterministically:
|
||||
|
||||
- File entries are sorted by `path` in **lexicographic byte order**
|
||||
- `createdAt` is omitted unless explicitly requested
|
||||
- `atime` is never included (field removed from schema)
|
||||
|
||||
This ensures that two independent runs over the same directory tree produce
|
||||
byte-identical `.mf` files (assuming file contents and metadata have not
|
||||
changed).
|
||||
|
||||
## MIME Type
|
||||
|
||||
The recommended MIME type for `.mf` files is `application/octet-stream`.
|
||||
The `.mf` file extension is the canonical identifier.
|
||||
|
||||
## Reference
|
||||
|
||||
- Proto definition: [`mfer/mf.proto`](mfer/mf.proto)
|
||||
- Reference implementation: [git.eeqj.de/sneak/mfer](https://git.eeqj.de/sneak/mfer)
|
||||
15
Makefile
15
Makefile
@@ -5,7 +5,7 @@ export PATH := $(PATH):$(GOPATH)/bin
|
||||
PROTOC_GEN_GO := $(GOPATH)/bin/protoc-gen-go
|
||||
SOURCEFILES := mfer/*.go mfer/*.proto internal/*/*.go cmd/*/*.go go.mod go.sum
|
||||
ARCH := $(shell uname -m)
|
||||
GITREV_BUILD := $(shell bash $(PWD)/bin/gitrev.sh)
|
||||
GITREV_BUILD := $(shell bash $(PWD)/bin/gitrev.sh 2>/dev/null || echo unknown)
|
||||
APPNAME := mfer
|
||||
VERSION := 0.1.0
|
||||
export DOCKER_IMAGE_CACHE_DIR := $(HOME)/Library/Caches/Docker/$(APPNAME)-$(ARCH)
|
||||
@@ -13,7 +13,7 @@ GOLDFLAGS += -X main.Version=$(VERSION)
|
||||
GOLDFLAGS += -X main.Gitrev=$(GITREV_BUILD)
|
||||
GOFLAGS := -ldflags "$(GOLDFLAGS)"
|
||||
|
||||
.PHONY: docker default run ci test fixme
|
||||
.PHONY: docker default run ci test check lint fmt fmt-check hooks fixme
|
||||
|
||||
default: fmt test
|
||||
|
||||
@@ -32,8 +32,17 @@ $(PROTOC_GEN_GO):
|
||||
fixme:
|
||||
@grep -nir fixme . | grep -v Makefile
|
||||
|
||||
check: test lint fmt-check
|
||||
|
||||
fmt-check: mfer/mf.pb.go
|
||||
sh -c 'test -z "$$(gofmt -l .)"'
|
||||
|
||||
hooks:
|
||||
echo '#!/bin/sh\nmake check' > .git/hooks/pre-commit
|
||||
chmod +x .git/hooks/pre-commit
|
||||
|
||||
devprereqs:
|
||||
which golangci-lint || go install -v github.com/golangci/golangci-lint/cmd/golangci-lint@latest
|
||||
which golangci-lint || go install -v github.com/golangci/golangci-lint/cmd/golangci-lint@v2.0.2
|
||||
|
||||
mfer/mf.pb.go: mfer/mf.proto
|
||||
cd mfer && go generate .
|
||||
|
||||
46
README.md
46
README.md
@@ -23,35 +23,6 @@ itch in 2022 and is currently a one-person effort, though the goal is for
|
||||
this to emerge as a de-facto standard and be incorporated into other
|
||||
software. A compatible javascript library is planned.
|
||||
|
||||
# Phases
|
||||
|
||||
Manifest generation happens in two distinct phases:
|
||||
|
||||
## Phase 1: Enumeration
|
||||
|
||||
Walking directories and calling `stat()` on files to collect metadata (path, size, mtime, ctime). This builds the list of files to be scanned. Relatively fast as it only reads filesystem metadata, not file contents.
|
||||
|
||||
**Progress:** `EnumerateStatus` with `FilesFound` and `BytesFound`
|
||||
|
||||
## Phase 2: Scan (ToManifest)
|
||||
|
||||
Reading file contents and computing cryptographic hashes for manifest generation. This is the expensive phase that reads all file data from disk.
|
||||
|
||||
**Progress:** `ScanStatus` with `TotalFiles`, `ScannedFiles`, `TotalBytes`, `ScannedBytes`, `BytesPerSec`
|
||||
|
||||
# Code Conventions
|
||||
|
||||
- **Logging:** Never use `fmt.Printf` or write to stdout/stderr directly in normal code. Use the `internal/log` package for all output (`log.Info`, `log.Infof`, `log.Debug`, `log.Debugf`, `log.Progressf`, `log.ProgressDone`).
|
||||
- **Filesystem abstraction:** Use `github.com/spf13/afero` for filesystem operations to enable testing and flexibility.
|
||||
- **CLI framework:** Use `github.com/urfave/cli/v2` for command-line interface.
|
||||
- **Serialization:** Use Protocol Buffers for manifest file format.
|
||||
- **Internal packages:** Non-exported implementation details go in `internal/` subdirectories.
|
||||
- **Concurrency:** Use `sync.RWMutex` for protecting shared state; prefer channels for progress reporting.
|
||||
- **Progress channels:** Use buffered channels (size 1) with non-blocking sends to avoid blocking the main operation if the consumer is slow.
|
||||
- **Context support:** Long-running operations should accept `context.Context` for cancellation.
|
||||
- **NO_COLOR:** Respect the `NO_COLOR` environment variable for disabling colored output.
|
||||
- **Options pattern:** Use `NewWithOptions(opts *Options)` constructor pattern for configurable types.
|
||||
|
||||
# Build Status
|
||||
|
||||
[](https://drone.datavi.be/sneak/mfer)
|
||||
@@ -71,6 +42,9 @@ requests](https://git.eeqj.de/sneak/mfer/pulls) and pass CI to be merged.
|
||||
Any changes submitted to this project must also be
|
||||
[WTFPL-licensed](https://wtfpl.net) to be considered.
|
||||
|
||||
See [`REPO_POLICIES.md`](REPO_POLICIES.md) for detailed coding standards,
|
||||
tooling requirements, and workflow conventions.
|
||||
|
||||
# Problem Statement
|
||||
|
||||
Given a plain URL, there is no standard way to safely and programmatically
|
||||
@@ -148,14 +122,9 @@ The manifest file would do several important things:
|
||||
- metadata size should not be used as an excuse to sacrifice utility (such
|
||||
as providing checksums over each chunk of a large file)
|
||||
|
||||
# Limitations
|
||||
|
||||
- **Manifest size:** Manifests must fit entirely in system memory during reading and writing.
|
||||
|
||||
# Open Questions
|
||||
|
||||
- Should the manifest file include checksums of individual file chunks, or just for the whole assembled file?
|
||||
|
||||
- If so, should the chunksize be fixed or dynamic?
|
||||
|
||||
- Should the manifest signature format be GnuPG signatures, or those from
|
||||
@@ -239,6 +208,15 @@ regardless of filesystem format.
|
||||
Please email [`sneak@sneak.berlin`](mailto:sneak@sneak.berlin) with your
|
||||
desired username for an account on this Gitea instance.
|
||||
|
||||
# See Also
|
||||
|
||||
## Prior Art: Metalink
|
||||
|
||||
- [Metalink - Mozilla Wiki](https://wiki.mozilla.org/Metalink)
|
||||
- [Metalink - Wikipedia](https://en.wikipedia.org/wiki/Metalink)
|
||||
- [RFC 5854 - The Metalink Download Description Format](https://datatracker.ietf.org/doc/html/rfc5854)
|
||||
- [RFC 6249 - Metalink/HTTP: Mirrors and Hashes](https://www.rfc-editor.org/rfc/rfc6249.html)
|
||||
|
||||
## Links
|
||||
|
||||
- Repo: [https://git.eeqj.de/sneak/mfer](https://git.eeqj.de/sneak/mfer)
|
||||
|
||||
255
REPO_POLICIES.md
Normal file
255
REPO_POLICIES.md
Normal file
@@ -0,0 +1,255 @@
|
||||
---
|
||||
title: Repository Policies
|
||||
last_modified: 2026-03-10
|
||||
---
|
||||
|
||||
This document covers repository structure, tooling, and workflow standards. Code
|
||||
style conventions are in separate documents:
|
||||
|
||||
- [Code Styleguide](https://git.eeqj.de/sneak/prompts/raw/branch/main/prompts/CODE_STYLEGUIDE.md)
|
||||
(general, bash, Docker)
|
||||
- [Go](https://git.eeqj.de/sneak/prompts/raw/branch/main/prompts/CODE_STYLEGUIDE_GO.md)
|
||||
- [JavaScript](https://git.eeqj.de/sneak/prompts/raw/branch/main/prompts/CODE_STYLEGUIDE_JS.md)
|
||||
- [Python](https://git.eeqj.de/sneak/prompts/raw/branch/main/prompts/CODE_STYLEGUIDE_PYTHON.md)
|
||||
- [Go HTTP Server Conventions](https://git.eeqj.de/sneak/prompts/raw/branch/main/prompts/GO_HTTP_SERVER_CONVENTIONS.md)
|
||||
|
||||
---
|
||||
|
||||
- Cross-project documentation (such as this file) must include
|
||||
`last_modified: YYYY-MM-DD` in the YAML front matter so it can be kept in sync
|
||||
with the authoritative source as policies evolve.
|
||||
|
||||
- **ALL external references must be pinned by cryptographic hash.** This
|
||||
includes Docker base images, Go modules, npm packages, GitHub Actions, and
|
||||
anything else fetched from a remote source. Version tags (`@v4`, `@latest`,
|
||||
`:3.21`, etc.) are server-mutable and therefore remote code execution
|
||||
vulnerabilities. The ONLY acceptable way to reference an external dependency
|
||||
is by its content hash (Docker `@sha256:...`, Go module hash in `go.sum`, npm
|
||||
integrity hash in lockfile, GitHub Actions `@<commit-sha>`). No exceptions.
|
||||
This also means never `curl | bash` to install tools like pyenv, nvm, rustup,
|
||||
etc. Instead, download a specific release archive from GitHub, verify its hash
|
||||
(hardcoded in the Dockerfile or script), and only then install. Unverified
|
||||
install scripts are arbitrary remote code execution. This is the single most
|
||||
important rule in this document. Double-check every external reference in
|
||||
every file before committing. There are zero exceptions to this rule.
|
||||
|
||||
- Every repo with software must have a root `Makefile` with these targets:
|
||||
`make test`, `make lint`, `make fmt` (writes), `make fmt-check` (read-only),
|
||||
`make check` (prereqs: `test`, `lint`, `fmt-check`), `make docker`, and
|
||||
`make hooks` (installs pre-commit hook). A model Makefile is at
|
||||
`https://git.eeqj.de/sneak/prompts/raw/branch/main/Makefile`.
|
||||
|
||||
- Always use Makefile targets (`make fmt`, `make test`, `make lint`, etc.)
|
||||
instead of invoking the underlying tools directly. The Makefile is the single
|
||||
source of truth for how these operations are run.
|
||||
|
||||
- The Makefile is authoritative documentation for how the repo is used. Beyond
|
||||
the required targets above, it should have targets for every common operation:
|
||||
running a local development server (`make run`, `make dev`), re-initializing
|
||||
or migrating the database (`make db-reset`, `make migrate`), building
|
||||
artifacts (`make build`), generating code, seeding data, or anything else a
|
||||
developer would do regularly. If someone checks out the repo and types
|
||||
`make<tab>`, they should see every meaningful operation available. A new
|
||||
contributor should be able to understand the entire development workflow by
|
||||
reading the Makefile.
|
||||
|
||||
- Every repo should have a `Dockerfile`. All Dockerfiles must run `make check`
|
||||
as a build step so the build fails if the branch is not green. For non-server
|
||||
repos, the Dockerfile should bring up a development environment and run
|
||||
`make check`. For server repos, `make check` should run as an early build
|
||||
stage before the final image is assembled.
|
||||
|
||||
- Every repo should have a Gitea Actions workflow (`.gitea/workflows/`) that
|
||||
runs `docker build .` on push. Since the Dockerfile already runs `make check`,
|
||||
a successful build implies all checks pass.
|
||||
|
||||
- Use platform-standard formatters: `black` for Python, `prettier` for
|
||||
JS/CSS/Markdown/HTML, `go fmt` for Go. Always use default configuration with
|
||||
two exceptions: four-space indents (except Go), and `proseWrap: always` for
|
||||
Markdown (hard-wrap at 80 columns). Documentation and writing repos (Markdown,
|
||||
HTML, CSS) should also have `.prettierrc` and `.prettierignore`.
|
||||
|
||||
- Pre-commit hook: `make check` if local testing is possible, otherwise
|
||||
`make lint && make fmt-check`. The Makefile should provide a `make hooks`
|
||||
target to install the pre-commit hook.
|
||||
|
||||
- All repos with software must have tests that run via the platform-standard
|
||||
test framework (`go test`, `pytest`, `jest`/`vitest`, etc.). If no meaningful
|
||||
tests exist yet, add the most minimal test possible — e.g. importing the
|
||||
module under test to verify it compiles/parses. There is no excuse for
|
||||
`make test` to be a no-op.
|
||||
|
||||
- `make test` must complete in under 20 seconds. Add a 30-second timeout in the
|
||||
Makefile.
|
||||
|
||||
- Docker builds must complete in under 5 minutes.
|
||||
|
||||
- `make check` must not modify any files in the repo. Tests may use temporary
|
||||
directories.
|
||||
|
||||
- `main` must always pass `make check`, no exceptions.
|
||||
|
||||
- Never commit secrets. `.env` files, credentials, API keys, and private keys
|
||||
must be in `.gitignore`. No exceptions.
|
||||
|
||||
- `.gitignore` should be comprehensive from the start: OS files (`.DS_Store`),
|
||||
editor files (`.swp`, `*~`), language build artifacts, and `node_modules/`.
|
||||
Fetch the standard `.gitignore` from
|
||||
`https://git.eeqj.de/sneak/prompts/raw/branch/main/.gitignore` when setting up
|
||||
a new repo.
|
||||
|
||||
- **No build artifacts in version control.** Code-derived data (compiled
|
||||
bundles, minified output, generated assets) must never be committed to the
|
||||
repository if it can be avoided. The build process (e.g. Dockerfile, Makefile)
|
||||
should generate these at build time. Notable exception: Go protobuf generated
|
||||
files (`.pb.go`) ARE committed because repos need to work with `go get`, which
|
||||
downloads code but does not execute code generation.
|
||||
|
||||
- Never use `git add -A` or `git add .`. Always stage files explicitly by name.
|
||||
|
||||
- Never force-push to `main`.
|
||||
|
||||
- Make all changes on a feature branch. You can do whatever you want on a
|
||||
feature branch.
|
||||
|
||||
- `.golangci.yml` is standardized and must _NEVER_ be modified by an agent, only
|
||||
manually by the user. Fetch from
|
||||
`https://git.eeqj.de/sneak/prompts/raw/branch/main/.golangci.yml`.
|
||||
|
||||
- When pinning images or packages by hash, add a comment above the reference
|
||||
with the version and date (YYYY-MM-DD).
|
||||
|
||||
- Use `yarn`, not `npm`.
|
||||
|
||||
- Write all dates as YYYY-MM-DD (ISO 8601).
|
||||
|
||||
- Simple projects should be configured with environment variables.
|
||||
|
||||
- Dockerized web services listen on port 8080 by default, overridable with
|
||||
`PORT`.
|
||||
|
||||
- **HTTP/web services must be hardened for production internet exposure before
|
||||
tagging 1.0.** This means full compliance with security best practices
|
||||
including, without limitation, all of the following:
|
||||
- **Security headers** on every response:
|
||||
- `Strict-Transport-Security` (HSTS) with `max-age` of at least one year
|
||||
and `includeSubDomains`.
|
||||
- `Content-Security-Policy` (CSP) with a restrictive default policy
|
||||
(`default-src 'self'` as a baseline, tightened per-resource as
|
||||
needed). Never use `unsafe-inline` or `unsafe-eval` unless
|
||||
unavoidable, and document the reason.
|
||||
- `X-Frame-Options: DENY` (or `SAMEORIGIN` if framing is required).
|
||||
Prefer the `frame-ancestors` CSP directive as the primary control.
|
||||
- `X-Content-Type-Options: nosniff`.
|
||||
- `Referrer-Policy: strict-origin-when-cross-origin` (or stricter).
|
||||
- `Permissions-Policy` restricting access to browser features the
|
||||
application does not use (camera, microphone, geolocation, etc.).
|
||||
- **Request and response limits:**
|
||||
- Maximum request body size enforced on all endpoints (e.g. Go
|
||||
`http.MaxBytesReader`). Choose a sane default per-route; never accept
|
||||
unbounded input.
|
||||
- Maximum response body size where applicable (e.g. paginated APIs).
|
||||
- `ReadTimeout` and `ReadHeaderTimeout` on the `http.Server` to defend
|
||||
against slowloris attacks.
|
||||
- `WriteTimeout` on the `http.Server`.
|
||||
- `IdleTimeout` on the `http.Server`.
|
||||
- Per-handler execution time limits via `context.WithTimeout` or
|
||||
chi/stdlib `middleware.Timeout`.
|
||||
- **Authentication and session security:**
|
||||
- Rate limiting on password-based authentication endpoints. API keys are
|
||||
high-entropy and not susceptible to brute force, so they are exempt.
|
||||
- CSRF tokens on all state-mutating HTML forms. API endpoints
|
||||
authenticated via `Authorization` header (Bearer token, API key) are
|
||||
exempt because the browser does not attach these automatically.
|
||||
- Passwords stored using bcrypt, scrypt, or argon2 — never plain-text,
|
||||
MD5, or SHA.
|
||||
- Session cookies set with `HttpOnly`, `Secure`, and `SameSite=Lax` (or
|
||||
`Strict`) attributes.
|
||||
- **Reverse proxy awareness:**
|
||||
- True client IP detection when behind a reverse proxy
|
||||
(`X-Forwarded-For`, `X-Real-IP`). The application must accept
|
||||
forwarded headers only from a configured set of trusted proxy
|
||||
addresses — never trust `X-Forwarded-For` unconditionally.
|
||||
- **CORS:**
|
||||
- Authenticated endpoints must restrict `Access-Control-Allow-Origin` to
|
||||
an explicit allowlist of known origins. Wildcard (`*`) is acceptable
|
||||
only for public, unauthenticated read-only APIs.
|
||||
- **Error handling:**
|
||||
- Internal errors must never leak stack traces, SQL queries, file paths,
|
||||
or other implementation details to the client. Return generic error
|
||||
messages in production; detailed errors only when `DEBUG` is enabled.
|
||||
- **TLS:**
|
||||
- Services never terminate TLS directly. They are always deployed behind
|
||||
a TLS-terminating reverse proxy. The service itself listens on plain
|
||||
HTTP. However, HSTS headers and `Secure` cookie flags must still be
|
||||
set by the application so that the browser enforces HTTPS end-to-end.
|
||||
|
||||
This list is non-exhaustive. Apply defense-in-depth: if a standard security
|
||||
hardening measure exists for HTTP services and is not listed here, it is
|
||||
still expected. When in doubt, harden.
|
||||
|
||||
- `README.md` is the primary documentation. Required sections:
|
||||
- **Description**: First line must include the project name, purpose,
|
||||
category (web server, SPA, CLI tool, etc.), license, and author. Example:
|
||||
"µPaaS is an MIT-licensed Go web application by @sneak that receives
|
||||
git-frontend webhooks and deploys applications via Docker in realtime."
|
||||
- **Getting Started**: Copy-pasteable install/usage code block.
|
||||
- **Rationale**: Why does this exist?
|
||||
- **Design**: How is the program structured?
|
||||
- **TODO**: Update meticulously, even between commits. When planning, put
|
||||
the todo list in the README so a new agent can pick up where the last one
|
||||
left off.
|
||||
- **License**: MIT, GPL, or WTFPL. Ask the user for new projects. Include a
|
||||
`LICENSE` file in the repo root and a License section in the README.
|
||||
- **Author**: [@sneak](https://sneak.berlin).
|
||||
|
||||
- First commit of a new repo should contain only `README.md`.
|
||||
|
||||
- Go module root: `sneak.berlin/go/<name>`. Always run `go mod tidy` before
|
||||
committing.
|
||||
|
||||
- Use SemVer.
|
||||
|
||||
- Database migrations live in `internal/db/migrations/` and must be embedded in
|
||||
the binary.
|
||||
- `000_migration.sql` — contains ONLY the creation of the migrations
|
||||
tracking table itself. Nothing else.
|
||||
- `001_schema.sql` — the full application schema.
|
||||
- **Pre-1.0.0:** never add additional migration files (002, 003, etc.).
|
||||
There is no installed base to migrate. Edit `001_schema.sql` directly.
|
||||
- **Post-1.0.0:** add new numbered migration files for each schema change.
|
||||
Never edit existing migrations after release.
|
||||
|
||||
- All repos should have an `.editorconfig` enforcing the project's indentation
|
||||
settings.
|
||||
|
||||
- Avoid putting files in the repo root unless necessary. Root should contain
|
||||
only project-level config files (`README.md`, `Makefile`, `Dockerfile`,
|
||||
`LICENSE`, `.gitignore`, `.editorconfig`, `REPO_POLICIES.md`, and
|
||||
language-specific config). Everything else goes in a subdirectory. Canonical
|
||||
subdirectory names:
|
||||
- `bin/` — executable scripts and tools
|
||||
- `cmd/` — Go command entrypoints
|
||||
- `configs/` — configuration templates and examples
|
||||
- `deploy/` — deployment manifests (k8s, compose, terraform)
|
||||
- `docs/` — documentation and markdown (README.md stays in root)
|
||||
- `internal/` — Go internal packages
|
||||
- `internal/db/migrations/` — database migrations
|
||||
- `pkg/` — Go library packages
|
||||
- `share/` — systemd units, data files
|
||||
- `static/` — static assets (images, fonts, etc.)
|
||||
- `web/` — web frontend source
|
||||
|
||||
- When setting up a new repo, files from the `prompts` repo may be used as
|
||||
templates. Fetch them from
|
||||
`https://git.eeqj.de/sneak/prompts/raw/branch/main/<path>`.
|
||||
|
||||
- New repos must contain at minimum:
|
||||
- `README.md`, `.git`, `.gitignore`, `.editorconfig`
|
||||
- `LICENSE`, `REPO_POLICIES.md` (copy from the `prompts` repo)
|
||||
- `Makefile`
|
||||
- `Dockerfile`, `.dockerignore`
|
||||
- `.gitea/workflows/check.yml`
|
||||
- Go: `go.mod`, `go.sum`, `.golangci.yml`
|
||||
- JS: `package.json`, `yarn.lock`, `.prettierrc`, `.prettierignore`
|
||||
- Python: `pyproject.toml`
|
||||
30
TODO.md
30
TODO.md
@@ -2,83 +2,83 @@
|
||||
|
||||
## Design Questions
|
||||
|
||||
*sneak: please answer inline below each question. These are preserved for posterity.*
|
||||
_sneak: please answer inline below each question. These are preserved for posterity._
|
||||
|
||||
### Format Design
|
||||
|
||||
**1. Should `MFFileChecksum` be simplified?**
|
||||
Currently it's a separate message wrapping a single `bytes multiHash` field. Since multihash already self-describes the algorithm, `repeated bytes hashes` directly on `MFFilePath` would be simpler and reduce per-file protobuf overhead. Is the extra message layer intentional (e.g. planning to add per-hash metadata like `verified_at`)?
|
||||
|
||||
> *answer:*
|
||||
> _answer:_
|
||||
|
||||
**2. Should file permissions/mode be stored?**
|
||||
The format stores mtime/ctime but not Unix file permissions. For archival use (ExFAT, filesystem-independent checksums) this may not matter, but for software distribution or filesystem restoration it's a gap. Should we reserve a field now (e.g. `optional uint32 mode = 305`) even if we don't populate it yet?
|
||||
|
||||
> *answer:*
|
||||
> _answer:_
|
||||
|
||||
**3. Should `atime` be removed from the schema?**
|
||||
Access time is volatile, non-deterministic, and often disabled (`noatime`). Including it means two manifests of the same directory at different times will differ, which conflicts with the determinism goal. Remove it, or document it as "never set by default"?
|
||||
|
||||
> *answer:*
|
||||
> _answer:_
|
||||
|
||||
**4. What are the path normalization rules?**
|
||||
The proto has `string path` with no specification about: always forward-slash? Must be relative? No `..` components allowed? UTF-8 NFC vs NFD normalization (macOS vs Linux)? Max path length? This is a security issue (path traversal) and a cross-platform compatibility issue. What rules should the spec mandate?
|
||||
|
||||
> *answer:*
|
||||
> _answer:_
|
||||
|
||||
**5. Should we add a version byte after the magic?**
|
||||
Currently `ZNAVSRFG` is followed immediately by protobuf. Adding a version byte (`ZNAVSRFG\x01`) would allow future framing changes without requiring protobuf parsing to detect the version. `MFFileOuter.Version` serves this purpose but requires successful deserialization to read. Worth the extra byte?
|
||||
|
||||
> *answer:*
|
||||
> _answer:_
|
||||
|
||||
**6. Should we add a length-prefix after the magic?**
|
||||
Protobuf is not self-delimiting. If we ever want to concatenate manifests or append data after the protobuf, the current framing is insufficient. Add a varint or fixed-width length-prefix?
|
||||
|
||||
> *answer:*
|
||||
> _answer:_
|
||||
|
||||
### Signature Design
|
||||
|
||||
**7. What does the outer SHA-256 hash cover — compressed or uncompressed data?**
|
||||
The review notes it currently hashes compressed data (good for verifying before decompression), but this should be explicitly documented. Which is the intended behavior?
|
||||
|
||||
> *answer:*
|
||||
> _answer:_
|
||||
|
||||
**8. Should `signatureString()` sign raw bytes instead of a hex-encoded string?**
|
||||
Currently the canonical string is `MAGIC-UUID-MULTIHASH` with hex encoding, which adds a transformation layer. Signing the raw `sha256` bytes (or compressed `innerMessage` directly) would be simpler. Keep the string format or switch to raw bytes?
|
||||
|
||||
> *answer:*
|
||||
> _answer:_
|
||||
|
||||
**9. Should we support detached signature files (`.mf.sig`)?**
|
||||
Embedded signatures are better for single-file distribution. Detached `.mf.sig` files follow the familiar `SHASUMS`/`SHASUMS.asc` pattern and are simpler for HTTP serving. Support both modes?
|
||||
|
||||
> *answer:*
|
||||
> _answer:_
|
||||
|
||||
**10. GPG vs pure-Go crypto for signatures?**
|
||||
Shelling out to `gpg` is fragile (may not be installed, version-dependent output). `github.com/ProtonMail/go-crypto` provides pure-Go OpenPGP, or we could go Ed25519/signify (simpler, no key management). Which direction?
|
||||
|
||||
> *answer:*
|
||||
> _answer:_
|
||||
|
||||
### Implementation Design
|
||||
|
||||
**11. Should manifests be deterministic by default?**
|
||||
This means: sort file entries by path, omit `createdAt` timestamp (or make it opt-in), no `atime`. Should determinism be the default, with a `--include-timestamps` flag to opt in?
|
||||
|
||||
> *answer:*
|
||||
> _answer:_
|
||||
|
||||
**12. Should we consolidate or keep both scanner/checker implementations?**
|
||||
There are two parallel implementations: `mfer/scanner.go` + `mfer/checker.go` (typed with `FileSize`, `RelFilePath`) and `internal/scanner/` + `internal/checker/` (raw `int64`, `string`). The `mfer/` versions are superior. Delete the `internal/` versions?
|
||||
|
||||
> *answer:*
|
||||
> _answer:_
|
||||
|
||||
**13. Should the `manifest` type be exported?**
|
||||
Currently unexported with exported constructors (`New`, `NewFromPaths`, etc.). Consumers can't declare `var m *mfer.manifest`. Export the type, or define an interface?
|
||||
|
||||
> *answer:*
|
||||
> _answer:_
|
||||
|
||||
**14. What should the Go module path be for 1.0?**
|
||||
Currently mixed between `sneak.berlin/go/mfer` and `git.eeqj.de/sneak/mfer`. Which is canonical?
|
||||
|
||||
> *answer:*
|
||||
> _answer:_
|
||||
|
||||
---
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ package cli
|
||||
import (
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
@@ -34,29 +35,32 @@ func findManifest(fs afero.Fs, dir string) (string, error) {
|
||||
func (mfa *CLIApp) checkManifestOperation(ctx *cli.Context) error {
|
||||
log.Debug("checkManifestOperation()")
|
||||
|
||||
var manifestPath string
|
||||
var err error
|
||||
manifestPath, err := mfa.resolveManifestArg(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("check: %w", err)
|
||||
}
|
||||
|
||||
if ctx.Args().Len() > 0 {
|
||||
arg := ctx.Args().Get(0)
|
||||
// Check if arg is a directory or a file
|
||||
info, statErr := mfa.Fs.Stat(arg)
|
||||
if statErr == nil && info.IsDir() {
|
||||
// It's a directory, look for manifest inside
|
||||
manifestPath, err = findManifest(mfa.Fs, arg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
// Treat as a file path
|
||||
manifestPath = arg
|
||||
// URL manifests need to be downloaded to a temp file for the checker
|
||||
if isHTTPURL(manifestPath) {
|
||||
rc, fetchErr := mfa.openManifestReader(manifestPath)
|
||||
if fetchErr != nil {
|
||||
return fmt.Errorf("check: %w", fetchErr)
|
||||
}
|
||||
} else {
|
||||
// No argument, look in current directory
|
||||
manifestPath, err = findManifest(mfa.Fs, ".")
|
||||
if err != nil {
|
||||
return err
|
||||
tmpFile, tmpErr := afero.TempFile(mfa.Fs, "", "mfer-manifest-*.mf")
|
||||
if tmpErr != nil {
|
||||
_ = rc.Close()
|
||||
return fmt.Errorf("check: failed to create temp file: %w", tmpErr)
|
||||
}
|
||||
tmpPath := tmpFile.Name()
|
||||
_, cpErr := io.Copy(tmpFile, rc)
|
||||
_ = rc.Close()
|
||||
_ = tmpFile.Close()
|
||||
if cpErr != nil {
|
||||
_ = mfa.Fs.Remove(tmpPath)
|
||||
return fmt.Errorf("check: failed to download manifest: %w", cpErr)
|
||||
}
|
||||
defer func() { _ = mfa.Fs.Remove(tmpPath) }()
|
||||
manifestPath = tmpPath
|
||||
}
|
||||
|
||||
basePath := ctx.String("base")
|
||||
|
||||
72
internal/cli/export.go
Normal file
72
internal/cli/export.go
Normal file
@@ -0,0 +1,72 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
"sneak.berlin/go/mfer/mfer"
|
||||
)
|
||||
|
||||
// ExportEntry represents a single file entry in the exported JSON output.
|
||||
type ExportEntry struct {
|
||||
Path string `json:"path"`
|
||||
Size int64 `json:"size"`
|
||||
Hashes []string `json:"hashes"`
|
||||
Mtime *string `json:"mtime,omitempty"`
|
||||
Ctime *string `json:"ctime,omitempty"`
|
||||
}
|
||||
|
||||
func (mfa *CLIApp) exportManifestOperation(ctx *cli.Context) error {
|
||||
pathOrURL, err := mfa.resolveManifestArg(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("export: %w", err)
|
||||
}
|
||||
|
||||
rc, err := mfa.openManifestReader(pathOrURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("export: %w", err)
|
||||
}
|
||||
defer func() { _ = rc.Close() }()
|
||||
|
||||
manifest, err := mfer.NewManifestFromReader(rc)
|
||||
if err != nil {
|
||||
return fmt.Errorf("export: failed to parse manifest: %w", err)
|
||||
}
|
||||
|
||||
files := manifest.Files()
|
||||
entries := make([]ExportEntry, 0, len(files))
|
||||
|
||||
for _, f := range files {
|
||||
entry := ExportEntry{
|
||||
Path: f.Path,
|
||||
Size: f.Size,
|
||||
Hashes: make([]string, 0, len(f.Hashes)),
|
||||
}
|
||||
|
||||
for _, h := range f.Hashes {
|
||||
entry.Hashes = append(entry.Hashes, hex.EncodeToString(h.MultiHash))
|
||||
}
|
||||
|
||||
if f.Mtime != nil {
|
||||
t := time.Unix(f.Mtime.Seconds, int64(f.Mtime.Nanos)).UTC().Format(time.RFC3339Nano)
|
||||
entry.Mtime = &t
|
||||
}
|
||||
if f.Ctime != nil {
|
||||
t := time.Unix(f.Ctime.Seconds, int64(f.Ctime.Nanos)).UTC().Format(time.RFC3339Nano)
|
||||
entry.Ctime = &t
|
||||
}
|
||||
|
||||
entries = append(entries, entry)
|
||||
}
|
||||
|
||||
enc := json.NewEncoder(mfa.Stdout)
|
||||
enc.SetIndent("", " ")
|
||||
if err := enc.Encode(entries); err != nil {
|
||||
return fmt.Errorf("export: failed to encode JSON: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
137
internal/cli/export_test.go
Normal file
137
internal/cli/export_test.go
Normal file
@@ -0,0 +1,137 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"testing"
|
||||
|
||||
"github.com/spf13/afero"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"sneak.berlin/go/mfer/mfer"
|
||||
)
|
||||
|
||||
// buildTestManifest creates a manifest from in-memory files and returns its bytes.
|
||||
func buildTestManifest(t *testing.T, files map[string][]byte) []byte {
|
||||
t.Helper()
|
||||
sourceFs := afero.NewMemMapFs()
|
||||
for path, content := range files {
|
||||
require.NoError(t, sourceFs.MkdirAll("/", 0o755))
|
||||
require.NoError(t, afero.WriteFile(sourceFs, "/"+path, content, 0o644))
|
||||
}
|
||||
|
||||
opts := &mfer.ScannerOptions{Fs: sourceFs}
|
||||
s := mfer.NewScannerWithOptions(opts)
|
||||
require.NoError(t, s.EnumerateFS(sourceFs, "/", nil))
|
||||
|
||||
var buf bytes.Buffer
|
||||
require.NoError(t, s.ToManifest(context.Background(), &buf, nil))
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
func TestExportManifestOperation(t *testing.T) {
|
||||
testFiles := map[string][]byte{
|
||||
"hello.txt": []byte("Hello, World!"),
|
||||
"sub/file.txt": []byte("nested content"),
|
||||
}
|
||||
manifestData := buildTestManifest(t, testFiles)
|
||||
|
||||
// Write manifest to memfs
|
||||
fs := afero.NewMemMapFs()
|
||||
require.NoError(t, afero.WriteFile(fs, "/test.mf", manifestData, 0o644))
|
||||
|
||||
var stdout, stderr bytes.Buffer
|
||||
exitCode := RunWithOptions(&RunOptions{
|
||||
Appname: "mfer",
|
||||
Args: []string{"mfer", "export", "/test.mf"},
|
||||
Stdin: &bytes.Buffer{},
|
||||
Stdout: &stdout,
|
||||
Stderr: &stderr,
|
||||
Fs: fs,
|
||||
})
|
||||
|
||||
require.Equal(t, 0, exitCode, "stderr: %s", stderr.String())
|
||||
|
||||
var entries []ExportEntry
|
||||
require.NoError(t, json.Unmarshal(stdout.Bytes(), &entries))
|
||||
assert.Len(t, entries, 2)
|
||||
|
||||
// Verify entries have expected fields
|
||||
pathSet := make(map[string]bool)
|
||||
for _, e := range entries {
|
||||
pathSet[e.Path] = true
|
||||
assert.NotEmpty(t, e.Hashes, "entry %s should have hashes", e.Path)
|
||||
assert.Greater(t, e.Size, int64(0), "entry %s should have positive size", e.Path)
|
||||
}
|
||||
assert.True(t, pathSet["hello.txt"])
|
||||
assert.True(t, pathSet["sub/file.txt"])
|
||||
}
|
||||
|
||||
func TestExportFromHTTPURL(t *testing.T) {
|
||||
testFiles := map[string][]byte{
|
||||
"a.txt": []byte("aaa"),
|
||||
}
|
||||
manifestData := buildTestManifest(t, testFiles)
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/octet-stream")
|
||||
_, _ = w.Write(manifestData)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
var stdout, stderr bytes.Buffer
|
||||
exitCode := RunWithOptions(&RunOptions{
|
||||
Appname: "mfer",
|
||||
Args: []string{"mfer", "export", server.URL + "/index.mf"},
|
||||
Stdin: &bytes.Buffer{},
|
||||
Stdout: &stdout,
|
||||
Stderr: &stderr,
|
||||
Fs: afero.NewMemMapFs(),
|
||||
})
|
||||
|
||||
require.Equal(t, 0, exitCode, "stderr: %s", stderr.String())
|
||||
|
||||
var entries []ExportEntry
|
||||
require.NoError(t, json.Unmarshal(stdout.Bytes(), &entries))
|
||||
assert.Len(t, entries, 1)
|
||||
assert.Equal(t, "a.txt", entries[0].Path)
|
||||
}
|
||||
|
||||
func TestListFromHTTPURL(t *testing.T) {
|
||||
testFiles := map[string][]byte{
|
||||
"one.txt": []byte("1"),
|
||||
"two.txt": []byte("22"),
|
||||
}
|
||||
manifestData := buildTestManifest(t, testFiles)
|
||||
|
||||
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
|
||||
_, _ = w.Write(manifestData)
|
||||
}))
|
||||
defer server.Close()
|
||||
|
||||
var stdout, stderr bytes.Buffer
|
||||
exitCode := RunWithOptions(&RunOptions{
|
||||
Appname: "mfer",
|
||||
Args: []string{"mfer", "list", server.URL + "/index.mf"},
|
||||
Stdin: &bytes.Buffer{},
|
||||
Stdout: &stdout,
|
||||
Stderr: &stderr,
|
||||
Fs: afero.NewMemMapFs(),
|
||||
})
|
||||
|
||||
require.Equal(t, 0, exitCode, "stderr: %s", stderr.String())
|
||||
output := stdout.String()
|
||||
assert.Contains(t, output, "one.txt")
|
||||
assert.Contains(t, output, "two.txt")
|
||||
}
|
||||
|
||||
func TestIsHTTPURL(t *testing.T) {
|
||||
assert.True(t, isHTTPURL("http://example.com/manifest.mf"))
|
||||
assert.True(t, isHTTPURL("https://example.com/manifest.mf"))
|
||||
assert.False(t, isHTTPURL("/local/path.mf"))
|
||||
assert.False(t, isHTTPURL("relative/path.mf"))
|
||||
assert.False(t, isHTTPURL("ftp://example.com/file"))
|
||||
}
|
||||
@@ -67,7 +67,7 @@ func (mfa *CLIApp) fetchManifestOperation(ctx *cli.Context) error {
|
||||
// Compute base URL (directory containing manifest)
|
||||
baseURL, err := url.Parse(manifestURL)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("fetch: invalid manifest URL: %w", err)
|
||||
}
|
||||
baseURL.Path = path.Dir(baseURL.Path)
|
||||
if !strings.HasSuffix(baseURL.Path, "/") {
|
||||
@@ -267,7 +267,7 @@ func downloadFile(fileURL, localPath string, entry *mfer.MFFilePath, progress ch
|
||||
dir := filepath.Dir(localPath)
|
||||
if dir != "" && dir != "." {
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
return err
|
||||
return fmt.Errorf("failed to create directory %s: %w", dir, err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -287,9 +287,9 @@ func downloadFile(fileURL, localPath string, entry *mfer.MFFilePath, progress ch
|
||||
}
|
||||
|
||||
// Fetch file
|
||||
resp, err := http.Get(fileURL)
|
||||
resp, err := http.Get(fileURL) //nolint:gosec // URL constructed from manifest base
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("HTTP request failed: %w", err)
|
||||
}
|
||||
defer func() { _ = resp.Body.Close() }()
|
||||
|
||||
@@ -307,7 +307,7 @@ func downloadFile(fileURL, localPath string, entry *mfer.MFFilePath, progress ch
|
||||
// Create temp file
|
||||
out, err := os.Create(tmpPath)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("failed to create temp file: %w", err)
|
||||
}
|
||||
|
||||
// Set up hash computation
|
||||
|
||||
@@ -41,8 +41,8 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error {
|
||||
|
||||
basePath := ctx.String("base")
|
||||
showProgress := ctx.Bool("progress")
|
||||
includeDotfiles := ctx.Bool("IncludeDotfiles")
|
||||
followSymlinks := ctx.Bool("FollowSymLinks")
|
||||
includeDotfiles := ctx.Bool("include-dotfiles")
|
||||
followSymlinks := ctx.Bool("follow-symlinks")
|
||||
|
||||
// Find manifest file
|
||||
var manifestPath string
|
||||
@@ -54,7 +54,7 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error {
|
||||
if statErr == nil && info.IsDir() {
|
||||
manifestPath, err = findManifest(mfa.Fs, arg)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("freshen: %w", err)
|
||||
}
|
||||
} else {
|
||||
manifestPath = arg
|
||||
@@ -62,7 +62,7 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error {
|
||||
} else {
|
||||
manifestPath, err = findManifest(mfa.Fs, ".")
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("freshen: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -93,7 +93,7 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error {
|
||||
|
||||
absBase, err := filepath.Abs(basePath)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("freshen: invalid base path: %w", err)
|
||||
}
|
||||
|
||||
err = afero.Walk(mfa.Fs, absBase, func(path string, info fs.FileInfo, walkErr error) error {
|
||||
@@ -104,7 +104,7 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error {
|
||||
// Get relative path
|
||||
relPath, err := filepath.Rel(absBase, path)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("freshen: failed to compute relative path for %s: %w", path, err)
|
||||
}
|
||||
|
||||
// Skip the manifest file itself
|
||||
@@ -226,6 +226,9 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error {
|
||||
var hashedBytes int64
|
||||
|
||||
builder := mfer.NewBuilder()
|
||||
if ctx.Bool("include-timestamps") {
|
||||
builder.SetIncludeTimestamps(true)
|
||||
}
|
||||
|
||||
// Set up signing options if sign-key is provided
|
||||
if signKey := ctx.String("sign-key"); signKey != "" {
|
||||
|
||||
@@ -20,9 +20,16 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
|
||||
log.Debug("generateManifestOperation()")
|
||||
|
||||
opts := &mfer.ScannerOptions{
|
||||
IncludeDotfiles: ctx.Bool("IncludeDotfiles"),
|
||||
FollowSymLinks: ctx.Bool("FollowSymLinks"),
|
||||
Fs: mfa.Fs,
|
||||
IncludeDotfiles: ctx.Bool("include-dotfiles"),
|
||||
FollowSymLinks: ctx.Bool("follow-symlinks"),
|
||||
IncludeTimestamps: ctx.Bool("include-timestamps"),
|
||||
Fs: mfa.Fs,
|
||||
}
|
||||
|
||||
// Set seed for deterministic UUID if provided
|
||||
if seed := ctx.String("seed"); seed != "" {
|
||||
opts.Seed = seed
|
||||
log.Infof("using deterministic seed for manifest UUID")
|
||||
}
|
||||
|
||||
// Set up signing options if sign-key is provided
|
||||
@@ -59,7 +66,7 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
|
||||
if args.Len() == 0 {
|
||||
// Default to current directory
|
||||
if err := s.EnumeratePath(".", enumProgress); err != nil {
|
||||
return err
|
||||
return fmt.Errorf("generate: failed to enumerate current directory: %w", err)
|
||||
}
|
||||
} else {
|
||||
// Collect and validate all paths first
|
||||
@@ -68,7 +75,7 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
|
||||
inputPath := args.Get(i)
|
||||
ap, err := filepath.Abs(inputPath)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("generate: invalid path %q: %w", inputPath, err)
|
||||
}
|
||||
// Validate path exists before adding to list
|
||||
if exists, _ := afero.Exists(mfa.Fs, ap); !exists {
|
||||
@@ -78,7 +85,7 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
|
||||
paths = append(paths, ap)
|
||||
}
|
||||
if err := s.EnumeratePaths(enumProgress, paths...); err != nil {
|
||||
return err
|
||||
return fmt.Errorf("generate: failed to enumerate paths: %w", err)
|
||||
}
|
||||
}
|
||||
enumWg.Wait()
|
||||
|
||||
@@ -16,32 +16,20 @@ func (mfa *CLIApp) listManifestOperation(ctx *cli.Context) error {
|
||||
longFormat := ctx.Bool("long")
|
||||
print0 := ctx.Bool("print0")
|
||||
|
||||
// Find manifest file
|
||||
var manifestPath string
|
||||
var err error
|
||||
|
||||
if ctx.Args().Len() > 0 {
|
||||
arg := ctx.Args().Get(0)
|
||||
info, statErr := mfa.Fs.Stat(arg)
|
||||
if statErr == nil && info.IsDir() {
|
||||
manifestPath, err = findManifest(mfa.Fs, arg)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
} else {
|
||||
manifestPath = arg
|
||||
}
|
||||
} else {
|
||||
manifestPath, err = findManifest(mfa.Fs, ".")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
pathOrURL, err := mfa.resolveManifestArg(ctx)
|
||||
if err != nil {
|
||||
return fmt.Errorf("list: %w", err)
|
||||
}
|
||||
|
||||
// Load manifest
|
||||
manifest, err := mfer.NewManifestFromFile(mfa.Fs, manifestPath)
|
||||
rc, err := mfa.openManifestReader(pathOrURL)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load manifest: %w", err)
|
||||
return fmt.Errorf("list: %w", err)
|
||||
}
|
||||
defer func() { _ = rc.Close() }()
|
||||
|
||||
manifest, err := mfer.NewManifestFromReader(rc)
|
||||
if err != nil {
|
||||
return fmt.Errorf("list: failed to parse manifest: %w", err)
|
||||
}
|
||||
|
||||
files := manifest.Files()
|
||||
|
||||
56
internal/cli/manifest_loader.go
Normal file
56
internal/cli/manifest_loader.go
Normal file
@@ -0,0 +1,56 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io"
|
||||
"net/http"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/urfave/cli/v2"
|
||||
)
|
||||
|
||||
// isHTTPURL returns true if the string starts with http:// or https://.
|
||||
func isHTTPURL(s string) bool {
|
||||
return strings.HasPrefix(s, "http://") || strings.HasPrefix(s, "https://")
|
||||
}
|
||||
|
||||
// openManifestReader opens a manifest from a path or URL and returns a ReadCloser.
|
||||
// The caller must close the returned reader.
|
||||
func (mfa *CLIApp) openManifestReader(pathOrURL string) (io.ReadCloser, error) {
|
||||
if isHTTPURL(pathOrURL) {
|
||||
client := &http.Client{Timeout: 30 * time.Second}
|
||||
resp, err := client.Get(pathOrURL) //nolint:gosec // user-provided URL is intentional
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to fetch %s: %w", pathOrURL, err)
|
||||
}
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
_ = resp.Body.Close()
|
||||
return nil, fmt.Errorf("failed to fetch %s: HTTP %d", pathOrURL, resp.StatusCode)
|
||||
}
|
||||
return resp.Body, nil
|
||||
}
|
||||
f, err := mfa.Fs.Open(pathOrURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return f, nil
|
||||
}
|
||||
|
||||
// resolveManifestArg resolves the manifest path from CLI arguments.
|
||||
// HTTP(S) URLs are returned as-is. Directories are searched for index.mf/.index.mf.
|
||||
// If no argument is given, the current directory is searched.
|
||||
func (mfa *CLIApp) resolveManifestArg(ctx *cli.Context) (string, error) {
|
||||
if ctx.Args().Len() > 0 {
|
||||
arg := ctx.Args().Get(0)
|
||||
if isHTTPURL(arg) {
|
||||
return arg, nil
|
||||
}
|
||||
info, statErr := mfa.Fs.Stat(arg)
|
||||
if statErr == nil && info.IsDir() {
|
||||
return findManifest(mfa.Fs, arg)
|
||||
}
|
||||
return arg, nil
|
||||
}
|
||||
return findManifest(mfa.Fs, ".")
|
||||
}
|
||||
@@ -123,14 +123,15 @@ func (mfa *CLIApp) run(args []string) {
|
||||
},
|
||||
Flags: append(commonFlags(),
|
||||
&cli.BoolFlag{
|
||||
Name: "FollowSymLinks",
|
||||
Aliases: []string{"follow-symlinks"},
|
||||
Name: "follow-symlinks",
|
||||
Aliases: []string{"L"},
|
||||
Usage: "Resolve encountered symlinks",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "IncludeDotfiles",
|
||||
Aliases: []string{"include-dotfiles"},
|
||||
Usage: "Include dot (hidden) files (excluded by default)",
|
||||
Name: "include-dotfiles",
|
||||
Aliases: []string{"IncludeDotfiles"},
|
||||
|
||||
Usage: "Include dot (hidden) files (excluded by default)",
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "output",
|
||||
@@ -154,6 +155,15 @@ func (mfa *CLIApp) run(args []string) {
|
||||
Usage: "GPG key ID to sign the manifest with",
|
||||
EnvVars: []string{"MFER_SIGN_KEY"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "seed",
|
||||
Usage: "Seed value for deterministic manifest UUID",
|
||||
EnvVars: []string{"MFER_SEED"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "include-timestamps",
|
||||
Usage: "Include createdAt timestamp in manifest (omitted by default for determinism)",
|
||||
},
|
||||
),
|
||||
},
|
||||
{
|
||||
@@ -206,14 +216,15 @@ func (mfa *CLIApp) run(args []string) {
|
||||
Usage: "Base directory for resolving relative paths",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "FollowSymLinks",
|
||||
Aliases: []string{"follow-symlinks"},
|
||||
Name: "follow-symlinks",
|
||||
Aliases: []string{"L"},
|
||||
Usage: "Resolve encountered symlinks",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "IncludeDotfiles",
|
||||
Aliases: []string{"include-dotfiles"},
|
||||
Usage: "Include dot (hidden) files (excluded by default)",
|
||||
Name: "include-dotfiles",
|
||||
Aliases: []string{"IncludeDotfiles"},
|
||||
|
||||
Usage: "Include dot (hidden) files (excluded by default)",
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "progress",
|
||||
@@ -226,8 +237,20 @@ func (mfa *CLIApp) run(args []string) {
|
||||
Usage: "GPG key ID to sign the manifest with",
|
||||
EnvVars: []string{"MFER_SIGN_KEY"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "include-timestamps",
|
||||
Usage: "Include createdAt timestamp in manifest (omitted by default for determinism)",
|
||||
},
|
||||
),
|
||||
},
|
||||
{
|
||||
Name: "export",
|
||||
Usage: "Export manifest contents as JSON",
|
||||
ArgsUsage: "[manifest file or URL]",
|
||||
Action: func(c *cli.Context) error {
|
||||
return mfa.exportManifestOperation(c)
|
||||
},
|
||||
},
|
||||
{
|
||||
Name: "version",
|
||||
Usage: "Show version",
|
||||
@@ -269,7 +292,7 @@ func (mfa *CLIApp) run(args []string) {
|
||||
},
|
||||
}
|
||||
|
||||
mfa.app.HideVersion = true
|
||||
mfa.app.HideVersion = false
|
||||
err := mfa.app.Run(args)
|
||||
if err != nil {
|
||||
mfa.exitCode = 1
|
||||
|
||||
@@ -5,6 +5,7 @@ import (
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"sort"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -84,10 +85,20 @@ type FileHashProgress struct {
|
||||
|
||||
// Builder constructs a manifest by adding files one at a time.
|
||||
type Builder struct {
|
||||
mu sync.Mutex
|
||||
files []*MFFilePath
|
||||
createdAt time.Time
|
||||
signingOptions *SigningOptions
|
||||
mu sync.Mutex
|
||||
files []*MFFilePath
|
||||
createdAt time.Time
|
||||
includeTimestamps bool
|
||||
signingOptions *SigningOptions
|
||||
fixedUUID []byte // if set, use this UUID instead of generating one
|
||||
}
|
||||
|
||||
// SetSeed derives a deterministic UUID from the given seed string.
|
||||
// The seed is hashed once with SHA-256 and the first 16 bytes are used
|
||||
// as a fixed UUID for the manifest.
|
||||
func (b *Builder) SetSeed(seed string) {
|
||||
hash := sha256.Sum256([]byte(seed))
|
||||
b.fixedUUID = hash[:16]
|
||||
}
|
||||
|
||||
// NewBuilder creates a new Builder.
|
||||
@@ -185,7 +196,7 @@ func (b *Builder) FileCount() int {
|
||||
// Returns an error if path is empty, size is negative, or hash is nil/empty.
|
||||
func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error {
|
||||
if err := ValidatePath(string(path)); err != nil {
|
||||
return err
|
||||
return fmt.Errorf("add file: %w", err)
|
||||
}
|
||||
if size < 0 {
|
||||
return errors.New("size cannot be negative")
|
||||
@@ -209,6 +220,14 @@ func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetIncludeTimestamps controls whether the manifest includes a createdAt timestamp.
|
||||
// By default timestamps are omitted for deterministic output.
|
||||
func (b *Builder) SetIncludeTimestamps(include bool) {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
b.includeTimestamps = include
|
||||
}
|
||||
|
||||
// SetSigningOptions sets the GPG signing options for the manifest.
|
||||
// If opts is non-nil, the manifest will be signed when Build() is called.
|
||||
func (b *Builder) SetSigningOptions(opts *SigningOptions) {
|
||||
@@ -222,30 +241,41 @@ func (b *Builder) Build(w io.Writer) error {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
|
||||
// Sort files by path for deterministic output
|
||||
sort.Slice(b.files, func(i, j int) bool {
|
||||
return b.files[i].Path < b.files[j].Path
|
||||
})
|
||||
|
||||
// Create inner manifest
|
||||
inner := &MFFile{
|
||||
Version: MFFile_VERSION_ONE,
|
||||
CreatedAt: newTimestampFromTime(b.createdAt),
|
||||
Files: b.files,
|
||||
Version: MFFile_VERSION_ONE,
|
||||
Files: b.files,
|
||||
}
|
||||
if b.includeTimestamps {
|
||||
inner.CreatedAt = newTimestampFromTime(b.createdAt)
|
||||
}
|
||||
|
||||
// Create a temporary manifest to use existing serialization
|
||||
m := &manifest{
|
||||
pbInner: inner,
|
||||
signingOptions: b.signingOptions,
|
||||
fixedUUID: b.fixedUUID,
|
||||
}
|
||||
|
||||
// Generate outer wrapper
|
||||
if err := m.generateOuter(); err != nil {
|
||||
return err
|
||||
return fmt.Errorf("build: generate outer: %w", err)
|
||||
}
|
||||
|
||||
// Generate final output
|
||||
if err := m.generate(); err != nil {
|
||||
return err
|
||||
return fmt.Errorf("build: generate: %w", err)
|
||||
}
|
||||
|
||||
// Write to output
|
||||
_, err := w.Write(m.output.Bytes())
|
||||
return err
|
||||
if err != nil {
|
||||
return fmt.Errorf("build: write output: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -115,6 +115,207 @@ func TestNewTimestampFromTimeExtremeDate(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuilderDeterministicOutput(t *testing.T) {
|
||||
buildManifest := func() []byte {
|
||||
b := NewBuilder()
|
||||
// Use a fixed createdAt and UUID so output is reproducible
|
||||
b.createdAt = time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
b.fixedUUID = make([]byte, 16) // all zeros
|
||||
|
||||
mtime := ModTime(time.Date(2025, 6, 1, 0, 0, 0, 0, time.UTC))
|
||||
|
||||
// Add files in reverse order to test sorting
|
||||
files := []struct {
|
||||
path string
|
||||
content string
|
||||
}{
|
||||
{"c/file.txt", "content c"},
|
||||
{"a/file.txt", "content a"},
|
||||
{"b/file.txt", "content b"},
|
||||
}
|
||||
for _, f := range files {
|
||||
r := bytes.NewReader([]byte(f.content))
|
||||
_, err := b.AddFile(RelFilePath(f.path), FileSize(len(f.content)), mtime, r, nil)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
err := b.Build(&buf)
|
||||
require.NoError(t, err)
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
out1 := buildManifest()
|
||||
out2 := buildManifest()
|
||||
assert.Equal(t, out1, out2, "two builds with same input should produce byte-identical output")
|
||||
}
|
||||
|
||||
func TestSetSeedDeterministic(t *testing.T) {
|
||||
b1 := NewBuilder()
|
||||
b1.SetSeed("test-seed-value")
|
||||
b2 := NewBuilder()
|
||||
b2.SetSeed("test-seed-value")
|
||||
assert.Equal(t, b1.fixedUUID, b2.fixedUUID, "same seed should produce same UUID")
|
||||
assert.Len(t, b1.fixedUUID, 16, "UUID should be 16 bytes")
|
||||
|
||||
b3 := NewBuilder()
|
||||
b3.SetSeed("different-seed")
|
||||
assert.NotEqual(t, b1.fixedUUID, b3.fixedUUID, "different seeds should produce different UUIDs")
|
||||
}
|
||||
|
||||
func TestValidatePath(t *testing.T) {
|
||||
valid := []string{
|
||||
"file.txt",
|
||||
"dir/file.txt",
|
||||
"a/b/c/d.txt",
|
||||
"file with spaces.txt",
|
||||
"日本語.txt",
|
||||
}
|
||||
for _, p := range valid {
|
||||
t.Run("valid:"+p, func(t *testing.T) {
|
||||
assert.NoError(t, ValidatePath(p))
|
||||
})
|
||||
}
|
||||
|
||||
invalid := []struct {
|
||||
path string
|
||||
desc string
|
||||
}{
|
||||
{"", "empty"},
|
||||
{"/absolute", "absolute path"},
|
||||
{"has\\backslash", "backslash"},
|
||||
{"has/../traversal", "dot-dot segment"},
|
||||
{"has//double", "empty segment"},
|
||||
{"..", "just dot-dot"},
|
||||
{string([]byte{0xff, 0xfe}), "invalid UTF-8"},
|
||||
}
|
||||
for _, tt := range invalid {
|
||||
t.Run("invalid:"+tt.desc, func(t *testing.T) {
|
||||
assert.Error(t, ValidatePath(tt.path))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuilderAddFileSizeMismatch(t *testing.T) {
|
||||
b := NewBuilder()
|
||||
content := []byte("short")
|
||||
reader := bytes.NewReader(content)
|
||||
|
||||
// Declare wrong size
|
||||
_, err := b.AddFile("test.txt", FileSize(100), ModTime(time.Now()), reader, nil)
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "size mismatch")
|
||||
}
|
||||
|
||||
func TestBuilderAddFileInvalidPath(t *testing.T) {
|
||||
b := NewBuilder()
|
||||
content := []byte("data")
|
||||
reader := bytes.NewReader(content)
|
||||
|
||||
_, err := b.AddFile("", FileSize(len(content)), ModTime(time.Now()), reader, nil)
|
||||
assert.Error(t, err)
|
||||
|
||||
reader.Reset(content)
|
||||
_, err = b.AddFile("/absolute", FileSize(len(content)), ModTime(time.Now()), reader, nil)
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
func TestBuilderAddFileWithProgress(t *testing.T) {
|
||||
b := NewBuilder()
|
||||
content := bytes.Repeat([]byte("x"), 1000)
|
||||
reader := bytes.NewReader(content)
|
||||
progress := make(chan FileHashProgress, 100)
|
||||
|
||||
bytesRead, err := b.AddFile("test.txt", FileSize(len(content)), ModTime(time.Now()), reader, progress)
|
||||
close(progress)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, FileSize(1000), bytesRead)
|
||||
|
||||
var updates []FileHashProgress
|
||||
for p := range progress {
|
||||
updates = append(updates, p)
|
||||
}
|
||||
assert.NotEmpty(t, updates)
|
||||
// Last update should show all bytes
|
||||
assert.Equal(t, FileSize(1000), updates[len(updates)-1].BytesRead)
|
||||
}
|
||||
|
||||
func TestBuilderBuildRoundTrip(t *testing.T) {
|
||||
// Build a manifest, deserialize it, verify all fields survive round-trip
|
||||
b := NewBuilder()
|
||||
now := time.Date(2025, 6, 15, 12, 0, 0, 0, time.UTC)
|
||||
|
||||
files := []struct {
|
||||
path string
|
||||
content []byte
|
||||
}{
|
||||
{"alpha.txt", []byte("alpha content")},
|
||||
{"beta/gamma.txt", []byte("gamma content")},
|
||||
{"beta/delta.txt", []byte("delta content")},
|
||||
}
|
||||
|
||||
for _, f := range files {
|
||||
reader := bytes.NewReader(f.content)
|
||||
_, err := b.AddFile(RelFilePath(f.path), FileSize(len(f.content)), ModTime(now), reader, nil)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
require.NoError(t, b.Build(&buf))
|
||||
|
||||
m, err := NewManifestFromReader(&buf)
|
||||
require.NoError(t, err)
|
||||
|
||||
mfiles := m.Files()
|
||||
require.Len(t, mfiles, 3)
|
||||
|
||||
// Verify sorted order
|
||||
assert.Equal(t, "alpha.txt", mfiles[0].Path)
|
||||
assert.Equal(t, "beta/delta.txt", mfiles[1].Path)
|
||||
assert.Equal(t, "beta/gamma.txt", mfiles[2].Path)
|
||||
|
||||
// Verify sizes
|
||||
assert.Equal(t, int64(len("alpha content")), mfiles[0].Size)
|
||||
|
||||
// Verify hashes are present
|
||||
for _, f := range mfiles {
|
||||
require.NotEmpty(t, f.Hashes, "file %s should have hashes", f.Path)
|
||||
assert.NotEmpty(t, f.Hashes[0].MultiHash)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewManifestFromReaderInvalidMagic(t *testing.T) {
|
||||
_, err := NewManifestFromReader(bytes.NewReader([]byte("NOT_VALID")))
|
||||
assert.Error(t, err)
|
||||
assert.Contains(t, err.Error(), "invalid file format")
|
||||
}
|
||||
|
||||
func TestNewManifestFromReaderEmpty(t *testing.T) {
|
||||
_, err := NewManifestFromReader(bytes.NewReader([]byte{}))
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
func TestNewManifestFromReaderTruncated(t *testing.T) {
|
||||
// Just the magic with nothing after
|
||||
_, err := NewManifestFromReader(bytes.NewReader([]byte(MAGIC)))
|
||||
assert.Error(t, err)
|
||||
}
|
||||
|
||||
func TestManifestString(t *testing.T) {
|
||||
b := NewBuilder()
|
||||
content := []byte("test")
|
||||
reader := bytes.NewReader(content)
|
||||
_, err := b.AddFile("test.txt", FileSize(len(content)), ModTime(time.Now()), reader, nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
var buf bytes.Buffer
|
||||
require.NoError(t, b.Build(&buf))
|
||||
|
||||
m, err := NewManifestFromReader(&buf)
|
||||
require.NoError(t, err)
|
||||
assert.Contains(t, m.String(), "count=1")
|
||||
}
|
||||
|
||||
func TestBuilderBuildEmpty(t *testing.T) {
|
||||
b := NewBuilder()
|
||||
|
||||
@@ -125,3 +326,62 @@ func TestBuilderBuildEmpty(t *testing.T) {
|
||||
// Should still produce valid manifest with 0 files
|
||||
assert.True(t, strings.HasPrefix(buf.String(), MAGIC))
|
||||
}
|
||||
|
||||
func TestBuilderOmitsCreatedAtByDefault(t *testing.T) {
|
||||
b := NewBuilder()
|
||||
content := []byte("hello")
|
||||
_, err := b.AddFile("test.txt", FileSize(len(content)), ModTime(time.Now()), bytes.NewReader(content), nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
var buf bytes.Buffer
|
||||
require.NoError(t, b.Build(&buf))
|
||||
|
||||
m, err := NewManifestFromReader(&buf)
|
||||
require.NoError(t, err)
|
||||
assert.Nil(t, m.pbInner.CreatedAt, "createdAt should be nil by default for deterministic output")
|
||||
}
|
||||
|
||||
func TestBuilderIncludesCreatedAtWhenRequested(t *testing.T) {
|
||||
b := NewBuilder()
|
||||
b.SetIncludeTimestamps(true)
|
||||
content := []byte("hello")
|
||||
_, err := b.AddFile("test.txt", FileSize(len(content)), ModTime(time.Now()), bytes.NewReader(content), nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
var buf bytes.Buffer
|
||||
require.NoError(t, b.Build(&buf))
|
||||
|
||||
m, err := NewManifestFromReader(&buf)
|
||||
require.NoError(t, err)
|
||||
assert.NotNil(t, m.pbInner.CreatedAt, "createdAt should be set when IncludeTimestamps is true")
|
||||
}
|
||||
|
||||
func TestBuilderDeterministicFileOrder(t *testing.T) {
|
||||
// Two builds with same files in different order should produce same file ordering.
|
||||
// Note: UUIDs differ per build, so we compare parsed file lists, not raw bytes.
|
||||
buildAndParse := func(order []string) []*MFFilePath {
|
||||
b := NewBuilder()
|
||||
for _, name := range order {
|
||||
content := []byte("content of " + name)
|
||||
_, err := b.AddFile(RelFilePath(name), FileSize(len(content)), ModTime(time.Unix(1000, 0)), bytes.NewReader(content), nil)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
require.NoError(t, b.Build(&buf))
|
||||
m, err := NewManifestFromReader(&buf)
|
||||
require.NoError(t, err)
|
||||
return m.Files()
|
||||
}
|
||||
|
||||
files1 := buildAndParse([]string{"b.txt", "a.txt"})
|
||||
files2 := buildAndParse([]string{"a.txt", "b.txt"})
|
||||
|
||||
require.Len(t, files1, 2)
|
||||
require.Len(t, files2, 2)
|
||||
for i := range files1 {
|
||||
assert.Equal(t, files1[i].Path, files2[i].Path)
|
||||
assert.Equal(t, files1[i].Size, files2[i].Size)
|
||||
}
|
||||
assert.Equal(t, "a.txt", files1[0].Path)
|
||||
assert.Equal(t, "b.txt", files1[1].Path)
|
||||
}
|
||||
|
||||
@@ -70,6 +70,8 @@ type Checker struct {
|
||||
fs afero.Fs
|
||||
// manifestPaths is a set of paths in the manifest for quick lookup
|
||||
manifestPaths map[RelFilePath]struct{}
|
||||
// manifestRelPath is the relative path of the manifest file from basePath (for exclusion)
|
||||
manifestRelPath RelFilePath
|
||||
// signature info from the manifest
|
||||
signature []byte
|
||||
signer []byte
|
||||
@@ -100,14 +102,25 @@ func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, er
|
||||
manifestPaths[RelFilePath(f.Path)] = struct{}{}
|
||||
}
|
||||
|
||||
// Compute manifest's relative path from basePath for exclusion in FindExtraFiles
|
||||
absManifest, err := filepath.Abs(manifestPath)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
manifestRel, err := filepath.Rel(abs, absManifest)
|
||||
if err != nil {
|
||||
manifestRel = ""
|
||||
}
|
||||
|
||||
return &Checker{
|
||||
basePath: AbsFilePath(abs),
|
||||
files: files,
|
||||
fs: fs,
|
||||
manifestPaths: manifestPaths,
|
||||
signature: m.pbOuter.Signature,
|
||||
signer: m.pbOuter.Signer,
|
||||
signingPubKey: m.pbOuter.SigningPubKey,
|
||||
basePath: AbsFilePath(abs),
|
||||
files: files,
|
||||
fs: fs,
|
||||
manifestPaths: manifestPaths,
|
||||
manifestRelPath: RelFilePath(manifestRel),
|
||||
signature: m.pbOuter.Signature,
|
||||
signer: m.pbOuter.Signer,
|
||||
signingPubKey: m.pbOuter.SigningPubKey,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -170,6 +183,7 @@ func (c *Checker) Check(ctx context.Context, results chan<- Result, progress cha
|
||||
var failures FileCount
|
||||
|
||||
startTime := time.Now()
|
||||
lastProgressTime := time.Now()
|
||||
|
||||
for _, entry := range c.files {
|
||||
select {
|
||||
@@ -188,29 +202,34 @@ func (c *Checker) Check(ctx context.Context, results chan<- Result, progress cha
|
||||
results <- result
|
||||
}
|
||||
|
||||
// Send progress with rate and ETA calculation
|
||||
// Send progress at most once per second (rate-limited)
|
||||
if progress != nil {
|
||||
elapsed := time.Since(startTime)
|
||||
var bytesPerSec float64
|
||||
var eta time.Duration
|
||||
now := time.Now()
|
||||
isLast := checkedFiles == totalFiles
|
||||
if isLast || now.Sub(lastProgressTime) >= time.Second {
|
||||
elapsed := time.Since(startTime)
|
||||
var bytesPerSec float64
|
||||
var eta time.Duration
|
||||
|
||||
if elapsed > 0 && checkedBytes > 0 {
|
||||
bytesPerSec = float64(checkedBytes) / elapsed.Seconds()
|
||||
remainingBytes := totalBytes - checkedBytes
|
||||
if bytesPerSec > 0 {
|
||||
eta = time.Duration(float64(remainingBytes)/bytesPerSec) * time.Second
|
||||
if elapsed > 0 && checkedBytes > 0 {
|
||||
bytesPerSec = float64(checkedBytes) / elapsed.Seconds()
|
||||
remainingBytes := totalBytes - checkedBytes
|
||||
if bytesPerSec > 0 {
|
||||
eta = time.Duration(float64(remainingBytes)/bytesPerSec) * time.Second
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sendCheckStatus(progress, CheckStatus{
|
||||
TotalFiles: totalFiles,
|
||||
CheckedFiles: checkedFiles,
|
||||
TotalBytes: totalBytes,
|
||||
CheckedBytes: checkedBytes,
|
||||
BytesPerSec: bytesPerSec,
|
||||
ETA: eta,
|
||||
Failures: failures,
|
||||
})
|
||||
sendCheckStatus(progress, CheckStatus{
|
||||
TotalFiles: totalFiles,
|
||||
CheckedFiles: checkedFiles,
|
||||
TotalBytes: totalBytes,
|
||||
CheckedBytes: checkedBytes,
|
||||
BytesPerSec: bytesPerSec,
|
||||
ETA: eta,
|
||||
Failures: failures,
|
||||
})
|
||||
lastProgressTime = now
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -309,14 +328,13 @@ func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) err
|
||||
return nil
|
||||
}
|
||||
|
||||
// Skip manifest files
|
||||
base := filepath.Base(rel)
|
||||
if base == "index.mf" || base == ".index.mf" {
|
||||
relPath := RelFilePath(rel)
|
||||
|
||||
// Skip the manifest file itself
|
||||
if relPath == c.manifestRelPath {
|
||||
return nil
|
||||
}
|
||||
|
||||
relPath := RelFilePath(rel)
|
||||
|
||||
// Check if path is in manifest
|
||||
if _, exists := c.manifestPaths[relPath]; !exists {
|
||||
if results != nil {
|
||||
|
||||
@@ -3,6 +3,7 @@ package mfer
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@@ -452,6 +453,61 @@ func TestCheckMissingFileDetectedWithoutFallback(t *testing.T) {
|
||||
assert.Equal(t, 0, statusCounts[StatusError], "no files should be ERROR")
|
||||
}
|
||||
|
||||
func TestFindExtraFilesSkipsDotfiles(t *testing.T) {
|
||||
// Regression test for #16: FindExtraFiles should not report dotfiles
|
||||
// or the manifest file itself as extra files.
|
||||
fs := afero.NewMemMapFs()
|
||||
files := map[string][]byte{
|
||||
"file1.txt": []byte("in manifest"),
|
||||
}
|
||||
createTestManifest(t, fs, "/data/.index.mf", files)
|
||||
createFilesOnDisk(t, fs, "/data", files)
|
||||
|
||||
// Add dotfiles and manifest file on disk
|
||||
require.NoError(t, afero.WriteFile(fs, "/data/.hidden", []byte("dotfile"), 0o644))
|
||||
require.NoError(t, fs.MkdirAll("/data/.git", 0o755))
|
||||
require.NoError(t, afero.WriteFile(fs, "/data/.git/config", []byte("git config"), 0o644))
|
||||
|
||||
chk, err := NewChecker("/data/.index.mf", "/data", fs)
|
||||
require.NoError(t, err)
|
||||
|
||||
results := make(chan Result, 10)
|
||||
err = chk.FindExtraFiles(context.Background(), results)
|
||||
require.NoError(t, err)
|
||||
|
||||
var extras []Result
|
||||
for r := range results {
|
||||
extras = append(extras, r)
|
||||
}
|
||||
|
||||
// Should report NO extra files — dotfiles and manifest should be skipped
|
||||
assert.Empty(t, extras, "FindExtraFiles should not report dotfiles or manifest file as extra; got: %v", extras)
|
||||
}
|
||||
|
||||
func TestFindExtraFilesSkipsManifestFile(t *testing.T) {
|
||||
// The manifest file itself should never be reported as extra
|
||||
fs := afero.NewMemMapFs()
|
||||
files := map[string][]byte{
|
||||
"file1.txt": []byte("content"),
|
||||
}
|
||||
createTestManifest(t, fs, "/data/index.mf", files)
|
||||
createFilesOnDisk(t, fs, "/data", files)
|
||||
|
||||
chk, err := NewChecker("/data/index.mf", "/data", fs)
|
||||
require.NoError(t, err)
|
||||
|
||||
results := make(chan Result, 10)
|
||||
err = chk.FindExtraFiles(context.Background(), results)
|
||||
require.NoError(t, err)
|
||||
|
||||
var extras []Result
|
||||
for r := range results {
|
||||
extras = append(extras, r)
|
||||
}
|
||||
|
||||
assert.Empty(t, extras, "manifest file should not be reported as extra; got: %v", extras)
|
||||
}
|
||||
|
||||
func TestCheckEmptyManifest(t *testing.T) {
|
||||
fs := afero.NewMemMapFs()
|
||||
// Create manifest with no files
|
||||
@@ -473,3 +529,40 @@ func TestCheckEmptyManifest(t *testing.T) {
|
||||
}
|
||||
assert.Equal(t, 0, count)
|
||||
}
|
||||
|
||||
func TestCheckProgressRateLimited(t *testing.T) {
|
||||
// Create many small files - progress should be rate-limited, not one per file.
|
||||
// With rate-limiting to once per second, we should get far fewer progress
|
||||
// updates than files (plus one final update).
|
||||
fs := afero.NewMemMapFs()
|
||||
files := make(map[string][]byte, 100)
|
||||
for i := 0; i < 100; i++ {
|
||||
name := fmt.Sprintf("file%03d.txt", i)
|
||||
files[name] = []byte("content")
|
||||
}
|
||||
createTestManifest(t, fs, "/manifest.mf", files)
|
||||
createFilesOnDisk(t, fs, "/data", files)
|
||||
|
||||
chk, err := NewChecker("/manifest.mf", "/data", fs)
|
||||
require.NoError(t, err)
|
||||
|
||||
results := make(chan Result, 200)
|
||||
progress := make(chan CheckStatus, 200)
|
||||
err = chk.Check(context.Background(), results, progress)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Drain results
|
||||
for range results {
|
||||
}
|
||||
|
||||
// Count progress updates
|
||||
var progressCount int
|
||||
for range progress {
|
||||
progressCount++
|
||||
}
|
||||
|
||||
// Should be far fewer than 100 (rate-limited to once per second)
|
||||
// At minimum we get the final update
|
||||
assert.GreaterOrEqual(t, progressCount, 1, "should get at least the final progress update")
|
||||
assert.Less(t, progressCount, 100, "progress should be rate-limited, not one per file")
|
||||
}
|
||||
|
||||
@@ -44,7 +44,7 @@ func (m *manifest) deserializeInner() error {
|
||||
// Verify hash of compressed data before decompression
|
||||
h := sha256.New()
|
||||
if _, err := h.Write(m.pbOuter.InnerMessage); err != nil {
|
||||
return err
|
||||
return fmt.Errorf("deserialize: hash write: %w", err)
|
||||
}
|
||||
sha256Hash := h.Sum(nil)
|
||||
if !bytes.Equal(sha256Hash, m.pbOuter.Sha256) {
|
||||
@@ -72,7 +72,7 @@ func (m *manifest) deserializeInner() error {
|
||||
|
||||
zr, err := zstd.NewReader(bb)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("deserialize: zstd reader: %w", err)
|
||||
}
|
||||
defer zr.Close()
|
||||
|
||||
@@ -85,7 +85,7 @@ func (m *manifest) deserializeInner() error {
|
||||
limitedReader := io.LimitReader(zr, maxSize)
|
||||
dat, err := io.ReadAll(limitedReader)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("deserialize: decompress: %w", err)
|
||||
}
|
||||
if int64(len(dat)) >= MaxDecompressedSize {
|
||||
return fmt.Errorf("decompressed data exceeds maximum allowed size of %d bytes", MaxDecompressedSize)
|
||||
@@ -100,7 +100,7 @@ func (m *manifest) deserializeInner() error {
|
||||
// Deserialize inner message
|
||||
m.pbInner = new(MFFile)
|
||||
if err := proto.Unmarshal(dat, m.pbInner); err != nil {
|
||||
return err
|
||||
return fmt.Errorf("deserialize: unmarshal inner: %w", err)
|
||||
}
|
||||
|
||||
// Validate inner UUID
|
||||
|
||||
14
mfer/gpg.go
14
mfer/gpg.go
@@ -20,7 +20,7 @@ type SigningOptions struct {
|
||||
// gpgSign creates a detached signature of the data using the specified key.
|
||||
// Returns the armored detached signature.
|
||||
func gpgSign(data []byte, keyID GPGKeyID) ([]byte, error) {
|
||||
cmd := exec.Command("gpg",
|
||||
cmd := exec.Command("gpg", "--batch", "--no-tty",
|
||||
"--detach-sign",
|
||||
"--armor",
|
||||
"--local-user", string(keyID),
|
||||
@@ -42,7 +42,7 @@ func gpgSign(data []byte, keyID GPGKeyID) ([]byte, error) {
|
||||
// gpgExportPublicKey exports the public key for the specified key ID.
|
||||
// Returns the armored public key.
|
||||
func gpgExportPublicKey(keyID GPGKeyID) ([]byte, error) {
|
||||
cmd := exec.Command("gpg",
|
||||
cmd := exec.Command("gpg", "--batch", "--no-tty",
|
||||
"--export",
|
||||
"--armor",
|
||||
string(keyID),
|
||||
@@ -65,7 +65,7 @@ func gpgExportPublicKey(keyID GPGKeyID) ([]byte, error) {
|
||||
|
||||
// gpgGetKeyFingerprint gets the full fingerprint for a key ID.
|
||||
func gpgGetKeyFingerprint(keyID GPGKeyID) ([]byte, error) {
|
||||
cmd := exec.Command("gpg",
|
||||
cmd := exec.Command("gpg", "--batch", "--no-tty",
|
||||
"--with-colons",
|
||||
"--fingerprint",
|
||||
string(keyID),
|
||||
@@ -114,7 +114,7 @@ func gpgExtractPubKeyFingerprint(pubKey []byte) (string, error) {
|
||||
}
|
||||
|
||||
// Import the public key into the temporary keyring
|
||||
importCmd := exec.Command("gpg",
|
||||
importCmd := exec.Command("gpg", "--batch", "--no-tty",
|
||||
"--homedir", tmpDir,
|
||||
"--import",
|
||||
pubKeyFile,
|
||||
@@ -126,7 +126,7 @@ func gpgExtractPubKeyFingerprint(pubKey []byte) (string, error) {
|
||||
}
|
||||
|
||||
// List keys to get fingerprint
|
||||
listCmd := exec.Command("gpg",
|
||||
listCmd := exec.Command("gpg", "--batch", "--no-tty",
|
||||
"--homedir", tmpDir,
|
||||
"--with-colons",
|
||||
"--fingerprint",
|
||||
@@ -184,7 +184,7 @@ func gpgVerify(data, signature, pubKey []byte) error {
|
||||
}
|
||||
|
||||
// Import the public key into the temporary keyring
|
||||
importCmd := exec.Command("gpg",
|
||||
importCmd := exec.Command("gpg", "--batch", "--no-tty",
|
||||
"--homedir", tmpDir,
|
||||
"--import",
|
||||
pubKeyFile,
|
||||
@@ -196,7 +196,7 @@ func gpgVerify(data, signature, pubKey []byte) error {
|
||||
}
|
||||
|
||||
// Verify the signature
|
||||
verifyCmd := exec.Command("gpg",
|
||||
verifyCmd := exec.Command("gpg", "--batch", "--no-tty",
|
||||
"--homedir", tmpDir,
|
||||
"--verify",
|
||||
sigFile,
|
||||
|
||||
@@ -17,6 +17,7 @@ type manifest struct {
|
||||
pbOuter *MFFileOuter
|
||||
output *bytes.Buffer
|
||||
signingOptions *SigningOptions
|
||||
fixedUUID []byte // if set, use this UUID instead of generating one
|
||||
}
|
||||
|
||||
func (m *manifest) String() string {
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
// Code generated by protoc-gen-go. DO NOT EDIT.
|
||||
// versions:
|
||||
// protoc-gen-go v1.36.11
|
||||
// protoc v6.33.0
|
||||
// protoc v6.33.4
|
||||
// source: mf.proto
|
||||
|
||||
package mfer
|
||||
@@ -329,6 +329,9 @@ func (x *MFFileOuter) GetSigningPubKey() []byte {
|
||||
type MFFilePath struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
// required attributes:
|
||||
// Path invariants: must be valid UTF-8, use forward slashes only,
|
||||
// be relative (no leading /), contain no ".." segments, and no
|
||||
// empty segments (no "//").
|
||||
Path string `protobuf:"bytes,1,opt,name=path,proto3" json:"path,omitempty"`
|
||||
Size int64 `protobuf:"varint,2,opt,name=size,proto3" json:"size,omitempty"`
|
||||
// gotta have at least one:
|
||||
@@ -337,7 +340,6 @@ type MFFilePath struct {
|
||||
MimeType *string `protobuf:"bytes,301,opt,name=mimeType,proto3,oneof" json:"mimeType,omitempty"`
|
||||
Mtime *Timestamp `protobuf:"bytes,302,opt,name=mtime,proto3,oneof" json:"mtime,omitempty"`
|
||||
Ctime *Timestamp `protobuf:"bytes,303,opt,name=ctime,proto3,oneof" json:"ctime,omitempty"`
|
||||
Atime *Timestamp `protobuf:"bytes,304,opt,name=atime,proto3,oneof" json:"atime,omitempty"`
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
@@ -414,13 +416,6 @@ func (x *MFFilePath) GetCtime() *Timestamp {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (x *MFFilePath) GetAtime() *Timestamp {
|
||||
if x != nil {
|
||||
return x.Atime
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type MFFileChecksum struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
// 1.0 golang implementation must write a multihash here
|
||||
@@ -566,7 +561,7 @@ const file_mf_proto_rawDesc = "" +
|
||||
"\n" +
|
||||
"_signatureB\t\n" +
|
||||
"\a_signerB\x10\n" +
|
||||
"\x0e_signingPubKey\"\xa2\x02\n" +
|
||||
"\x0e_signingPubKey\"\xf0\x01\n" +
|
||||
"\n" +
|
||||
"MFFilePath\x12\x12\n" +
|
||||
"\x04path\x18\x01 \x01(\tR\x04path\x12\x12\n" +
|
||||
@@ -576,13 +571,10 @@ const file_mf_proto_rawDesc = "" +
|
||||
"\x05mtime\x18\xae\x02 \x01(\v2\n" +
|
||||
".TimestampH\x01R\x05mtime\x88\x01\x01\x12&\n" +
|
||||
"\x05ctime\x18\xaf\x02 \x01(\v2\n" +
|
||||
".TimestampH\x02R\x05ctime\x88\x01\x01\x12&\n" +
|
||||
"\x05atime\x18\xb0\x02 \x01(\v2\n" +
|
||||
".TimestampH\x03R\x05atime\x88\x01\x01B\v\n" +
|
||||
".TimestampH\x02R\x05ctime\x88\x01\x01B\v\n" +
|
||||
"\t_mimeTypeB\b\n" +
|
||||
"\x06_mtimeB\b\n" +
|
||||
"\x06_ctimeB\b\n" +
|
||||
"\x06_atime\".\n" +
|
||||
"\x06_ctime\".\n" +
|
||||
"\x0eMFFileChecksum\x12\x1c\n" +
|
||||
"\tmultiHash\x18\x01 \x01(\fR\tmultiHash\"\xd6\x01\n" +
|
||||
"\x06MFFile\x12)\n" +
|
||||
@@ -627,15 +619,14 @@ var file_mf_proto_depIdxs = []int32{
|
||||
6, // 2: MFFilePath.hashes:type_name -> MFFileChecksum
|
||||
3, // 3: MFFilePath.mtime:type_name -> Timestamp
|
||||
3, // 4: MFFilePath.ctime:type_name -> Timestamp
|
||||
3, // 5: MFFilePath.atime:type_name -> Timestamp
|
||||
2, // 6: MFFile.version:type_name -> MFFile.Version
|
||||
5, // 7: MFFile.files:type_name -> MFFilePath
|
||||
3, // 8: MFFile.createdAt:type_name -> Timestamp
|
||||
9, // [9:9] is the sub-list for method output_type
|
||||
9, // [9:9] is the sub-list for method input_type
|
||||
9, // [9:9] is the sub-list for extension type_name
|
||||
9, // [9:9] is the sub-list for extension extendee
|
||||
0, // [0:9] is the sub-list for field type_name
|
||||
2, // 5: MFFile.version:type_name -> MFFile.Version
|
||||
5, // 6: MFFile.files:type_name -> MFFilePath
|
||||
3, // 7: MFFile.createdAt:type_name -> Timestamp
|
||||
8, // [8:8] is the sub-list for method output_type
|
||||
8, // [8:8] is the sub-list for method input_type
|
||||
8, // [8:8] is the sub-list for extension type_name
|
||||
8, // [8:8] is the sub-list for extension extendee
|
||||
0, // [0:8] is the sub-list for field type_name
|
||||
}
|
||||
|
||||
func init() { file_mf_proto_init() }
|
||||
|
||||
@@ -59,7 +59,6 @@ message MFFilePath {
|
||||
optional string mimeType = 301;
|
||||
optional Timestamp mtime = 302;
|
||||
optional Timestamp ctime = 303;
|
||||
optional Timestamp atime = 304;
|
||||
}
|
||||
|
||||
message MFFileChecksum {
|
||||
|
||||
@@ -43,10 +43,12 @@ type ScanStatus struct {
|
||||
|
||||
// ScannerOptions configures scanner behavior.
|
||||
type ScannerOptions struct {
|
||||
IncludeDotfiles bool // Include files and directories starting with a dot (default: exclude)
|
||||
FollowSymLinks bool // Resolve symlinks instead of skipping them
|
||||
Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
|
||||
SigningOptions *SigningOptions // GPG signing options (nil = no signing)
|
||||
IncludeDotfiles bool // Include files and directories starting with a dot (default: exclude)
|
||||
FollowSymLinks bool // Resolve symlinks instead of skipping them
|
||||
IncludeTimestamps bool // Include createdAt timestamp in manifest (default: omit for determinism)
|
||||
Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
|
||||
SigningOptions *SigningOptions // GPG signing options (nil = no signing)
|
||||
Seed string // If set, derive a deterministic UUID from this seed
|
||||
}
|
||||
|
||||
// FileEntry represents a file that has been enumerated.
|
||||
@@ -273,9 +275,15 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S
|
||||
s.mu.RUnlock()
|
||||
|
||||
builder := NewBuilder()
|
||||
if s.options.IncludeTimestamps {
|
||||
builder.SetIncludeTimestamps(true)
|
||||
}
|
||||
if s.options.SigningOptions != nil {
|
||||
builder.SetSigningOptions(s.options.SigningOptions)
|
||||
}
|
||||
if s.options.Seed != "" {
|
||||
builder.SetSeed(s.options.Seed)
|
||||
}
|
||||
|
||||
var scannedFiles FileCount
|
||||
var scannedBytes FileSize
|
||||
|
||||
@@ -352,8 +352,10 @@ func TestIsHiddenPath(t *testing.T) {
|
||||
{"/absolute/.hidden", true},
|
||||
{"./relative", false}, // path.Clean removes leading ./
|
||||
{"a/b/c/.d/e", true},
|
||||
{".", false}, // current directory is not hidden
|
||||
{"/", false}, // root is not hidden
|
||||
{".", false}, // current directory is not hidden (#14)
|
||||
{"/", false}, // root is not hidden
|
||||
{"./", false}, // current directory with trailing slash
|
||||
{"./file.txt", false}, // file in current directory
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
|
||||
@@ -34,12 +34,12 @@ func (m *manifest) generate() error {
|
||||
}
|
||||
dat, err := proto.MarshalOptions{Deterministic: true}.Marshal(m.pbOuter)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("serialize: marshal outer: %w", err)
|
||||
}
|
||||
m.output = bytes.NewBuffer([]byte(MAGIC))
|
||||
_, err = m.output.Write(dat)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("serialize: write output: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -49,24 +49,29 @@ func (m *manifest) generateOuter() error {
|
||||
return errors.New("internal error")
|
||||
}
|
||||
|
||||
// Generate UUID and set on inner message
|
||||
manifestUUID := uuid.New()
|
||||
// Use fixed UUID if provided, otherwise generate a new one
|
||||
var manifestUUID uuid.UUID
|
||||
if len(m.fixedUUID) == 16 {
|
||||
copy(manifestUUID[:], m.fixedUUID)
|
||||
} else {
|
||||
manifestUUID = uuid.New()
|
||||
}
|
||||
m.pbInner.Uuid = manifestUUID[:]
|
||||
|
||||
innerData, err := proto.MarshalOptions{Deterministic: true}.Marshal(m.pbInner)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("serialize: marshal inner: %w", err)
|
||||
}
|
||||
|
||||
// Compress the inner data
|
||||
idc := new(bytes.Buffer)
|
||||
zw, err := zstd.NewWriter(idc, zstd.WithEncoderLevel(zstd.SpeedBestCompression))
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("serialize: create compressor: %w", err)
|
||||
}
|
||||
_, err = zw.Write(innerData)
|
||||
if err != nil {
|
||||
return err
|
||||
return fmt.Errorf("serialize: compress: %w", err)
|
||||
}
|
||||
_ = zw.Close()
|
||||
|
||||
@@ -75,7 +80,7 @@ func (m *manifest) generateOuter() error {
|
||||
// Hash the compressed data for integrity verification before decompression
|
||||
h := sha256.New()
|
||||
if _, err := h.Write(compressedData); err != nil {
|
||||
return err
|
||||
return fmt.Errorf("serialize: hash write: %w", err)
|
||||
}
|
||||
sha256Hash := h.Sum(nil)
|
||||
|
||||
|
||||
@@ -27,8 +27,12 @@ func (b BaseURL) JoinPath(path RelFilePath) (FileURL, error) {
|
||||
base.Path += "/"
|
||||
}
|
||||
|
||||
// Parse and encode the relative path
|
||||
ref, err := url.Parse(url.PathEscape(string(path)))
|
||||
// Encode each path segment individually to preserve slashes
|
||||
segments := strings.Split(string(path), "/")
|
||||
for i, seg := range segments {
|
||||
segments[i] = url.PathEscape(seg)
|
||||
}
|
||||
ref, err := url.Parse(strings.Join(segments, "/"))
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
44
mfer/url_test.go
Normal file
44
mfer/url_test.go
Normal file
@@ -0,0 +1,44 @@
|
||||
package mfer
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestBaseURLJoinPath(t *testing.T) {
|
||||
tests := []struct {
|
||||
base BaseURL
|
||||
path RelFilePath
|
||||
expected string
|
||||
}{
|
||||
{"https://example.com/dir/", "file.txt", "https://example.com/dir/file.txt"},
|
||||
{"https://example.com/dir", "file.txt", "https://example.com/dir/file.txt"},
|
||||
{"https://example.com/", "sub/file.txt", "https://example.com/sub/file.txt"},
|
||||
{"https://example.com/dir/", "file with spaces.txt", "https://example.com/dir/file%20with%20spaces.txt"},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(string(tt.base)+"+"+string(tt.path), func(t *testing.T) {
|
||||
result, err := tt.base.JoinPath(tt.path)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, tt.expected, string(result))
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBaseURLString(t *testing.T) {
|
||||
b := BaseURL("https://example.com/")
|
||||
assert.Equal(t, "https://example.com/", b.String())
|
||||
}
|
||||
|
||||
func TestFileURLString(t *testing.T) {
|
||||
f := FileURL("https://example.com/file.txt")
|
||||
assert.Equal(t, "https://example.com/file.txt", f.String())
|
||||
}
|
||||
|
||||
func TestManifestURLString(t *testing.T) {
|
||||
m := ManifestURL("https://example.com/index.mf")
|
||||
assert.Equal(t, "https://example.com/index.mf", m.String())
|
||||
}
|
||||
BIN
modcache.tzst
BIN
modcache.tzst
Binary file not shown.
BIN
vendor.tzst
BIN
vendor.tzst
Binary file not shown.
Reference in New Issue
Block a user