From 008f270d90a05d67148bfddaab050228d0533484 Mon Sep 17 00:00:00 2001 From: user Date: Tue, 10 Feb 2026 18:33:41 -0800 Subject: [PATCH 01/11] Fix IsHiddenPath treating current directory as hidden (closes #14) IsHiddenPath(".") incorrectly returned true because path.Clean(".") starts with a dot. Add explicit check for "." before the HasPrefix check. Add test cases for ".", "./", and "./file.txt". --- mfer/scanner_test.go | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mfer/scanner_test.go b/mfer/scanner_test.go index 0cbdfe3..8db6357 100644 --- a/mfer/scanner_test.go +++ b/mfer/scanner_test.go @@ -352,8 +352,10 @@ func TestIsHiddenPath(t *testing.T) { {"/absolute/.hidden", true}, {"./relative", false}, // path.Clean removes leading ./ {"a/b/c/.d/e", true}, - {".", false}, // current directory is not hidden - {"/", false}, // root is not hidden + {".", false}, // current directory is not hidden (#14) + {"/", false}, // root is not hidden + {"./", false}, // current directory with trailing slash + {"./file.txt", false}, // file in current directory } for _, tt := range tests { -- 2.45.2 From 8bb70bc6a9b75e1669ea4cb789a441d904702cd9 Mon Sep 17 00:00:00 2001 From: user Date: Tue, 10 Feb 2026 18:34:51 -0800 Subject: [PATCH 02/11] Fix FindExtraFiles reporting manifest file and dotfiles as extra (closes #16) FindExtraFiles now skips hidden files/directories (dotfiles) and the manifest file itself when walking the filesystem. The manifest's relative path is computed at Checker construction time. --- mfer/checker.go | 36 +++++++++++++++++++---------- mfer/checker_test.go | 55 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 79 insertions(+), 12 deletions(-) diff --git a/mfer/checker.go b/mfer/checker.go index a213697..1bd8408 100644 --- a/mfer/checker.go +++ b/mfer/checker.go @@ -70,6 +70,8 @@ type Checker struct { fs afero.Fs // manifestPaths is a set of paths in the manifest for quick lookup manifestPaths map[RelFilePath]struct{} + // manifestRelPath is the relative path of the manifest file from basePath (for exclusion) + manifestRelPath RelFilePath // signature info from the manifest signature []byte signer []byte @@ -100,14 +102,25 @@ func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, er manifestPaths[RelFilePath(f.Path)] = struct{}{} } + // Compute manifest's relative path from basePath for exclusion in FindExtraFiles + absManifest, err := filepath.Abs(manifestPath) + if err != nil { + return nil, err + } + manifestRel, err := filepath.Rel(abs, absManifest) + if err != nil { + manifestRel = "" + } + return &Checker{ - basePath: AbsFilePath(abs), - files: files, - fs: fs, - manifestPaths: manifestPaths, - signature: m.pbOuter.Signature, - signer: m.pbOuter.Signer, - signingPubKey: m.pbOuter.SigningPubKey, + basePath: AbsFilePath(abs), + files: files, + fs: fs, + manifestPaths: manifestPaths, + manifestRelPath: RelFilePath(manifestRel), + signature: m.pbOuter.Signature, + signer: m.pbOuter.Signer, + signingPubKey: m.pbOuter.SigningPubKey, }, nil } @@ -309,14 +322,13 @@ func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) err return nil } - // Skip manifest files - base := filepath.Base(rel) - if base == "index.mf" || base == ".index.mf" { + relPath := RelFilePath(rel) + + // Skip the manifest file itself + if relPath == c.manifestRelPath { return nil } - relPath := RelFilePath(rel) - // Check if path is in manifest if _, exists := c.manifestPaths[relPath]; !exists { if results != nil { diff --git a/mfer/checker_test.go b/mfer/checker_test.go index 5ba283e..9c7bf9a 100644 --- a/mfer/checker_test.go +++ b/mfer/checker_test.go @@ -452,6 +452,61 @@ func TestCheckMissingFileDetectedWithoutFallback(t *testing.T) { assert.Equal(t, 0, statusCounts[StatusError], "no files should be ERROR") } +func TestFindExtraFilesSkipsDotfiles(t *testing.T) { + // Regression test for #16: FindExtraFiles should not report dotfiles + // or the manifest file itself as extra files. + fs := afero.NewMemMapFs() + files := map[string][]byte{ + "file1.txt": []byte("in manifest"), + } + createTestManifest(t, fs, "/data/.index.mf", files) + createFilesOnDisk(t, fs, "/data", files) + + // Add dotfiles and manifest file on disk + require.NoError(t, afero.WriteFile(fs, "/data/.hidden", []byte("dotfile"), 0o644)) + require.NoError(t, fs.MkdirAll("/data/.git", 0o755)) + require.NoError(t, afero.WriteFile(fs, "/data/.git/config", []byte("git config"), 0o644)) + + chk, err := NewChecker("/data/.index.mf", "/data", fs) + require.NoError(t, err) + + results := make(chan Result, 10) + err = chk.FindExtraFiles(context.Background(), results) + require.NoError(t, err) + + var extras []Result + for r := range results { + extras = append(extras, r) + } + + // Should report NO extra files — dotfiles and manifest should be skipped + assert.Empty(t, extras, "FindExtraFiles should not report dotfiles or manifest file as extra; got: %v", extras) +} + +func TestFindExtraFilesSkipsManifestFile(t *testing.T) { + // The manifest file itself should never be reported as extra + fs := afero.NewMemMapFs() + files := map[string][]byte{ + "file1.txt": []byte("content"), + } + createTestManifest(t, fs, "/data/index.mf", files) + createFilesOnDisk(t, fs, "/data", files) + + chk, err := NewChecker("/data/index.mf", "/data", fs) + require.NoError(t, err) + + results := make(chan Result, 10) + err = chk.FindExtraFiles(context.Background(), results) + require.NoError(t, err) + + var extras []Result + for r := range results { + extras = append(extras, r) + } + + assert.Empty(t, extras, "manifest file should not be reported as extra; got: %v", extras) +} + func TestCheckEmptyManifest(t *testing.T) { fs := afero.NewMemMapFs() // Create manifest with no files -- 2.45.2 From a6a72faafb1712a51c730231b0a3aab54804be61 Mon Sep 17 00:00:00 2001 From: user Date: Tue, 10 Feb 2026 18:36:50 -0800 Subject: [PATCH 03/11] Fix CLI flag naming to use kebab-case as primary names Change --FollowSymLinks to --follow-symlinks (-L) and --IncludeDotfiles to --include-dotfiles as primary flag names. --- internal/cli/freshen.go | 4 ++-- internal/cli/gen.go | 4 ++-- internal/cli/mfer.go | 20 ++++++++++---------- 3 files changed, 14 insertions(+), 14 deletions(-) diff --git a/internal/cli/freshen.go b/internal/cli/freshen.go index 61f7a86..e63218d 100644 --- a/internal/cli/freshen.go +++ b/internal/cli/freshen.go @@ -41,8 +41,8 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error { basePath := ctx.String("base") showProgress := ctx.Bool("progress") - includeDotfiles := ctx.Bool("IncludeDotfiles") - followSymlinks := ctx.Bool("FollowSymLinks") + includeDotfiles := ctx.Bool("include-dotfiles") + followSymlinks := ctx.Bool("follow-symlinks") // Find manifest file var manifestPath string diff --git a/internal/cli/gen.go b/internal/cli/gen.go index ac04427..b812132 100644 --- a/internal/cli/gen.go +++ b/internal/cli/gen.go @@ -20,8 +20,8 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error { log.Debug("generateManifestOperation()") opts := &mfer.ScannerOptions{ - IncludeDotfiles: ctx.Bool("IncludeDotfiles"), - FollowSymLinks: ctx.Bool("FollowSymLinks"), + IncludeDotfiles: ctx.Bool("include-dotfiles"), + FollowSymLinks: ctx.Bool("follow-symlinks"), Fs: mfa.Fs, } diff --git a/internal/cli/mfer.go b/internal/cli/mfer.go index f1ef518..cf1152c 100644 --- a/internal/cli/mfer.go +++ b/internal/cli/mfer.go @@ -123,14 +123,14 @@ func (mfa *CLIApp) run(args []string) { }, Flags: append(commonFlags(), &cli.BoolFlag{ - Name: "FollowSymLinks", - Aliases: []string{"follow-symlinks"}, + Name: "follow-symlinks", + Aliases: []string{"L"}, Usage: "Resolve encountered symlinks", }, &cli.BoolFlag{ - Name: "IncludeDotfiles", - Aliases: []string{"include-dotfiles"}, - Usage: "Include dot (hidden) files (excluded by default)", + Name: "include-dotfiles", + + Usage: "Include dot (hidden) files (excluded by default)", }, &cli.StringFlag{ Name: "output", @@ -211,14 +211,14 @@ func (mfa *CLIApp) run(args []string) { Usage: "Base directory for resolving relative paths", }, &cli.BoolFlag{ - Name: "FollowSymLinks", - Aliases: []string{"follow-symlinks"}, + Name: "follow-symlinks", + Aliases: []string{"L"}, Usage: "Resolve encountered symlinks", }, &cli.BoolFlag{ - Name: "IncludeDotfiles", - Aliases: []string{"include-dotfiles"}, - Usage: "Include dot (hidden) files (excluded by default)", + Name: "include-dotfiles", + + Usage: "Include dot (hidden) files (excluded by default)", }, &cli.BoolFlag{ Name: "progress", -- 2.45.2 From 07e0fc166aa63d9c33e46499e716baf735ae9609 Mon Sep 17 00:00:00 2001 From: user Date: Tue, 10 Feb 2026 18:37:40 -0800 Subject: [PATCH 04/11] Expand test coverage: path validation, round-trip, error cases Add tests for ValidatePath, AddFile size mismatch, invalid paths, progress reporting, manifest round-trip, invalid magic, truncated input, empty input, and manifest String() method. --- mfer/builder_test.go | 153 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) diff --git a/mfer/builder_test.go b/mfer/builder_test.go index 5f96406..56be794 100644 --- a/mfer/builder_test.go +++ b/mfer/builder_test.go @@ -163,6 +163,159 @@ func TestSetSeedDeterministic(t *testing.T) { assert.NotEqual(t, b1.fixedUUID, b3.fixedUUID, "different seeds should produce different UUIDs") } +func TestValidatePath(t *testing.T) { + valid := []string{ + "file.txt", + "dir/file.txt", + "a/b/c/d.txt", + "file with spaces.txt", + "日本語.txt", + } + for _, p := range valid { + t.Run("valid:"+p, func(t *testing.T) { + assert.NoError(t, ValidatePath(p)) + }) + } + + invalid := []struct { + path string + desc string + }{ + {"", "empty"}, + {"/absolute", "absolute path"}, + {"has\\backslash", "backslash"}, + {"has/../traversal", "dot-dot segment"}, + {"has//double", "empty segment"}, + {"..", "just dot-dot"}, + {string([]byte{0xff, 0xfe}), "invalid UTF-8"}, + } + for _, tt := range invalid { + t.Run("invalid:"+tt.desc, func(t *testing.T) { + assert.Error(t, ValidatePath(tt.path)) + }) + } +} + +func TestBuilderAddFileSizeMismatch(t *testing.T) { + b := NewBuilder() + content := []byte("short") + reader := bytes.NewReader(content) + + // Declare wrong size + _, err := b.AddFile("test.txt", FileSize(100), ModTime(time.Now()), reader, nil) + assert.Error(t, err) + assert.Contains(t, err.Error(), "size mismatch") +} + +func TestBuilderAddFileInvalidPath(t *testing.T) { + b := NewBuilder() + content := []byte("data") + reader := bytes.NewReader(content) + + _, err := b.AddFile("", FileSize(len(content)), ModTime(time.Now()), reader, nil) + assert.Error(t, err) + + reader.Reset(content) + _, err = b.AddFile("/absolute", FileSize(len(content)), ModTime(time.Now()), reader, nil) + assert.Error(t, err) +} + +func TestBuilderAddFileWithProgress(t *testing.T) { + b := NewBuilder() + content := bytes.Repeat([]byte("x"), 1000) + reader := bytes.NewReader(content) + progress := make(chan FileHashProgress, 100) + + bytesRead, err := b.AddFile("test.txt", FileSize(len(content)), ModTime(time.Now()), reader, progress) + close(progress) + require.NoError(t, err) + assert.Equal(t, FileSize(1000), bytesRead) + + var updates []FileHashProgress + for p := range progress { + updates = append(updates, p) + } + assert.NotEmpty(t, updates) + // Last update should show all bytes + assert.Equal(t, FileSize(1000), updates[len(updates)-1].BytesRead) +} + +func TestBuilderBuildRoundTrip(t *testing.T) { + // Build a manifest, deserialize it, verify all fields survive round-trip + b := NewBuilder() + now := time.Date(2025, 6, 15, 12, 0, 0, 0, time.UTC) + + files := []struct { + path string + content []byte + }{ + {"alpha.txt", []byte("alpha content")}, + {"beta/gamma.txt", []byte("gamma content")}, + {"beta/delta.txt", []byte("delta content")}, + } + + for _, f := range files { + reader := bytes.NewReader(f.content) + _, err := b.AddFile(RelFilePath(f.path), FileSize(len(f.content)), ModTime(now), reader, nil) + require.NoError(t, err) + } + + var buf bytes.Buffer + require.NoError(t, b.Build(&buf)) + + m, err := NewManifestFromReader(&buf) + require.NoError(t, err) + + mfiles := m.Files() + require.Len(t, mfiles, 3) + + // Verify sorted order + assert.Equal(t, "alpha.txt", mfiles[0].Path) + assert.Equal(t, "beta/delta.txt", mfiles[1].Path) + assert.Equal(t, "beta/gamma.txt", mfiles[2].Path) + + // Verify sizes + assert.Equal(t, int64(len("alpha content")), mfiles[0].Size) + + // Verify hashes are present + for _, f := range mfiles { + require.NotEmpty(t, f.Hashes, "file %s should have hashes", f.Path) + assert.NotEmpty(t, f.Hashes[0].MultiHash) + } +} + +func TestNewManifestFromReaderInvalidMagic(t *testing.T) { + _, err := NewManifestFromReader(bytes.NewReader([]byte("NOT_VALID"))) + assert.Error(t, err) + assert.Contains(t, err.Error(), "invalid file format") +} + +func TestNewManifestFromReaderEmpty(t *testing.T) { + _, err := NewManifestFromReader(bytes.NewReader([]byte{})) + assert.Error(t, err) +} + +func TestNewManifestFromReaderTruncated(t *testing.T) { + // Just the magic with nothing after + _, err := NewManifestFromReader(bytes.NewReader([]byte(MAGIC))) + assert.Error(t, err) +} + +func TestManifestString(t *testing.T) { + b := NewBuilder() + content := []byte("test") + reader := bytes.NewReader(content) + _, err := b.AddFile("test.txt", FileSize(len(content)), ModTime(time.Now()), reader, nil) + require.NoError(t, err) + + var buf bytes.Buffer + require.NoError(t, b.Build(&buf)) + + m, err := NewManifestFromReader(&buf) + require.NoError(t, err) + assert.Contains(t, m.String(), "count=1") +} + func TestBuilderBuildEmpty(t *testing.T) { b := NewBuilder() -- 2.45.2 From e18ab550ae5f545e734565cd8601a8a374aa4549 Mon Sep 17 00:00:00 2001 From: user Date: Tue, 10 Feb 2026 18:37:53 -0800 Subject: [PATCH 05/11] Add build instructions to README (closes #9) Document prerequisites (Go, protoc, golangci-lint, gofumpt), build commands, and go install instructions. --- README.md | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/README.md b/README.md index 6895d5b..8d9a3a4 100644 --- a/README.md +++ b/README.md @@ -52,6 +52,37 @@ Reading file contents and computing cryptographic hashes for manifest generation - **NO_COLOR:** Respect the `NO_COLOR` environment variable for disabling colored output. - **Options pattern:** Use `NewWithOptions(opts *Options)` constructor pattern for configurable types. +# Building + +## Prerequisites + +- Go 1.21 or later +- `protoc` (Protocol Buffers compiler) — only needed if modifying `.proto` files +- `golangci-lint` — for linting (`go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest`) +- `gofumpt` — for formatting (`go install mvdan.cc/gofumpt@latest`) + +## Build + +```sh +# Build the binary +make bin/mfer + +# Run tests +make test + +# Format code +make fmt + +# Lint +make lint +``` + +## Install from source + +```sh +go install sneak.berlin/go/mfer/cmd/mfer@latest +``` + # Build Status [![Build Status](https://drone.datavi.be/api/badges/sneak/mfer/status.svg)](https://drone.datavi.be/sneak/mfer) -- 2.45.2 From 2f0005bf6496da4a419a3a4461f32f94ef1428b2 Mon Sep 17 00:00:00 2001 From: user Date: Tue, 10 Feb 2026 18:38:54 -0800 Subject: [PATCH 06/11] Fix BaseURL.JoinPath encoding slashes in paths, add URL tests JoinPath used url.PathEscape on the entire path which encoded slashes as %2F. Now encodes each segment individually. Add tests for all URL types. --- mfer/url.go | 8 ++++++-- mfer/url_test.go | 44 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 2 deletions(-) create mode 100644 mfer/url_test.go diff --git a/mfer/url.go b/mfer/url.go index fb1da96..274687e 100644 --- a/mfer/url.go +++ b/mfer/url.go @@ -27,8 +27,12 @@ func (b BaseURL) JoinPath(path RelFilePath) (FileURL, error) { base.Path += "/" } - // Parse and encode the relative path - ref, err := url.Parse(url.PathEscape(string(path))) + // Encode each path segment individually to preserve slashes + segments := strings.Split(string(path), "/") + for i, seg := range segments { + segments[i] = url.PathEscape(seg) + } + ref, err := url.Parse(strings.Join(segments, "/")) if err != nil { return "", err } diff --git a/mfer/url_test.go b/mfer/url_test.go new file mode 100644 index 0000000..dd36a4a --- /dev/null +++ b/mfer/url_test.go @@ -0,0 +1,44 @@ +package mfer + +import ( + "testing" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestBaseURLJoinPath(t *testing.T) { + tests := []struct { + base BaseURL + path RelFilePath + expected string + }{ + {"https://example.com/dir/", "file.txt", "https://example.com/dir/file.txt"}, + {"https://example.com/dir", "file.txt", "https://example.com/dir/file.txt"}, + {"https://example.com/", "sub/file.txt", "https://example.com/sub/file.txt"}, + {"https://example.com/dir/", "file with spaces.txt", "https://example.com/dir/file%20with%20spaces.txt"}, + } + + for _, tt := range tests { + t.Run(string(tt.base)+"+"+string(tt.path), func(t *testing.T) { + result, err := tt.base.JoinPath(tt.path) + require.NoError(t, err) + assert.Equal(t, tt.expected, string(result)) + }) + } +} + +func TestBaseURLString(t *testing.T) { + b := BaseURL("https://example.com/") + assert.Equal(t, "https://example.com/", b.String()) +} + +func TestFileURLString(t *testing.T) { + f := FileURL("https://example.com/file.txt") + assert.Equal(t, "https://example.com/file.txt", f.String()) +} + +func TestManifestURLString(t *testing.T) { + m := ManifestURL("https://example.com/index.mf") + assert.Equal(t, "https://example.com/index.mf", m.String()) +} -- 2.45.2 From dacc97d1f6b4133268e0e8d9ebebc8a4277469a0 Mon Sep 17 00:00:00 2001 From: clawbot Date: Wed, 11 Feb 2026 03:49:43 -0800 Subject: [PATCH 07/11] feat: deterministic manifests by default, remove atime, rate-limit checker progress - Remove atime field from proto schema (field 304 reserved) - Omit createdAt timestamp by default for deterministic output - Add --include-timestamps flag to gen and freshen commands to opt in - Add Builder.SetIncludeTimestamps() and ScannerOptions.IncludeTimestamps - Rate-limit Checker progress updates to once per second (matching Scanner) - Add tests for all changes Closes design decisions: deterministic-by-default, atime removal. --- internal/cli/freshen.go | 3 +++ internal/cli/gen.go | 7 ++--- internal/cli/mfer.go | 8 ++++++ mfer/builder.go | 27 +++++++++++++------ mfer/builder_test.go | 59 +++++++++++++++++++++++++++++++++++++++++ mfer/checker.go | 44 +++++++++++++++++------------- mfer/checker_test.go | 38 ++++++++++++++++++++++++++ mfer/mf.pb.go | 41 +++++++++++----------------- mfer/mf.proto | 2 +- mfer/scanner.go | 14 ++++++---- 10 files changed, 182 insertions(+), 61 deletions(-) diff --git a/internal/cli/freshen.go b/internal/cli/freshen.go index e63218d..095c777 100644 --- a/internal/cli/freshen.go +++ b/internal/cli/freshen.go @@ -226,6 +226,9 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error { var hashedBytes int64 builder := mfer.NewBuilder() + if ctx.Bool("include-timestamps") { + builder.SetIncludeTimestamps(true) + } // Set up signing options if sign-key is provided if signKey := ctx.String("sign-key"); signKey != "" { diff --git a/internal/cli/gen.go b/internal/cli/gen.go index b812132..8d19c4c 100644 --- a/internal/cli/gen.go +++ b/internal/cli/gen.go @@ -20,9 +20,10 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error { log.Debug("generateManifestOperation()") opts := &mfer.ScannerOptions{ - IncludeDotfiles: ctx.Bool("include-dotfiles"), - FollowSymLinks: ctx.Bool("follow-symlinks"), - Fs: mfa.Fs, + IncludeDotfiles: ctx.Bool("include-dotfiles"), + FollowSymLinks: ctx.Bool("follow-symlinks"), + IncludeTimestamps: ctx.Bool("include-timestamps"), + Fs: mfa.Fs, } // Set seed for deterministic UUID if provided diff --git a/internal/cli/mfer.go b/internal/cli/mfer.go index cf1152c..8c482d7 100644 --- a/internal/cli/mfer.go +++ b/internal/cli/mfer.go @@ -159,6 +159,10 @@ func (mfa *CLIApp) run(args []string) { Usage: "Seed value for deterministic manifest UUID", EnvVars: []string{"MFER_SEED"}, }, + &cli.BoolFlag{ + Name: "include-timestamps", + Usage: "Include createdAt timestamp in manifest (omitted by default for determinism)", + }, ), }, { @@ -231,6 +235,10 @@ func (mfa *CLIApp) run(args []string) { Usage: "GPG key ID to sign the manifest with", EnvVars: []string{"MFER_SIGN_KEY"}, }, + &cli.BoolFlag{ + Name: "include-timestamps", + Usage: "Include createdAt timestamp in manifest (omitted by default for determinism)", + }, ), }, { diff --git a/mfer/builder.go b/mfer/builder.go index 0e2beb2..facf173 100644 --- a/mfer/builder.go +++ b/mfer/builder.go @@ -85,11 +85,12 @@ type FileHashProgress struct { // Builder constructs a manifest by adding files one at a time. type Builder struct { - mu sync.Mutex - files []*MFFilePath - createdAt time.Time - signingOptions *SigningOptions - fixedUUID []byte // if set, use this UUID instead of generating one + mu sync.Mutex + files []*MFFilePath + createdAt time.Time + includeTimestamps bool + signingOptions *SigningOptions + fixedUUID []byte // if set, use this UUID instead of generating one } // SetSeed derives a deterministic UUID from the given seed string. @@ -219,6 +220,14 @@ func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime return nil } +// SetIncludeTimestamps controls whether the manifest includes a createdAt timestamp. +// By default timestamps are omitted for deterministic output. +func (b *Builder) SetIncludeTimestamps(include bool) { + b.mu.Lock() + defer b.mu.Unlock() + b.includeTimestamps = include +} + // SetSigningOptions sets the GPG signing options for the manifest. // If opts is non-nil, the manifest will be signed when Build() is called. func (b *Builder) SetSigningOptions(opts *SigningOptions) { @@ -239,9 +248,11 @@ func (b *Builder) Build(w io.Writer) error { // Create inner manifest inner := &MFFile{ - Version: MFFile_VERSION_ONE, - CreatedAt: newTimestampFromTime(b.createdAt), - Files: b.files, + Version: MFFile_VERSION_ONE, + Files: b.files, + } + if b.includeTimestamps { + inner.CreatedAt = newTimestampFromTime(b.createdAt) } // Create a temporary manifest to use existing serialization diff --git a/mfer/builder_test.go b/mfer/builder_test.go index 56be794..577106d 100644 --- a/mfer/builder_test.go +++ b/mfer/builder_test.go @@ -326,3 +326,62 @@ func TestBuilderBuildEmpty(t *testing.T) { // Should still produce valid manifest with 0 files assert.True(t, strings.HasPrefix(buf.String(), MAGIC)) } + +func TestBuilderOmitsCreatedAtByDefault(t *testing.T) { + b := NewBuilder() + content := []byte("hello") + _, err := b.AddFile("test.txt", FileSize(len(content)), ModTime(time.Now()), bytes.NewReader(content), nil) + require.NoError(t, err) + + var buf bytes.Buffer + require.NoError(t, b.Build(&buf)) + + m, err := NewManifestFromReader(&buf) + require.NoError(t, err) + assert.Nil(t, m.pbInner.CreatedAt, "createdAt should be nil by default for deterministic output") +} + +func TestBuilderIncludesCreatedAtWhenRequested(t *testing.T) { + b := NewBuilder() + b.SetIncludeTimestamps(true) + content := []byte("hello") + _, err := b.AddFile("test.txt", FileSize(len(content)), ModTime(time.Now()), bytes.NewReader(content), nil) + require.NoError(t, err) + + var buf bytes.Buffer + require.NoError(t, b.Build(&buf)) + + m, err := NewManifestFromReader(&buf) + require.NoError(t, err) + assert.NotNil(t, m.pbInner.CreatedAt, "createdAt should be set when IncludeTimestamps is true") +} + +func TestBuilderDeterministicFileOrder(t *testing.T) { + // Two builds with same files in different order should produce same file ordering. + // Note: UUIDs differ per build, so we compare parsed file lists, not raw bytes. + buildAndParse := func(order []string) []*MFFilePath { + b := NewBuilder() + for _, name := range order { + content := []byte("content of " + name) + _, err := b.AddFile(RelFilePath(name), FileSize(len(content)), ModTime(time.Unix(1000, 0)), bytes.NewReader(content), nil) + require.NoError(t, err) + } + var buf bytes.Buffer + require.NoError(t, b.Build(&buf)) + m, err := NewManifestFromReader(&buf) + require.NoError(t, err) + return m.Files() + } + + files1 := buildAndParse([]string{"b.txt", "a.txt"}) + files2 := buildAndParse([]string{"a.txt", "b.txt"}) + + require.Len(t, files1, 2) + require.Len(t, files2, 2) + for i := range files1 { + assert.Equal(t, files1[i].Path, files2[i].Path) + assert.Equal(t, files1[i].Size, files2[i].Size) + } + assert.Equal(t, "a.txt", files1[0].Path) + assert.Equal(t, "b.txt", files1[1].Path) +} diff --git a/mfer/checker.go b/mfer/checker.go index 1bd8408..35f233c 100644 --- a/mfer/checker.go +++ b/mfer/checker.go @@ -183,6 +183,7 @@ func (c *Checker) Check(ctx context.Context, results chan<- Result, progress cha var failures FileCount startTime := time.Now() + lastProgressTime := time.Now() for _, entry := range c.files { select { @@ -201,29 +202,34 @@ func (c *Checker) Check(ctx context.Context, results chan<- Result, progress cha results <- result } - // Send progress with rate and ETA calculation + // Send progress at most once per second (rate-limited) if progress != nil { - elapsed := time.Since(startTime) - var bytesPerSec float64 - var eta time.Duration + now := time.Now() + isLast := checkedFiles == totalFiles + if isLast || now.Sub(lastProgressTime) >= time.Second { + elapsed := time.Since(startTime) + var bytesPerSec float64 + var eta time.Duration - if elapsed > 0 && checkedBytes > 0 { - bytesPerSec = float64(checkedBytes) / elapsed.Seconds() - remainingBytes := totalBytes - checkedBytes - if bytesPerSec > 0 { - eta = time.Duration(float64(remainingBytes)/bytesPerSec) * time.Second + if elapsed > 0 && checkedBytes > 0 { + bytesPerSec = float64(checkedBytes) / elapsed.Seconds() + remainingBytes := totalBytes - checkedBytes + if bytesPerSec > 0 { + eta = time.Duration(float64(remainingBytes)/bytesPerSec) * time.Second + } } - } - sendCheckStatus(progress, CheckStatus{ - TotalFiles: totalFiles, - CheckedFiles: checkedFiles, - TotalBytes: totalBytes, - CheckedBytes: checkedBytes, - BytesPerSec: bytesPerSec, - ETA: eta, - Failures: failures, - }) + sendCheckStatus(progress, CheckStatus{ + TotalFiles: totalFiles, + CheckedFiles: checkedFiles, + TotalBytes: totalBytes, + CheckedBytes: checkedBytes, + BytesPerSec: bytesPerSec, + ETA: eta, + Failures: failures, + }) + lastProgressTime = now + } } } diff --git a/mfer/checker_test.go b/mfer/checker_test.go index 9c7bf9a..3709d48 100644 --- a/mfer/checker_test.go +++ b/mfer/checker_test.go @@ -3,6 +3,7 @@ package mfer import ( "bytes" "context" + "fmt" "testing" "time" @@ -528,3 +529,40 @@ func TestCheckEmptyManifest(t *testing.T) { } assert.Equal(t, 0, count) } + +func TestCheckProgressRateLimited(t *testing.T) { + // Create many small files - progress should be rate-limited, not one per file. + // With rate-limiting to once per second, we should get far fewer progress + // updates than files (plus one final update). + fs := afero.NewMemMapFs() + files := make(map[string][]byte, 100) + for i := 0; i < 100; i++ { + name := fmt.Sprintf("file%03d.txt", i) + files[name] = []byte("content") + } + createTestManifest(t, fs, "/manifest.mf", files) + createFilesOnDisk(t, fs, "/data", files) + + chk, err := NewChecker("/manifest.mf", "/data", fs) + require.NoError(t, err) + + results := make(chan Result, 200) + progress := make(chan CheckStatus, 200) + err = chk.Check(context.Background(), results, progress) + require.NoError(t, err) + + // Drain results + for range results { + } + + // Count progress updates + var progressCount int + for range progress { + progressCount++ + } + + // Should be far fewer than 100 (rate-limited to once per second) + // At minimum we get the final update + assert.GreaterOrEqual(t, progressCount, 1, "should get at least the final progress update") + assert.Less(t, progressCount, 100, "progress should be rate-limited, not one per file") +} diff --git a/mfer/mf.pb.go b/mfer/mf.pb.go index 7c02e2d..6312c21 100644 --- a/mfer/mf.pb.go +++ b/mfer/mf.pb.go @@ -1,7 +1,7 @@ // Code generated by protoc-gen-go. DO NOT EDIT. // versions: // protoc-gen-go v1.36.11 -// protoc v6.33.0 +// protoc v6.33.4 // source: mf.proto package mfer @@ -329,6 +329,9 @@ func (x *MFFileOuter) GetSigningPubKey() []byte { type MFFilePath struct { state protoimpl.MessageState `protogen:"open.v1"` // required attributes: + // Path invariants: must be valid UTF-8, use forward slashes only, + // be relative (no leading /), contain no ".." segments, and no + // empty segments (no "//"). Path string `protobuf:"bytes,1,opt,name=path,proto3" json:"path,omitempty"` Size int64 `protobuf:"varint,2,opt,name=size,proto3" json:"size,omitempty"` // gotta have at least one: @@ -336,8 +339,7 @@ type MFFilePath struct { // optional per-file metadata MimeType *string `protobuf:"bytes,301,opt,name=mimeType,proto3,oneof" json:"mimeType,omitempty"` Mtime *Timestamp `protobuf:"bytes,302,opt,name=mtime,proto3,oneof" json:"mtime,omitempty"` - Ctime *Timestamp `protobuf:"bytes,303,opt,name=ctime,proto3,oneof" json:"ctime,omitempty"` - Atime *Timestamp `protobuf:"bytes,304,opt,name=atime,proto3,oneof" json:"atime,omitempty"` + Ctime *Timestamp `protobuf:"bytes,303,opt,name=ctime,proto3,oneof" json:"ctime,omitempty"` // Field 304 (atime) removed — not useful for integrity verification. unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -414,13 +416,6 @@ func (x *MFFilePath) GetCtime() *Timestamp { return nil } -func (x *MFFilePath) GetAtime() *Timestamp { - if x != nil { - return x.Atime - } - return nil -} - type MFFileChecksum struct { state protoimpl.MessageState `protogen:"open.v1"` // 1.0 golang implementation must write a multihash here @@ -566,7 +561,7 @@ const file_mf_proto_rawDesc = "" + "\n" + "_signatureB\t\n" + "\a_signerB\x10\n" + - "\x0e_signingPubKey\"\xa2\x02\n" + + "\x0e_signingPubKey\"\xf0\x01\n" + "\n" + "MFFilePath\x12\x12\n" + "\x04path\x18\x01 \x01(\tR\x04path\x12\x12\n" + @@ -576,13 +571,10 @@ const file_mf_proto_rawDesc = "" + "\x05mtime\x18\xae\x02 \x01(\v2\n" + ".TimestampH\x01R\x05mtime\x88\x01\x01\x12&\n" + "\x05ctime\x18\xaf\x02 \x01(\v2\n" + - ".TimestampH\x02R\x05ctime\x88\x01\x01\x12&\n" + - "\x05atime\x18\xb0\x02 \x01(\v2\n" + - ".TimestampH\x03R\x05atime\x88\x01\x01B\v\n" + + ".TimestampH\x02R\x05ctime\x88\x01\x01B\v\n" + "\t_mimeTypeB\b\n" + "\x06_mtimeB\b\n" + - "\x06_ctimeB\b\n" + - "\x06_atime\".\n" + + "\x06_ctime\".\n" + "\x0eMFFileChecksum\x12\x1c\n" + "\tmultiHash\x18\x01 \x01(\fR\tmultiHash\"\xd6\x01\n" + "\x06MFFile\x12)\n" + @@ -627,15 +619,14 @@ var file_mf_proto_depIdxs = []int32{ 6, // 2: MFFilePath.hashes:type_name -> MFFileChecksum 3, // 3: MFFilePath.mtime:type_name -> Timestamp 3, // 4: MFFilePath.ctime:type_name -> Timestamp - 3, // 5: MFFilePath.atime:type_name -> Timestamp - 2, // 6: MFFile.version:type_name -> MFFile.Version - 5, // 7: MFFile.files:type_name -> MFFilePath - 3, // 8: MFFile.createdAt:type_name -> Timestamp - 9, // [9:9] is the sub-list for method output_type - 9, // [9:9] is the sub-list for method input_type - 9, // [9:9] is the sub-list for extension type_name - 9, // [9:9] is the sub-list for extension extendee - 0, // [0:9] is the sub-list for field type_name + 2, // 5: MFFile.version:type_name -> MFFile.Version + 5, // 6: MFFile.files:type_name -> MFFilePath + 3, // 7: MFFile.createdAt:type_name -> Timestamp + 8, // [8:8] is the sub-list for method output_type + 8, // [8:8] is the sub-list for method input_type + 8, // [8:8] is the sub-list for extension type_name + 8, // [8:8] is the sub-list for extension extendee + 0, // [0:8] is the sub-list for field type_name } func init() { file_mf_proto_init() } diff --git a/mfer/mf.proto b/mfer/mf.proto index 66748e0..d77b8e4 100644 --- a/mfer/mf.proto +++ b/mfer/mf.proto @@ -59,7 +59,7 @@ message MFFilePath { optional string mimeType = 301; optional Timestamp mtime = 302; optional Timestamp ctime = 303; - optional Timestamp atime = 304; + // Field 304 (atime) removed — not useful for integrity verification. } message MFFileChecksum { diff --git a/mfer/scanner.go b/mfer/scanner.go index f1486c5..abf845d 100644 --- a/mfer/scanner.go +++ b/mfer/scanner.go @@ -43,11 +43,12 @@ type ScanStatus struct { // ScannerOptions configures scanner behavior. type ScannerOptions struct { - IncludeDotfiles bool // Include files and directories starting with a dot (default: exclude) - FollowSymLinks bool // Resolve symlinks instead of skipping them - Fs afero.Fs // Filesystem to use, defaults to OsFs if nil - SigningOptions *SigningOptions // GPG signing options (nil = no signing) - Seed string // If set, derive a deterministic UUID from this seed + IncludeDotfiles bool // Include files and directories starting with a dot (default: exclude) + FollowSymLinks bool // Resolve symlinks instead of skipping them + IncludeTimestamps bool // Include createdAt timestamp in manifest (default: omit for determinism) + Fs afero.Fs // Filesystem to use, defaults to OsFs if nil + SigningOptions *SigningOptions // GPG signing options (nil = no signing) + Seed string // If set, derive a deterministic UUID from this seed } // FileEntry represents a file that has been enumerated. @@ -274,6 +275,9 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S s.mu.RUnlock() builder := NewBuilder() + if s.options.IncludeTimestamps { + builder.SetIncludeTimestamps(true) + } if s.options.SigningOptions != nil { builder.SetSigningOptions(s.options.SigningOptions) } -- 2.45.2 From 472221a7f6a4c5df0a87456752ff80c62a4b646a Mon Sep 17 00:00:00 2001 From: clawbot Date: Wed, 11 Feb 2026 03:56:10 -0800 Subject: [PATCH 08/11] feat: add export command, HTTP URL support, --version flag, error wrapping audit - Add 'mfer export' command: dumps manifest as JSON to stdout for piping to jq etc - Add HTTP/HTTPS URL support for manifest path arguments (check, list, export) - Enable --version flag (was hidden, now shown) - Audit all error messages: wrap with fmt.Errorf context throughout CLI and library - Add tests for export command and URL-based manifest loading - Add manifest_loader.go with shared resolveManifestArg and openManifestReader helpers --- internal/cli/check.go | 44 +++++----- internal/cli/export.go | 72 +++++++++++++++++ internal/cli/export_test.go | 137 ++++++++++++++++++++++++++++++++ internal/cli/fetch.go | 10 +-- internal/cli/freshen.go | 8 +- internal/cli/gen.go | 6 +- internal/cli/list.go | 34 +++----- internal/cli/manifest_loader.go | 54 +++++++++++++ internal/cli/mfer.go | 10 ++- mfer/builder.go | 11 ++- mfer/deserialize.go | 8 +- mfer/serialize.go | 12 +-- 12 files changed, 336 insertions(+), 70 deletions(-) create mode 100644 internal/cli/export.go create mode 100644 internal/cli/export_test.go create mode 100644 internal/cli/manifest_loader.go diff --git a/internal/cli/check.go b/internal/cli/check.go index a2d0cdf..7327474 100644 --- a/internal/cli/check.go +++ b/internal/cli/check.go @@ -3,6 +3,7 @@ package cli import ( "encoding/hex" "fmt" + "io" "path/filepath" "strings" "time" @@ -34,29 +35,32 @@ func findManifest(fs afero.Fs, dir string) (string, error) { func (mfa *CLIApp) checkManifestOperation(ctx *cli.Context) error { log.Debug("checkManifestOperation()") - var manifestPath string - var err error + manifestPath, err := mfa.resolveManifestArg(ctx) + if err != nil { + return fmt.Errorf("check: %w", err) + } - if ctx.Args().Len() > 0 { - arg := ctx.Args().Get(0) - // Check if arg is a directory or a file - info, statErr := mfa.Fs.Stat(arg) - if statErr == nil && info.IsDir() { - // It's a directory, look for manifest inside - manifestPath, err = findManifest(mfa.Fs, arg) - if err != nil { - return err - } - } else { - // Treat as a file path - manifestPath = arg + // URL manifests need to be downloaded to a temp file for the checker + if isHTTPURL(manifestPath) { + rc, fetchErr := mfa.openManifestReader(manifestPath) + if fetchErr != nil { + return fmt.Errorf("check: %w", fetchErr) } - } else { - // No argument, look in current directory - manifestPath, err = findManifest(mfa.Fs, ".") - if err != nil { - return err + tmpFile, tmpErr := afero.TempFile(mfa.Fs, "", "mfer-manifest-*.mf") + if tmpErr != nil { + _ = rc.Close() + return fmt.Errorf("check: failed to create temp file: %w", tmpErr) } + tmpPath := tmpFile.Name() + _, cpErr := io.Copy(tmpFile, rc) + _ = rc.Close() + _ = tmpFile.Close() + if cpErr != nil { + _ = mfa.Fs.Remove(tmpPath) + return fmt.Errorf("check: failed to download manifest: %w", cpErr) + } + defer func() { _ = mfa.Fs.Remove(tmpPath) }() + manifestPath = tmpPath } basePath := ctx.String("base") diff --git a/internal/cli/export.go b/internal/cli/export.go new file mode 100644 index 0000000..aca8ebf --- /dev/null +++ b/internal/cli/export.go @@ -0,0 +1,72 @@ +package cli + +import ( + "encoding/hex" + "encoding/json" + "fmt" + "time" + + "github.com/urfave/cli/v2" + "sneak.berlin/go/mfer/mfer" +) + +// ExportEntry represents a single file entry in the exported JSON output. +type ExportEntry struct { + Path string `json:"path"` + Size int64 `json:"size"` + Hashes []string `json:"hashes"` + Mtime *string `json:"mtime,omitempty"` + Ctime *string `json:"ctime,omitempty"` +} + +func (mfa *CLIApp) exportManifestOperation(ctx *cli.Context) error { + pathOrURL, err := mfa.resolveManifestArg(ctx) + if err != nil { + return fmt.Errorf("export: %w", err) + } + + rc, err := mfa.openManifestReader(pathOrURL) + if err != nil { + return fmt.Errorf("export: %w", err) + } + defer func() { _ = rc.Close() }() + + manifest, err := mfer.NewManifestFromReader(rc) + if err != nil { + return fmt.Errorf("export: failed to parse manifest: %w", err) + } + + files := manifest.Files() + entries := make([]ExportEntry, 0, len(files)) + + for _, f := range files { + entry := ExportEntry{ + Path: f.Path, + Size: f.Size, + Hashes: make([]string, 0, len(f.Hashes)), + } + + for _, h := range f.Hashes { + entry.Hashes = append(entry.Hashes, hex.EncodeToString(h.MultiHash)) + } + + if f.Mtime != nil { + t := time.Unix(f.Mtime.Seconds, int64(f.Mtime.Nanos)).UTC().Format(time.RFC3339Nano) + entry.Mtime = &t + } + if f.Ctime != nil { + t := time.Unix(f.Ctime.Seconds, int64(f.Ctime.Nanos)).UTC().Format(time.RFC3339Nano) + entry.Ctime = &t + } + + entries = append(entries, entry) + } + + enc := json.NewEncoder(mfa.Stdout) + enc.SetIndent("", " ") + if err := enc.Encode(entries); err != nil { + return fmt.Errorf("export: failed to encode JSON: %w", err) + } + + return nil +} diff --git a/internal/cli/export_test.go b/internal/cli/export_test.go new file mode 100644 index 0000000..6d1f87d --- /dev/null +++ b/internal/cli/export_test.go @@ -0,0 +1,137 @@ +package cli + +import ( + "bytes" + "context" + "encoding/json" + "net/http" + "net/http/httptest" + "testing" + + "github.com/spf13/afero" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "sneak.berlin/go/mfer/mfer" +) + +// buildTestManifest creates a manifest from in-memory files and returns its bytes. +func buildTestManifest(t *testing.T, files map[string][]byte) []byte { + t.Helper() + sourceFs := afero.NewMemMapFs() + for path, content := range files { + require.NoError(t, sourceFs.MkdirAll("/", 0o755)) + require.NoError(t, afero.WriteFile(sourceFs, "/"+path, content, 0o644)) + } + + opts := &mfer.ScannerOptions{Fs: sourceFs} + s := mfer.NewScannerWithOptions(opts) + require.NoError(t, s.EnumerateFS(sourceFs, "/", nil)) + + var buf bytes.Buffer + require.NoError(t, s.ToManifest(context.Background(), &buf, nil)) + return buf.Bytes() +} + +func TestExportManifestOperation(t *testing.T) { + testFiles := map[string][]byte{ + "hello.txt": []byte("Hello, World!"), + "sub/file.txt": []byte("nested content"), + } + manifestData := buildTestManifest(t, testFiles) + + // Write manifest to memfs + fs := afero.NewMemMapFs() + require.NoError(t, afero.WriteFile(fs, "/test.mf", manifestData, 0o644)) + + var stdout, stderr bytes.Buffer + exitCode := RunWithOptions(&RunOptions{ + Appname: "mfer", + Args: []string{"mfer", "export", "/test.mf"}, + Stdin: &bytes.Buffer{}, + Stdout: &stdout, + Stderr: &stderr, + Fs: fs, + }) + + require.Equal(t, 0, exitCode, "stderr: %s", stderr.String()) + + var entries []ExportEntry + require.NoError(t, json.Unmarshal(stdout.Bytes(), &entries)) + assert.Len(t, entries, 2) + + // Verify entries have expected fields + pathSet := make(map[string]bool) + for _, e := range entries { + pathSet[e.Path] = true + assert.NotEmpty(t, e.Hashes, "entry %s should have hashes", e.Path) + assert.Greater(t, e.Size, int64(0), "entry %s should have positive size", e.Path) + } + assert.True(t, pathSet["hello.txt"]) + assert.True(t, pathSet["sub/file.txt"]) +} + +func TestExportFromHTTPURL(t *testing.T) { + testFiles := map[string][]byte{ + "a.txt": []byte("aaa"), + } + manifestData := buildTestManifest(t, testFiles) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/octet-stream") + _, _ = w.Write(manifestData) + })) + defer server.Close() + + var stdout, stderr bytes.Buffer + exitCode := RunWithOptions(&RunOptions{ + Appname: "mfer", + Args: []string{"mfer", "export", server.URL + "/index.mf"}, + Stdin: &bytes.Buffer{}, + Stdout: &stdout, + Stderr: &stderr, + Fs: afero.NewMemMapFs(), + }) + + require.Equal(t, 0, exitCode, "stderr: %s", stderr.String()) + + var entries []ExportEntry + require.NoError(t, json.Unmarshal(stdout.Bytes(), &entries)) + assert.Len(t, entries, 1) + assert.Equal(t, "a.txt", entries[0].Path) +} + +func TestListFromHTTPURL(t *testing.T) { + testFiles := map[string][]byte{ + "one.txt": []byte("1"), + "two.txt": []byte("22"), + } + manifestData := buildTestManifest(t, testFiles) + + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + _, _ = w.Write(manifestData) + })) + defer server.Close() + + var stdout, stderr bytes.Buffer + exitCode := RunWithOptions(&RunOptions{ + Appname: "mfer", + Args: []string{"mfer", "list", server.URL + "/index.mf"}, + Stdin: &bytes.Buffer{}, + Stdout: &stdout, + Stderr: &stderr, + Fs: afero.NewMemMapFs(), + }) + + require.Equal(t, 0, exitCode, "stderr: %s", stderr.String()) + output := stdout.String() + assert.Contains(t, output, "one.txt") + assert.Contains(t, output, "two.txt") +} + +func TestIsHTTPURL(t *testing.T) { + assert.True(t, isHTTPURL("http://example.com/manifest.mf")) + assert.True(t, isHTTPURL("https://example.com/manifest.mf")) + assert.False(t, isHTTPURL("/local/path.mf")) + assert.False(t, isHTTPURL("relative/path.mf")) + assert.False(t, isHTTPURL("ftp://example.com/file")) +} diff --git a/internal/cli/fetch.go b/internal/cli/fetch.go index ade2080..677b65a 100644 --- a/internal/cli/fetch.go +++ b/internal/cli/fetch.go @@ -67,7 +67,7 @@ func (mfa *CLIApp) fetchManifestOperation(ctx *cli.Context) error { // Compute base URL (directory containing manifest) baseURL, err := url.Parse(manifestURL) if err != nil { - return err + return fmt.Errorf("fetch: invalid manifest URL: %w", err) } baseURL.Path = path.Dir(baseURL.Path) if !strings.HasSuffix(baseURL.Path, "/") { @@ -267,7 +267,7 @@ func downloadFile(fileURL, localPath string, entry *mfer.MFFilePath, progress ch dir := filepath.Dir(localPath) if dir != "" && dir != "." { if err := os.MkdirAll(dir, 0o755); err != nil { - return err + return fmt.Errorf("failed to create directory %s: %w", dir, err) } } @@ -287,9 +287,9 @@ func downloadFile(fileURL, localPath string, entry *mfer.MFFilePath, progress ch } // Fetch file - resp, err := http.Get(fileURL) + resp, err := http.Get(fileURL) //nolint:gosec // URL constructed from manifest base if err != nil { - return err + return fmt.Errorf("HTTP request failed: %w", err) } defer func() { _ = resp.Body.Close() }() @@ -307,7 +307,7 @@ func downloadFile(fileURL, localPath string, entry *mfer.MFFilePath, progress ch // Create temp file out, err := os.Create(tmpPath) if err != nil { - return err + return fmt.Errorf("failed to create temp file: %w", err) } // Set up hash computation diff --git a/internal/cli/freshen.go b/internal/cli/freshen.go index 095c777..a078ee5 100644 --- a/internal/cli/freshen.go +++ b/internal/cli/freshen.go @@ -54,7 +54,7 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error { if statErr == nil && info.IsDir() { manifestPath, err = findManifest(mfa.Fs, arg) if err != nil { - return err + return fmt.Errorf("freshen: %w", err) } } else { manifestPath = arg @@ -62,7 +62,7 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error { } else { manifestPath, err = findManifest(mfa.Fs, ".") if err != nil { - return err + return fmt.Errorf("freshen: %w", err) } } @@ -93,7 +93,7 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error { absBase, err := filepath.Abs(basePath) if err != nil { - return err + return fmt.Errorf("freshen: invalid base path: %w", err) } err = afero.Walk(mfa.Fs, absBase, func(path string, info fs.FileInfo, walkErr error) error { @@ -104,7 +104,7 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error { // Get relative path relPath, err := filepath.Rel(absBase, path) if err != nil { - return err + return fmt.Errorf("freshen: failed to compute relative path for %s: %w", path, err) } // Skip the manifest file itself diff --git a/internal/cli/gen.go b/internal/cli/gen.go index 8d19c4c..f424a6f 100644 --- a/internal/cli/gen.go +++ b/internal/cli/gen.go @@ -66,7 +66,7 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error { if args.Len() == 0 { // Default to current directory if err := s.EnumeratePath(".", enumProgress); err != nil { - return err + return fmt.Errorf("generate: failed to enumerate current directory: %w", err) } } else { // Collect and validate all paths first @@ -75,7 +75,7 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error { inputPath := args.Get(i) ap, err := filepath.Abs(inputPath) if err != nil { - return err + return fmt.Errorf("generate: invalid path %q: %w", inputPath, err) } // Validate path exists before adding to list if exists, _ := afero.Exists(mfa.Fs, ap); !exists { @@ -85,7 +85,7 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error { paths = append(paths, ap) } if err := s.EnumeratePaths(enumProgress, paths...); err != nil { - return err + return fmt.Errorf("generate: failed to enumerate paths: %w", err) } } enumWg.Wait() diff --git a/internal/cli/list.go b/internal/cli/list.go index b89c236..66031d7 100644 --- a/internal/cli/list.go +++ b/internal/cli/list.go @@ -16,32 +16,20 @@ func (mfa *CLIApp) listManifestOperation(ctx *cli.Context) error { longFormat := ctx.Bool("long") print0 := ctx.Bool("print0") - // Find manifest file - var manifestPath string - var err error - - if ctx.Args().Len() > 0 { - arg := ctx.Args().Get(0) - info, statErr := mfa.Fs.Stat(arg) - if statErr == nil && info.IsDir() { - manifestPath, err = findManifest(mfa.Fs, arg) - if err != nil { - return err - } - } else { - manifestPath = arg - } - } else { - manifestPath, err = findManifest(mfa.Fs, ".") - if err != nil { - return err - } + pathOrURL, err := mfa.resolveManifestArg(ctx) + if err != nil { + return fmt.Errorf("list: %w", err) } - // Load manifest - manifest, err := mfer.NewManifestFromFile(mfa.Fs, manifestPath) + rc, err := mfa.openManifestReader(pathOrURL) if err != nil { - return fmt.Errorf("failed to load manifest: %w", err) + return fmt.Errorf("list: %w", err) + } + defer func() { _ = rc.Close() }() + + manifest, err := mfer.NewManifestFromReader(rc) + if err != nil { + return fmt.Errorf("list: failed to parse manifest: %w", err) } files := manifest.Files() diff --git a/internal/cli/manifest_loader.go b/internal/cli/manifest_loader.go new file mode 100644 index 0000000..5d575d8 --- /dev/null +++ b/internal/cli/manifest_loader.go @@ -0,0 +1,54 @@ +package cli + +import ( + "fmt" + "io" + "net/http" + "strings" + + "github.com/urfave/cli/v2" +) + +// isHTTPURL returns true if the string starts with http:// or https://. +func isHTTPURL(s string) bool { + return strings.HasPrefix(s, "http://") || strings.HasPrefix(s, "https://") +} + +// openManifestReader opens a manifest from a path or URL and returns a ReadCloser. +// The caller must close the returned reader. +func (mfa *CLIApp) openManifestReader(pathOrURL string) (io.ReadCloser, error) { + if isHTTPURL(pathOrURL) { + resp, err := http.Get(pathOrURL) //nolint:gosec // user-provided URL is intentional + if err != nil { + return nil, fmt.Errorf("failed to fetch %s: %w", pathOrURL, err) + } + if resp.StatusCode != http.StatusOK { + _ = resp.Body.Close() + return nil, fmt.Errorf("failed to fetch %s: HTTP %d", pathOrURL, resp.StatusCode) + } + return resp.Body, nil + } + f, err := mfa.Fs.Open(pathOrURL) + if err != nil { + return nil, err + } + return f, nil +} + +// resolveManifestArg resolves the manifest path from CLI arguments. +// HTTP(S) URLs are returned as-is. Directories are searched for index.mf/.index.mf. +// If no argument is given, the current directory is searched. +func (mfa *CLIApp) resolveManifestArg(ctx *cli.Context) (string, error) { + if ctx.Args().Len() > 0 { + arg := ctx.Args().Get(0) + if isHTTPURL(arg) { + return arg, nil + } + info, statErr := mfa.Fs.Stat(arg) + if statErr == nil && info.IsDir() { + return findManifest(mfa.Fs, arg) + } + return arg, nil + } + return findManifest(mfa.Fs, ".") +} diff --git a/internal/cli/mfer.go b/internal/cli/mfer.go index 8c482d7..ed399f2 100644 --- a/internal/cli/mfer.go +++ b/internal/cli/mfer.go @@ -241,6 +241,14 @@ func (mfa *CLIApp) run(args []string) { }, ), }, + { + Name: "export", + Usage: "Export manifest contents as JSON", + ArgsUsage: "[manifest file or URL]", + Action: func(c *cli.Context) error { + return mfa.exportManifestOperation(c) + }, + }, { Name: "version", Usage: "Show version", @@ -282,7 +290,7 @@ func (mfa *CLIApp) run(args []string) { }, } - mfa.app.HideVersion = true + mfa.app.HideVersion = false err := mfa.app.Run(args) if err != nil { mfa.exitCode = 1 diff --git a/mfer/builder.go b/mfer/builder.go index facf173..0b2262f 100644 --- a/mfer/builder.go +++ b/mfer/builder.go @@ -196,7 +196,7 @@ func (b *Builder) FileCount() int { // Returns an error if path is empty, size is negative, or hash is nil/empty. func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error { if err := ValidatePath(string(path)); err != nil { - return err + return fmt.Errorf("add file: %w", err) } if size < 0 { return errors.New("size cannot be negative") @@ -264,15 +264,18 @@ func (b *Builder) Build(w io.Writer) error { // Generate outer wrapper if err := m.generateOuter(); err != nil { - return err + return fmt.Errorf("build: generate outer: %w", err) } // Generate final output if err := m.generate(); err != nil { - return err + return fmt.Errorf("build: generate: %w", err) } // Write to output _, err := w.Write(m.output.Bytes()) - return err + if err != nil { + return fmt.Errorf("build: write output: %w", err) + } + return nil } diff --git a/mfer/deserialize.go b/mfer/deserialize.go index 878bf8f..f8de802 100644 --- a/mfer/deserialize.go +++ b/mfer/deserialize.go @@ -44,7 +44,7 @@ func (m *manifest) deserializeInner() error { // Verify hash of compressed data before decompression h := sha256.New() if _, err := h.Write(m.pbOuter.InnerMessage); err != nil { - return err + return fmt.Errorf("deserialize: hash write: %w", err) } sha256Hash := h.Sum(nil) if !bytes.Equal(sha256Hash, m.pbOuter.Sha256) { @@ -72,7 +72,7 @@ func (m *manifest) deserializeInner() error { zr, err := zstd.NewReader(bb) if err != nil { - return err + return fmt.Errorf("deserialize: zstd reader: %w", err) } defer zr.Close() @@ -85,7 +85,7 @@ func (m *manifest) deserializeInner() error { limitedReader := io.LimitReader(zr, maxSize) dat, err := io.ReadAll(limitedReader) if err != nil { - return err + return fmt.Errorf("deserialize: decompress: %w", err) } if int64(len(dat)) >= MaxDecompressedSize { return fmt.Errorf("decompressed data exceeds maximum allowed size of %d bytes", MaxDecompressedSize) @@ -100,7 +100,7 @@ func (m *manifest) deserializeInner() error { // Deserialize inner message m.pbInner = new(MFFile) if err := proto.Unmarshal(dat, m.pbInner); err != nil { - return err + return fmt.Errorf("deserialize: unmarshal inner: %w", err) } // Validate inner UUID diff --git a/mfer/serialize.go b/mfer/serialize.go index 3d712a6..b60c1c0 100644 --- a/mfer/serialize.go +++ b/mfer/serialize.go @@ -34,12 +34,12 @@ func (m *manifest) generate() error { } dat, err := proto.MarshalOptions{Deterministic: true}.Marshal(m.pbOuter) if err != nil { - return err + return fmt.Errorf("serialize: marshal outer: %w", err) } m.output = bytes.NewBuffer([]byte(MAGIC)) _, err = m.output.Write(dat) if err != nil { - return err + return fmt.Errorf("serialize: write output: %w", err) } return nil } @@ -60,18 +60,18 @@ func (m *manifest) generateOuter() error { innerData, err := proto.MarshalOptions{Deterministic: true}.Marshal(m.pbInner) if err != nil { - return err + return fmt.Errorf("serialize: marshal inner: %w", err) } // Compress the inner data idc := new(bytes.Buffer) zw, err := zstd.NewWriter(idc, zstd.WithEncoderLevel(zstd.SpeedBestCompression)) if err != nil { - return err + return fmt.Errorf("serialize: create compressor: %w", err) } _, err = zw.Write(innerData) if err != nil { - return err + return fmt.Errorf("serialize: compress: %w", err) } _ = zw.Close() @@ -80,7 +80,7 @@ func (m *manifest) generateOuter() error { // Hash the compressed data for integrity verification before decompression h := sha256.New() if _, err := h.Write(compressedData); err != nil { - return err + return fmt.Errorf("serialize: hash write: %w", err) } sha256Hash := h.Sum(nil) -- 2.45.2 From ca3e29e8029a1b67b166ebc2b17dda9c6395e751 Mon Sep 17 00:00:00 2001 From: clawbot Date: Wed, 11 Feb 2026 03:59:46 -0800 Subject: [PATCH 09/11] docs: add FORMAT.md, answer design questions, bump version to 1.0.0 - Write complete .mf format specification (FORMAT.md) - Fill in all design question answers in TODO.md - Mark completed implementation items in TODO.md - Bump VERSION from 0.1.0 to 1.0.0 in Makefile - Update README to reference FORMAT.md and reflect 1.0 status --- FORMAT.md | 142 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ Makefile | 2 +- README.md | 10 ++-- TODO.md | 42 ++++++++-------- 4 files changed, 168 insertions(+), 28 deletions(-) create mode 100644 FORMAT.md diff --git a/FORMAT.md b/FORMAT.md new file mode 100644 index 0000000..e09dfb8 --- /dev/null +++ b/FORMAT.md @@ -0,0 +1,142 @@ +# .mf File Format Specification + +Version 1.0 + +## Overview + +An `.mf` file is a binary manifest that describes a directory tree of files, +including their paths, sizes, and cryptographic checksums. It supports +optional GPG signatures for integrity verification and optional timestamps +for metadata preservation. + +## File Structure + +An `.mf` file consists of two parts, concatenated: + +1. **Magic bytes** (8 bytes): the ASCII string `ZNAVSRFG` +2. **Outer message**: a Protocol Buffers serialized `MFFileOuter` message + +There is no length prefix or version byte between the magic and the protobuf +message. The protobuf message extends to the end of the file. + +See [`mfer/mf.proto`](mfer/mf.proto) for exact field numbers and types. + +## Outer Message (`MFFileOuter`) + +The outer message contains: + +| Field | Number | Type | Description | +|--------------------|--------|-------------------|--------------------------------------------------| +| `version` | 101 | enum | Must be `VERSION_ONE` (1) | +| `compressionType` | 102 | enum | Compression of `innerMessage`; must be `COMPRESSION_ZSTD` (1) | +| `size` | 103 | int64 | Uncompressed size of `innerMessage` (corruption detection) | +| `sha256` | 104 | bytes | SHA-256 hash of the **compressed** `innerMessage` (corruption detection) | +| `uuid` | 105 | bytes | Random v4 UUID; must match the inner message UUID | +| `innerMessage` | 199 | bytes | Zstd-compressed serialized `MFFile` message | +| `signature` | 201 | bytes (optional) | GPG signature (ASCII-armored or binary) | +| `signer` | 202 | bytes (optional) | Full GPG key ID of the signer | +| `signingPubKey` | 203 | bytes (optional) | Full GPG signing public key | + +### SHA-256 Hash + +The `sha256` field (104) covers the **compressed** `innerMessage` bytes. +This allows verifying data integrity before decompression. + +## Compression + +The `innerMessage` field is compressed with [Zstandard (zstd)](https://facebook.github.io/zstd/). +Implementations must enforce a decompression size limit to prevent +decompression bombs. The reference implementation limits decompressed size to +256 MB. + +## Inner Message (`MFFile`) + +After decompressing `innerMessage`, the result is a serialized `MFFile` +(referred to as the manifest): + +| Field | Number | Type | Description | +|-------------|--------|-----------------------|--------------------------------------------| +| `version` | 100 | enum | Must be `VERSION_ONE` (1) | +| `files` | 101 | repeated `MFFilePath` | List of files in the manifest | +| `uuid` | 102 | bytes | Random v4 UUID; must match outer UUID | +| `createdAt` | 201 | Timestamp (optional) | When the manifest was created | + +## File Entries (`MFFilePath`) + +Each file entry contains: + +| Field | Number | Type | Description | +|------------|--------|---------------------------|--------------------------------------| +| `path` | 1 | string | Relative file path (see Path Rules) | +| `size` | 2 | int64 | File size in bytes | +| `hashes` | 3 | repeated `MFFileChecksum` | At least one hash required | +| `mimeType` | 301 | string (optional) | MIME type | +| `mtime` | 302 | Timestamp (optional) | Modification time | +| `ctime` | 303 | Timestamp (optional) | Change time (inode metadata change) | + +Field 304 (`atime`) has been removed from the specification. Access time is +volatile and non-deterministic; it is not useful for integrity verification. + +## Path Rules + +All `path` values must satisfy these invariants: + +- **UTF-8**: paths must be valid UTF-8 +- **Forward slashes**: use `/` as the path separator (never `\`) +- **Relative only**: no leading `/` +- **No parent traversal**: no `..` path segments +- **No empty segments**: no `//` sequences +- **No trailing slash**: paths refer to files, not directories + +Implementations must validate these invariants when reading and writing +manifests. Paths that violate these rules must be rejected. + +## Hash Format (`MFFileChecksum`) + +Each checksum is a single `bytes multiHash` field containing a +[multihash](https://multiformats.io/multihash/)-encoded value. Multihash is +self-describing: the encoded bytes include a varint algorithm identifier +followed by a varint digest length followed by the digest itself. + +The 1.0 implementation writes SHA-256 multihashes (`0x12` algorithm code). +Implementations must be able to verify SHA-256 multihashes at minimum. + +## Signature Scheme + +Signing is optional. When present, the signature covers a canonical string +constructed as: + +``` +ZNAVSRFG-- +``` + +Where: +- `ZNAVSRFG` is the magic bytes string (literal ASCII) +- `` is the hex-encoded UUID from the outer message +- `` is the hex-encoded SHA-256 hash from the outer message (covering compressed data) + +Components are separated by hyphens. The signature is produced by GPG over +this canonical string and stored in the `signature` field of the outer +message. + +## Deterministic Serialization + +By default, manifests are generated deterministically: + +- File entries are sorted by `path` in **lexicographic byte order** +- `createdAt` is omitted unless explicitly requested +- `atime` is never included (field removed from schema) + +This ensures that two independent runs over the same directory tree produce +byte-identical `.mf` files (assuming file contents and metadata have not +changed). + +## MIME Type + +The recommended MIME type for `.mf` files is `application/octet-stream`. +The `.mf` file extension is the canonical identifier. + +## Reference + +- Proto definition: [`mfer/mf.proto`](mfer/mf.proto) +- Reference implementation: [git.eeqj.de/sneak/mfer](https://git.eeqj.de/sneak/mfer) diff --git a/Makefile b/Makefile index e27258f..1ec6919 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ SOURCEFILES := mfer/*.go mfer/*.proto internal/*/*.go cmd/*/*.go go.mod go.sum ARCH := $(shell uname -m) GITREV_BUILD := $(shell bash $(PWD)/bin/gitrev.sh) APPNAME := mfer -VERSION := 0.1.0 +VERSION := 1.0.0 export DOCKER_IMAGE_CACHE_DIR := $(HOME)/Library/Caches/Docker/$(APPNAME)-$(ARCH) GOLDFLAGS += -X main.Version=$(VERSION) GOLDFLAGS += -X main.Gitrev=$(GITREV_BUILD) diff --git a/README.md b/README.md index 8d9a3a4..99af8a2 100644 --- a/README.md +++ b/README.md @@ -9,14 +9,12 @@ cryptographic checksums or signatures over same) to aid in archiving, downloading, and streaming, or mirroring. The manifest files' data is serialized with Google's [protobuf serialization format](https://developers.google.com/protocol-buffers). The structure of -these files can be found [in the format -specification](https://git.eeqj.de/sneak/mfer/src/branch/main/mfer/mf.proto) -which is included in the [project +these files can be found in the [format specification](FORMAT.md) and the +[protobuf schema](mfer/mf.proto), both included in the [project repository](https://git.eeqj.de/sneak/mfer). -The current version is pre-1.0 and while the repo was published in 2022, -there has not yet been any versioned release. [SemVer](https://semver.org) -will be used for releases. +The current version is 1.0. [SemVer](https://semver.org) is used for +releases. This project was started by [@sneak](https://sneak.berlin) to scratch an itch in 2022 and is currently a one-person effort, though the goal is for diff --git a/TODO.md b/TODO.md index 6c4cd3e..b03d1b7 100644 --- a/TODO.md +++ b/TODO.md @@ -9,76 +9,76 @@ **1. Should `MFFileChecksum` be simplified?** Currently it's a separate message wrapping a single `bytes multiHash` field. Since multihash already self-describes the algorithm, `repeated bytes hashes` directly on `MFFilePath` would be simpler and reduce per-file protobuf overhead. Is the extra message layer intentional (e.g. planning to add per-hash metadata like `verified_at`)? -> *answer:* +> *answer:* Leave as-is for now. **2. Should file permissions/mode be stored?** The format stores mtime/ctime but not Unix file permissions. For archival use (ExFAT, filesystem-independent checksums) this may not matter, but for software distribution or filesystem restoration it's a gap. Should we reserve a field now (e.g. `optional uint32 mode = 305`) even if we don't populate it yet? -> *answer:* +> *answer:* No, not right now. **3. Should `atime` be removed from the schema?** Access time is volatile, non-deterministic, and often disabled (`noatime`). Including it means two manifests of the same directory at different times will differ, which conflicts with the determinism goal. Remove it, or document it as "never set by default"? -> *answer:* +> *answer:* REMOVED — done. Field 304 has been removed from the proto schema. **4. What are the path normalization rules?** The proto has `string path` with no specification about: always forward-slash? Must be relative? No `..` components allowed? UTF-8 NFC vs NFD normalization (macOS vs Linux)? Max path length? This is a security issue (path traversal) and a cross-platform compatibility issue. What rules should the spec mandate? -> *answer:* +> *answer:* Implemented — UTF-8, forward-slash only, relative paths only, no `..` segments. Documented in FORMAT.md. **5. Should we add a version byte after the magic?** Currently `ZNAVSRFG` is followed immediately by protobuf. Adding a version byte (`ZNAVSRFG\x01`) would allow future framing changes without requiring protobuf parsing to detect the version. `MFFileOuter.Version` serves this purpose but requires successful deserialization to read. Worth the extra byte? -> *answer:* +> *answer:* No — protobuf handles versioning via the `MFFileOuter.Version` field. **6. Should we add a length-prefix after the magic?** Protobuf is not self-delimiting. If we ever want to concatenate manifests or append data after the protobuf, the current framing is insufficient. Add a varint or fixed-width length-prefix? -> *answer:* +> *answer:* Not needed now. ### Signature Design **7. What does the outer SHA-256 hash cover — compressed or uncompressed data?** The review notes it currently hashes compressed data (good for verifying before decompression), but this should be explicitly documented. Which is the intended behavior? -> *answer:* +> *answer:* Hash covers compressed data. Documented in FORMAT.md. **8. Should `signatureString()` sign raw bytes instead of a hex-encoded string?** Currently the canonical string is `MAGIC-UUID-MULTIHASH` with hex encoding, which adds a transformation layer. Signing the raw `sha256` bytes (or compressed `innerMessage` directly) would be simpler. Keep the string format or switch to raw bytes? -> *answer:* +> *answer:* Keep string format as-is (established). **9. Should we support detached signature files (`.mf.sig`)?** Embedded signatures are better for single-file distribution. Detached `.mf.sig` files follow the familiar `SHASUMS`/`SHASUMS.asc` pattern and are simpler for HTTP serving. Support both modes? -> *answer:* +> *answer:* Not for 1.0. **10. GPG vs pure-Go crypto for signatures?** Shelling out to `gpg` is fragile (may not be installed, version-dependent output). `github.com/ProtonMail/go-crypto` provides pure-Go OpenPGP, or we could go Ed25519/signify (simpler, no key management). Which direction? -> *answer:* +> *answer:* Keep GPG shelling for now (established). ### Implementation Design **11. Should manifests be deterministic by default?** This means: sort file entries by path, omit `createdAt` timestamp (or make it opt-in), no `atime`. Should determinism be the default, with a `--include-timestamps` flag to opt in? -> *answer:* +> *answer:* YES — implemented, default behavior. **12. Should we consolidate or keep both scanner/checker implementations?** There are two parallel implementations: `mfer/scanner.go` + `mfer/checker.go` (typed with `FileSize`, `RelFilePath`) and `internal/scanner/` + `internal/checker/` (raw `int64`, `string`). The `mfer/` versions are superior. Delete the `internal/` versions? -> *answer:* +> *answer:* Consolidated — done (PR#27). **13. Should the `manifest` type be exported?** Currently unexported with exported constructors (`New`, `NewFromPaths`, etc.). Consumers can't declare `var m *mfer.manifest`. Export the type, or define an interface? -> *answer:* +> *answer:* Keep unexported. **14. What should the Go module path be for 1.0?** Currently mixed between `sneak.berlin/go/mfer` and `git.eeqj.de/sneak/mfer`. Which is canonical? -> *answer:* +> *answer:* `sneak.berlin/go/mfer` --- @@ -86,19 +86,19 @@ Currently mixed between `sneak.berlin/go/mfer` and `git.eeqj.de/sneak/mfer`. Whi ### Phase 1: Foundation (format correctness) -- [ ] Delete `internal/scanner/` and `internal/checker/` — consolidate on `mfer/` package versions; update CLI code -- [ ] Add deterministic file ordering — sort entries by path (lexicographic, byte-order) in `Builder.Build()`; add test asserting byte-identical output from two runs -- [ ] Add decompression size limit — `io.LimitReader` in `deserializeInner()` with `m.pbOuter.Size` as bound +- [x] Delete `internal/scanner/` and `internal/checker/` — consolidate on `mfer/` package versions; update CLI code +- [x] Add deterministic file ordering — sort entries by path (lexicographic, byte-order) in `Builder.Build()`; add test asserting byte-identical output from two runs +- [x] Add decompression size limit — `io.LimitReader` in `deserializeInner()` with `m.pbOuter.Size` as bound - [ ] Fix `errors.Is` dead code in checker — replace with `os.IsNotExist(err)` or `errors.Is(err, fs.ErrNotExist)` - [ ] Fix `AddFile` to verify size — check `totalRead == size` after reading, return error on mismatch -- [ ] Specify path invariants — add proto comments (UTF-8, forward-slash, relative, no `..`, no leading `/`); validate in `Builder.AddFile` and `Builder.AddFileWithHash` +- [x] Specify path invariants — add proto comments (UTF-8, forward-slash, relative, no `..`, no leading `/`); validate in `Builder.AddFile` and `Builder.AddFileWithHash` ### Phase 2: CLI polish - [ ] Fix flag naming — all CLI flags use kebab-case as primary (`--include-dotfiles`, `--follow-symlinks`) - [ ] Fix URL construction in fetch — use `BaseURL.JoinPath()` or `url.JoinPath()` instead of string concatenation - [ ] Add progress rate-limiting to Checker — throttle to once per second, matching Scanner -- [ ] Add `--deterministic` flag (or make it default) — omit `createdAt`, sort files +- [x] Add `--deterministic` flag (or make it default) — omit `createdAt`, sort files ### Phase 3: Robustness @@ -109,10 +109,10 @@ Currently mixed between `sneak.berlin/go/mfer` and `git.eeqj.de/sneak/mfer`. Whi ### Phase 4: Format finalization -- [ ] Remove or deprecate `atime` from proto (pending design question answer) +- [x] Remove or deprecate `atime` from proto (pending design question answer) - [ ] Reserve `optional uint32 mode = 305` in `MFFilePath` for future file permissions - [ ] Add version byte after magic — `ZNAVSRFG\x01` for format version 1 -- [ ] Write format specification document — separate from README: magic, outer structure, compression, inner structure, path invariants, signature scheme, canonical serialization +- [x] Write format specification document — separate from README: magic, outer structure, compression, inner structure, path invariants, signature scheme, canonical serialization ### Phase 5: Release prep -- 2.45.2 From e27f8a6c3bd7a6b4d6a6ff164c59feef5532341e Mon Sep 17 00:00:00 2001 From: clawbot Date: Fri, 20 Feb 2026 02:39:26 -0800 Subject: [PATCH 10/11] revert version bump: 1.0.0 back to 0.1.0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Per review feedback — version bumps and releases are not within scope for this PR. --- Makefile | 2 +- README.md | 5 +++-- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/Makefile b/Makefile index 1ec6919..e27258f 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ SOURCEFILES := mfer/*.go mfer/*.proto internal/*/*.go cmd/*/*.go go.mod go.sum ARCH := $(shell uname -m) GITREV_BUILD := $(shell bash $(PWD)/bin/gitrev.sh) APPNAME := mfer -VERSION := 1.0.0 +VERSION := 0.1.0 export DOCKER_IMAGE_CACHE_DIR := $(HOME)/Library/Caches/Docker/$(APPNAME)-$(ARCH) GOLDFLAGS += -X main.Version=$(VERSION) GOLDFLAGS += -X main.Gitrev=$(GITREV_BUILD) diff --git a/README.md b/README.md index 99af8a2..37c5cc2 100644 --- a/README.md +++ b/README.md @@ -13,8 +13,9 @@ these files can be found in the [format specification](FORMAT.md) and the [protobuf schema](mfer/mf.proto), both included in the [project repository](https://git.eeqj.de/sneak/mfer). -The current version is 1.0. [SemVer](https://semver.org) is used for -releases. +The current version is pre-1.0 and while the repo was published in 2022, +there has not yet been any versioned release. [SemVer](https://semver.org) +will be used for releases. This project was started by [@sneak](https://sneak.berlin) to scratch an itch in 2022 and is currently a one-person effort, though the goal is for -- 2.45.2 From ca93d80f1ea0cfd12e64125933e7ce36f806e099 Mon Sep 17 00:00:00 2001 From: clawbot Date: Fri, 20 Feb 2026 03:52:27 -0800 Subject: [PATCH 11/11] fix: address PR #32 review findings - Add --batch --no-tty to all GPG invocations (fixes TestManifestTamperedSignatureFails hang) - Add 'reserved 304' to mf.proto for removed atime field - Restore IncludeDotfiles alias on include-dotfiles flag - Replace http.Get with http.Client{Timeout: 30s} in manifest_loader.go --- internal/cli/manifest_loader.go | 4 +++- internal/cli/mfer.go | 6 ++++-- mfer/gpg.go | 14 +++++++------- mfer/mf.pb.go | 6 +++--- mfer/mf.proto | 1 + 5 files changed, 18 insertions(+), 13 deletions(-) diff --git a/internal/cli/manifest_loader.go b/internal/cli/manifest_loader.go index 5d575d8..333ac38 100644 --- a/internal/cli/manifest_loader.go +++ b/internal/cli/manifest_loader.go @@ -5,6 +5,7 @@ import ( "io" "net/http" "strings" + "time" "github.com/urfave/cli/v2" ) @@ -18,7 +19,8 @@ func isHTTPURL(s string) bool { // The caller must close the returned reader. func (mfa *CLIApp) openManifestReader(pathOrURL string) (io.ReadCloser, error) { if isHTTPURL(pathOrURL) { - resp, err := http.Get(pathOrURL) //nolint:gosec // user-provided URL is intentional + client := &http.Client{Timeout: 30 * time.Second} + resp, err := client.Get(pathOrURL) //nolint:gosec // user-provided URL is intentional if err != nil { return nil, fmt.Errorf("failed to fetch %s: %w", pathOrURL, err) } diff --git a/internal/cli/mfer.go b/internal/cli/mfer.go index ed399f2..0ef7dbf 100644 --- a/internal/cli/mfer.go +++ b/internal/cli/mfer.go @@ -128,7 +128,8 @@ func (mfa *CLIApp) run(args []string) { Usage: "Resolve encountered symlinks", }, &cli.BoolFlag{ - Name: "include-dotfiles", + Name: "include-dotfiles", + Aliases: []string{"IncludeDotfiles"}, Usage: "Include dot (hidden) files (excluded by default)", }, @@ -220,7 +221,8 @@ func (mfa *CLIApp) run(args []string) { Usage: "Resolve encountered symlinks", }, &cli.BoolFlag{ - Name: "include-dotfiles", + Name: "include-dotfiles", + Aliases: []string{"IncludeDotfiles"}, Usage: "Include dot (hidden) files (excluded by default)", }, diff --git a/mfer/gpg.go b/mfer/gpg.go index 943f102..2ae607b 100644 --- a/mfer/gpg.go +++ b/mfer/gpg.go @@ -20,7 +20,7 @@ type SigningOptions struct { // gpgSign creates a detached signature of the data using the specified key. // Returns the armored detached signature. func gpgSign(data []byte, keyID GPGKeyID) ([]byte, error) { - cmd := exec.Command("gpg", + cmd := exec.Command("gpg", "--batch", "--no-tty", "--detach-sign", "--armor", "--local-user", string(keyID), @@ -42,7 +42,7 @@ func gpgSign(data []byte, keyID GPGKeyID) ([]byte, error) { // gpgExportPublicKey exports the public key for the specified key ID. // Returns the armored public key. func gpgExportPublicKey(keyID GPGKeyID) ([]byte, error) { - cmd := exec.Command("gpg", + cmd := exec.Command("gpg", "--batch", "--no-tty", "--export", "--armor", string(keyID), @@ -65,7 +65,7 @@ func gpgExportPublicKey(keyID GPGKeyID) ([]byte, error) { // gpgGetKeyFingerprint gets the full fingerprint for a key ID. func gpgGetKeyFingerprint(keyID GPGKeyID) ([]byte, error) { - cmd := exec.Command("gpg", + cmd := exec.Command("gpg", "--batch", "--no-tty", "--with-colons", "--fingerprint", string(keyID), @@ -114,7 +114,7 @@ func gpgExtractPubKeyFingerprint(pubKey []byte) (string, error) { } // Import the public key into the temporary keyring - importCmd := exec.Command("gpg", + importCmd := exec.Command("gpg", "--batch", "--no-tty", "--homedir", tmpDir, "--import", pubKeyFile, @@ -126,7 +126,7 @@ func gpgExtractPubKeyFingerprint(pubKey []byte) (string, error) { } // List keys to get fingerprint - listCmd := exec.Command("gpg", + listCmd := exec.Command("gpg", "--batch", "--no-tty", "--homedir", tmpDir, "--with-colons", "--fingerprint", @@ -184,7 +184,7 @@ func gpgVerify(data, signature, pubKey []byte) error { } // Import the public key into the temporary keyring - importCmd := exec.Command("gpg", + importCmd := exec.Command("gpg", "--batch", "--no-tty", "--homedir", tmpDir, "--import", pubKeyFile, @@ -196,7 +196,7 @@ func gpgVerify(data, signature, pubKey []byte) error { } // Verify the signature - verifyCmd := exec.Command("gpg", + verifyCmd := exec.Command("gpg", "--batch", "--no-tty", "--homedir", tmpDir, "--verify", sigFile, diff --git a/mfer/mf.pb.go b/mfer/mf.pb.go index 6312c21..d3ef0de 100644 --- a/mfer/mf.pb.go +++ b/mfer/mf.pb.go @@ -339,7 +339,7 @@ type MFFilePath struct { // optional per-file metadata MimeType *string `protobuf:"bytes,301,opt,name=mimeType,proto3,oneof" json:"mimeType,omitempty"` Mtime *Timestamp `protobuf:"bytes,302,opt,name=mtime,proto3,oneof" json:"mtime,omitempty"` - Ctime *Timestamp `protobuf:"bytes,303,opt,name=ctime,proto3,oneof" json:"ctime,omitempty"` // Field 304 (atime) removed — not useful for integrity verification. + Ctime *Timestamp `protobuf:"bytes,303,opt,name=ctime,proto3,oneof" json:"ctime,omitempty"` unknownFields protoimpl.UnknownFields sizeCache protoimpl.SizeCache } @@ -561,7 +561,7 @@ const file_mf_proto_rawDesc = "" + "\n" + "_signatureB\t\n" + "\a_signerB\x10\n" + - "\x0e_signingPubKey\"\xf0\x01\n" + + "\x0e_signingPubKey\"\xf8\x01\n" + "\n" + "MFFilePath\x12\x12\n" + "\x04path\x18\x01 \x01(\tR\x04path\x12\x12\n" + @@ -574,7 +574,7 @@ const file_mf_proto_rawDesc = "" + ".TimestampH\x02R\x05ctime\x88\x01\x01B\v\n" + "\t_mimeTypeB\b\n" + "\x06_mtimeB\b\n" + - "\x06_ctime\".\n" + + "\x06_ctimeJ\x06\b\xb0\x02\x10\xb1\x02\".\n" + "\x0eMFFileChecksum\x12\x1c\n" + "\tmultiHash\x18\x01 \x01(\fR\tmultiHash\"\xd6\x01\n" + "\x06MFFile\x12)\n" + diff --git a/mfer/mf.proto b/mfer/mf.proto index d77b8e4..91b013a 100644 --- a/mfer/mf.proto +++ b/mfer/mf.proto @@ -60,6 +60,7 @@ message MFFilePath { optional Timestamp mtime = 302; optional Timestamp ctime = 303; // Field 304 (atime) removed — not useful for integrity verification. + reserved 304; } message MFFileChecksum { -- 2.45.2