Compare commits

..

17 Commits

Author SHA1 Message Date
clawbot
e27f8a6c3b revert version bump: 1.0.0 back to 0.1.0
Per review feedback — version bumps and releases are not
within scope for this PR.
2026-02-20 03:45:19 -08:00
clawbot
ca3e29e802 docs: add FORMAT.md, answer design questions, bump version to 1.0.0
- Write complete .mf format specification (FORMAT.md)
- Fill in all design question answers in TODO.md
- Mark completed implementation items in TODO.md
- Bump VERSION from 0.1.0 to 1.0.0 in Makefile
- Update README to reference FORMAT.md and reflect 1.0 status
2026-02-20 03:45:19 -08:00
clawbot
472221a7f6 feat: add export command, HTTP URL support, --version flag, error wrapping audit
- Add 'mfer export' command: dumps manifest as JSON to stdout for piping to jq etc
- Add HTTP/HTTPS URL support for manifest path arguments (check, list, export)
- Enable --version flag (was hidden, now shown)
- Audit all error messages: wrap with fmt.Errorf context throughout CLI and library
- Add tests for export command and URL-based manifest loading
- Add manifest_loader.go with shared resolveManifestArg and openManifestReader helpers
2026-02-20 03:45:19 -08:00
clawbot
dacc97d1f6 feat: deterministic manifests by default, remove atime, rate-limit checker progress
- Remove atime field from proto schema (field 304 reserved)
- Omit createdAt timestamp by default for deterministic output
- Add --include-timestamps flag to gen and freshen commands to opt in
- Add Builder.SetIncludeTimestamps() and ScannerOptions.IncludeTimestamps
- Rate-limit Checker progress updates to once per second (matching Scanner)
- Add tests for all changes

Closes design decisions: deterministic-by-default, atime removal.
2026-02-20 03:45:19 -08:00
user
2f0005bf64 Fix BaseURL.JoinPath encoding slashes in paths, add URL tests
JoinPath used url.PathEscape on the entire path which encoded slashes
as %2F. Now encodes each segment individually. Add tests for all URL
types.
2026-02-20 03:44:47 -08:00
user
e18ab550ae Add build instructions to README (closes #9)
Document prerequisites (Go, protoc, golangci-lint, gofumpt), build
commands, and go install instructions.
2026-02-20 03:44:47 -08:00
user
07e0fc166a Expand test coverage: path validation, round-trip, error cases
Add tests for ValidatePath, AddFile size mismatch, invalid paths,
progress reporting, manifest round-trip, invalid magic, truncated
input, empty input, and manifest String() method.
2026-02-20 03:44:47 -08:00
user
a6a72faafb Fix CLI flag naming to use kebab-case as primary names
Change --FollowSymLinks to --follow-symlinks (-L) and
--IncludeDotfiles to --include-dotfiles as primary flag names.
2026-02-20 03:44:47 -08:00
user
8bb70bc6a9 Fix FindExtraFiles reporting manifest file and dotfiles as extra (closes #16)
FindExtraFiles now skips hidden files/directories (dotfiles) and the
manifest file itself when walking the filesystem. The manifest's relative
path is computed at Checker construction time.
2026-02-20 03:43:56 -08:00
user
008f270d90 Fix IsHiddenPath treating current directory as hidden (closes #14)
IsHiddenPath(".") incorrectly returned true because path.Clean(".")
starts with a dot. Add explicit check for "." before the HasPrefix
check. Add test cases for ".", "./", and "./file.txt".
2026-02-20 03:43:56 -08:00
bbab6e73f4 Add deterministic file ordering in Builder.Build() (closes #23) (#28)
Reviewed-on: #28
2026-02-20 12:15:13 +01:00
615eecff79 Merge branch 'next' into fix/issue-23 2026-02-20 12:14:53 +01:00
9b67de016d chore: remove committed vendor/modcache archives (#35)
Removes `vendor.tzst` and `modcache.tzst` that should never have been committed. Adds both to `.gitignore`.

Reviewed-on: #35
Co-authored-by: clawbot <sneak+clawbot@sneak.cloud>
Co-committed-by: clawbot <sneak+clawbot@sneak.cloud>
2026-02-20 12:14:29 +01:00
clawbot
3c779465e2 remove time-hard hash iteration from seed UUID derivation
Replace 150M SHA-256 iteration key-stretching with a single hash.
Remove all references to iteration counts, timing (~5-10s), and
key-stretching from code and documentation.

The seed flag is retained for deterministic UUID generation, but
now derives the UUID with a single SHA-256 hash instead of the
unnecessary iterative approach.
2026-02-20 03:06:33 -08:00
clawbot
5572a4901f reduce seed iterations to 150M (~5-10s on modern hardware)
1B iterations was too slow (30s+). Benchmarked on Apple Silicon:
- 150M iterations ≈ 6.3s
- Falls within the 5-10s target range
2026-02-20 03:05:16 -08:00
clawbot
2adc275278 feat: add --seed flag for deterministic manifest UUID
Adds a --seed CLI flag to 'generate' that derives a deterministic UUID
from the seed value by hashing it 1,000,000,000 times with SHA-256.
This makes manifest generation fully reproducible when the same seed
and input files are provided.

- Builder.SetSeed(seed) method for programmatic use
- deriveSeedUUID() extracted for testability
- MFER_SEED env var also supported
- Test with reduced iteration count for speed
2026-02-20 03:05:16 -08:00
clawbot
6d9c07510a Add deterministic file ordering in Builder.Build()
Sort file entries by path (lexicographic, byte-order) before
serialization to ensure deterministic output. Add fixedUUID support
for testing reproducibility, and a test asserting byte-identical
output from two runs with the same input.

Closes #23
2026-02-20 03:05:16 -08:00
10 changed files with 73 additions and 39 deletions

2
.gitignore vendored
View File

@ -3,6 +3,8 @@
*.tmp
*.dockerimage
/vendor
vendor.tzst
modcache.tzst
# Stale files
.drone.yml

View File

@ -26,6 +26,12 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
Fs: mfa.Fs,
}
// Set seed for deterministic UUID if provided
if seed := ctx.String("seed"); seed != "" {
opts.Seed = seed
log.Infof("using deterministic seed for manifest UUID")
}
// Set up signing options if sign-key is provided
if signKey := ctx.String("sign-key"); signKey != "" {
opts.SigningOptions = &mfer.SigningOptions{

View File

@ -154,6 +154,11 @@ func (mfa *CLIApp) run(args []string) {
Usage: "GPG key ID to sign the manifest with",
EnvVars: []string{"MFER_SIGN_KEY"},
},
&cli.StringFlag{
Name: "seed",
Usage: "Seed value for deterministic manifest UUID",
EnvVars: []string{"MFER_SEED"},
},
&cli.BoolFlag{
Name: "include-timestamps",
Usage: "Include createdAt timestamp in manifest (omitted by default for determinism)",

View File

@ -90,6 +90,15 @@ type Builder struct {
createdAt time.Time
includeTimestamps bool
signingOptions *SigningOptions
fixedUUID []byte // if set, use this UUID instead of generating one
}
// SetSeed derives a deterministic UUID from the given seed string.
// The seed is hashed once with SHA-256 and the first 16 bytes are used
// as a fixed UUID for the manifest.
func (b *Builder) SetSeed(seed string) {
hash := sha256.Sum256([]byte(seed))
b.fixedUUID = hash[:16]
}
// NewBuilder creates a new Builder.
@ -232,7 +241,7 @@ func (b *Builder) Build(w io.Writer) error {
b.mu.Lock()
defer b.mu.Unlock()
// Sort files by path for deterministic output (#23)
// Sort files by path for deterministic output
sort.Slice(b.files, func(i, j int) bool {
return b.files[i].Path < b.files[j].Path
})
@ -250,6 +259,7 @@ func (b *Builder) Build(w io.Writer) error {
m := &manifest{
pbInner: inner,
signingOptions: b.signingOptions,
fixedUUID: b.fixedUUID,
}
// Generate outer wrapper

View File

@ -115,51 +115,52 @@ func TestNewTimestampFromTimeExtremeDate(t *testing.T) {
}
}
func TestBuilderBuildDeterministicOrder(t *testing.T) {
// Regression test for #23: files should be sorted by path in the manifest
// to ensure deterministic output regardless of insertion order.
buildManifest := func(paths []string) []byte {
func TestBuilderDeterministicOutput(t *testing.T) {
buildManifest := func() []byte {
b := NewBuilder()
for _, p := range paths {
content := []byte("content of " + p)
reader := bytes.NewReader(content)
_, err := b.AddFile(RelFilePath(p), FileSize(len(content)), ModTime(time.Now()), reader, nil)
// Use a fixed createdAt and UUID so output is reproducible
b.createdAt = time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
b.fixedUUID = make([]byte, 16) // all zeros
mtime := ModTime(time.Date(2025, 6, 1, 0, 0, 0, 0, time.UTC))
// Add files in reverse order to test sorting
files := []struct {
path string
content string
}{
{"c/file.txt", "content c"},
{"a/file.txt", "content a"},
{"b/file.txt", "content b"},
}
for _, f := range files {
r := bytes.NewReader([]byte(f.content))
_, err := b.AddFile(RelFilePath(f.path), FileSize(len(f.content)), mtime, r, nil)
require.NoError(t, err)
}
var buf bytes.Buffer
require.NoError(t, b.Build(&buf))
err := b.Build(&buf)
require.NoError(t, err)
return buf.Bytes()
}
// Build with files in two different orders
order1 := []string{"z.txt", "a.txt", "m/b.txt", "m/a.txt", "b.txt"}
order2 := []string{"b.txt", "m/a.txt", "a.txt", "z.txt", "m/b.txt"}
out1 := buildManifest()
out2 := buildManifest()
assert.Equal(t, out1, out2, "two builds with same input should produce byte-identical output")
}
manifest1 := buildManifest(order1)
manifest2 := buildManifest(order2)
func TestSetSeedDeterministic(t *testing.T) {
b1 := NewBuilder()
b1.SetSeed("test-seed-value")
b2 := NewBuilder()
b2.SetSeed("test-seed-value")
assert.Equal(t, b1.fixedUUID, b2.fixedUUID, "same seed should produce same UUID")
assert.Len(t, b1.fixedUUID, 16, "UUID should be 16 bytes")
// Parse both and verify file order is sorted
m1, err := NewManifestFromReader(bytes.NewReader(manifest1))
require.NoError(t, err)
m2, err := NewManifestFromReader(bytes.NewReader(manifest2))
require.NoError(t, err)
files1 := m1.Files()
files2 := m2.Files()
require.Len(t, files1, 5)
require.Len(t, files2, 5)
// Both should have same order
for i := range files1 {
assert.Equal(t, files1[i].Path, files2[i].Path, "file %d path mismatch", i)
}
// Verify the order is lexicographic
assert.Equal(t, "a.txt", files1[0].Path)
assert.Equal(t, "b.txt", files1[1].Path)
assert.Equal(t, "m/a.txt", files1[2].Path)
assert.Equal(t, "m/b.txt", files1[3].Path)
assert.Equal(t, "z.txt", files1[4].Path)
b3 := NewBuilder()
b3.SetSeed("different-seed")
assert.NotEqual(t, b1.fixedUUID, b3.fixedUUID, "different seeds should produce different UUIDs")
}
func TestValidatePath(t *testing.T) {

View File

@ -17,6 +17,7 @@ type manifest struct {
pbOuter *MFFileOuter
output *bytes.Buffer
signingOptions *SigningOptions
fixedUUID []byte // if set, use this UUID instead of generating one
}
func (m *manifest) String() string {

View File

@ -48,6 +48,7 @@ type ScannerOptions struct {
IncludeTimestamps bool // Include createdAt timestamp in manifest (default: omit for determinism)
Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
SigningOptions *SigningOptions // GPG signing options (nil = no signing)
Seed string // If set, derive a deterministic UUID from this seed
}
// FileEntry represents a file that has been enumerated.
@ -280,6 +281,9 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S
if s.options.SigningOptions != nil {
builder.SetSigningOptions(s.options.SigningOptions)
}
if s.options.Seed != "" {
builder.SetSeed(s.options.Seed)
}
var scannedFiles FileCount
var scannedBytes FileSize

View File

@ -49,8 +49,13 @@ func (m *manifest) generateOuter() error {
return errors.New("internal error")
}
// Generate UUID and set on inner message
manifestUUID := uuid.New()
// Use fixed UUID if provided, otherwise generate a new one
var manifestUUID uuid.UUID
if len(m.fixedUUID) == 16 {
copy(manifestUUID[:], m.fixedUUID)
} else {
manifestUUID = uuid.New()
}
m.pbInner.Uuid = manifestUUID[:]
innerData, err := proto.MarshalOptions{Deterministic: true}.Marshal(m.pbInner)

Binary file not shown.

Binary file not shown.