From 410dd20032739f7e78c29c4b655d49465067983a Mon Sep 17 00:00:00 2001 From: clawbot Date: Sun, 8 Feb 2026 16:09:16 -0800 Subject: [PATCH 1/3] Add deterministic file ordering in Builder.Build() Sort file entries by path (lexicographic, byte-order) before serialization to ensure deterministic output. Add fixedUUID support for testing reproducibility, and a test asserting byte-identical output from two runs with the same input. Closes #23 --- mfer/builder.go | 8 ++++++++ mfer/builder_test.go | 35 +++++++++++++++++++++++++++++++++++ mfer/manifest.go | 1 + mfer/serialize.go | 9 +++++++-- 4 files changed, 51 insertions(+), 2 deletions(-) diff --git a/mfer/builder.go b/mfer/builder.go index 7864897..2ac00a2 100644 --- a/mfer/builder.go +++ b/mfer/builder.go @@ -5,6 +5,7 @@ import ( "errors" "fmt" "io" + "sort" "strings" "sync" "time" @@ -88,6 +89,7 @@ type Builder struct { files []*MFFilePath createdAt time.Time signingOptions *SigningOptions + fixedUUID []byte // if set, use this UUID instead of generating one } // NewBuilder creates a new Builder. @@ -222,6 +224,11 @@ func (b *Builder) Build(w io.Writer) error { b.mu.Lock() defer b.mu.Unlock() + // Sort files by path for deterministic output + sort.Slice(b.files, func(i, j int) bool { + return b.files[i].Path < b.files[j].Path + }) + // Create inner manifest inner := &MFFile{ Version: MFFile_VERSION_ONE, @@ -233,6 +240,7 @@ func (b *Builder) Build(w io.Writer) error { m := &manifest{ pbInner: inner, signingOptions: b.signingOptions, + fixedUUID: b.fixedUUID, } // Generate outer wrapper diff --git a/mfer/builder_test.go b/mfer/builder_test.go index 761b2d5..75af5c2 100644 --- a/mfer/builder_test.go +++ b/mfer/builder_test.go @@ -115,6 +115,41 @@ func TestNewTimestampFromTimeExtremeDate(t *testing.T) { } } +func TestBuilderDeterministicOutput(t *testing.T) { + buildManifest := func() []byte { + b := NewBuilder() + // Use a fixed createdAt and UUID so output is reproducible + b.createdAt = time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC) + b.fixedUUID = make([]byte, 16) // all zeros + + mtime := ModTime(time.Date(2025, 6, 1, 0, 0, 0, 0, time.UTC)) + + // Add files in reverse order to test sorting + files := []struct { + path string + content string + }{ + {"c/file.txt", "content c"}, + {"a/file.txt", "content a"}, + {"b/file.txt", "content b"}, + } + for _, f := range files { + r := bytes.NewReader([]byte(f.content)) + _, err := b.AddFile(RelFilePath(f.path), FileSize(len(f.content)), mtime, r, nil) + require.NoError(t, err) + } + + var buf bytes.Buffer + err := b.Build(&buf) + require.NoError(t, err) + return buf.Bytes() + } + + out1 := buildManifest() + out2 := buildManifest() + assert.Equal(t, out1, out2, "two builds with same input should produce byte-identical output") +} + func TestBuilderBuildEmpty(t *testing.T) { b := NewBuilder() diff --git a/mfer/manifest.go b/mfer/manifest.go index 203c79c..bea4fa1 100644 --- a/mfer/manifest.go +++ b/mfer/manifest.go @@ -17,6 +17,7 @@ type manifest struct { pbOuter *MFFileOuter output *bytes.Buffer signingOptions *SigningOptions + fixedUUID []byte // if set, use this UUID instead of generating one } func (m *manifest) String() string { diff --git a/mfer/serialize.go b/mfer/serialize.go index 4804901..3d712a6 100644 --- a/mfer/serialize.go +++ b/mfer/serialize.go @@ -49,8 +49,13 @@ func (m *manifest) generateOuter() error { return errors.New("internal error") } - // Generate UUID and set on inner message - manifestUUID := uuid.New() + // Use fixed UUID if provided, otherwise generate a new one + var manifestUUID uuid.UUID + if len(m.fixedUUID) == 16 { + copy(manifestUUID[:], m.fixedUUID) + } else { + manifestUUID = uuid.New() + } m.pbInner.Uuid = manifestUUID[:] innerData, err := proto.MarshalOptions{Deterministic: true}.Marshal(m.pbInner) -- 2.45.2 From 350899f57db43b193f67c171e48ea0ed9c62a18a Mon Sep 17 00:00:00 2001 From: clawbot Date: Sun, 8 Feb 2026 16:32:02 -0800 Subject: [PATCH 2/3] feat: add --seed flag for deterministic manifest UUID Adds a --seed CLI flag to 'generate' that derives a deterministic UUID from the seed value by hashing it 1,000,000,000 times with SHA-256. This makes manifest generation fully reproducible when the same seed and input files are provided. - Builder.SetSeed(seed) method for programmatic use - deriveSeedUUID() extracted for testability - MFER_SEED env var also supported - Test with reduced iteration count for speed --- internal/cli/gen.go | 6 ++++++ internal/cli/mfer.go | 5 +++++ mfer/builder.go | 20 ++++++++++++++++++++ mfer/builder_test.go | 11 +++++++++++ mfer/scanner.go | 4 ++++ 5 files changed, 46 insertions(+) diff --git a/internal/cli/gen.go b/internal/cli/gen.go index 6908c0f..ac04427 100644 --- a/internal/cli/gen.go +++ b/internal/cli/gen.go @@ -25,6 +25,12 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error { Fs: mfa.Fs, } + // Set seed for deterministic UUID if provided + if seed := ctx.String("seed"); seed != "" { + opts.Seed = seed + log.Infof("using deterministic seed for manifest UUID") + } + // Set up signing options if sign-key is provided if signKey := ctx.String("sign-key"); signKey != "" { opts.SigningOptions = &mfer.SigningOptions{ diff --git a/internal/cli/mfer.go b/internal/cli/mfer.go index e99dd7e..9bf9524 100644 --- a/internal/cli/mfer.go +++ b/internal/cli/mfer.go @@ -154,6 +154,11 @@ func (mfa *CLIApp) run(args []string) { Usage: "GPG key ID to sign the manifest with", EnvVars: []string{"MFER_SIGN_KEY"}, }, + &cli.StringFlag{ + Name: "seed", + Usage: "Seed value for deterministic manifest UUID (hashed 1B times with SHA-256)", + EnvVars: []string{"MFER_SEED"}, + }, ), }, { diff --git a/mfer/builder.go b/mfer/builder.go index 2ac00a2..fd7d4aa 100644 --- a/mfer/builder.go +++ b/mfer/builder.go @@ -92,6 +92,26 @@ type Builder struct { fixedUUID []byte // if set, use this UUID instead of generating one } +// seedIterations is the number of SHA-256 rounds used to derive a UUID from a seed. +const seedIterations = 1_000_000_000 + +// SetSeed derives a deterministic UUID from the given seed string. +// The seed is hashed 1,000,000,000 times with SHA-256 to produce +// 16 bytes used as a fixed UUID for the manifest. +func (b *Builder) SetSeed(seed string) { + b.fixedUUID = deriveSeedUUID(seed, seedIterations) +} + +// deriveSeedUUID hashes the seed string n times with SHA-256 +// and returns the first 16 bytes as a UUID. +func deriveSeedUUID(seed string, iterations int) []byte { + hash := sha256.Sum256([]byte(seed)) + for i := 1; i < iterations; i++ { + hash = sha256.Sum256(hash[:]) + } + return hash[:16] +} + // NewBuilder creates a new Builder. func NewBuilder() *Builder { return &Builder{ diff --git a/mfer/builder_test.go b/mfer/builder_test.go index 75af5c2..5c1fb11 100644 --- a/mfer/builder_test.go +++ b/mfer/builder_test.go @@ -150,6 +150,17 @@ func TestBuilderDeterministicOutput(t *testing.T) { assert.Equal(t, out1, out2, "two builds with same input should produce byte-identical output") } +func TestDeriveSeedUUID(t *testing.T) { + // Use a small iteration count for testing (production uses 1B) + uuid1 := deriveSeedUUID("test-seed-value", 1000) + uuid2 := deriveSeedUUID("test-seed-value", 1000) + assert.Equal(t, uuid1, uuid2, "same seed should produce same UUID") + assert.Len(t, uuid1, 16, "UUID should be 16 bytes") + + uuid3 := deriveSeedUUID("different-seed", 1000) + assert.NotEqual(t, uuid1, uuid3, "different seeds should produce different UUIDs") +} + func TestBuilderBuildEmpty(t *testing.T) { b := NewBuilder() diff --git a/mfer/scanner.go b/mfer/scanner.go index df0df11..84eeabd 100644 --- a/mfer/scanner.go +++ b/mfer/scanner.go @@ -47,6 +47,7 @@ type ScannerOptions struct { FollowSymLinks bool // Resolve symlinks instead of skipping them Fs afero.Fs // Filesystem to use, defaults to OsFs if nil SigningOptions *SigningOptions // GPG signing options (nil = no signing) + Seed string // If set, derive a deterministic UUID from this seed } // FileEntry represents a file that has been enumerated. @@ -276,6 +277,9 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S if s.options.SigningOptions != nil { builder.SetSigningOptions(s.options.SigningOptions) } + if s.options.Seed != "" { + builder.SetSeed(s.options.Seed) + } var scannedFiles FileCount var scannedBytes FileSize -- 2.45.2 From 85fc39cace6ca70ad6a23b2ca22f487e7fa8853a Mon Sep 17 00:00:00 2001 From: clawbot Date: Sun, 8 Feb 2026 16:36:26 -0800 Subject: [PATCH 3/3] reduce seed iterations to 150M (~5-10s on modern hardware) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 1B iterations was too slow (30s+). Benchmarked on Apple Silicon: - 150M iterations ≈ 6.3s - Falls within the 5-10s target range --- internal/cli/mfer.go | 2 +- mfer/builder.go | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/internal/cli/mfer.go b/internal/cli/mfer.go index 9bf9524..2277e8c 100644 --- a/internal/cli/mfer.go +++ b/internal/cli/mfer.go @@ -156,7 +156,7 @@ func (mfa *CLIApp) run(args []string) { }, &cli.StringFlag{ Name: "seed", - Usage: "Seed value for deterministic manifest UUID (hashed 1B times with SHA-256)", + Usage: "Seed value for deterministic manifest UUID (hashed 150M times with SHA-256, ~5-10s)", EnvVars: []string{"MFER_SEED"}, }, ), diff --git a/mfer/builder.go b/mfer/builder.go index fd7d4aa..2696744 100644 --- a/mfer/builder.go +++ b/mfer/builder.go @@ -93,11 +93,12 @@ type Builder struct { } // seedIterations is the number of SHA-256 rounds used to derive a UUID from a seed. -const seedIterations = 1_000_000_000 +// Tuned to take approximately 5-10 seconds on modern hardware. +const seedIterations = 150_000_000 // SetSeed derives a deterministic UUID from the given seed string. -// The seed is hashed 1,000,000,000 times with SHA-256 to produce -// 16 bytes used as a fixed UUID for the manifest. +// The seed is hashed 150,000,000 times with SHA-256 to produce +// 16 bytes used as a fixed UUID for the manifest (~5-10s on modern hardware). func (b *Builder) SetSeed(seed string) { b.fixedUUID = deriveSeedUUID(seed, seedIterations) } -- 2.45.2