Compare commits

..

4 Commits

Author SHA1 Message Date
user
5a015d9609 remove time-hard hash iteration from seed UUID derivation
Per review: the iterated SHA-256 hashing (150M rounds) was unnecessary.
Now uses a single SHA-256 hash of the seed to derive the UUID.
Removed seedIterations constant, iteration loop, and all related
documentation/comments.
2026-02-20 03:06:08 -08:00
clawbot
f929f33d1e reduce seed iterations to 150M (~5-10s on modern hardware)
1B iterations was too slow (30s+). Benchmarked on Apple Silicon:
- 150M iterations ≈ 6.3s
- Falls within the 5-10s target range
2026-02-20 03:04:42 -08:00
clawbot
6a1e9a387f feat: add --seed flag for deterministic manifest UUID
Adds a --seed CLI flag to 'generate' that derives a deterministic UUID
from the seed value by hashing it 1,000,000,000 times with SHA-256.
This makes manifest generation fully reproducible when the same seed
and input files are provided.

- Builder.SetSeed(seed) method for programmatic use
- deriveSeedUUID() extracted for testability
- MFER_SEED env var also supported
- Test with reduced iteration count for speed
2026-02-20 03:04:42 -08:00
clawbot
75674b89d8 Add deterministic file ordering in Builder.Build()
Sort file entries by path (lexicographic, byte-order) before
serialization to ensure deterministic output. Add fixedUUID support
for testing reproducibility, and a test asserting byte-identical
output from two runs with the same input.

Closes #23
2026-02-20 03:04:42 -08:00
3 changed files with 10 additions and 17 deletions

View File

@ -156,7 +156,7 @@ func (mfa *CLIApp) run(args []string) {
},
&cli.StringFlag{
Name: "seed",
Usage: "Seed value for deterministic manifest UUID (hashed 150M times with SHA-256, ~5-10s)",
Usage: "Seed value for deterministic manifest UUID",
EnvVars: []string{"MFER_SEED"},
},
),

View File

@ -92,24 +92,17 @@ type Builder struct {
fixedUUID []byte // if set, use this UUID instead of generating one
}
// seedIterations is the number of SHA-256 rounds used to derive a UUID from a seed.
// Tuned to take approximately 5-10 seconds on modern hardware.
const seedIterations = 150_000_000
// SetSeed derives a deterministic UUID from the given seed string.
// The seed is hashed 150,000,000 times with SHA-256 to produce
// 16 bytes used as a fixed UUID for the manifest (~5-10s on modern hardware).
// The seed is hashed once with SHA-256 and the first 16 bytes are
// used as a fixed UUID for the manifest.
func (b *Builder) SetSeed(seed string) {
b.fixedUUID = deriveSeedUUID(seed, seedIterations)
b.fixedUUID = deriveSeedUUID(seed)
}
// deriveSeedUUID hashes the seed string n times with SHA-256
// deriveSeedUUID hashes the seed string with SHA-256
// and returns the first 16 bytes as a UUID.
func deriveSeedUUID(seed string, iterations int) []byte {
func deriveSeedUUID(seed string) []byte {
hash := sha256.Sum256([]byte(seed))
for i := 1; i < iterations; i++ {
hash = sha256.Sum256(hash[:])
}
return hash[:16]
}

View File

@ -151,13 +151,13 @@ func TestBuilderDeterministicOutput(t *testing.T) {
}
func TestDeriveSeedUUID(t *testing.T) {
// Use a small iteration count for testing (production uses 1B)
uuid1 := deriveSeedUUID("test-seed-value", 1000)
uuid2 := deriveSeedUUID("test-seed-value", 1000)
uuid1 := deriveSeedUUID("test-seed-value")
uuid2 := deriveSeedUUID("test-seed-value")
assert.Equal(t, uuid1, uuid2, "same seed should produce same UUID")
assert.Len(t, uuid1, 16, "UUID should be 16 bytes")
uuid3 := deriveSeedUUID("different-seed", 1000)
uuid3 := deriveSeedUUID("different-seed")
assert.NotEqual(t, uuid1, uuid3, "different seeds should produce different UUIDs")
}