3 Commits

Author SHA1 Message Date
clawbot
85fc39cace reduce seed iterations to 150M (~5-10s on modern hardware)
1B iterations was too slow (30s+). Benchmarked on Apple Silicon:
- 150M iterations ≈ 6.3s
- Falls within the 5-10s target range
2026-02-08 17:16:08 -08:00
clawbot
350899f57d feat: add --seed flag for deterministic manifest UUID
Adds a --seed CLI flag to 'generate' that derives a deterministic UUID
from the seed value by hashing it 1,000,000,000 times with SHA-256.
This makes manifest generation fully reproducible when the same seed
and input files are provided.

- Builder.SetSeed(seed) method for programmatic use
- deriveSeedUUID() extracted for testability
- MFER_SEED env var also supported
- Test with reduced iteration count for speed
2026-02-08 17:16:08 -08:00
clawbot
410dd20032 Add deterministic file ordering in Builder.Build()
Sort file entries by path (lexicographic, byte-order) before
serialization to ensure deterministic output. Add fixedUUID support
for testing reproducibility, and a test asserting byte-identical
output from two runs with the same input.

Closes #23
2026-02-08 17:16:08 -08:00
13 changed files with 59 additions and 90 deletions

23
.drone.yml Normal file
View File

@@ -0,0 +1,23 @@
kind: pipeline
name: test-docker-build
steps:
- name: test-docker-build
image: plugins/docker
network_mode: bridge
settings:
repo: sneak/mfer
build_args_from_env: [ DRONE_COMMIT_SHA ]
dry_run: true
custom_dns: [ 116.202.204.30 ]
tags:
- ${DRONE_COMMIT_SHA:0:7}
- ${DRONE_BRANCH}
- latest
- name: notify
image: plugins/slack
settings:
webhook:
from_secret: SLACK_WEBHOOK_URL
when:
event: pull_request

5
.gitignore vendored
View File

@@ -3,8 +3,3 @@
*.tmp
*.dockerimage
/vendor
vendor.tzst
modcache.tzst
# Stale files
.drone.yml

View File

@@ -156,7 +156,7 @@ func (mfa *CLIApp) run(args []string) {
},
&cli.StringFlag{
Name: "seed",
Usage: "Seed value for deterministic manifest UUID",
Usage: "Seed value for deterministic manifest UUID (hashed 150M times with SHA-256, ~5-10s)",
EnvVars: []string{"MFER_SEED"},
},
),

View File

@@ -92,12 +92,25 @@ type Builder struct {
fixedUUID []byte // if set, use this UUID instead of generating one
}
// seedIterations is the number of SHA-256 rounds used to derive a UUID from a seed.
// Tuned to take approximately 5-10 seconds on modern hardware.
const seedIterations = 150_000_000
// SetSeed derives a deterministic UUID from the given seed string.
// The seed is hashed once with SHA-256 and the first 16 bytes are used
// as a fixed UUID for the manifest.
// The seed is hashed 150,000,000 times with SHA-256 to produce
// 16 bytes used as a fixed UUID for the manifest (~5-10s on modern hardware).
func (b *Builder) SetSeed(seed string) {
b.fixedUUID = deriveSeedUUID(seed, seedIterations)
}
// deriveSeedUUID hashes the seed string n times with SHA-256
// and returns the first 16 bytes as a UUID.
func deriveSeedUUID(seed string, iterations int) []byte {
hash := sha256.Sum256([]byte(seed))
b.fixedUUID = hash[:16]
for i := 1; i < iterations; i++ {
hash = sha256.Sum256(hash[:])
}
return hash[:16]
}
// NewBuilder creates a new Builder.

View File

@@ -150,17 +150,15 @@ func TestBuilderDeterministicOutput(t *testing.T) {
assert.Equal(t, out1, out2, "two builds with same input should produce byte-identical output")
}
func TestSetSeedDeterministic(t *testing.T) {
b1 := NewBuilder()
b1.SetSeed("test-seed-value")
b2 := NewBuilder()
b2.SetSeed("test-seed-value")
assert.Equal(t, b1.fixedUUID, b2.fixedUUID, "same seed should produce same UUID")
assert.Len(t, b1.fixedUUID, 16, "UUID should be 16 bytes")
func TestDeriveSeedUUID(t *testing.T) {
// Use a small iteration count for testing (production uses 1B)
uuid1 := deriveSeedUUID("test-seed-value", 1000)
uuid2 := deriveSeedUUID("test-seed-value", 1000)
assert.Equal(t, uuid1, uuid2, "same seed should produce same UUID")
assert.Len(t, uuid1, 16, "UUID should be 16 bytes")
b3 := NewBuilder()
b3.SetSeed("different-seed")
assert.NotEqual(t, b1.fixedUUID, b3.fixedUUID, "different seeds should produce different UUIDs")
uuid3 := deriveSeedUUID("different-seed", 1000)
assert.NotEqual(t, uuid1, uuid3, "different seeds should produce different UUIDs")
}
func TestBuilderBuildEmpty(t *testing.T) {

View File

@@ -272,14 +272,12 @@ func (c *Checker) checkFile(entry *MFFilePath, checkedBytes *FileSize) Result {
// FindExtraFiles walks the filesystem and reports files not in the manifest.
// Results are sent to the results channel. The channel is closed when done.
// Hidden files/directories (starting with .) are skipped, as they are excluded
// from manifests by default. The manifest file itself is also skipped.
func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error {
if results != nil {
defer close(results)
}
return afero.Walk(c.fs, string(c.basePath), func(walkPath string, info os.FileInfo, err error) error {
return afero.Walk(c.fs, string(c.basePath), func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
@@ -290,31 +288,16 @@ func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) err
default:
}
// Get relative path
rel, err := filepath.Rel(string(c.basePath), walkPath)
if err != nil {
return err
}
// Skip hidden files and directories (dotfiles)
if IsHiddenPath(filepath.ToSlash(rel)) {
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
// Skip directories
if info.IsDir() {
return nil
}
// Skip manifest files
base := filepath.Base(rel)
if base == "index.mf" || base == ".index.mf" {
return nil
// Get relative path
rel, err := filepath.Rel(string(c.basePath), path)
if err != nil {
return err
}
relPath := RelFilePath(rel)
// Check if path is in manifest

View File

@@ -305,44 +305,6 @@ func TestFindExtraFiles(t *testing.T) {
assert.Equal(t, "not in manifest", extras[0].Message)
}
func TestFindExtraFilesSkipsManifestAndDotfiles(t *testing.T) {
fs := afero.NewMemMapFs()
manifestFiles := map[string][]byte{
"file1.txt": []byte("in manifest"),
}
createTestManifest(t, fs, "/data/.index.mf", manifestFiles)
createFilesOnDisk(t, fs, "/data", map[string][]byte{
"file1.txt": []byte("in manifest"),
})
// Create dotfile and manifest that should be skipped
require.NoError(t, afero.WriteFile(fs, "/data/.hidden", []byte("hidden"), 0o644))
require.NoError(t, afero.WriteFile(fs, "/data/.config/settings", []byte("cfg"), 0o644))
// Create a real extra file
require.NoError(t, fs.MkdirAll("/data", 0o755))
require.NoError(t, afero.WriteFile(fs, "/data/extra.txt", []byte("extra"), 0o644))
chk, err := NewChecker("/data/.index.mf", "/data", fs)
require.NoError(t, err)
results := make(chan Result, 10)
err = chk.FindExtraFiles(context.Background(), results)
require.NoError(t, err)
var extras []Result
for r := range results {
extras = append(extras, r)
}
// Should only report extra.txt, not .hidden, .config/settings, or .index.mf
for _, e := range extras {
t.Logf("extra: %s", e.Path)
}
assert.Len(t, extras, 1)
if len(extras) > 0 {
assert.Equal(t, RelFilePath("extra.txt"), extras[0].Path)
}
}
func TestFindExtraFilesContextCancellation(t *testing.T) {
fs := afero.NewMemMapFs()
files := map[string][]byte{"file.txt": []byte("data")}

View File

@@ -100,7 +100,7 @@ func gpgExtractPubKeyFingerprint(pubKey []byte) (string, error) {
if err != nil {
return "", fmt.Errorf("failed to create temp dir: %w", err)
}
defer func() { _ = os.RemoveAll(tmpDir) }()
defer os.RemoveAll(tmpDir)
// Set restrictive permissions
if err := os.Chmod(tmpDir, 0o700); err != nil {
@@ -158,7 +158,7 @@ func gpgVerify(data, signature, pubKey []byte) error {
if err != nil {
return fmt.Errorf("failed to create temp dir: %w", err)
}
defer func() { _ = os.RemoveAll(tmpDir) }()
defer os.RemoveAll(tmpDir)
// Set restrictive permissions
if err := os.Chmod(tmpDir, 0o700); err != nil {

View File

@@ -34,15 +34,15 @@ func testGPGEnv(t *testing.T) (GPGKeyID, func()) {
// Save original GNUPGHOME and set new one
origGPGHome := os.Getenv("GNUPGHOME")
require.NoError(t, os.Setenv("GNUPGHOME", gpgHome))
os.Setenv("GNUPGHOME", gpgHome)
cleanup := func() {
if origGPGHome == "" {
_ = os.Unsetenv("GNUPGHOME")
os.Unsetenv("GNUPGHOME")
} else {
_ = os.Setenv("GNUPGHOME", origGPGHome)
os.Setenv("GNUPGHOME", origGPGHome)
}
_ = os.RemoveAll(gpgHome)
os.RemoveAll(gpgHome)
}
// Generate a test key with no passphrase

View File

@@ -389,9 +389,6 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S
// The path should use forward slashes.
func IsHiddenPath(p string) bool {
tp := path.Clean(p)
if tp == "." || tp == "/" {
return false
}
if strings.HasPrefix(tp, ".") {
return true
}

View File

@@ -352,8 +352,6 @@ func TestIsHiddenPath(t *testing.T) {
{"/absolute/.hidden", true},
{"./relative", false}, // path.Clean removes leading ./
{"a/b/c/.d/e", true},
{".", false}, // current directory is not hidden
{"/", false}, // root is not hidden
}
for _, tt := range tests {

BIN
modcache.tzst Normal file

Binary file not shown.

BIN
vendor.tzst Normal file

Binary file not shown.