Compare commits
3 Commits
fix/issue-
...
d947fc81ae
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d947fc81ae | ||
|
|
a1a8aaf922 | ||
|
|
9d301d7b1d |
@@ -25,6 +25,12 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
|
||||
Fs: mfa.Fs,
|
||||
}
|
||||
|
||||
// Set seed for deterministic UUID if provided
|
||||
if seed := ctx.String("seed"); seed != "" {
|
||||
opts.Seed = seed
|
||||
log.Infof("using deterministic seed for manifest UUID")
|
||||
}
|
||||
|
||||
// Set up signing options if sign-key is provided
|
||||
if signKey := ctx.String("sign-key"); signKey != "" {
|
||||
opts.SigningOptions = &mfer.SigningOptions{
|
||||
|
||||
@@ -154,6 +154,11 @@ func (mfa *CLIApp) run(args []string) {
|
||||
Usage: "GPG key ID to sign the manifest with",
|
||||
EnvVars: []string{"MFER_SIGN_KEY"},
|
||||
},
|
||||
&cli.StringFlag{
|
||||
Name: "seed",
|
||||
Usage: "Seed value for deterministic manifest UUID (hashed 150M times with SHA-256, ~5-10s)",
|
||||
EnvVars: []string{"MFER_SEED"},
|
||||
},
|
||||
),
|
||||
},
|
||||
{
|
||||
|
||||
@@ -3,47 +3,14 @@ package mfer
|
||||
import (
|
||||
"crypto/sha256"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"sort"
|
||||
"sync"
|
||||
"time"
|
||||
"unicode/utf8"
|
||||
|
||||
"github.com/multiformats/go-multihash"
|
||||
)
|
||||
|
||||
// ValidatePath checks that a file path conforms to manifest path invariants:
|
||||
// - Must be valid UTF-8
|
||||
// - Must use forward slashes only (no backslashes)
|
||||
// - Must be relative (no leading /)
|
||||
// - Must not contain ".." segments
|
||||
// - Must not contain empty segments (no "//")
|
||||
// - Must not be empty
|
||||
func ValidatePath(p string) error {
|
||||
if p == "" {
|
||||
return errors.New("path cannot be empty")
|
||||
}
|
||||
if !utf8.ValidString(p) {
|
||||
return fmt.Errorf("path %q is not valid UTF-8", p)
|
||||
}
|
||||
if strings.ContainsRune(p, '\\') {
|
||||
return fmt.Errorf("path %q contains backslash; use forward slashes only", p)
|
||||
}
|
||||
if strings.HasPrefix(p, "/") {
|
||||
return fmt.Errorf("path %q is absolute; must be relative", p)
|
||||
}
|
||||
for _, seg := range strings.Split(p, "/") {
|
||||
if seg == "" {
|
||||
return fmt.Errorf("path %q contains empty segment", p)
|
||||
}
|
||||
if seg == ".." {
|
||||
return fmt.Errorf("path %q contains '..' segment", p)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// RelFilePath represents a relative file path within a manifest.
|
||||
type RelFilePath string
|
||||
|
||||
@@ -88,6 +55,28 @@ type Builder struct {
|
||||
files []*MFFilePath
|
||||
createdAt time.Time
|
||||
signingOptions *SigningOptions
|
||||
fixedUUID []byte // if set, use this UUID instead of generating one
|
||||
}
|
||||
|
||||
// seedIterations is the number of SHA-256 rounds used to derive a UUID from a seed.
|
||||
// Tuned to take approximately 5-10 seconds on modern hardware.
|
||||
const seedIterations = 150_000_000
|
||||
|
||||
// SetSeed derives a deterministic UUID from the given seed string.
|
||||
// The seed is hashed 150,000,000 times with SHA-256 to produce
|
||||
// 16 bytes used as a fixed UUID for the manifest (~5-10s on modern hardware).
|
||||
func (b *Builder) SetSeed(seed string) {
|
||||
b.fixedUUID = deriveSeedUUID(seed, seedIterations)
|
||||
}
|
||||
|
||||
// deriveSeedUUID hashes the seed string n times with SHA-256
|
||||
// and returns the first 16 bytes as a UUID.
|
||||
func deriveSeedUUID(seed string, iterations int) []byte {
|
||||
hash := sha256.Sum256([]byte(seed))
|
||||
for i := 1; i < iterations; i++ {
|
||||
hash = sha256.Sum256(hash[:])
|
||||
}
|
||||
return hash[:16]
|
||||
}
|
||||
|
||||
// NewBuilder creates a new Builder.
|
||||
@@ -108,10 +97,6 @@ func (b *Builder) AddFile(
|
||||
reader io.Reader,
|
||||
progress chan<- FileHashProgress,
|
||||
) (FileSize, error) {
|
||||
if err := ValidatePath(string(path)); err != nil {
|
||||
return 0, err
|
||||
}
|
||||
|
||||
// Create hash writer
|
||||
h := sha256.New()
|
||||
|
||||
@@ -134,11 +119,6 @@ func (b *Builder) AddFile(
|
||||
}
|
||||
}
|
||||
|
||||
// Verify actual bytes read matches declared size
|
||||
if totalRead != size {
|
||||
return totalRead, fmt.Errorf("size mismatch for %q: declared %d bytes but read %d bytes", path, size, totalRead)
|
||||
}
|
||||
|
||||
// Encode hash as multihash (SHA2-256)
|
||||
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
|
||||
if err != nil {
|
||||
@@ -184,8 +164,8 @@ func (b *Builder) FileCount() int {
|
||||
// This is useful when the hash is already known (e.g., from an existing manifest).
|
||||
// Returns an error if path is empty, size is negative, or hash is nil/empty.
|
||||
func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error {
|
||||
if err := ValidatePath(string(path)); err != nil {
|
||||
return err
|
||||
if path == "" {
|
||||
return errors.New("path cannot be empty")
|
||||
}
|
||||
if size < 0 {
|
||||
return errors.New("size cannot be negative")
|
||||
@@ -222,6 +202,11 @@ func (b *Builder) Build(w io.Writer) error {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
|
||||
// Sort files by path for deterministic output
|
||||
sort.Slice(b.files, func(i, j int) bool {
|
||||
return b.files[i].Path < b.files[j].Path
|
||||
})
|
||||
|
||||
// Create inner manifest
|
||||
inner := &MFFile{
|
||||
Version: MFFile_VERSION_ONE,
|
||||
@@ -233,6 +218,7 @@ func (b *Builder) Build(w io.Writer) error {
|
||||
m := &manifest{
|
||||
pbInner: inner,
|
||||
signingOptions: b.signingOptions,
|
||||
fixedUUID: b.fixedUUID,
|
||||
}
|
||||
|
||||
// Generate outer wrapper
|
||||
|
||||
@@ -92,6 +92,52 @@ func TestBuilderBuild(t *testing.T) {
|
||||
assert.True(t, strings.HasPrefix(buf.String(), MAGIC))
|
||||
}
|
||||
|
||||
func TestBuilderDeterministicOutput(t *testing.T) {
|
||||
buildManifest := func() []byte {
|
||||
b := NewBuilder()
|
||||
// Use a fixed createdAt and UUID so output is reproducible
|
||||
b.createdAt = time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||
b.fixedUUID = make([]byte, 16) // all zeros
|
||||
|
||||
mtime := ModTime(time.Date(2025, 6, 1, 0, 0, 0, 0, time.UTC))
|
||||
|
||||
// Add files in reverse order to test sorting
|
||||
files := []struct {
|
||||
path string
|
||||
content string
|
||||
}{
|
||||
{"c/file.txt", "content c"},
|
||||
{"a/file.txt", "content a"},
|
||||
{"b/file.txt", "content b"},
|
||||
}
|
||||
for _, f := range files {
|
||||
r := bytes.NewReader([]byte(f.content))
|
||||
_, err := b.AddFile(RelFilePath(f.path), FileSize(len(f.content)), mtime, r, nil)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
var buf bytes.Buffer
|
||||
err := b.Build(&buf)
|
||||
require.NoError(t, err)
|
||||
return buf.Bytes()
|
||||
}
|
||||
|
||||
out1 := buildManifest()
|
||||
out2 := buildManifest()
|
||||
assert.Equal(t, out1, out2, "two builds with same input should produce byte-identical output")
|
||||
}
|
||||
|
||||
func TestDeriveSeedUUID(t *testing.T) {
|
||||
// Use a small iteration count for testing (production uses 1B)
|
||||
uuid1 := deriveSeedUUID("test-seed-value", 1000)
|
||||
uuid2 := deriveSeedUUID("test-seed-value", 1000)
|
||||
assert.Equal(t, uuid1, uuid2, "same seed should produce same UUID")
|
||||
assert.Len(t, uuid1, 16, "UUID should be 16 bytes")
|
||||
|
||||
uuid3 := deriveSeedUUID("different-seed", 1000)
|
||||
assert.NotEqual(t, uuid1, uuid3, "different seeds should produce different UUIDs")
|
||||
}
|
||||
|
||||
func TestBuilderBuildEmpty(t *testing.T) {
|
||||
b := NewBuilder()
|
||||
|
||||
|
||||
@@ -17,6 +17,7 @@ type manifest struct {
|
||||
pbOuter *MFFileOuter
|
||||
output *bytes.Buffer
|
||||
signingOptions *SigningOptions
|
||||
fixedUUID []byte // if set, use this UUID instead of generating one
|
||||
}
|
||||
|
||||
func (m *manifest) String() string {
|
||||
|
||||
@@ -46,9 +46,6 @@ message MFFileOuter {
|
||||
|
||||
message MFFilePath {
|
||||
// required attributes:
|
||||
// Path invariants: must be valid UTF-8, use forward slashes only,
|
||||
// be relative (no leading /), contain no ".." segments, and no
|
||||
// empty segments (no "//").
|
||||
string path = 1;
|
||||
int64 size = 2;
|
||||
|
||||
|
||||
@@ -47,6 +47,7 @@ type ScannerOptions struct {
|
||||
FollowSymLinks bool // Resolve symlinks instead of skipping them
|
||||
Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
|
||||
SigningOptions *SigningOptions // GPG signing options (nil = no signing)
|
||||
Seed string // If set, derive a deterministic UUID from this seed
|
||||
}
|
||||
|
||||
// FileEntry represents a file that has been enumerated.
|
||||
@@ -276,6 +277,9 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S
|
||||
if s.options.SigningOptions != nil {
|
||||
builder.SetSigningOptions(s.options.SigningOptions)
|
||||
}
|
||||
if s.options.Seed != "" {
|
||||
builder.SetSeed(s.options.Seed)
|
||||
}
|
||||
|
||||
var scannedFiles FileCount
|
||||
var scannedBytes FileSize
|
||||
|
||||
@@ -50,8 +50,13 @@ func (m *manifest) generateOuter() error {
|
||||
return errors.New("internal error")
|
||||
}
|
||||
|
||||
// Generate UUID and set on inner message
|
||||
manifestUUID := uuid.New()
|
||||
// Use fixed UUID if provided, otherwise generate a new one
|
||||
var manifestUUID uuid.UUID
|
||||
if len(m.fixedUUID) == 16 {
|
||||
copy(manifestUUID[:], m.fixedUUID)
|
||||
} else {
|
||||
manifestUUID = uuid.New()
|
||||
}
|
||||
m.pbInner.Uuid = manifestUUID[:]
|
||||
|
||||
innerData, err := proto.MarshalOptions{Deterministic: true}.Marshal(m.pbInner)
|
||||
|
||||
Reference in New Issue
Block a user