feat: deterministic manifests by default, remove atime, rate-limit checker progress
- Remove atime field from proto schema (field 304 reserved) - Omit createdAt timestamp by default for deterministic output - Add --include-timestamps flag to gen and freshen commands to opt in - Add Builder.SetIncludeTimestamps() and ScannerOptions.IncludeTimestamps - Rate-limit Checker progress updates to once per second (matching Scanner) - Add tests for all changes Closes design decisions: deterministic-by-default, atime removal.
This commit is contained in:
parent
d7e6e6752a
commit
353e05d270
@ -226,6 +226,9 @@ func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error {
|
||||
var hashedBytes int64
|
||||
|
||||
builder := mfer.NewBuilder()
|
||||
if ctx.Bool("include-timestamps") {
|
||||
builder.SetIncludeTimestamps(true)
|
||||
}
|
||||
|
||||
// Set up signing options if sign-key is provided
|
||||
if signKey := ctx.String("sign-key"); signKey != "" {
|
||||
|
||||
@ -20,9 +20,10 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
|
||||
log.Debug("generateManifestOperation()")
|
||||
|
||||
opts := &mfer.ScannerOptions{
|
||||
IncludeDotfiles: ctx.Bool("include-dotfiles"),
|
||||
FollowSymLinks: ctx.Bool("follow-symlinks"),
|
||||
Fs: mfa.Fs,
|
||||
IncludeDotfiles: ctx.Bool("include-dotfiles"),
|
||||
FollowSymLinks: ctx.Bool("follow-symlinks"),
|
||||
IncludeTimestamps: ctx.Bool("include-timestamps"),
|
||||
Fs: mfa.Fs,
|
||||
}
|
||||
|
||||
// Set up signing options if sign-key is provided
|
||||
|
||||
@ -154,6 +154,10 @@ func (mfa *CLIApp) run(args []string) {
|
||||
Usage: "GPG key ID to sign the manifest with",
|
||||
EnvVars: []string{"MFER_SIGN_KEY"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "include-timestamps",
|
||||
Usage: "Include createdAt timestamp in manifest (omitted by default for determinism)",
|
||||
},
|
||||
),
|
||||
},
|
||||
{
|
||||
@ -226,6 +230,10 @@ func (mfa *CLIApp) run(args []string) {
|
||||
Usage: "GPG key ID to sign the manifest with",
|
||||
EnvVars: []string{"MFER_SIGN_KEY"},
|
||||
},
|
||||
&cli.BoolFlag{
|
||||
Name: "include-timestamps",
|
||||
Usage: "Include createdAt timestamp in manifest (omitted by default for determinism)",
|
||||
},
|
||||
),
|
||||
},
|
||||
{
|
||||
|
||||
@ -85,10 +85,11 @@ type FileHashProgress struct {
|
||||
|
||||
// Builder constructs a manifest by adding files one at a time.
|
||||
type Builder struct {
|
||||
mu sync.Mutex
|
||||
files []*MFFilePath
|
||||
createdAt time.Time
|
||||
signingOptions *SigningOptions
|
||||
mu sync.Mutex
|
||||
files []*MFFilePath
|
||||
createdAt time.Time
|
||||
includeTimestamps bool
|
||||
signingOptions *SigningOptions
|
||||
}
|
||||
|
||||
// NewBuilder creates a new Builder.
|
||||
@ -210,6 +211,14 @@ func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime
|
||||
return nil
|
||||
}
|
||||
|
||||
// SetIncludeTimestamps controls whether the manifest includes a createdAt timestamp.
|
||||
// By default timestamps are omitted for deterministic output.
|
||||
func (b *Builder) SetIncludeTimestamps(include bool) {
|
||||
b.mu.Lock()
|
||||
defer b.mu.Unlock()
|
||||
b.includeTimestamps = include
|
||||
}
|
||||
|
||||
// SetSigningOptions sets the GPG signing options for the manifest.
|
||||
// If opts is non-nil, the manifest will be signed when Build() is called.
|
||||
func (b *Builder) SetSigningOptions(opts *SigningOptions) {
|
||||
@ -230,9 +239,11 @@ func (b *Builder) Build(w io.Writer) error {
|
||||
|
||||
// Create inner manifest
|
||||
inner := &MFFile{
|
||||
Version: MFFile_VERSION_ONE,
|
||||
CreatedAt: newTimestampFromTime(b.createdAt),
|
||||
Files: b.files,
|
||||
Version: MFFile_VERSION_ONE,
|
||||
Files: b.files,
|
||||
}
|
||||
if b.includeTimestamps {
|
||||
inner.CreatedAt = newTimestampFromTime(b.createdAt)
|
||||
}
|
||||
|
||||
// Create a temporary manifest to use existing serialization
|
||||
|
||||
@ -325,3 +325,62 @@ func TestBuilderBuildEmpty(t *testing.T) {
|
||||
// Should still produce valid manifest with 0 files
|
||||
assert.True(t, strings.HasPrefix(buf.String(), MAGIC))
|
||||
}
|
||||
|
||||
func TestBuilderOmitsCreatedAtByDefault(t *testing.T) {
|
||||
b := NewBuilder()
|
||||
content := []byte("hello")
|
||||
_, err := b.AddFile("test.txt", FileSize(len(content)), ModTime(time.Now()), bytes.NewReader(content), nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
var buf bytes.Buffer
|
||||
require.NoError(t, b.Build(&buf))
|
||||
|
||||
m, err := NewManifestFromReader(&buf)
|
||||
require.NoError(t, err)
|
||||
assert.Nil(t, m.pbInner.CreatedAt, "createdAt should be nil by default for deterministic output")
|
||||
}
|
||||
|
||||
func TestBuilderIncludesCreatedAtWhenRequested(t *testing.T) {
|
||||
b := NewBuilder()
|
||||
b.SetIncludeTimestamps(true)
|
||||
content := []byte("hello")
|
||||
_, err := b.AddFile("test.txt", FileSize(len(content)), ModTime(time.Now()), bytes.NewReader(content), nil)
|
||||
require.NoError(t, err)
|
||||
|
||||
var buf bytes.Buffer
|
||||
require.NoError(t, b.Build(&buf))
|
||||
|
||||
m, err := NewManifestFromReader(&buf)
|
||||
require.NoError(t, err)
|
||||
assert.NotNil(t, m.pbInner.CreatedAt, "createdAt should be set when IncludeTimestamps is true")
|
||||
}
|
||||
|
||||
func TestBuilderDeterministicFileOrder(t *testing.T) {
|
||||
// Two builds with same files in different order should produce same file ordering.
|
||||
// Note: UUIDs differ per build, so we compare parsed file lists, not raw bytes.
|
||||
buildAndParse := func(order []string) []*MFFilePath {
|
||||
b := NewBuilder()
|
||||
for _, name := range order {
|
||||
content := []byte("content of " + name)
|
||||
_, err := b.AddFile(RelFilePath(name), FileSize(len(content)), ModTime(time.Unix(1000, 0)), bytes.NewReader(content), nil)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
require.NoError(t, b.Build(&buf))
|
||||
m, err := NewManifestFromReader(&buf)
|
||||
require.NoError(t, err)
|
||||
return m.Files()
|
||||
}
|
||||
|
||||
files1 := buildAndParse([]string{"b.txt", "a.txt"})
|
||||
files2 := buildAndParse([]string{"a.txt", "b.txt"})
|
||||
|
||||
require.Len(t, files1, 2)
|
||||
require.Len(t, files2, 2)
|
||||
for i := range files1 {
|
||||
assert.Equal(t, files1[i].Path, files2[i].Path)
|
||||
assert.Equal(t, files1[i].Size, files2[i].Size)
|
||||
}
|
||||
assert.Equal(t, "a.txt", files1[0].Path)
|
||||
assert.Equal(t, "b.txt", files1[1].Path)
|
||||
}
|
||||
|
||||
@ -183,6 +183,7 @@ func (c *Checker) Check(ctx context.Context, results chan<- Result, progress cha
|
||||
var failures FileCount
|
||||
|
||||
startTime := time.Now()
|
||||
lastProgressTime := time.Now()
|
||||
|
||||
for _, entry := range c.files {
|
||||
select {
|
||||
@ -201,29 +202,34 @@ func (c *Checker) Check(ctx context.Context, results chan<- Result, progress cha
|
||||
results <- result
|
||||
}
|
||||
|
||||
// Send progress with rate and ETA calculation
|
||||
// Send progress at most once per second (rate-limited)
|
||||
if progress != nil {
|
||||
elapsed := time.Since(startTime)
|
||||
var bytesPerSec float64
|
||||
var eta time.Duration
|
||||
now := time.Now()
|
||||
isLast := checkedFiles == totalFiles
|
||||
if isLast || now.Sub(lastProgressTime) >= time.Second {
|
||||
elapsed := time.Since(startTime)
|
||||
var bytesPerSec float64
|
||||
var eta time.Duration
|
||||
|
||||
if elapsed > 0 && checkedBytes > 0 {
|
||||
bytesPerSec = float64(checkedBytes) / elapsed.Seconds()
|
||||
remainingBytes := totalBytes - checkedBytes
|
||||
if bytesPerSec > 0 {
|
||||
eta = time.Duration(float64(remainingBytes)/bytesPerSec) * time.Second
|
||||
if elapsed > 0 && checkedBytes > 0 {
|
||||
bytesPerSec = float64(checkedBytes) / elapsed.Seconds()
|
||||
remainingBytes := totalBytes - checkedBytes
|
||||
if bytesPerSec > 0 {
|
||||
eta = time.Duration(float64(remainingBytes)/bytesPerSec) * time.Second
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
sendCheckStatus(progress, CheckStatus{
|
||||
TotalFiles: totalFiles,
|
||||
CheckedFiles: checkedFiles,
|
||||
TotalBytes: totalBytes,
|
||||
CheckedBytes: checkedBytes,
|
||||
BytesPerSec: bytesPerSec,
|
||||
ETA: eta,
|
||||
Failures: failures,
|
||||
})
|
||||
sendCheckStatus(progress, CheckStatus{
|
||||
TotalFiles: totalFiles,
|
||||
CheckedFiles: checkedFiles,
|
||||
TotalBytes: totalBytes,
|
||||
CheckedBytes: checkedBytes,
|
||||
BytesPerSec: bytesPerSec,
|
||||
ETA: eta,
|
||||
Failures: failures,
|
||||
})
|
||||
lastProgressTime = now
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@ -3,6 +3,7 @@ package mfer
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
@ -490,3 +491,40 @@ func TestCheckEmptyManifest(t *testing.T) {
|
||||
}
|
||||
assert.Equal(t, 0, count)
|
||||
}
|
||||
|
||||
func TestCheckProgressRateLimited(t *testing.T) {
|
||||
// Create many small files - progress should be rate-limited, not one per file.
|
||||
// With rate-limiting to once per second, we should get far fewer progress
|
||||
// updates than files (plus one final update).
|
||||
fs := afero.NewMemMapFs()
|
||||
files := make(map[string][]byte, 100)
|
||||
for i := 0; i < 100; i++ {
|
||||
name := fmt.Sprintf("file%03d.txt", i)
|
||||
files[name] = []byte("content")
|
||||
}
|
||||
createTestManifest(t, fs, "/manifest.mf", files)
|
||||
createFilesOnDisk(t, fs, "/data", files)
|
||||
|
||||
chk, err := NewChecker("/manifest.mf", "/data", fs)
|
||||
require.NoError(t, err)
|
||||
|
||||
results := make(chan Result, 200)
|
||||
progress := make(chan CheckStatus, 200)
|
||||
err = chk.Check(context.Background(), results, progress)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Drain results
|
||||
for range results {
|
||||
}
|
||||
|
||||
// Count progress updates
|
||||
var progressCount int
|
||||
for range progress {
|
||||
progressCount++
|
||||
}
|
||||
|
||||
// Should be far fewer than 100 (rate-limited to once per second)
|
||||
// At minimum we get the final update
|
||||
assert.GreaterOrEqual(t, progressCount, 1, "should get at least the final progress update")
|
||||
assert.Less(t, progressCount, 100, "progress should be rate-limited, not one per file")
|
||||
}
|
||||
|
||||
@ -1,7 +1,7 @@
|
||||
// Code generated by protoc-gen-go. DO NOT EDIT.
|
||||
// versions:
|
||||
// protoc-gen-go v1.36.11
|
||||
// protoc v6.33.0
|
||||
// protoc v6.33.4
|
||||
// source: mf.proto
|
||||
|
||||
package mfer
|
||||
@ -329,6 +329,9 @@ func (x *MFFileOuter) GetSigningPubKey() []byte {
|
||||
type MFFilePath struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
// required attributes:
|
||||
// Path invariants: must be valid UTF-8, use forward slashes only,
|
||||
// be relative (no leading /), contain no ".." segments, and no
|
||||
// empty segments (no "//").
|
||||
Path string `protobuf:"bytes,1,opt,name=path,proto3" json:"path,omitempty"`
|
||||
Size int64 `protobuf:"varint,2,opt,name=size,proto3" json:"size,omitempty"`
|
||||
// gotta have at least one:
|
||||
@ -336,8 +339,7 @@ type MFFilePath struct {
|
||||
// optional per-file metadata
|
||||
MimeType *string `protobuf:"bytes,301,opt,name=mimeType,proto3,oneof" json:"mimeType,omitempty"`
|
||||
Mtime *Timestamp `protobuf:"bytes,302,opt,name=mtime,proto3,oneof" json:"mtime,omitempty"`
|
||||
Ctime *Timestamp `protobuf:"bytes,303,opt,name=ctime,proto3,oneof" json:"ctime,omitempty"`
|
||||
Atime *Timestamp `protobuf:"bytes,304,opt,name=atime,proto3,oneof" json:"atime,omitempty"`
|
||||
Ctime *Timestamp `protobuf:"bytes,303,opt,name=ctime,proto3,oneof" json:"ctime,omitempty"` // Field 304 (atime) removed — not useful for integrity verification.
|
||||
unknownFields protoimpl.UnknownFields
|
||||
sizeCache protoimpl.SizeCache
|
||||
}
|
||||
@ -414,13 +416,6 @@ func (x *MFFilePath) GetCtime() *Timestamp {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (x *MFFilePath) GetAtime() *Timestamp {
|
||||
if x != nil {
|
||||
return x.Atime
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
type MFFileChecksum struct {
|
||||
state protoimpl.MessageState `protogen:"open.v1"`
|
||||
// 1.0 golang implementation must write a multihash here
|
||||
@ -566,7 +561,7 @@ const file_mf_proto_rawDesc = "" +
|
||||
"\n" +
|
||||
"_signatureB\t\n" +
|
||||
"\a_signerB\x10\n" +
|
||||
"\x0e_signingPubKey\"\xa2\x02\n" +
|
||||
"\x0e_signingPubKey\"\xf0\x01\n" +
|
||||
"\n" +
|
||||
"MFFilePath\x12\x12\n" +
|
||||
"\x04path\x18\x01 \x01(\tR\x04path\x12\x12\n" +
|
||||
@ -576,13 +571,10 @@ const file_mf_proto_rawDesc = "" +
|
||||
"\x05mtime\x18\xae\x02 \x01(\v2\n" +
|
||||
".TimestampH\x01R\x05mtime\x88\x01\x01\x12&\n" +
|
||||
"\x05ctime\x18\xaf\x02 \x01(\v2\n" +
|
||||
".TimestampH\x02R\x05ctime\x88\x01\x01\x12&\n" +
|
||||
"\x05atime\x18\xb0\x02 \x01(\v2\n" +
|
||||
".TimestampH\x03R\x05atime\x88\x01\x01B\v\n" +
|
||||
".TimestampH\x02R\x05ctime\x88\x01\x01B\v\n" +
|
||||
"\t_mimeTypeB\b\n" +
|
||||
"\x06_mtimeB\b\n" +
|
||||
"\x06_ctimeB\b\n" +
|
||||
"\x06_atime\".\n" +
|
||||
"\x06_ctime\".\n" +
|
||||
"\x0eMFFileChecksum\x12\x1c\n" +
|
||||
"\tmultiHash\x18\x01 \x01(\fR\tmultiHash\"\xd6\x01\n" +
|
||||
"\x06MFFile\x12)\n" +
|
||||
@ -627,15 +619,14 @@ var file_mf_proto_depIdxs = []int32{
|
||||
6, // 2: MFFilePath.hashes:type_name -> MFFileChecksum
|
||||
3, // 3: MFFilePath.mtime:type_name -> Timestamp
|
||||
3, // 4: MFFilePath.ctime:type_name -> Timestamp
|
||||
3, // 5: MFFilePath.atime:type_name -> Timestamp
|
||||
2, // 6: MFFile.version:type_name -> MFFile.Version
|
||||
5, // 7: MFFile.files:type_name -> MFFilePath
|
||||
3, // 8: MFFile.createdAt:type_name -> Timestamp
|
||||
9, // [9:9] is the sub-list for method output_type
|
||||
9, // [9:9] is the sub-list for method input_type
|
||||
9, // [9:9] is the sub-list for extension type_name
|
||||
9, // [9:9] is the sub-list for extension extendee
|
||||
0, // [0:9] is the sub-list for field type_name
|
||||
2, // 5: MFFile.version:type_name -> MFFile.Version
|
||||
5, // 6: MFFile.files:type_name -> MFFilePath
|
||||
3, // 7: MFFile.createdAt:type_name -> Timestamp
|
||||
8, // [8:8] is the sub-list for method output_type
|
||||
8, // [8:8] is the sub-list for method input_type
|
||||
8, // [8:8] is the sub-list for extension type_name
|
||||
8, // [8:8] is the sub-list for extension extendee
|
||||
0, // [0:8] is the sub-list for field type_name
|
||||
}
|
||||
|
||||
func init() { file_mf_proto_init() }
|
||||
|
||||
@ -59,7 +59,7 @@ message MFFilePath {
|
||||
optional string mimeType = 301;
|
||||
optional Timestamp mtime = 302;
|
||||
optional Timestamp ctime = 303;
|
||||
optional Timestamp atime = 304;
|
||||
// Field 304 (atime) removed — not useful for integrity verification.
|
||||
}
|
||||
|
||||
message MFFileChecksum {
|
||||
|
||||
@ -43,10 +43,11 @@ type ScanStatus struct {
|
||||
|
||||
// ScannerOptions configures scanner behavior.
|
||||
type ScannerOptions struct {
|
||||
IncludeDotfiles bool // Include files and directories starting with a dot (default: exclude)
|
||||
FollowSymLinks bool // Resolve symlinks instead of skipping them
|
||||
Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
|
||||
SigningOptions *SigningOptions // GPG signing options (nil = no signing)
|
||||
IncludeDotfiles bool // Include files and directories starting with a dot (default: exclude)
|
||||
FollowSymLinks bool // Resolve symlinks instead of skipping them
|
||||
IncludeTimestamps bool // Include createdAt timestamp in manifest (default: omit for determinism)
|
||||
Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
|
||||
SigningOptions *SigningOptions // GPG signing options (nil = no signing)
|
||||
}
|
||||
|
||||
// FileEntry represents a file that has been enumerated.
|
||||
@ -273,6 +274,9 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S
|
||||
s.mu.RUnlock()
|
||||
|
||||
builder := NewBuilder()
|
||||
if s.options.IncludeTimestamps {
|
||||
builder.SetIncludeTimestamps(true)
|
||||
}
|
||||
if s.options.SigningOptions != nil {
|
||||
builder.SetSigningOptions(s.options.SigningOptions)
|
||||
}
|
||||
|
||||
Loading…
Reference in New Issue
Block a user