Compare commits
6 Commits
fa99bdc5ee
...
85fc39cace
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
85fc39cace | ||
|
|
350899f57d | ||
|
|
410dd20032 | ||
| 1f12d10cb7 | |||
| 7f25970dd3 | |||
| 70af055d4e |
@ -113,7 +113,7 @@ func (mfa *CLIApp) fetchManifestOperation(ctx *cli.Context) error {
|
|||||||
return fmt.Errorf("invalid path in manifest: %w", err)
|
return fmt.Errorf("invalid path in manifest: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
fileURL := baseURL.String() + f.Path
|
fileURL := baseURL.String() + encodeFilePath(f.Path)
|
||||||
log.Infof("fetching %s", f.Path)
|
log.Infof("fetching %s", f.Path)
|
||||||
|
|
||||||
if err := downloadFile(fileURL, localPath, f, progress); err != nil {
|
if err := downloadFile(fileURL, localPath, f, progress); err != nil {
|
||||||
@ -139,6 +139,15 @@ func (mfa *CLIApp) fetchManifestOperation(ctx *cli.Context) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// encodeFilePath URL-encodes each segment of a file path while preserving slashes.
|
||||||
|
func encodeFilePath(p string) string {
|
||||||
|
segments := strings.Split(p, "/")
|
||||||
|
for i, seg := range segments {
|
||||||
|
segments[i] = url.PathEscape(seg)
|
||||||
|
}
|
||||||
|
return strings.Join(segments, "/")
|
||||||
|
}
|
||||||
|
|
||||||
// sanitizePath validates and sanitizes a file path from the manifest.
|
// sanitizePath validates and sanitizes a file path from the manifest.
|
||||||
// It prevents path traversal attacks and rejects unsafe paths.
|
// It prevents path traversal attacks and rejects unsafe paths.
|
||||||
func sanitizePath(p string) (string, error) {
|
func sanitizePath(p string) (string, error) {
|
||||||
|
|||||||
@ -16,6 +16,29 @@ import (
|
|||||||
"sneak.berlin/go/mfer/mfer"
|
"sneak.berlin/go/mfer/mfer"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
func TestEncodeFilePath(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
input string
|
||||||
|
expected string
|
||||||
|
}{
|
||||||
|
{"file.txt", "file.txt"},
|
||||||
|
{"dir/file.txt", "dir/file.txt"},
|
||||||
|
{"my file.txt", "my%20file.txt"},
|
||||||
|
{"dir/my file.txt", "dir/my%20file.txt"},
|
||||||
|
{"file#1.txt", "file%231.txt"},
|
||||||
|
{"file?v=1.txt", "file%3Fv=1.txt"},
|
||||||
|
{"path/to/file with spaces.txt", "path/to/file%20with%20spaces.txt"},
|
||||||
|
{"100%done.txt", "100%25done.txt"},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.input, func(t *testing.T) {
|
||||||
|
result := encodeFilePath(tt.input)
|
||||||
|
assert.Equal(t, tt.expected, result)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestSanitizePath(t *testing.T) {
|
func TestSanitizePath(t *testing.T) {
|
||||||
// Valid paths that should be accepted
|
// Valid paths that should be accepted
|
||||||
validTests := []struct {
|
validTests := []struct {
|
||||||
|
|||||||
@ -25,6 +25,12 @@ func (mfa *CLIApp) generateManifestOperation(ctx *cli.Context) error {
|
|||||||
Fs: mfa.Fs,
|
Fs: mfa.Fs,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set seed for deterministic UUID if provided
|
||||||
|
if seed := ctx.String("seed"); seed != "" {
|
||||||
|
opts.Seed = seed
|
||||||
|
log.Infof("using deterministic seed for manifest UUID")
|
||||||
|
}
|
||||||
|
|
||||||
// Set up signing options if sign-key is provided
|
// Set up signing options if sign-key is provided
|
||||||
if signKey := ctx.String("sign-key"); signKey != "" {
|
if signKey := ctx.String("sign-key"); signKey != "" {
|
||||||
opts.SigningOptions = &mfer.SigningOptions{
|
opts.SigningOptions = &mfer.SigningOptions{
|
||||||
|
|||||||
@ -154,6 +154,11 @@ func (mfa *CLIApp) run(args []string) {
|
|||||||
Usage: "GPG key ID to sign the manifest with",
|
Usage: "GPG key ID to sign the manifest with",
|
||||||
EnvVars: []string{"MFER_SIGN_KEY"},
|
EnvVars: []string{"MFER_SIGN_KEY"},
|
||||||
},
|
},
|
||||||
|
&cli.StringFlag{
|
||||||
|
Name: "seed",
|
||||||
|
Usage: "Seed value for deterministic manifest UUID (hashed 150M times with SHA-256, ~5-10s)",
|
||||||
|
EnvVars: []string{"MFER_SEED"},
|
||||||
|
},
|
||||||
),
|
),
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
@ -5,6 +5,7 @@ import (
|
|||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
|
"sort"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@ -88,6 +89,28 @@ type Builder struct {
|
|||||||
files []*MFFilePath
|
files []*MFFilePath
|
||||||
createdAt time.Time
|
createdAt time.Time
|
||||||
signingOptions *SigningOptions
|
signingOptions *SigningOptions
|
||||||
|
fixedUUID []byte // if set, use this UUID instead of generating one
|
||||||
|
}
|
||||||
|
|
||||||
|
// seedIterations is the number of SHA-256 rounds used to derive a UUID from a seed.
|
||||||
|
// Tuned to take approximately 5-10 seconds on modern hardware.
|
||||||
|
const seedIterations = 150_000_000
|
||||||
|
|
||||||
|
// SetSeed derives a deterministic UUID from the given seed string.
|
||||||
|
// The seed is hashed 150,000,000 times with SHA-256 to produce
|
||||||
|
// 16 bytes used as a fixed UUID for the manifest (~5-10s on modern hardware).
|
||||||
|
func (b *Builder) SetSeed(seed string) {
|
||||||
|
b.fixedUUID = deriveSeedUUID(seed, seedIterations)
|
||||||
|
}
|
||||||
|
|
||||||
|
// deriveSeedUUID hashes the seed string n times with SHA-256
|
||||||
|
// and returns the first 16 bytes as a UUID.
|
||||||
|
func deriveSeedUUID(seed string, iterations int) []byte {
|
||||||
|
hash := sha256.Sum256([]byte(seed))
|
||||||
|
for i := 1; i < iterations; i++ {
|
||||||
|
hash = sha256.Sum256(hash[:])
|
||||||
|
}
|
||||||
|
return hash[:16]
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewBuilder creates a new Builder.
|
// NewBuilder creates a new Builder.
|
||||||
@ -222,6 +245,11 @@ func (b *Builder) Build(w io.Writer) error {
|
|||||||
b.mu.Lock()
|
b.mu.Lock()
|
||||||
defer b.mu.Unlock()
|
defer b.mu.Unlock()
|
||||||
|
|
||||||
|
// Sort files by path for deterministic output
|
||||||
|
sort.Slice(b.files, func(i, j int) bool {
|
||||||
|
return b.files[i].Path < b.files[j].Path
|
||||||
|
})
|
||||||
|
|
||||||
// Create inner manifest
|
// Create inner manifest
|
||||||
inner := &MFFile{
|
inner := &MFFile{
|
||||||
Version: MFFile_VERSION_ONE,
|
Version: MFFile_VERSION_ONE,
|
||||||
@ -233,6 +261,7 @@ func (b *Builder) Build(w io.Writer) error {
|
|||||||
m := &manifest{
|
m := &manifest{
|
||||||
pbInner: inner,
|
pbInner: inner,
|
||||||
signingOptions: b.signingOptions,
|
signingOptions: b.signingOptions,
|
||||||
|
fixedUUID: b.fixedUUID,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate outer wrapper
|
// Generate outer wrapper
|
||||||
|
|||||||
@ -92,6 +92,75 @@ func TestBuilderBuild(t *testing.T) {
|
|||||||
assert.True(t, strings.HasPrefix(buf.String(), MAGIC))
|
assert.True(t, strings.HasPrefix(buf.String(), MAGIC))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestNewTimestampFromTimeExtremeDate(t *testing.T) {
|
||||||
|
// Regression test: newTimestampFromTime used UnixNano() which panics
|
||||||
|
// for dates outside ~1678-2262. Now uses Nanosecond() which is safe.
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
time time.Time
|
||||||
|
}{
|
||||||
|
{"zero time", time.Time{}},
|
||||||
|
{"year 1000", time.Date(1000, 1, 1, 0, 0, 0, 0, time.UTC)},
|
||||||
|
{"year 3000", time.Date(3000, 1, 1, 0, 0, 0, 123456789, time.UTC)},
|
||||||
|
{"unix epoch", time.Unix(0, 0)},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
// Should not panic
|
||||||
|
ts := newTimestampFromTime(tt.time)
|
||||||
|
assert.Equal(t, tt.time.Unix(), ts.Seconds)
|
||||||
|
assert.Equal(t, int32(tt.time.Nanosecond()), ts.Nanos)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestBuilderDeterministicOutput(t *testing.T) {
|
||||||
|
buildManifest := func() []byte {
|
||||||
|
b := NewBuilder()
|
||||||
|
// Use a fixed createdAt and UUID so output is reproducible
|
||||||
|
b.createdAt = time.Date(2025, 1, 1, 0, 0, 0, 0, time.UTC)
|
||||||
|
b.fixedUUID = make([]byte, 16) // all zeros
|
||||||
|
|
||||||
|
mtime := ModTime(time.Date(2025, 6, 1, 0, 0, 0, 0, time.UTC))
|
||||||
|
|
||||||
|
// Add files in reverse order to test sorting
|
||||||
|
files := []struct {
|
||||||
|
path string
|
||||||
|
content string
|
||||||
|
}{
|
||||||
|
{"c/file.txt", "content c"},
|
||||||
|
{"a/file.txt", "content a"},
|
||||||
|
{"b/file.txt", "content b"},
|
||||||
|
}
|
||||||
|
for _, f := range files {
|
||||||
|
r := bytes.NewReader([]byte(f.content))
|
||||||
|
_, err := b.AddFile(RelFilePath(f.path), FileSize(len(f.content)), mtime, r, nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
var buf bytes.Buffer
|
||||||
|
err := b.Build(&buf)
|
||||||
|
require.NoError(t, err)
|
||||||
|
return buf.Bytes()
|
||||||
|
}
|
||||||
|
|
||||||
|
out1 := buildManifest()
|
||||||
|
out2 := buildManifest()
|
||||||
|
assert.Equal(t, out1, out2, "two builds with same input should produce byte-identical output")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDeriveSeedUUID(t *testing.T) {
|
||||||
|
// Use a small iteration count for testing (production uses 1B)
|
||||||
|
uuid1 := deriveSeedUUID("test-seed-value", 1000)
|
||||||
|
uuid2 := deriveSeedUUID("test-seed-value", 1000)
|
||||||
|
assert.Equal(t, uuid1, uuid2, "same seed should produce same UUID")
|
||||||
|
assert.Len(t, uuid1, 16, "UUID should be 16 bytes")
|
||||||
|
|
||||||
|
uuid3 := deriveSeedUUID("different-seed", 1000)
|
||||||
|
assert.NotEqual(t, uuid1, uuid3, "different seeds should produce different UUIDs")
|
||||||
|
}
|
||||||
|
|
||||||
func TestBuilderBuildEmpty(t *testing.T) {
|
func TestBuilderBuildEmpty(t *testing.T) {
|
||||||
b := NewBuilder()
|
b := NewBuilder()
|
||||||
|
|
||||||
|
|||||||
@ -224,12 +224,7 @@ func (c *Checker) checkFile(entry *MFFilePath, checkedBytes *FileSize) Result {
|
|||||||
// Check if file exists
|
// Check if file exists
|
||||||
info, err := c.fs.Stat(absPath)
|
info, err := c.fs.Stat(absPath)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) {
|
if errors.Is(err, os.ErrNotExist) || errors.Is(err, afero.ErrFileNotFound) {
|
||||||
return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
|
|
||||||
}
|
|
||||||
// Check for "file does not exist" style errors
|
|
||||||
exists, _ := afero.Exists(c.fs, absPath)
|
|
||||||
if !exists {
|
|
||||||
return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
|
return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
|
||||||
}
|
}
|
||||||
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
|
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
|
||||||
|
|||||||
@ -381,6 +381,39 @@ func TestCheckSubdirectories(t *testing.T) {
|
|||||||
assert.Equal(t, 3, okCount)
|
assert.Equal(t, 3, okCount)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestCheckMissingFileDetectedWithoutFallback(t *testing.T) {
|
||||||
|
// Regression test: errors.Is(err, errors.New("...")) never matches because
|
||||||
|
// errors.New creates a new value each time. The fix uses os.ErrNotExist instead.
|
||||||
|
fs := afero.NewMemMapFs()
|
||||||
|
files := map[string][]byte{
|
||||||
|
"exists.txt": []byte("here"),
|
||||||
|
"missing.txt": []byte("not on disk"),
|
||||||
|
}
|
||||||
|
createTestManifest(t, fs, "/manifest.mf", files)
|
||||||
|
// Only create one file on disk
|
||||||
|
createFilesOnDisk(t, fs, "/data", map[string][]byte{
|
||||||
|
"exists.txt": []byte("here"),
|
||||||
|
})
|
||||||
|
|
||||||
|
chk, err := NewChecker("/manifest.mf", "/data", fs)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
results := make(chan Result, 10)
|
||||||
|
err = chk.Check(context.Background(), results, nil)
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
statusCounts := map[Status]int{}
|
||||||
|
for r := range results {
|
||||||
|
statusCounts[r.Status]++
|
||||||
|
if r.Status == StatusMissing {
|
||||||
|
assert.Equal(t, RelFilePath("missing.txt"), r.Path)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
assert.Equal(t, 1, statusCounts[StatusOK], "one file should be OK")
|
||||||
|
assert.Equal(t, 1, statusCounts[StatusMissing], "one file should be MISSING")
|
||||||
|
assert.Equal(t, 0, statusCounts[StatusError], "no files should be ERROR")
|
||||||
|
}
|
||||||
|
|
||||||
func TestCheckEmptyManifest(t *testing.T) {
|
func TestCheckEmptyManifest(t *testing.T) {
|
||||||
fs := afero.NewMemMapFs()
|
fs := afero.NewMemMapFs()
|
||||||
// Create manifest with no files
|
// Create manifest with no files
|
||||||
|
|||||||
@ -17,6 +17,7 @@ type manifest struct {
|
|||||||
pbOuter *MFFileOuter
|
pbOuter *MFFileOuter
|
||||||
output *bytes.Buffer
|
output *bytes.Buffer
|
||||||
signingOptions *SigningOptions
|
signingOptions *SigningOptions
|
||||||
|
fixedUUID []byte // if set, use this UUID instead of generating one
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *manifest) String() string {
|
func (m *manifest) String() string {
|
||||||
|
|||||||
@ -47,6 +47,7 @@ type ScannerOptions struct {
|
|||||||
FollowSymLinks bool // Resolve symlinks instead of skipping them
|
FollowSymLinks bool // Resolve symlinks instead of skipping them
|
||||||
Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
|
Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
|
||||||
SigningOptions *SigningOptions // GPG signing options (nil = no signing)
|
SigningOptions *SigningOptions // GPG signing options (nil = no signing)
|
||||||
|
Seed string // If set, derive a deterministic UUID from this seed
|
||||||
}
|
}
|
||||||
|
|
||||||
// FileEntry represents a file that has been enumerated.
|
// FileEntry represents a file that has been enumerated.
|
||||||
@ -276,6 +277,9 @@ func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- S
|
|||||||
if s.options.SigningOptions != nil {
|
if s.options.SigningOptions != nil {
|
||||||
builder.SetSigningOptions(s.options.SigningOptions)
|
builder.SetSigningOptions(s.options.SigningOptions)
|
||||||
}
|
}
|
||||||
|
if s.options.Seed != "" {
|
||||||
|
builder.SetSeed(s.options.Seed)
|
||||||
|
}
|
||||||
|
|
||||||
var scannedFiles FileCount
|
var scannedFiles FileCount
|
||||||
var scannedBytes FileSize
|
var scannedBytes FileSize
|
||||||
|
|||||||
@ -16,11 +16,10 @@ import (
|
|||||||
const MAGIC string = "ZNAVSRFG"
|
const MAGIC string = "ZNAVSRFG"
|
||||||
|
|
||||||
func newTimestampFromTime(t time.Time) *Timestamp {
|
func newTimestampFromTime(t time.Time) *Timestamp {
|
||||||
out := &Timestamp{
|
return &Timestamp{
|
||||||
Seconds: t.Unix(),
|
Seconds: t.Unix(),
|
||||||
Nanos: int32(t.UnixNano() - (t.Unix() * 1000000000)),
|
Nanos: int32(t.Nanosecond()),
|
||||||
}
|
}
|
||||||
return out
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (m *manifest) generate() error {
|
func (m *manifest) generate() error {
|
||||||
@ -50,8 +49,13 @@ func (m *manifest) generateOuter() error {
|
|||||||
return errors.New("internal error")
|
return errors.New("internal error")
|
||||||
}
|
}
|
||||||
|
|
||||||
// Generate UUID and set on inner message
|
// Use fixed UUID if provided, otherwise generate a new one
|
||||||
manifestUUID := uuid.New()
|
var manifestUUID uuid.UUID
|
||||||
|
if len(m.fixedUUID) == 16 {
|
||||||
|
copy(manifestUUID[:], m.fixedUUID)
|
||||||
|
} else {
|
||||||
|
manifestUUID = uuid.New()
|
||||||
|
}
|
||||||
m.pbInner.Uuid = manifestUUID[:]
|
m.pbInner.Uuid = manifestUUID[:]
|
||||||
|
|
||||||
innerData, err := proto.MarshalOptions{Deterministic: true}.Marshal(m.pbInner)
|
innerData, err := proto.MarshalOptions{Deterministic: true}.Marshal(m.pbInner)
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user