mfer/mfer/builder.go
clawbot 2efffd9da8 Specify and enforce path invariants (closes #26) (#31)
Add `ValidatePath()` enforcing UTF-8, forward-slash, relative, no `..`, no empty segments. Applied in `AddFile` and `AddFileWithHash`. Proto comments document the rules.

Co-authored-by: clawbot <clawbot@openclaw>
Co-authored-by: Jeffrey Paul <sneak@noreply.example.org>
Reviewed-on: #31
Co-authored-by: clawbot <clawbot@noreply.example.org>
Co-committed-by: clawbot <clawbot@noreply.example.org>
2026-02-09 01:45:29 +01:00

252 lines
5.9 KiB
Go

package mfer
import (
"crypto/sha256"
"errors"
"fmt"
"io"
"strings"
"sync"
"time"
"unicode/utf8"
"github.com/multiformats/go-multihash"
)
// ValidatePath checks that a file path conforms to manifest path invariants:
// - Must be valid UTF-8
// - Must use forward slashes only (no backslashes)
// - Must be relative (no leading /)
// - Must not contain ".." segments
// - Must not contain empty segments (no "//")
// - Must not be empty
func ValidatePath(p string) error {
if p == "" {
return errors.New("path cannot be empty")
}
if !utf8.ValidString(p) {
return fmt.Errorf("path %q is not valid UTF-8", p)
}
if strings.ContainsRune(p, '\\') {
return fmt.Errorf("path %q contains backslash; use forward slashes only", p)
}
if strings.HasPrefix(p, "/") {
return fmt.Errorf("path %q is absolute; must be relative", p)
}
for _, seg := range strings.Split(p, "/") {
if seg == "" {
return fmt.Errorf("path %q contains empty segment", p)
}
if seg == ".." {
return fmt.Errorf("path %q contains '..' segment", p)
}
}
return nil
}
// RelFilePath represents a relative file path within a manifest.
type RelFilePath string
// AbsFilePath represents an absolute file path on the filesystem.
type AbsFilePath string
// FileSize represents the size of a file in bytes.
type FileSize int64
// FileCount represents a count of files.
type FileCount int64
// ModTime represents a file's modification time.
type ModTime time.Time
// UnixSeconds represents seconds since Unix epoch.
type UnixSeconds int64
// UnixNanos represents the nanosecond component of a timestamp (0-999999999).
type UnixNanos int32
// Timestamp converts ModTime to a protobuf Timestamp.
func (m ModTime) Timestamp() *Timestamp {
t := time.Time(m)
return &Timestamp{
Seconds: t.Unix(),
Nanos: int32(t.Nanosecond()),
}
}
// Multihash represents a multihash-encoded file hash (typically SHA2-256).
type Multihash []byte
// FileHashProgress reports progress during file hashing.
type FileHashProgress struct {
BytesRead FileSize // Total bytes read so far for the current file
}
// Builder constructs a manifest by adding files one at a time.
type Builder struct {
mu sync.Mutex
files []*MFFilePath
createdAt time.Time
signingOptions *SigningOptions
}
// NewBuilder creates a new Builder.
func NewBuilder() *Builder {
return &Builder{
files: make([]*MFFilePath, 0),
createdAt: time.Now(),
}
}
// AddFile reads file content from reader, computes hashes, and adds to manifest.
// Progress updates are sent to the progress channel (if non-nil) without blocking.
// Returns the number of bytes read.
func (b *Builder) AddFile(
path RelFilePath,
size FileSize,
mtime ModTime,
reader io.Reader,
progress chan<- FileHashProgress,
) (FileSize, error) {
if err := ValidatePath(string(path)); err != nil {
return 0, err
}
// Create hash writer
h := sha256.New()
// Read file in chunks, updating hash and progress
var totalRead FileSize
buf := make([]byte, 64*1024) // 64KB chunks
for {
n, err := reader.Read(buf)
if n > 0 {
h.Write(buf[:n])
totalRead += FileSize(n)
sendFileHashProgress(progress, FileHashProgress{BytesRead: totalRead})
}
if err == io.EOF {
break
}
if err != nil {
return totalRead, err
}
}
// Verify actual bytes read matches declared size
if totalRead != size {
return totalRead, fmt.Errorf("size mismatch for %q: declared %d bytes but read %d bytes", path, size, totalRead)
}
// Encode hash as multihash (SHA2-256)
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
if err != nil {
return totalRead, err
}
// Create file entry
entry := &MFFilePath{
Path: string(path),
Size: int64(size),
Hashes: []*MFFileChecksum{
{MultiHash: mh},
},
Mtime: mtime.Timestamp(),
}
b.mu.Lock()
b.files = append(b.files, entry)
b.mu.Unlock()
return totalRead, nil
}
// sendFileHashProgress sends a progress update without blocking.
func sendFileHashProgress(ch chan<- FileHashProgress, p FileHashProgress) {
if ch == nil {
return
}
select {
case ch <- p:
default:
}
}
// FileCount returns the number of files added to the builder.
func (b *Builder) FileCount() int {
b.mu.Lock()
defer b.mu.Unlock()
return len(b.files)
}
// AddFileWithHash adds a file entry with a pre-computed hash.
// This is useful when the hash is already known (e.g., from an existing manifest).
// Returns an error if path is empty, size is negative, or hash is nil/empty.
func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error {
if err := ValidatePath(string(path)); err != nil {
return err
}
if size < 0 {
return errors.New("size cannot be negative")
}
if len(hash) == 0 {
return errors.New("hash cannot be nil or empty")
}
entry := &MFFilePath{
Path: string(path),
Size: int64(size),
Hashes: []*MFFileChecksum{
{MultiHash: hash},
},
Mtime: mtime.Timestamp(),
}
b.mu.Lock()
b.files = append(b.files, entry)
b.mu.Unlock()
return nil
}
// SetSigningOptions sets the GPG signing options for the manifest.
// If opts is non-nil, the manifest will be signed when Build() is called.
func (b *Builder) SetSigningOptions(opts *SigningOptions) {
b.mu.Lock()
defer b.mu.Unlock()
b.signingOptions = opts
}
// Build finalizes the manifest and writes it to the writer.
func (b *Builder) Build(w io.Writer) error {
b.mu.Lock()
defer b.mu.Unlock()
// Create inner manifest
inner := &MFFile{
Version: MFFile_VERSION_ONE,
CreatedAt: newTimestampFromTime(b.createdAt),
Files: b.files,
}
// Create a temporary manifest to use existing serialization
m := &manifest{
pbInner: inner,
signingOptions: b.signingOptions,
}
// Generate outer wrapper
if err := m.generateOuter(); err != nil {
return err
}
// Generate final output
if err := m.generate(); err != nil {
return err
}
// Write to output
_, err := w.Write(m.output.Bytes())
return err
}