Sort file entries by path (lexicographic byte-order) before serializing the manifest. This ensures identical output regardless of file insertion order. Add test verifying two different insertion orders produce the same manifest file order.
258 lines
6.0 KiB
Go
258 lines
6.0 KiB
Go
package mfer
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"errors"
|
|
"fmt"
|
|
"io"
|
|
"sort"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
"unicode/utf8"
|
|
|
|
"github.com/multiformats/go-multihash"
|
|
)
|
|
|
|
// ValidatePath checks that a file path conforms to manifest path invariants:
|
|
// - Must be valid UTF-8
|
|
// - Must use forward slashes only (no backslashes)
|
|
// - Must be relative (no leading /)
|
|
// - Must not contain ".." segments
|
|
// - Must not contain empty segments (no "//")
|
|
// - Must not be empty
|
|
func ValidatePath(p string) error {
|
|
if p == "" {
|
|
return errors.New("path cannot be empty")
|
|
}
|
|
if !utf8.ValidString(p) {
|
|
return fmt.Errorf("path %q is not valid UTF-8", p)
|
|
}
|
|
if strings.ContainsRune(p, '\\') {
|
|
return fmt.Errorf("path %q contains backslash; use forward slashes only", p)
|
|
}
|
|
if strings.HasPrefix(p, "/") {
|
|
return fmt.Errorf("path %q is absolute; must be relative", p)
|
|
}
|
|
for _, seg := range strings.Split(p, "/") {
|
|
if seg == "" {
|
|
return fmt.Errorf("path %q contains empty segment", p)
|
|
}
|
|
if seg == ".." {
|
|
return fmt.Errorf("path %q contains '..' segment", p)
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// RelFilePath represents a relative file path within a manifest.
|
|
type RelFilePath string
|
|
|
|
// AbsFilePath represents an absolute file path on the filesystem.
|
|
type AbsFilePath string
|
|
|
|
// FileSize represents the size of a file in bytes.
|
|
type FileSize int64
|
|
|
|
// FileCount represents a count of files.
|
|
type FileCount int64
|
|
|
|
// ModTime represents a file's modification time.
|
|
type ModTime time.Time
|
|
|
|
// UnixSeconds represents seconds since Unix epoch.
|
|
type UnixSeconds int64
|
|
|
|
// UnixNanos represents the nanosecond component of a timestamp (0-999999999).
|
|
type UnixNanos int32
|
|
|
|
// Timestamp converts ModTime to a protobuf Timestamp.
|
|
func (m ModTime) Timestamp() *Timestamp {
|
|
t := time.Time(m)
|
|
return &Timestamp{
|
|
Seconds: t.Unix(),
|
|
Nanos: int32(t.Nanosecond()),
|
|
}
|
|
}
|
|
|
|
// Multihash represents a multihash-encoded file hash (typically SHA2-256).
|
|
type Multihash []byte
|
|
|
|
// FileHashProgress reports progress during file hashing.
|
|
type FileHashProgress struct {
|
|
BytesRead FileSize // Total bytes read so far for the current file
|
|
}
|
|
|
|
// Builder constructs a manifest by adding files one at a time.
|
|
type Builder struct {
|
|
mu sync.Mutex
|
|
files []*MFFilePath
|
|
createdAt time.Time
|
|
signingOptions *SigningOptions
|
|
}
|
|
|
|
// NewBuilder creates a new Builder.
|
|
func NewBuilder() *Builder {
|
|
return &Builder{
|
|
files: make([]*MFFilePath, 0),
|
|
createdAt: time.Now(),
|
|
}
|
|
}
|
|
|
|
// AddFile reads file content from reader, computes hashes, and adds to manifest.
|
|
// Progress updates are sent to the progress channel (if non-nil) without blocking.
|
|
// Returns the number of bytes read.
|
|
func (b *Builder) AddFile(
|
|
path RelFilePath,
|
|
size FileSize,
|
|
mtime ModTime,
|
|
reader io.Reader,
|
|
progress chan<- FileHashProgress,
|
|
) (FileSize, error) {
|
|
if err := ValidatePath(string(path)); err != nil {
|
|
return 0, err
|
|
}
|
|
|
|
// Create hash writer
|
|
h := sha256.New()
|
|
|
|
// Read file in chunks, updating hash and progress
|
|
var totalRead FileSize
|
|
buf := make([]byte, 64*1024) // 64KB chunks
|
|
|
|
for {
|
|
n, err := reader.Read(buf)
|
|
if n > 0 {
|
|
h.Write(buf[:n])
|
|
totalRead += FileSize(n)
|
|
sendFileHashProgress(progress, FileHashProgress{BytesRead: totalRead})
|
|
}
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return totalRead, err
|
|
}
|
|
}
|
|
|
|
// Verify actual bytes read matches declared size
|
|
if totalRead != size {
|
|
return totalRead, fmt.Errorf("size mismatch for %q: declared %d bytes but read %d bytes", path, size, totalRead)
|
|
}
|
|
|
|
// Encode hash as multihash (SHA2-256)
|
|
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
|
|
if err != nil {
|
|
return totalRead, err
|
|
}
|
|
|
|
// Create file entry
|
|
entry := &MFFilePath{
|
|
Path: string(path),
|
|
Size: int64(size),
|
|
Hashes: []*MFFileChecksum{
|
|
{MultiHash: mh},
|
|
},
|
|
Mtime: mtime.Timestamp(),
|
|
}
|
|
|
|
b.mu.Lock()
|
|
b.files = append(b.files, entry)
|
|
b.mu.Unlock()
|
|
|
|
return totalRead, nil
|
|
}
|
|
|
|
// sendFileHashProgress sends a progress update without blocking.
|
|
func sendFileHashProgress(ch chan<- FileHashProgress, p FileHashProgress) {
|
|
if ch == nil {
|
|
return
|
|
}
|
|
select {
|
|
case ch <- p:
|
|
default:
|
|
}
|
|
}
|
|
|
|
// FileCount returns the number of files added to the builder.
|
|
func (b *Builder) FileCount() int {
|
|
b.mu.Lock()
|
|
defer b.mu.Unlock()
|
|
return len(b.files)
|
|
}
|
|
|
|
// AddFileWithHash adds a file entry with a pre-computed hash.
|
|
// This is useful when the hash is already known (e.g., from an existing manifest).
|
|
// Returns an error if path is empty, size is negative, or hash is nil/empty.
|
|
func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error {
|
|
if err := ValidatePath(string(path)); err != nil {
|
|
return err
|
|
}
|
|
if size < 0 {
|
|
return errors.New("size cannot be negative")
|
|
}
|
|
if len(hash) == 0 {
|
|
return errors.New("hash cannot be nil or empty")
|
|
}
|
|
|
|
entry := &MFFilePath{
|
|
Path: string(path),
|
|
Size: int64(size),
|
|
Hashes: []*MFFileChecksum{
|
|
{MultiHash: hash},
|
|
},
|
|
Mtime: mtime.Timestamp(),
|
|
}
|
|
|
|
b.mu.Lock()
|
|
b.files = append(b.files, entry)
|
|
b.mu.Unlock()
|
|
return nil
|
|
}
|
|
|
|
// SetSigningOptions sets the GPG signing options for the manifest.
|
|
// If opts is non-nil, the manifest will be signed when Build() is called.
|
|
func (b *Builder) SetSigningOptions(opts *SigningOptions) {
|
|
b.mu.Lock()
|
|
defer b.mu.Unlock()
|
|
b.signingOptions = opts
|
|
}
|
|
|
|
// Build finalizes the manifest and writes it to the writer.
|
|
func (b *Builder) Build(w io.Writer) error {
|
|
b.mu.Lock()
|
|
defer b.mu.Unlock()
|
|
|
|
// Sort files by path for deterministic output (#23)
|
|
sort.Slice(b.files, func(i, j int) bool {
|
|
return b.files[i].Path < b.files[j].Path
|
|
})
|
|
|
|
// Create inner manifest
|
|
inner := &MFFile{
|
|
Version: MFFile_VERSION_ONE,
|
|
CreatedAt: newTimestampFromTime(b.createdAt),
|
|
Files: b.files,
|
|
}
|
|
|
|
// Create a temporary manifest to use existing serialization
|
|
m := &manifest{
|
|
pbInner: inner,
|
|
signingOptions: b.signingOptions,
|
|
}
|
|
|
|
// Generate outer wrapper
|
|
if err := m.generateOuter(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Generate final output
|
|
if err := m.generate(); err != nil {
|
|
return err
|
|
}
|
|
|
|
// Write to output
|
|
_, err := w.Write(m.output.Bytes())
|
|
return err
|
|
}
|