Compare commits
7 Commits
d947fc81ae
...
fa99bdc5ee
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
fa99bdc5ee | ||
|
|
b8506ad043 | ||
|
|
ec3f6cb7c1 | ||
| 04b05e01e8 | |||
| 7144617d0e | |||
| 2efffd9da8 | |||
| ebaf2a65ca |
@ -1,281 +0,0 @@
|
|||||||
package checker
|
|
||||||
|
|
||||||
import (
|
|
||||||
"bytes"
|
|
||||||
"context"
|
|
||||||
"crypto/sha256"
|
|
||||||
"errors"
|
|
||||||
"io"
|
|
||||||
"os"
|
|
||||||
"path/filepath"
|
|
||||||
|
|
||||||
"github.com/multiformats/go-multihash"
|
|
||||||
"github.com/spf13/afero"
|
|
||||||
"sneak.berlin/go/mfer/mfer"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Result represents the outcome of checking a single file.
|
|
||||||
type Result struct {
|
|
||||||
Path string // Relative path from manifest
|
|
||||||
Status Status // Verification result status
|
|
||||||
Message string // Human-readable description of the result
|
|
||||||
}
|
|
||||||
|
|
||||||
// Status represents the verification status of a file.
|
|
||||||
type Status int
|
|
||||||
|
|
||||||
const (
|
|
||||||
StatusOK Status = iota // File matches manifest (size and hash verified)
|
|
||||||
StatusMissing // File not found on disk
|
|
||||||
StatusSizeMismatch // File size differs from manifest
|
|
||||||
StatusHashMismatch // File hash differs from manifest
|
|
||||||
StatusExtra // File exists on disk but not in manifest
|
|
||||||
StatusError // Error occurred during verification
|
|
||||||
)
|
|
||||||
|
|
||||||
func (s Status) String() string {
|
|
||||||
switch s {
|
|
||||||
case StatusOK:
|
|
||||||
return "OK"
|
|
||||||
case StatusMissing:
|
|
||||||
return "MISSING"
|
|
||||||
case StatusSizeMismatch:
|
|
||||||
return "SIZE_MISMATCH"
|
|
||||||
case StatusHashMismatch:
|
|
||||||
return "HASH_MISMATCH"
|
|
||||||
case StatusExtra:
|
|
||||||
return "EXTRA"
|
|
||||||
case StatusError:
|
|
||||||
return "ERROR"
|
|
||||||
default:
|
|
||||||
return "UNKNOWN"
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// CheckStatus contains progress information for the check operation.
|
|
||||||
type CheckStatus struct {
|
|
||||||
TotalFiles int64 // Total number of files in manifest
|
|
||||||
CheckedFiles int64 // Number of files checked so far
|
|
||||||
TotalBytes int64 // Total bytes to verify (sum of all file sizes)
|
|
||||||
CheckedBytes int64 // Bytes verified so far
|
|
||||||
BytesPerSec float64 // Current throughput rate
|
|
||||||
Failures int64 // Number of verification failures encountered
|
|
||||||
}
|
|
||||||
|
|
||||||
// Checker verifies files against a manifest.
|
|
||||||
type Checker struct {
|
|
||||||
basePath string
|
|
||||||
files []*mfer.MFFilePath
|
|
||||||
fs afero.Fs
|
|
||||||
// manifestPaths is a set of paths in the manifest for quick lookup
|
|
||||||
manifestPaths map[string]struct{}
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewChecker creates a new Checker for the given manifest, base path, and filesystem.
|
|
||||||
// The basePath is the directory relative to which manifest paths are resolved.
|
|
||||||
// If fs is nil, the real filesystem (OsFs) is used.
|
|
||||||
func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, error) {
|
|
||||||
if fs == nil {
|
|
||||||
fs = afero.NewOsFs()
|
|
||||||
}
|
|
||||||
|
|
||||||
m, err := mfer.NewManifestFromFile(fs, manifestPath)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
abs, err := filepath.Abs(basePath)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
|
|
||||||
files := m.Files()
|
|
||||||
manifestPaths := make(map[string]struct{}, len(files))
|
|
||||||
for _, f := range files {
|
|
||||||
manifestPaths[f.Path] = struct{}{}
|
|
||||||
}
|
|
||||||
|
|
||||||
return &Checker{
|
|
||||||
basePath: abs,
|
|
||||||
files: files,
|
|
||||||
fs: fs,
|
|
||||||
manifestPaths: manifestPaths,
|
|
||||||
}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// FileCount returns the number of files in the manifest.
|
|
||||||
func (c *Checker) FileCount() int64 {
|
|
||||||
return int64(len(c.files))
|
|
||||||
}
|
|
||||||
|
|
||||||
// TotalBytes returns the total size of all files in the manifest.
|
|
||||||
func (c *Checker) TotalBytes() int64 {
|
|
||||||
var total int64
|
|
||||||
for _, f := range c.files {
|
|
||||||
total += f.Size
|
|
||||||
}
|
|
||||||
return total
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check verifies all files against the manifest.
|
|
||||||
// Results are sent to the results channel as files are checked.
|
|
||||||
// Progress updates are sent to the progress channel approximately once per second.
|
|
||||||
// Both channels are closed when the method returns.
|
|
||||||
func (c *Checker) Check(ctx context.Context, results chan<- Result, progress chan<- CheckStatus) error {
|
|
||||||
if results != nil {
|
|
||||||
defer close(results)
|
|
||||||
}
|
|
||||||
if progress != nil {
|
|
||||||
defer close(progress)
|
|
||||||
}
|
|
||||||
|
|
||||||
totalFiles := int64(len(c.files))
|
|
||||||
totalBytes := c.TotalBytes()
|
|
||||||
|
|
||||||
var checkedFiles int64
|
|
||||||
var checkedBytes int64
|
|
||||||
var failures int64
|
|
||||||
|
|
||||||
for _, entry := range c.files {
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return ctx.Err()
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
|
|
||||||
result := c.checkFile(entry, &checkedBytes)
|
|
||||||
if result.Status != StatusOK {
|
|
||||||
failures++
|
|
||||||
}
|
|
||||||
checkedFiles++
|
|
||||||
|
|
||||||
if results != nil {
|
|
||||||
results <- result
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send progress (simplified - every file for now)
|
|
||||||
if progress != nil {
|
|
||||||
sendCheckStatus(progress, CheckStatus{
|
|
||||||
TotalFiles: totalFiles,
|
|
||||||
CheckedFiles: checkedFiles,
|
|
||||||
TotalBytes: totalBytes,
|
|
||||||
CheckedBytes: checkedBytes,
|
|
||||||
Failures: failures,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result {
|
|
||||||
absPath := filepath.Join(c.basePath, entry.Path)
|
|
||||||
|
|
||||||
// Check if file exists
|
|
||||||
info, err := c.fs.Stat(absPath)
|
|
||||||
if err != nil {
|
|
||||||
if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) {
|
|
||||||
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
|
|
||||||
}
|
|
||||||
// Check for "file does not exist" style errors
|
|
||||||
exists, _ := afero.Exists(c.fs, absPath)
|
|
||||||
if !exists {
|
|
||||||
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
|
|
||||||
}
|
|
||||||
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check size
|
|
||||||
if info.Size() != entry.Size {
|
|
||||||
*checkedBytes += info.Size()
|
|
||||||
return Result{
|
|
||||||
Path: entry.Path,
|
|
||||||
Status: StatusSizeMismatch,
|
|
||||||
Message: "size mismatch",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Open and hash file
|
|
||||||
f, err := c.fs.Open(absPath)
|
|
||||||
if err != nil {
|
|
||||||
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
|
||||||
}
|
|
||||||
defer f.Close()
|
|
||||||
|
|
||||||
h := sha256.New()
|
|
||||||
n, err := io.Copy(h, f)
|
|
||||||
if err != nil {
|
|
||||||
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
|
||||||
}
|
|
||||||
*checkedBytes += n
|
|
||||||
|
|
||||||
// Encode as multihash and compare
|
|
||||||
computed, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
|
|
||||||
if err != nil {
|
|
||||||
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check against all hashes in manifest (at least one must match)
|
|
||||||
for _, hash := range entry.Hashes {
|
|
||||||
if bytes.Equal(computed, hash.MultiHash) {
|
|
||||||
return Result{Path: entry.Path, Status: StatusOK}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return Result{Path: entry.Path, Status: StatusHashMismatch, Message: "hash mismatch"}
|
|
||||||
}
|
|
||||||
|
|
||||||
// FindExtraFiles walks the filesystem and reports files not in the manifest.
|
|
||||||
// Results are sent to the results channel. The channel is closed when done.
|
|
||||||
func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error {
|
|
||||||
if results != nil {
|
|
||||||
defer close(results)
|
|
||||||
}
|
|
||||||
|
|
||||||
return afero.Walk(c.fs, c.basePath, func(path string, info os.FileInfo, err error) error {
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return ctx.Err()
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
|
|
||||||
// Skip directories
|
|
||||||
if info.IsDir() {
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Get relative path
|
|
||||||
relPath, err := filepath.Rel(c.basePath, path)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Check if path is in manifest
|
|
||||||
if _, exists := c.manifestPaths[relPath]; !exists {
|
|
||||||
if results != nil {
|
|
||||||
results <- Result{
|
|
||||||
Path: relPath,
|
|
||||||
Status: StatusExtra,
|
|
||||||
Message: "not in manifest",
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// sendCheckStatus sends a status update without blocking.
|
|
||||||
func sendCheckStatus(ch chan<- CheckStatus, status CheckStatus) {
|
|
||||||
if ch == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
select {
|
|
||||||
case ch <- status:
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -1,373 +0,0 @@
|
|||||||
package scanner
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"io"
|
|
||||||
"io/fs"
|
|
||||||
"path"
|
|
||||||
"path/filepath"
|
|
||||||
"strings"
|
|
||||||
"sync"
|
|
||||||
"time"
|
|
||||||
|
|
||||||
"github.com/spf13/afero"
|
|
||||||
"sneak.berlin/go/mfer/mfer"
|
|
||||||
)
|
|
||||||
|
|
||||||
// Phase 1: Enumeration
|
|
||||||
// ---------------------
|
|
||||||
// Walking directories and calling stat() on files to collect metadata.
|
|
||||||
// Builds the list of files to be scanned. Relatively fast (metadata only).
|
|
||||||
|
|
||||||
// EnumerateStatus contains progress information for the enumeration phase.
|
|
||||||
type EnumerateStatus struct {
|
|
||||||
FilesFound int64 // Number of files discovered so far
|
|
||||||
BytesFound int64 // Total size of discovered files (from stat)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Phase 2: Scan (ToManifest)
|
|
||||||
// --------------------------
|
|
||||||
// Reading file contents and computing hashes for manifest generation.
|
|
||||||
// This is the expensive phase that reads all file data.
|
|
||||||
|
|
||||||
// ScanStatus contains progress information for the scan phase.
|
|
||||||
type ScanStatus struct {
|
|
||||||
TotalFiles int64 // Total number of files to scan
|
|
||||||
ScannedFiles int64 // Number of files scanned so far
|
|
||||||
TotalBytes int64 // Total bytes to read (sum of all file sizes)
|
|
||||||
ScannedBytes int64 // Bytes read so far
|
|
||||||
BytesPerSec float64 // Current throughput rate
|
|
||||||
}
|
|
||||||
|
|
||||||
// Options configures scanner behavior.
|
|
||||||
type Options struct {
|
|
||||||
IgnoreDotfiles bool // Skip files and directories starting with a dot
|
|
||||||
FollowSymLinks bool // Resolve symlinks instead of skipping them
|
|
||||||
Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// FileEntry represents a file that has been enumerated.
|
|
||||||
type FileEntry struct {
|
|
||||||
Path string // Relative path (used in manifest)
|
|
||||||
AbsPath string // Absolute path (used for reading file content)
|
|
||||||
Size int64 // File size in bytes
|
|
||||||
Mtime time.Time // Last modification time
|
|
||||||
Ctime time.Time // Creation time (platform-dependent)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Scanner accumulates files and generates manifests from them.
|
|
||||||
type Scanner struct {
|
|
||||||
mu sync.RWMutex
|
|
||||||
files []*FileEntry
|
|
||||||
options *Options
|
|
||||||
fs afero.Fs
|
|
||||||
}
|
|
||||||
|
|
||||||
// New creates a new Scanner with default options.
|
|
||||||
func New() *Scanner {
|
|
||||||
return NewWithOptions(nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
// NewWithOptions creates a new Scanner with the given options.
|
|
||||||
func NewWithOptions(opts *Options) *Scanner {
|
|
||||||
if opts == nil {
|
|
||||||
opts = &Options{}
|
|
||||||
}
|
|
||||||
fs := opts.Fs
|
|
||||||
if fs == nil {
|
|
||||||
fs = afero.NewOsFs()
|
|
||||||
}
|
|
||||||
return &Scanner{
|
|
||||||
files: make([]*FileEntry, 0),
|
|
||||||
options: opts,
|
|
||||||
fs: fs,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// EnumerateFile adds a single file to the scanner, calling stat() to get metadata.
|
|
||||||
func (s *Scanner) EnumerateFile(filePath string) error {
|
|
||||||
abs, err := filepath.Abs(filePath)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
info, err := s.fs.Stat(abs)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
// For single files, use the filename as the relative path
|
|
||||||
basePath := filepath.Dir(abs)
|
|
||||||
return s.enumerateFileWithInfo(filepath.Base(abs), basePath, info, nil)
|
|
||||||
}
|
|
||||||
|
|
||||||
// EnumeratePath walks a directory path and adds all files to the scanner.
|
|
||||||
// If progress is non-nil, status updates are sent as files are discovered.
|
|
||||||
// The progress channel is closed when the method returns.
|
|
||||||
func (s *Scanner) EnumeratePath(inputPath string, progress chan<- EnumerateStatus) error {
|
|
||||||
if progress != nil {
|
|
||||||
defer close(progress)
|
|
||||||
}
|
|
||||||
abs, err := filepath.Abs(inputPath)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs))
|
|
||||||
return s.enumerateFS(afs, abs, progress)
|
|
||||||
}
|
|
||||||
|
|
||||||
// EnumeratePaths walks multiple directory paths and adds all files to the scanner.
|
|
||||||
// If progress is non-nil, status updates are sent as files are discovered.
|
|
||||||
// The progress channel is closed when the method returns.
|
|
||||||
func (s *Scanner) EnumeratePaths(progress chan<- EnumerateStatus, inputPaths ...string) error {
|
|
||||||
if progress != nil {
|
|
||||||
defer close(progress)
|
|
||||||
}
|
|
||||||
for _, p := range inputPaths {
|
|
||||||
abs, err := filepath.Abs(p)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs))
|
|
||||||
if err := s.enumerateFS(afs, abs, progress); err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// EnumerateFS walks an afero filesystem and adds all files to the scanner.
|
|
||||||
// If progress is non-nil, status updates are sent as files are discovered.
|
|
||||||
// The progress channel is closed when the method returns.
|
|
||||||
// basePath is used to compute absolute paths for file reading.
|
|
||||||
func (s *Scanner) EnumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error {
|
|
||||||
if progress != nil {
|
|
||||||
defer close(progress)
|
|
||||||
}
|
|
||||||
return s.enumerateFS(afs, basePath, progress)
|
|
||||||
}
|
|
||||||
|
|
||||||
// enumerateFS is the internal implementation that doesn't close the progress channel.
|
|
||||||
func (s *Scanner) enumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error {
|
|
||||||
return afero.Walk(afs, "/", func(p string, info fs.FileInfo, err error) error {
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
if s.options.IgnoreDotfiles && pathIsHidden(p) {
|
|
||||||
if info.IsDir() {
|
|
||||||
return filepath.SkipDir
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
return s.enumerateFileWithInfo(p, basePath, info, progress)
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// enumerateFileWithInfo adds a file with pre-existing fs.FileInfo.
|
|
||||||
func (s *Scanner) enumerateFileWithInfo(filePath string, basePath string, info fs.FileInfo, progress chan<- EnumerateStatus) error {
|
|
||||||
if info.IsDir() {
|
|
||||||
// Manifests contain only files, directories are implied
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Clean the path - remove leading slash if present
|
|
||||||
cleanPath := filePath
|
|
||||||
if len(cleanPath) > 0 && cleanPath[0] == '/' {
|
|
||||||
cleanPath = cleanPath[1:]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Compute absolute path for file reading
|
|
||||||
absPath := filepath.Join(basePath, cleanPath)
|
|
||||||
|
|
||||||
entry := &FileEntry{
|
|
||||||
Path: cleanPath,
|
|
||||||
AbsPath: absPath,
|
|
||||||
Size: info.Size(),
|
|
||||||
Mtime: info.ModTime(),
|
|
||||||
// Note: Ctime not available from fs.FileInfo on all platforms
|
|
||||||
// Will need platform-specific code to extract it
|
|
||||||
}
|
|
||||||
|
|
||||||
s.mu.Lock()
|
|
||||||
s.files = append(s.files, entry)
|
|
||||||
filesFound := int64(len(s.files))
|
|
||||||
var bytesFound int64
|
|
||||||
for _, f := range s.files {
|
|
||||||
bytesFound += f.Size
|
|
||||||
}
|
|
||||||
s.mu.Unlock()
|
|
||||||
|
|
||||||
sendEnumerateStatus(progress, EnumerateStatus{
|
|
||||||
FilesFound: filesFound,
|
|
||||||
BytesFound: bytesFound,
|
|
||||||
})
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Files returns a copy of all files added to the scanner.
|
|
||||||
func (s *Scanner) Files() []*FileEntry {
|
|
||||||
s.mu.RLock()
|
|
||||||
defer s.mu.RUnlock()
|
|
||||||
out := make([]*FileEntry, len(s.files))
|
|
||||||
copy(out, s.files)
|
|
||||||
return out
|
|
||||||
}
|
|
||||||
|
|
||||||
// FileCount returns the number of files in the scanner.
|
|
||||||
func (s *Scanner) FileCount() int64 {
|
|
||||||
s.mu.RLock()
|
|
||||||
defer s.mu.RUnlock()
|
|
||||||
return int64(len(s.files))
|
|
||||||
}
|
|
||||||
|
|
||||||
// TotalBytes returns the total size of all files in the scanner.
|
|
||||||
func (s *Scanner) TotalBytes() int64 {
|
|
||||||
s.mu.RLock()
|
|
||||||
defer s.mu.RUnlock()
|
|
||||||
var total int64
|
|
||||||
for _, f := range s.files {
|
|
||||||
total += f.Size
|
|
||||||
}
|
|
||||||
return total
|
|
||||||
}
|
|
||||||
|
|
||||||
// ToManifest reads all file contents, computes hashes, and generates a manifest.
|
|
||||||
// If progress is non-nil, status updates are sent approximately once per second.
|
|
||||||
// The progress channel is closed when the method returns.
|
|
||||||
// The manifest is written to the provided io.Writer.
|
|
||||||
func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- ScanStatus) error {
|
|
||||||
if progress != nil {
|
|
||||||
defer close(progress)
|
|
||||||
}
|
|
||||||
|
|
||||||
s.mu.RLock()
|
|
||||||
files := make([]*FileEntry, len(s.files))
|
|
||||||
copy(files, s.files)
|
|
||||||
totalFiles := int64(len(files))
|
|
||||||
var totalBytes int64
|
|
||||||
for _, f := range files {
|
|
||||||
totalBytes += f.Size
|
|
||||||
}
|
|
||||||
s.mu.RUnlock()
|
|
||||||
|
|
||||||
builder := mfer.NewBuilder()
|
|
||||||
|
|
||||||
var scannedFiles int64
|
|
||||||
var scannedBytes int64
|
|
||||||
lastProgressTime := time.Now()
|
|
||||||
startTime := time.Now()
|
|
||||||
|
|
||||||
for _, entry := range files {
|
|
||||||
// Check for cancellation
|
|
||||||
select {
|
|
||||||
case <-ctx.Done():
|
|
||||||
return ctx.Err()
|
|
||||||
default:
|
|
||||||
}
|
|
||||||
|
|
||||||
// Open file
|
|
||||||
f, err := s.fs.Open(entry.AbsPath)
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Add to manifest with progress callback
|
|
||||||
bytesRead, err := builder.AddFile(
|
|
||||||
entry.Path,
|
|
||||||
entry.Size,
|
|
||||||
entry.Mtime,
|
|
||||||
f,
|
|
||||||
func(fileBytes int64) {
|
|
||||||
// Send progress at most once per second
|
|
||||||
now := time.Now()
|
|
||||||
if progress != nil && now.Sub(lastProgressTime) >= time.Second {
|
|
||||||
elapsed := now.Sub(startTime).Seconds()
|
|
||||||
currentBytes := scannedBytes + fileBytes
|
|
||||||
var rate float64
|
|
||||||
if elapsed > 0 {
|
|
||||||
rate = float64(currentBytes) / elapsed
|
|
||||||
}
|
|
||||||
sendScanStatus(progress, ScanStatus{
|
|
||||||
TotalFiles: totalFiles,
|
|
||||||
ScannedFiles: scannedFiles,
|
|
||||||
TotalBytes: totalBytes,
|
|
||||||
ScannedBytes: currentBytes,
|
|
||||||
BytesPerSec: rate,
|
|
||||||
})
|
|
||||||
lastProgressTime = now
|
|
||||||
}
|
|
||||||
},
|
|
||||||
)
|
|
||||||
f.Close()
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
return err
|
|
||||||
}
|
|
||||||
|
|
||||||
scannedFiles++
|
|
||||||
scannedBytes += bytesRead
|
|
||||||
}
|
|
||||||
|
|
||||||
// Send final progress
|
|
||||||
if progress != nil {
|
|
||||||
elapsed := time.Since(startTime).Seconds()
|
|
||||||
var rate float64
|
|
||||||
if elapsed > 0 {
|
|
||||||
rate = float64(scannedBytes) / elapsed
|
|
||||||
}
|
|
||||||
sendScanStatus(progress, ScanStatus{
|
|
||||||
TotalFiles: totalFiles,
|
|
||||||
ScannedFiles: scannedFiles,
|
|
||||||
TotalBytes: totalBytes,
|
|
||||||
ScannedBytes: scannedBytes,
|
|
||||||
BytesPerSec: rate,
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
// Build and write manifest
|
|
||||||
return builder.Build(w)
|
|
||||||
}
|
|
||||||
|
|
||||||
// pathIsHidden returns true if the path or any of its parent directories
|
|
||||||
// start with a dot (hidden files/directories).
|
|
||||||
func pathIsHidden(p string) bool {
|
|
||||||
tp := path.Clean(p)
|
|
||||||
if strings.HasPrefix(tp, ".") {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
for {
|
|
||||||
d, f := path.Split(tp)
|
|
||||||
if strings.HasPrefix(f, ".") {
|
|
||||||
return true
|
|
||||||
}
|
|
||||||
if d == "" {
|
|
||||||
return false
|
|
||||||
}
|
|
||||||
tp = d[0 : len(d)-1] // trim trailing slash from dir
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// sendEnumerateStatus sends a status update without blocking.
|
|
||||||
// If the channel is full, the update is dropped.
|
|
||||||
func sendEnumerateStatus(ch chan<- EnumerateStatus, status EnumerateStatus) {
|
|
||||||
if ch == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
select {
|
|
||||||
case ch <- status:
|
|
||||||
default:
|
|
||||||
// Channel full, drop this update
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// sendScanStatus sends a status update without blocking.
|
|
||||||
// If the channel is full, the update is dropped.
|
|
||||||
func sendScanStatus(ch chan<- ScanStatus, status ScanStatus) {
|
|
||||||
if ch == nil {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
select {
|
|
||||||
case ch <- status:
|
|
||||||
default:
|
|
||||||
// Channel full, drop this update
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@ -3,14 +3,48 @@ package mfer
|
|||||||
import (
|
import (
|
||||||
"crypto/sha256"
|
"crypto/sha256"
|
||||||
"errors"
|
"errors"
|
||||||
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"sort"
|
"sort"
|
||||||
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
"unicode/utf8"
|
||||||
|
|
||||||
"github.com/multiformats/go-multihash"
|
"github.com/multiformats/go-multihash"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
// ValidatePath checks that a file path conforms to manifest path invariants:
|
||||||
|
// - Must be valid UTF-8
|
||||||
|
// - Must use forward slashes only (no backslashes)
|
||||||
|
// - Must be relative (no leading /)
|
||||||
|
// - Must not contain ".." segments
|
||||||
|
// - Must not contain empty segments (no "//")
|
||||||
|
// - Must not be empty
|
||||||
|
func ValidatePath(p string) error {
|
||||||
|
if p == "" {
|
||||||
|
return errors.New("path cannot be empty")
|
||||||
|
}
|
||||||
|
if !utf8.ValidString(p) {
|
||||||
|
return fmt.Errorf("path %q is not valid UTF-8", p)
|
||||||
|
}
|
||||||
|
if strings.ContainsRune(p, '\\') {
|
||||||
|
return fmt.Errorf("path %q contains backslash; use forward slashes only", p)
|
||||||
|
}
|
||||||
|
if strings.HasPrefix(p, "/") {
|
||||||
|
return fmt.Errorf("path %q is absolute; must be relative", p)
|
||||||
|
}
|
||||||
|
for _, seg := range strings.Split(p, "/") {
|
||||||
|
if seg == "" {
|
||||||
|
return fmt.Errorf("path %q contains empty segment", p)
|
||||||
|
}
|
||||||
|
if seg == ".." {
|
||||||
|
return fmt.Errorf("path %q contains '..' segment", p)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// RelFilePath represents a relative file path within a manifest.
|
// RelFilePath represents a relative file path within a manifest.
|
||||||
type RelFilePath string
|
type RelFilePath string
|
||||||
|
|
||||||
@ -97,6 +131,10 @@ func (b *Builder) AddFile(
|
|||||||
reader io.Reader,
|
reader io.Reader,
|
||||||
progress chan<- FileHashProgress,
|
progress chan<- FileHashProgress,
|
||||||
) (FileSize, error) {
|
) (FileSize, error) {
|
||||||
|
if err := ValidatePath(string(path)); err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
|
||||||
// Create hash writer
|
// Create hash writer
|
||||||
h := sha256.New()
|
h := sha256.New()
|
||||||
|
|
||||||
@ -119,6 +157,11 @@ func (b *Builder) AddFile(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verify actual bytes read matches declared size
|
||||||
|
if totalRead != size {
|
||||||
|
return totalRead, fmt.Errorf("size mismatch for %q: declared %d bytes but read %d bytes", path, size, totalRead)
|
||||||
|
}
|
||||||
|
|
||||||
// Encode hash as multihash (SHA2-256)
|
// Encode hash as multihash (SHA2-256)
|
||||||
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
|
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -164,8 +207,8 @@ func (b *Builder) FileCount() int {
|
|||||||
// This is useful when the hash is already known (e.g., from an existing manifest).
|
// This is useful when the hash is already known (e.g., from an existing manifest).
|
||||||
// Returns an error if path is empty, size is negative, or hash is nil/empty.
|
// Returns an error if path is empty, size is negative, or hash is nil/empty.
|
||||||
func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error {
|
func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error {
|
||||||
if path == "" {
|
if err := ValidatePath(string(path)); err != nil {
|
||||||
return errors.New("path cannot be empty")
|
return err
|
||||||
}
|
}
|
||||||
if size < 0 {
|
if size < 0 {
|
||||||
return errors.New("size cannot be negative")
|
return errors.New("size cannot be negative")
|
||||||
|
|||||||
@ -3,4 +3,9 @@ package mfer
|
|||||||
const (
|
const (
|
||||||
Version = "0.1.0"
|
Version = "0.1.0"
|
||||||
ReleaseDate = "2025-12-17"
|
ReleaseDate = "2025-12-17"
|
||||||
|
|
||||||
|
// MaxDecompressedSize is the maximum allowed size of decompressed manifest
|
||||||
|
// data (256 MB). This prevents decompression bombs from consuming excessive
|
||||||
|
// memory.
|
||||||
|
MaxDecompressedSize int64 = 256 * 1024 * 1024
|
||||||
)
|
)
|
||||||
|
|||||||
@ -76,10 +76,20 @@ func (m *manifest) deserializeInner() error {
|
|||||||
}
|
}
|
||||||
defer zr.Close()
|
defer zr.Close()
|
||||||
|
|
||||||
dat, err := io.ReadAll(zr)
|
// Limit decompressed size to prevent decompression bombs.
|
||||||
|
// Use declared size + 1 byte to detect overflow, capped at MaxDecompressedSize.
|
||||||
|
maxSize := MaxDecompressedSize
|
||||||
|
if m.pbOuter.Size > 0 && m.pbOuter.Size < int64(maxSize) {
|
||||||
|
maxSize = int64(m.pbOuter.Size) + 1
|
||||||
|
}
|
||||||
|
limitedReader := io.LimitReader(zr, maxSize)
|
||||||
|
dat, err := io.ReadAll(limitedReader)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
if int64(len(dat)) >= MaxDecompressedSize {
|
||||||
|
return fmt.Errorf("decompressed data exceeds maximum allowed size of %d bytes", MaxDecompressedSize)
|
||||||
|
}
|
||||||
|
|
||||||
isize := len(dat)
|
isize := len(dat)
|
||||||
if int64(isize) != m.pbOuter.Size {
|
if int64(isize) != m.pbOuter.Size {
|
||||||
|
|||||||
@ -46,6 +46,9 @@ message MFFileOuter {
|
|||||||
|
|
||||||
message MFFilePath {
|
message MFFilePath {
|
||||||
// required attributes:
|
// required attributes:
|
||||||
|
// Path invariants: must be valid UTF-8, use forward slashes only,
|
||||||
|
// be relative (no leading /), contain no ".." segments, and no
|
||||||
|
// empty segments (no "//").
|
||||||
string path = 1;
|
string path = 1;
|
||||||
int64 size = 2;
|
int64 size = 2;
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user