Compare commits
3 Commits
fa99bdc5ee
...
d947fc81ae
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
d947fc81ae | ||
|
|
a1a8aaf922 | ||
|
|
9d301d7b1d |
281
internal/checker/checker.go
Normal file
281
internal/checker/checker.go
Normal file
@ -0,0 +1,281 @@
|
|||||||
|
package checker
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"crypto/sha256"
|
||||||
|
"errors"
|
||||||
|
"io"
|
||||||
|
"os"
|
||||||
|
"path/filepath"
|
||||||
|
|
||||||
|
"github.com/multiformats/go-multihash"
|
||||||
|
"github.com/spf13/afero"
|
||||||
|
"sneak.berlin/go/mfer/mfer"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Result represents the outcome of checking a single file.
|
||||||
|
type Result struct {
|
||||||
|
Path string // Relative path from manifest
|
||||||
|
Status Status // Verification result status
|
||||||
|
Message string // Human-readable description of the result
|
||||||
|
}
|
||||||
|
|
||||||
|
// Status represents the verification status of a file.
|
||||||
|
type Status int
|
||||||
|
|
||||||
|
const (
|
||||||
|
StatusOK Status = iota // File matches manifest (size and hash verified)
|
||||||
|
StatusMissing // File not found on disk
|
||||||
|
StatusSizeMismatch // File size differs from manifest
|
||||||
|
StatusHashMismatch // File hash differs from manifest
|
||||||
|
StatusExtra // File exists on disk but not in manifest
|
||||||
|
StatusError // Error occurred during verification
|
||||||
|
)
|
||||||
|
|
||||||
|
func (s Status) String() string {
|
||||||
|
switch s {
|
||||||
|
case StatusOK:
|
||||||
|
return "OK"
|
||||||
|
case StatusMissing:
|
||||||
|
return "MISSING"
|
||||||
|
case StatusSizeMismatch:
|
||||||
|
return "SIZE_MISMATCH"
|
||||||
|
case StatusHashMismatch:
|
||||||
|
return "HASH_MISMATCH"
|
||||||
|
case StatusExtra:
|
||||||
|
return "EXTRA"
|
||||||
|
case StatusError:
|
||||||
|
return "ERROR"
|
||||||
|
default:
|
||||||
|
return "UNKNOWN"
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// CheckStatus contains progress information for the check operation.
|
||||||
|
type CheckStatus struct {
|
||||||
|
TotalFiles int64 // Total number of files in manifest
|
||||||
|
CheckedFiles int64 // Number of files checked so far
|
||||||
|
TotalBytes int64 // Total bytes to verify (sum of all file sizes)
|
||||||
|
CheckedBytes int64 // Bytes verified so far
|
||||||
|
BytesPerSec float64 // Current throughput rate
|
||||||
|
Failures int64 // Number of verification failures encountered
|
||||||
|
}
|
||||||
|
|
||||||
|
// Checker verifies files against a manifest.
|
||||||
|
type Checker struct {
|
||||||
|
basePath string
|
||||||
|
files []*mfer.MFFilePath
|
||||||
|
fs afero.Fs
|
||||||
|
// manifestPaths is a set of paths in the manifest for quick lookup
|
||||||
|
manifestPaths map[string]struct{}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewChecker creates a new Checker for the given manifest, base path, and filesystem.
|
||||||
|
// The basePath is the directory relative to which manifest paths are resolved.
|
||||||
|
// If fs is nil, the real filesystem (OsFs) is used.
|
||||||
|
func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, error) {
|
||||||
|
if fs == nil {
|
||||||
|
fs = afero.NewOsFs()
|
||||||
|
}
|
||||||
|
|
||||||
|
m, err := mfer.NewManifestFromFile(fs, manifestPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
abs, err := filepath.Abs(basePath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
files := m.Files()
|
||||||
|
manifestPaths := make(map[string]struct{}, len(files))
|
||||||
|
for _, f := range files {
|
||||||
|
manifestPaths[f.Path] = struct{}{}
|
||||||
|
}
|
||||||
|
|
||||||
|
return &Checker{
|
||||||
|
basePath: abs,
|
||||||
|
files: files,
|
||||||
|
fs: fs,
|
||||||
|
manifestPaths: manifestPaths,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FileCount returns the number of files in the manifest.
|
||||||
|
func (c *Checker) FileCount() int64 {
|
||||||
|
return int64(len(c.files))
|
||||||
|
}
|
||||||
|
|
||||||
|
// TotalBytes returns the total size of all files in the manifest.
|
||||||
|
func (c *Checker) TotalBytes() int64 {
|
||||||
|
var total int64
|
||||||
|
for _, f := range c.files {
|
||||||
|
total += f.Size
|
||||||
|
}
|
||||||
|
return total
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check verifies all files against the manifest.
|
||||||
|
// Results are sent to the results channel as files are checked.
|
||||||
|
// Progress updates are sent to the progress channel approximately once per second.
|
||||||
|
// Both channels are closed when the method returns.
|
||||||
|
func (c *Checker) Check(ctx context.Context, results chan<- Result, progress chan<- CheckStatus) error {
|
||||||
|
if results != nil {
|
||||||
|
defer close(results)
|
||||||
|
}
|
||||||
|
if progress != nil {
|
||||||
|
defer close(progress)
|
||||||
|
}
|
||||||
|
|
||||||
|
totalFiles := int64(len(c.files))
|
||||||
|
totalBytes := c.TotalBytes()
|
||||||
|
|
||||||
|
var checkedFiles int64
|
||||||
|
var checkedBytes int64
|
||||||
|
var failures int64
|
||||||
|
|
||||||
|
for _, entry := range c.files {
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
result := c.checkFile(entry, &checkedBytes)
|
||||||
|
if result.Status != StatusOK {
|
||||||
|
failures++
|
||||||
|
}
|
||||||
|
checkedFiles++
|
||||||
|
|
||||||
|
if results != nil {
|
||||||
|
results <- result
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send progress (simplified - every file for now)
|
||||||
|
if progress != nil {
|
||||||
|
sendCheckStatus(progress, CheckStatus{
|
||||||
|
TotalFiles: totalFiles,
|
||||||
|
CheckedFiles: checkedFiles,
|
||||||
|
TotalBytes: totalBytes,
|
||||||
|
CheckedBytes: checkedBytes,
|
||||||
|
Failures: failures,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result {
|
||||||
|
absPath := filepath.Join(c.basePath, entry.Path)
|
||||||
|
|
||||||
|
// Check if file exists
|
||||||
|
info, err := c.fs.Stat(absPath)
|
||||||
|
if err != nil {
|
||||||
|
if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) {
|
||||||
|
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
|
||||||
|
}
|
||||||
|
// Check for "file does not exist" style errors
|
||||||
|
exists, _ := afero.Exists(c.fs, absPath)
|
||||||
|
if !exists {
|
||||||
|
return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"}
|
||||||
|
}
|
||||||
|
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check size
|
||||||
|
if info.Size() != entry.Size {
|
||||||
|
*checkedBytes += info.Size()
|
||||||
|
return Result{
|
||||||
|
Path: entry.Path,
|
||||||
|
Status: StatusSizeMismatch,
|
||||||
|
Message: "size mismatch",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open and hash file
|
||||||
|
f, err := c.fs.Open(absPath)
|
||||||
|
if err != nil {
|
||||||
|
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
||||||
|
}
|
||||||
|
defer f.Close()
|
||||||
|
|
||||||
|
h := sha256.New()
|
||||||
|
n, err := io.Copy(h, f)
|
||||||
|
if err != nil {
|
||||||
|
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
||||||
|
}
|
||||||
|
*checkedBytes += n
|
||||||
|
|
||||||
|
// Encode as multihash and compare
|
||||||
|
computed, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
|
||||||
|
if err != nil {
|
||||||
|
return Result{Path: entry.Path, Status: StatusError, Message: err.Error()}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check against all hashes in manifest (at least one must match)
|
||||||
|
for _, hash := range entry.Hashes {
|
||||||
|
if bytes.Equal(computed, hash.MultiHash) {
|
||||||
|
return Result{Path: entry.Path, Status: StatusOK}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return Result{Path: entry.Path, Status: StatusHashMismatch, Message: "hash mismatch"}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FindExtraFiles walks the filesystem and reports files not in the manifest.
|
||||||
|
// Results are sent to the results channel. The channel is closed when done.
|
||||||
|
func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error {
|
||||||
|
if results != nil {
|
||||||
|
defer close(results)
|
||||||
|
}
|
||||||
|
|
||||||
|
return afero.Walk(c.fs, c.basePath, func(path string, info os.FileInfo, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip directories
|
||||||
|
if info.IsDir() {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get relative path
|
||||||
|
relPath, err := filepath.Rel(c.basePath, path)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if path is in manifest
|
||||||
|
if _, exists := c.manifestPaths[relPath]; !exists {
|
||||||
|
if results != nil {
|
||||||
|
results <- Result{
|
||||||
|
Path: relPath,
|
||||||
|
Status: StatusExtra,
|
||||||
|
Message: "not in manifest",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// sendCheckStatus sends a status update without blocking.
|
||||||
|
func sendCheckStatus(ch chan<- CheckStatus, status CheckStatus) {
|
||||||
|
if ch == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case ch <- status:
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
}
|
||||||
373
internal/scanner/scanner.go
Normal file
373
internal/scanner/scanner.go
Normal file
@ -0,0 +1,373 @@
|
|||||||
|
package scanner
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"io"
|
||||||
|
"io/fs"
|
||||||
|
"path"
|
||||||
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
|
"sync"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/spf13/afero"
|
||||||
|
"sneak.berlin/go/mfer/mfer"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Phase 1: Enumeration
|
||||||
|
// ---------------------
|
||||||
|
// Walking directories and calling stat() on files to collect metadata.
|
||||||
|
// Builds the list of files to be scanned. Relatively fast (metadata only).
|
||||||
|
|
||||||
|
// EnumerateStatus contains progress information for the enumeration phase.
|
||||||
|
type EnumerateStatus struct {
|
||||||
|
FilesFound int64 // Number of files discovered so far
|
||||||
|
BytesFound int64 // Total size of discovered files (from stat)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Phase 2: Scan (ToManifest)
|
||||||
|
// --------------------------
|
||||||
|
// Reading file contents and computing hashes for manifest generation.
|
||||||
|
// This is the expensive phase that reads all file data.
|
||||||
|
|
||||||
|
// ScanStatus contains progress information for the scan phase.
|
||||||
|
type ScanStatus struct {
|
||||||
|
TotalFiles int64 // Total number of files to scan
|
||||||
|
ScannedFiles int64 // Number of files scanned so far
|
||||||
|
TotalBytes int64 // Total bytes to read (sum of all file sizes)
|
||||||
|
ScannedBytes int64 // Bytes read so far
|
||||||
|
BytesPerSec float64 // Current throughput rate
|
||||||
|
}
|
||||||
|
|
||||||
|
// Options configures scanner behavior.
|
||||||
|
type Options struct {
|
||||||
|
IgnoreDotfiles bool // Skip files and directories starting with a dot
|
||||||
|
FollowSymLinks bool // Resolve symlinks instead of skipping them
|
||||||
|
Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FileEntry represents a file that has been enumerated.
|
||||||
|
type FileEntry struct {
|
||||||
|
Path string // Relative path (used in manifest)
|
||||||
|
AbsPath string // Absolute path (used for reading file content)
|
||||||
|
Size int64 // File size in bytes
|
||||||
|
Mtime time.Time // Last modification time
|
||||||
|
Ctime time.Time // Creation time (platform-dependent)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scanner accumulates files and generates manifests from them.
|
||||||
|
type Scanner struct {
|
||||||
|
mu sync.RWMutex
|
||||||
|
files []*FileEntry
|
||||||
|
options *Options
|
||||||
|
fs afero.Fs
|
||||||
|
}
|
||||||
|
|
||||||
|
// New creates a new Scanner with default options.
|
||||||
|
func New() *Scanner {
|
||||||
|
return NewWithOptions(nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewWithOptions creates a new Scanner with the given options.
|
||||||
|
func NewWithOptions(opts *Options) *Scanner {
|
||||||
|
if opts == nil {
|
||||||
|
opts = &Options{}
|
||||||
|
}
|
||||||
|
fs := opts.Fs
|
||||||
|
if fs == nil {
|
||||||
|
fs = afero.NewOsFs()
|
||||||
|
}
|
||||||
|
return &Scanner{
|
||||||
|
files: make([]*FileEntry, 0),
|
||||||
|
options: opts,
|
||||||
|
fs: fs,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// EnumerateFile adds a single file to the scanner, calling stat() to get metadata.
|
||||||
|
func (s *Scanner) EnumerateFile(filePath string) error {
|
||||||
|
abs, err := filepath.Abs(filePath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
info, err := s.fs.Stat(abs)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
// For single files, use the filename as the relative path
|
||||||
|
basePath := filepath.Dir(abs)
|
||||||
|
return s.enumerateFileWithInfo(filepath.Base(abs), basePath, info, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
// EnumeratePath walks a directory path and adds all files to the scanner.
|
||||||
|
// If progress is non-nil, status updates are sent as files are discovered.
|
||||||
|
// The progress channel is closed when the method returns.
|
||||||
|
func (s *Scanner) EnumeratePath(inputPath string, progress chan<- EnumerateStatus) error {
|
||||||
|
if progress != nil {
|
||||||
|
defer close(progress)
|
||||||
|
}
|
||||||
|
abs, err := filepath.Abs(inputPath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs))
|
||||||
|
return s.enumerateFS(afs, abs, progress)
|
||||||
|
}
|
||||||
|
|
||||||
|
// EnumeratePaths walks multiple directory paths and adds all files to the scanner.
|
||||||
|
// If progress is non-nil, status updates are sent as files are discovered.
|
||||||
|
// The progress channel is closed when the method returns.
|
||||||
|
func (s *Scanner) EnumeratePaths(progress chan<- EnumerateStatus, inputPaths ...string) error {
|
||||||
|
if progress != nil {
|
||||||
|
defer close(progress)
|
||||||
|
}
|
||||||
|
for _, p := range inputPaths {
|
||||||
|
abs, err := filepath.Abs(p)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs))
|
||||||
|
if err := s.enumerateFS(afs, abs, progress); err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// EnumerateFS walks an afero filesystem and adds all files to the scanner.
|
||||||
|
// If progress is non-nil, status updates are sent as files are discovered.
|
||||||
|
// The progress channel is closed when the method returns.
|
||||||
|
// basePath is used to compute absolute paths for file reading.
|
||||||
|
func (s *Scanner) EnumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error {
|
||||||
|
if progress != nil {
|
||||||
|
defer close(progress)
|
||||||
|
}
|
||||||
|
return s.enumerateFS(afs, basePath, progress)
|
||||||
|
}
|
||||||
|
|
||||||
|
// enumerateFS is the internal implementation that doesn't close the progress channel.
|
||||||
|
func (s *Scanner) enumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error {
|
||||||
|
return afero.Walk(afs, "/", func(p string, info fs.FileInfo, err error) error {
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
if s.options.IgnoreDotfiles && pathIsHidden(p) {
|
||||||
|
if info.IsDir() {
|
||||||
|
return filepath.SkipDir
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return s.enumerateFileWithInfo(p, basePath, info, progress)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// enumerateFileWithInfo adds a file with pre-existing fs.FileInfo.
|
||||||
|
func (s *Scanner) enumerateFileWithInfo(filePath string, basePath string, info fs.FileInfo, progress chan<- EnumerateStatus) error {
|
||||||
|
if info.IsDir() {
|
||||||
|
// Manifests contain only files, directories are implied
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clean the path - remove leading slash if present
|
||||||
|
cleanPath := filePath
|
||||||
|
if len(cleanPath) > 0 && cleanPath[0] == '/' {
|
||||||
|
cleanPath = cleanPath[1:]
|
||||||
|
}
|
||||||
|
|
||||||
|
// Compute absolute path for file reading
|
||||||
|
absPath := filepath.Join(basePath, cleanPath)
|
||||||
|
|
||||||
|
entry := &FileEntry{
|
||||||
|
Path: cleanPath,
|
||||||
|
AbsPath: absPath,
|
||||||
|
Size: info.Size(),
|
||||||
|
Mtime: info.ModTime(),
|
||||||
|
// Note: Ctime not available from fs.FileInfo on all platforms
|
||||||
|
// Will need platform-specific code to extract it
|
||||||
|
}
|
||||||
|
|
||||||
|
s.mu.Lock()
|
||||||
|
s.files = append(s.files, entry)
|
||||||
|
filesFound := int64(len(s.files))
|
||||||
|
var bytesFound int64
|
||||||
|
for _, f := range s.files {
|
||||||
|
bytesFound += f.Size
|
||||||
|
}
|
||||||
|
s.mu.Unlock()
|
||||||
|
|
||||||
|
sendEnumerateStatus(progress, EnumerateStatus{
|
||||||
|
FilesFound: filesFound,
|
||||||
|
BytesFound: bytesFound,
|
||||||
|
})
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Files returns a copy of all files added to the scanner.
|
||||||
|
func (s *Scanner) Files() []*FileEntry {
|
||||||
|
s.mu.RLock()
|
||||||
|
defer s.mu.RUnlock()
|
||||||
|
out := make([]*FileEntry, len(s.files))
|
||||||
|
copy(out, s.files)
|
||||||
|
return out
|
||||||
|
}
|
||||||
|
|
||||||
|
// FileCount returns the number of files in the scanner.
|
||||||
|
func (s *Scanner) FileCount() int64 {
|
||||||
|
s.mu.RLock()
|
||||||
|
defer s.mu.RUnlock()
|
||||||
|
return int64(len(s.files))
|
||||||
|
}
|
||||||
|
|
||||||
|
// TotalBytes returns the total size of all files in the scanner.
|
||||||
|
func (s *Scanner) TotalBytes() int64 {
|
||||||
|
s.mu.RLock()
|
||||||
|
defer s.mu.RUnlock()
|
||||||
|
var total int64
|
||||||
|
for _, f := range s.files {
|
||||||
|
total += f.Size
|
||||||
|
}
|
||||||
|
return total
|
||||||
|
}
|
||||||
|
|
||||||
|
// ToManifest reads all file contents, computes hashes, and generates a manifest.
|
||||||
|
// If progress is non-nil, status updates are sent approximately once per second.
|
||||||
|
// The progress channel is closed when the method returns.
|
||||||
|
// The manifest is written to the provided io.Writer.
|
||||||
|
func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- ScanStatus) error {
|
||||||
|
if progress != nil {
|
||||||
|
defer close(progress)
|
||||||
|
}
|
||||||
|
|
||||||
|
s.mu.RLock()
|
||||||
|
files := make([]*FileEntry, len(s.files))
|
||||||
|
copy(files, s.files)
|
||||||
|
totalFiles := int64(len(files))
|
||||||
|
var totalBytes int64
|
||||||
|
for _, f := range files {
|
||||||
|
totalBytes += f.Size
|
||||||
|
}
|
||||||
|
s.mu.RUnlock()
|
||||||
|
|
||||||
|
builder := mfer.NewBuilder()
|
||||||
|
|
||||||
|
var scannedFiles int64
|
||||||
|
var scannedBytes int64
|
||||||
|
lastProgressTime := time.Now()
|
||||||
|
startTime := time.Now()
|
||||||
|
|
||||||
|
for _, entry := range files {
|
||||||
|
// Check for cancellation
|
||||||
|
select {
|
||||||
|
case <-ctx.Done():
|
||||||
|
return ctx.Err()
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
|
||||||
|
// Open file
|
||||||
|
f, err := s.fs.Open(entry.AbsPath)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add to manifest with progress callback
|
||||||
|
bytesRead, err := builder.AddFile(
|
||||||
|
entry.Path,
|
||||||
|
entry.Size,
|
||||||
|
entry.Mtime,
|
||||||
|
f,
|
||||||
|
func(fileBytes int64) {
|
||||||
|
// Send progress at most once per second
|
||||||
|
now := time.Now()
|
||||||
|
if progress != nil && now.Sub(lastProgressTime) >= time.Second {
|
||||||
|
elapsed := now.Sub(startTime).Seconds()
|
||||||
|
currentBytes := scannedBytes + fileBytes
|
||||||
|
var rate float64
|
||||||
|
if elapsed > 0 {
|
||||||
|
rate = float64(currentBytes) / elapsed
|
||||||
|
}
|
||||||
|
sendScanStatus(progress, ScanStatus{
|
||||||
|
TotalFiles: totalFiles,
|
||||||
|
ScannedFiles: scannedFiles,
|
||||||
|
TotalBytes: totalBytes,
|
||||||
|
ScannedBytes: currentBytes,
|
||||||
|
BytesPerSec: rate,
|
||||||
|
})
|
||||||
|
lastProgressTime = now
|
||||||
|
}
|
||||||
|
},
|
||||||
|
)
|
||||||
|
f.Close()
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
scannedFiles++
|
||||||
|
scannedBytes += bytesRead
|
||||||
|
}
|
||||||
|
|
||||||
|
// Send final progress
|
||||||
|
if progress != nil {
|
||||||
|
elapsed := time.Since(startTime).Seconds()
|
||||||
|
var rate float64
|
||||||
|
if elapsed > 0 {
|
||||||
|
rate = float64(scannedBytes) / elapsed
|
||||||
|
}
|
||||||
|
sendScanStatus(progress, ScanStatus{
|
||||||
|
TotalFiles: totalFiles,
|
||||||
|
ScannedFiles: scannedFiles,
|
||||||
|
TotalBytes: totalBytes,
|
||||||
|
ScannedBytes: scannedBytes,
|
||||||
|
BytesPerSec: rate,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build and write manifest
|
||||||
|
return builder.Build(w)
|
||||||
|
}
|
||||||
|
|
||||||
|
// pathIsHidden returns true if the path or any of its parent directories
|
||||||
|
// start with a dot (hidden files/directories).
|
||||||
|
func pathIsHidden(p string) bool {
|
||||||
|
tp := path.Clean(p)
|
||||||
|
if strings.HasPrefix(tp, ".") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
for {
|
||||||
|
d, f := path.Split(tp)
|
||||||
|
if strings.HasPrefix(f, ".") {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
if d == "" {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
tp = d[0 : len(d)-1] // trim trailing slash from dir
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// sendEnumerateStatus sends a status update without blocking.
|
||||||
|
// If the channel is full, the update is dropped.
|
||||||
|
func sendEnumerateStatus(ch chan<- EnumerateStatus, status EnumerateStatus) {
|
||||||
|
if ch == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case ch <- status:
|
||||||
|
default:
|
||||||
|
// Channel full, drop this update
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// sendScanStatus sends a status update without blocking.
|
||||||
|
// If the channel is full, the update is dropped.
|
||||||
|
func sendScanStatus(ch chan<- ScanStatus, status ScanStatus) {
|
||||||
|
if ch == nil {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
select {
|
||||||
|
case ch <- status:
|
||||||
|
default:
|
||||||
|
// Channel full, drop this update
|
||||||
|
}
|
||||||
|
}
|
||||||
@ -3,48 +3,14 @@ package mfer
|
|||||||
import (
|
import (
|
||||||
"crypto/sha256"
|
"crypto/sha256"
|
||||||
"errors"
|
"errors"
|
||||||
"fmt"
|
|
||||||
"io"
|
"io"
|
||||||
"sort"
|
"sort"
|
||||||
"strings"
|
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
"unicode/utf8"
|
|
||||||
|
|
||||||
"github.com/multiformats/go-multihash"
|
"github.com/multiformats/go-multihash"
|
||||||
)
|
)
|
||||||
|
|
||||||
// ValidatePath checks that a file path conforms to manifest path invariants:
|
|
||||||
// - Must be valid UTF-8
|
|
||||||
// - Must use forward slashes only (no backslashes)
|
|
||||||
// - Must be relative (no leading /)
|
|
||||||
// - Must not contain ".." segments
|
|
||||||
// - Must not contain empty segments (no "//")
|
|
||||||
// - Must not be empty
|
|
||||||
func ValidatePath(p string) error {
|
|
||||||
if p == "" {
|
|
||||||
return errors.New("path cannot be empty")
|
|
||||||
}
|
|
||||||
if !utf8.ValidString(p) {
|
|
||||||
return fmt.Errorf("path %q is not valid UTF-8", p)
|
|
||||||
}
|
|
||||||
if strings.ContainsRune(p, '\\') {
|
|
||||||
return fmt.Errorf("path %q contains backslash; use forward slashes only", p)
|
|
||||||
}
|
|
||||||
if strings.HasPrefix(p, "/") {
|
|
||||||
return fmt.Errorf("path %q is absolute; must be relative", p)
|
|
||||||
}
|
|
||||||
for _, seg := range strings.Split(p, "/") {
|
|
||||||
if seg == "" {
|
|
||||||
return fmt.Errorf("path %q contains empty segment", p)
|
|
||||||
}
|
|
||||||
if seg == ".." {
|
|
||||||
return fmt.Errorf("path %q contains '..' segment", p)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// RelFilePath represents a relative file path within a manifest.
|
// RelFilePath represents a relative file path within a manifest.
|
||||||
type RelFilePath string
|
type RelFilePath string
|
||||||
|
|
||||||
@ -131,10 +97,6 @@ func (b *Builder) AddFile(
|
|||||||
reader io.Reader,
|
reader io.Reader,
|
||||||
progress chan<- FileHashProgress,
|
progress chan<- FileHashProgress,
|
||||||
) (FileSize, error) {
|
) (FileSize, error) {
|
||||||
if err := ValidatePath(string(path)); err != nil {
|
|
||||||
return 0, err
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create hash writer
|
// Create hash writer
|
||||||
h := sha256.New()
|
h := sha256.New()
|
||||||
|
|
||||||
@ -157,11 +119,6 @@ func (b *Builder) AddFile(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify actual bytes read matches declared size
|
|
||||||
if totalRead != size {
|
|
||||||
return totalRead, fmt.Errorf("size mismatch for %q: declared %d bytes but read %d bytes", path, size, totalRead)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Encode hash as multihash (SHA2-256)
|
// Encode hash as multihash (SHA2-256)
|
||||||
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
|
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -207,8 +164,8 @@ func (b *Builder) FileCount() int {
|
|||||||
// This is useful when the hash is already known (e.g., from an existing manifest).
|
// This is useful when the hash is already known (e.g., from an existing manifest).
|
||||||
// Returns an error if path is empty, size is negative, or hash is nil/empty.
|
// Returns an error if path is empty, size is negative, or hash is nil/empty.
|
||||||
func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error {
|
func (b *Builder) AddFileWithHash(path RelFilePath, size FileSize, mtime ModTime, hash Multihash) error {
|
||||||
if err := ValidatePath(string(path)); err != nil {
|
if path == "" {
|
||||||
return err
|
return errors.New("path cannot be empty")
|
||||||
}
|
}
|
||||||
if size < 0 {
|
if size < 0 {
|
||||||
return errors.New("size cannot be negative")
|
return errors.New("size cannot be negative")
|
||||||
|
|||||||
@ -3,9 +3,4 @@ package mfer
|
|||||||
const (
|
const (
|
||||||
Version = "0.1.0"
|
Version = "0.1.0"
|
||||||
ReleaseDate = "2025-12-17"
|
ReleaseDate = "2025-12-17"
|
||||||
|
|
||||||
// MaxDecompressedSize is the maximum allowed size of decompressed manifest
|
|
||||||
// data (256 MB). This prevents decompression bombs from consuming excessive
|
|
||||||
// memory.
|
|
||||||
MaxDecompressedSize int64 = 256 * 1024 * 1024
|
|
||||||
)
|
)
|
||||||
|
|||||||
@ -76,20 +76,10 @@ func (m *manifest) deserializeInner() error {
|
|||||||
}
|
}
|
||||||
defer zr.Close()
|
defer zr.Close()
|
||||||
|
|
||||||
// Limit decompressed size to prevent decompression bombs.
|
dat, err := io.ReadAll(zr)
|
||||||
// Use declared size + 1 byte to detect overflow, capped at MaxDecompressedSize.
|
|
||||||
maxSize := MaxDecompressedSize
|
|
||||||
if m.pbOuter.Size > 0 && m.pbOuter.Size < int64(maxSize) {
|
|
||||||
maxSize = int64(m.pbOuter.Size) + 1
|
|
||||||
}
|
|
||||||
limitedReader := io.LimitReader(zr, maxSize)
|
|
||||||
dat, err := io.ReadAll(limitedReader)
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
if int64(len(dat)) >= MaxDecompressedSize {
|
|
||||||
return fmt.Errorf("decompressed data exceeds maximum allowed size of %d bytes", MaxDecompressedSize)
|
|
||||||
}
|
|
||||||
|
|
||||||
isize := len(dat)
|
isize := len(dat)
|
||||||
if int64(isize) != m.pbOuter.Size {
|
if int64(isize) != m.pbOuter.Size {
|
||||||
|
|||||||
@ -46,9 +46,6 @@ message MFFileOuter {
|
|||||||
|
|
||||||
message MFFilePath {
|
message MFFilePath {
|
||||||
// required attributes:
|
// required attributes:
|
||||||
// Path invariants: must be valid UTF-8, use forward slashes only,
|
|
||||||
// be relative (no leading /), contain no ".." segments, and no
|
|
||||||
// empty segments (no "//").
|
|
||||||
string path = 1;
|
string path = 1;
|
||||||
int64 size = 2;
|
int64 size = 2;
|
||||||
|
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user