1
0
forked from sneak/mfer
mfer/mfer/scanner.go
sneak 778999a285 Add GPG signing support for manifest generation
- Add --sign-key flag and MFER_SIGN_KEY env var to gen and freshen commands
- Sign inner message multihash with GPG detached signature
- Include signer fingerprint and public key in outer wrapper
- Add comprehensive tests with temporary GPG keyring
- Increase test timeout to 10s for GPG key generation
2025-12-18 02:12:54 -08:00

428 lines
11 KiB
Go

package mfer
import (
"context"
"io"
"io/fs"
"path"
"path/filepath"
"strings"
"sync"
"time"
"github.com/dustin/go-humanize"
"github.com/spf13/afero"
"sneak.berlin/go/mfer/internal/log"
)
// Phase 1: Enumeration
// ---------------------
// Walking directories and calling stat() on files to collect metadata.
// Builds the list of files to be scanned. Relatively fast (metadata only).
// EnumerateStatus contains progress information for the enumeration phase.
type EnumerateStatus struct {
FilesFound FileCount // Number of files discovered so far
BytesFound FileSize // Total size of discovered files (from stat)
}
// Phase 2: Scan (ToManifest)
// --------------------------
// Reading file contents and computing hashes for manifest generation.
// This is the expensive phase that reads all file data.
// ScanStatus contains progress information for the scan phase.
type ScanStatus struct {
TotalFiles FileCount // Total number of files to scan
ScannedFiles FileCount // Number of files scanned so far
TotalBytes FileSize // Total bytes to read (sum of all file sizes)
ScannedBytes FileSize // Bytes read so far
BytesPerSec float64 // Current throughput rate
ETA time.Duration // Estimated time to completion
}
// ScannerOptions configures scanner behavior.
type ScannerOptions struct {
IncludeDotfiles bool // Include files and directories starting with a dot (default: exclude)
FollowSymLinks bool // Resolve symlinks instead of skipping them
Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
SigningOptions *SigningOptions // GPG signing options (nil = no signing)
}
// FileEntry represents a file that has been enumerated.
type FileEntry struct {
Path RelFilePath // Relative path (used in manifest)
AbsPath AbsFilePath // Absolute path (used for reading file content)
Size FileSize // File size in bytes
Mtime ModTime // Last modification time
Ctime time.Time // Creation time (platform-dependent)
}
// Scanner accumulates files and generates manifests from them.
type Scanner struct {
mu sync.RWMutex
files []*FileEntry
totalBytes FileSize // cached sum of all file sizes
options *ScannerOptions
fs afero.Fs
}
// NewScanner creates a new Scanner with default options.
func NewScanner() *Scanner {
return NewScannerWithOptions(nil)
}
// NewScannerWithOptions creates a new Scanner with the given options.
func NewScannerWithOptions(opts *ScannerOptions) *Scanner {
if opts == nil {
opts = &ScannerOptions{}
}
fs := opts.Fs
if fs == nil {
fs = afero.NewOsFs()
}
return &Scanner{
files: make([]*FileEntry, 0),
options: opts,
fs: fs,
}
}
// EnumerateFile adds a single file to the scanner, calling stat() to get metadata.
func (s *Scanner) EnumerateFile(filePath string) error {
abs, err := filepath.Abs(filePath)
if err != nil {
return err
}
info, err := s.fs.Stat(abs)
if err != nil {
return err
}
// For single files, use the filename as the relative path
basePath := filepath.Dir(abs)
return s.enumerateFileWithInfo(filepath.Base(abs), basePath, info, nil)
}
// EnumeratePath walks a directory path and adds all files to the scanner.
// If progress is non-nil, status updates are sent as files are discovered.
// The progress channel is closed when the method returns.
func (s *Scanner) EnumeratePath(inputPath string, progress chan<- EnumerateStatus) error {
if progress != nil {
defer close(progress)
}
abs, err := filepath.Abs(inputPath)
if err != nil {
return err
}
afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs))
return s.enumerateFS(afs, abs, progress)
}
// EnumeratePaths walks multiple directory paths and adds all files to the scanner.
// If progress is non-nil, status updates are sent as files are discovered.
// The progress channel is closed when the method returns.
func (s *Scanner) EnumeratePaths(progress chan<- EnumerateStatus, inputPaths ...string) error {
if progress != nil {
defer close(progress)
}
for _, p := range inputPaths {
abs, err := filepath.Abs(p)
if err != nil {
return err
}
afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs))
if err := s.enumerateFS(afs, abs, progress); err != nil {
return err
}
}
return nil
}
// EnumerateFS walks an afero filesystem and adds all files to the scanner.
// If progress is non-nil, status updates are sent as files are discovered.
// The progress channel is closed when the method returns.
// basePath is used to compute absolute paths for file reading.
func (s *Scanner) EnumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error {
if progress != nil {
defer close(progress)
}
return s.enumerateFS(afs, basePath, progress)
}
// enumerateFS is the internal implementation that doesn't close the progress channel.
func (s *Scanner) enumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error {
return afero.Walk(afs, "/", func(p string, info fs.FileInfo, err error) error {
if err != nil {
return err
}
if !s.options.IncludeDotfiles && IsHiddenPath(p) {
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
return s.enumerateFileWithInfo(p, basePath, info, progress)
})
}
// enumerateFileWithInfo adds a file with pre-existing fs.FileInfo.
func (s *Scanner) enumerateFileWithInfo(filePath string, basePath string, info fs.FileInfo, progress chan<- EnumerateStatus) error {
if info.IsDir() {
// Manifests contain only files, directories are implied
return nil
}
// Clean the path - remove leading slash if present
cleanPath := filePath
if len(cleanPath) > 0 && cleanPath[0] == '/' {
cleanPath = cleanPath[1:]
}
// Compute absolute path for file reading
absPath := filepath.Join(basePath, cleanPath)
// Handle symlinks
if info.Mode()&fs.ModeSymlink != 0 {
if !s.options.FollowSymLinks {
// Skip symlinks when not following them
return nil
}
// Resolve symlink to get real file info
realPath, err := filepath.EvalSymlinks(absPath)
if err != nil {
// Skip broken symlinks
return nil
}
realInfo, err := s.fs.Stat(realPath)
if err != nil {
return nil
}
// Skip if symlink points to a directory
if realInfo.IsDir() {
return nil
}
// Use resolved path for reading, but keep original path in manifest
absPath = realPath
info = realInfo
}
entry := &FileEntry{
Path: RelFilePath(cleanPath),
AbsPath: AbsFilePath(absPath),
Size: FileSize(info.Size()),
Mtime: ModTime(info.ModTime()),
// Note: Ctime not available from fs.FileInfo on all platforms
// Will need platform-specific code to extract it
}
s.mu.Lock()
s.files = append(s.files, entry)
s.totalBytes += entry.Size
filesFound := FileCount(len(s.files))
bytesFound := s.totalBytes
s.mu.Unlock()
sendEnumerateStatus(progress, EnumerateStatus{
FilesFound: filesFound,
BytesFound: bytesFound,
})
return nil
}
// Files returns a copy of all files added to the scanner.
func (s *Scanner) Files() []*FileEntry {
s.mu.RLock()
defer s.mu.RUnlock()
out := make([]*FileEntry, len(s.files))
copy(out, s.files)
return out
}
// FileCount returns the number of files in the scanner.
func (s *Scanner) FileCount() FileCount {
s.mu.RLock()
defer s.mu.RUnlock()
return FileCount(len(s.files))
}
// TotalBytes returns the total size of all files in the scanner.
func (s *Scanner) TotalBytes() FileSize {
s.mu.RLock()
defer s.mu.RUnlock()
return s.totalBytes
}
// ToManifest reads all file contents, computes hashes, and generates a manifest.
// If progress is non-nil, status updates are sent approximately once per second.
// The progress channel is closed when the method returns.
// The manifest is written to the provided io.Writer.
func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- ScanStatus) error {
if progress != nil {
defer close(progress)
}
s.mu.RLock()
files := make([]*FileEntry, len(s.files))
copy(files, s.files)
totalFiles := FileCount(len(files))
var totalBytes FileSize
for _, f := range files {
totalBytes += f.Size
}
s.mu.RUnlock()
builder := NewBuilder()
if s.options.SigningOptions != nil {
builder.SetSigningOptions(s.options.SigningOptions)
}
var scannedFiles FileCount
var scannedBytes FileSize
lastProgressTime := time.Now()
startTime := time.Now()
for _, entry := range files {
// Check for cancellation
select {
case <-ctx.Done():
return ctx.Err()
default:
}
// Open file
f, err := s.fs.Open(string(entry.AbsPath))
if err != nil {
return err
}
// Create progress channel for this file
var fileProgress chan FileHashProgress
var wg sync.WaitGroup
if progress != nil {
fileProgress = make(chan FileHashProgress, 1)
wg.Add(1)
go func(baseScannedBytes FileSize) {
defer wg.Done()
for p := range fileProgress {
// Send progress at most once per second
now := time.Now()
if now.Sub(lastProgressTime) >= time.Second {
elapsed := now.Sub(startTime).Seconds()
currentBytes := baseScannedBytes + p.BytesRead
var rate float64
var eta time.Duration
if elapsed > 0 && currentBytes > 0 {
rate = float64(currentBytes) / elapsed
remainingBytes := totalBytes - currentBytes
if rate > 0 {
eta = time.Duration(float64(remainingBytes)/rate) * time.Second
}
}
sendScanStatus(progress, ScanStatus{
TotalFiles: totalFiles,
ScannedFiles: scannedFiles,
TotalBytes: totalBytes,
ScannedBytes: currentBytes,
BytesPerSec: rate,
ETA: eta,
})
lastProgressTime = now
}
}
}(scannedBytes)
}
// Add to manifest with progress channel
bytesRead, err := builder.AddFile(
entry.Path,
entry.Size,
entry.Mtime,
f,
fileProgress,
)
_ = f.Close()
// Close channel and wait for goroutine to finish
if fileProgress != nil {
close(fileProgress)
wg.Wait()
}
if err != nil {
return err
}
log.Verbosef("+ %s (%s)", entry.Path, humanize.IBytes(uint64(bytesRead)))
scannedFiles++
scannedBytes += bytesRead
}
// Send final progress (ETA is 0 at completion)
if progress != nil {
elapsed := time.Since(startTime).Seconds()
var rate float64
if elapsed > 0 {
rate = float64(scannedBytes) / elapsed
}
sendScanStatus(progress, ScanStatus{
TotalFiles: totalFiles,
ScannedFiles: scannedFiles,
TotalBytes: totalBytes,
ScannedBytes: scannedBytes,
BytesPerSec: rate,
ETA: 0,
})
}
// Build and write manifest
return builder.Build(w)
}
// IsHiddenPath returns true if the path or any of its parent directories
// start with a dot (hidden files/directories).
// The path should use forward slashes.
func IsHiddenPath(p string) bool {
tp := path.Clean(p)
if strings.HasPrefix(tp, ".") {
return true
}
for {
d, f := path.Split(tp)
if strings.HasPrefix(f, ".") {
return true
}
if d == "" {
return false
}
tp = d[0 : len(d)-1] // trim trailing slash from dir
}
}
// sendEnumerateStatus sends a status update without blocking.
// If the channel is full, the update is dropped.
func sendEnumerateStatus(ch chan<- EnumerateStatus, status EnumerateStatus) {
if ch == nil {
return
}
select {
case ch <- status:
default:
// Channel full, drop this update
}
}
// sendScanStatus sends a status update without blocking.
// If the channel is full, the update is dropped.
func sendScanStatus(ch chan<- ScanStatus, status ScanStatus) {
if ch == nil {
return
}
select {
case ch <- status:
default:
// Channel full, drop this update
}
}