Documents: - cli: NO_COLOR, RunOptions fields, CLIApp, VersionString - checker: Result fields, Status constants, CheckStatus fields - scanner: EnumerateStatus, ScanStatus, Options, FileEntry fields - log: Level alias, DisableStyling, Init, Info/Debug functions, verbosity helpers, GetLogger, GetLevel, WithError - mfer: ManifestScanOptions, New, NewFromPaths, NewFromFS, MAGIC
374 lines
9.6 KiB
Go
374 lines
9.6 KiB
Go
package scanner
|
|
|
|
import (
|
|
"context"
|
|
"io"
|
|
"io/fs"
|
|
"path"
|
|
"path/filepath"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/spf13/afero"
|
|
"sneak.berlin/go/mfer/mfer"
|
|
)
|
|
|
|
// Phase 1: Enumeration
|
|
// ---------------------
|
|
// Walking directories and calling stat() on files to collect metadata.
|
|
// Builds the list of files to be scanned. Relatively fast (metadata only).
|
|
|
|
// EnumerateStatus contains progress information for the enumeration phase.
|
|
type EnumerateStatus struct {
|
|
FilesFound int64 // Number of files discovered so far
|
|
BytesFound int64 // Total size of discovered files (from stat)
|
|
}
|
|
|
|
// Phase 2: Scan (ToManifest)
|
|
// --------------------------
|
|
// Reading file contents and computing hashes for manifest generation.
|
|
// This is the expensive phase that reads all file data.
|
|
|
|
// ScanStatus contains progress information for the scan phase.
|
|
type ScanStatus struct {
|
|
TotalFiles int64 // Total number of files to scan
|
|
ScannedFiles int64 // Number of files scanned so far
|
|
TotalBytes int64 // Total bytes to read (sum of all file sizes)
|
|
ScannedBytes int64 // Bytes read so far
|
|
BytesPerSec float64 // Current throughput rate
|
|
}
|
|
|
|
// Options configures scanner behavior.
|
|
type Options struct {
|
|
IgnoreDotfiles bool // Skip files and directories starting with a dot
|
|
FollowSymLinks bool // Resolve symlinks instead of skipping them
|
|
Fs afero.Fs // Filesystem to use, defaults to OsFs if nil
|
|
}
|
|
|
|
// FileEntry represents a file that has been enumerated.
|
|
type FileEntry struct {
|
|
Path string // Relative path (used in manifest)
|
|
AbsPath string // Absolute path (used for reading file content)
|
|
Size int64 // File size in bytes
|
|
Mtime time.Time // Last modification time
|
|
Ctime time.Time // Creation time (platform-dependent)
|
|
}
|
|
|
|
// Scanner accumulates files and generates manifests from them.
|
|
type Scanner struct {
|
|
mu sync.RWMutex
|
|
files []*FileEntry
|
|
options *Options
|
|
fs afero.Fs
|
|
}
|
|
|
|
// New creates a new Scanner with default options.
|
|
func New() *Scanner {
|
|
return NewWithOptions(nil)
|
|
}
|
|
|
|
// NewWithOptions creates a new Scanner with the given options.
|
|
func NewWithOptions(opts *Options) *Scanner {
|
|
if opts == nil {
|
|
opts = &Options{}
|
|
}
|
|
fs := opts.Fs
|
|
if fs == nil {
|
|
fs = afero.NewOsFs()
|
|
}
|
|
return &Scanner{
|
|
files: make([]*FileEntry, 0),
|
|
options: opts,
|
|
fs: fs,
|
|
}
|
|
}
|
|
|
|
// EnumerateFile adds a single file to the scanner, calling stat() to get metadata.
|
|
func (s *Scanner) EnumerateFile(filePath string) error {
|
|
abs, err := filepath.Abs(filePath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
info, err := s.fs.Stat(abs)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
// For single files, use the filename as the relative path
|
|
basePath := filepath.Dir(abs)
|
|
return s.enumerateFileWithInfo(filepath.Base(abs), basePath, info, nil)
|
|
}
|
|
|
|
// EnumeratePath walks a directory path and adds all files to the scanner.
|
|
// If progress is non-nil, status updates are sent as files are discovered.
|
|
// The progress channel is closed when the method returns.
|
|
func (s *Scanner) EnumeratePath(inputPath string, progress chan<- EnumerateStatus) error {
|
|
if progress != nil {
|
|
defer close(progress)
|
|
}
|
|
abs, err := filepath.Abs(inputPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs))
|
|
return s.enumerateFS(afs, abs, progress)
|
|
}
|
|
|
|
// EnumeratePaths walks multiple directory paths and adds all files to the scanner.
|
|
// If progress is non-nil, status updates are sent as files are discovered.
|
|
// The progress channel is closed when the method returns.
|
|
func (s *Scanner) EnumeratePaths(progress chan<- EnumerateStatus, inputPaths ...string) error {
|
|
if progress != nil {
|
|
defer close(progress)
|
|
}
|
|
for _, p := range inputPaths {
|
|
abs, err := filepath.Abs(p)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs))
|
|
if err := s.enumerateFS(afs, abs, progress); err != nil {
|
|
return err
|
|
}
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// EnumerateFS walks an afero filesystem and adds all files to the scanner.
|
|
// If progress is non-nil, status updates are sent as files are discovered.
|
|
// The progress channel is closed when the method returns.
|
|
// basePath is used to compute absolute paths for file reading.
|
|
func (s *Scanner) EnumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error {
|
|
if progress != nil {
|
|
defer close(progress)
|
|
}
|
|
return s.enumerateFS(afs, basePath, progress)
|
|
}
|
|
|
|
// enumerateFS is the internal implementation that doesn't close the progress channel.
|
|
func (s *Scanner) enumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error {
|
|
return afero.Walk(afs, "/", func(p string, info fs.FileInfo, err error) error {
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if s.options.IgnoreDotfiles && pathIsHidden(p) {
|
|
if info.IsDir() {
|
|
return filepath.SkipDir
|
|
}
|
|
return nil
|
|
}
|
|
return s.enumerateFileWithInfo(p, basePath, info, progress)
|
|
})
|
|
}
|
|
|
|
// enumerateFileWithInfo adds a file with pre-existing fs.FileInfo.
|
|
func (s *Scanner) enumerateFileWithInfo(filePath string, basePath string, info fs.FileInfo, progress chan<- EnumerateStatus) error {
|
|
if info.IsDir() {
|
|
// Manifests contain only files, directories are implied
|
|
return nil
|
|
}
|
|
|
|
// Clean the path - remove leading slash if present
|
|
cleanPath := filePath
|
|
if len(cleanPath) > 0 && cleanPath[0] == '/' {
|
|
cleanPath = cleanPath[1:]
|
|
}
|
|
|
|
// Compute absolute path for file reading
|
|
absPath := filepath.Join(basePath, cleanPath)
|
|
|
|
entry := &FileEntry{
|
|
Path: cleanPath,
|
|
AbsPath: absPath,
|
|
Size: info.Size(),
|
|
Mtime: info.ModTime(),
|
|
// Note: Ctime not available from fs.FileInfo on all platforms
|
|
// Will need platform-specific code to extract it
|
|
}
|
|
|
|
s.mu.Lock()
|
|
s.files = append(s.files, entry)
|
|
filesFound := int64(len(s.files))
|
|
var bytesFound int64
|
|
for _, f := range s.files {
|
|
bytesFound += f.Size
|
|
}
|
|
s.mu.Unlock()
|
|
|
|
sendEnumerateStatus(progress, EnumerateStatus{
|
|
FilesFound: filesFound,
|
|
BytesFound: bytesFound,
|
|
})
|
|
|
|
return nil
|
|
}
|
|
|
|
// Files returns a copy of all files added to the scanner.
|
|
func (s *Scanner) Files() []*FileEntry {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
out := make([]*FileEntry, len(s.files))
|
|
copy(out, s.files)
|
|
return out
|
|
}
|
|
|
|
// FileCount returns the number of files in the scanner.
|
|
func (s *Scanner) FileCount() int64 {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
return int64(len(s.files))
|
|
}
|
|
|
|
// TotalBytes returns the total size of all files in the scanner.
|
|
func (s *Scanner) TotalBytes() int64 {
|
|
s.mu.RLock()
|
|
defer s.mu.RUnlock()
|
|
var total int64
|
|
for _, f := range s.files {
|
|
total += f.Size
|
|
}
|
|
return total
|
|
}
|
|
|
|
// ToManifest reads all file contents, computes hashes, and generates a manifest.
|
|
// If progress is non-nil, status updates are sent approximately once per second.
|
|
// The progress channel is closed when the method returns.
|
|
// The manifest is written to the provided io.Writer.
|
|
func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- ScanStatus) error {
|
|
if progress != nil {
|
|
defer close(progress)
|
|
}
|
|
|
|
s.mu.RLock()
|
|
files := make([]*FileEntry, len(s.files))
|
|
copy(files, s.files)
|
|
totalFiles := int64(len(files))
|
|
var totalBytes int64
|
|
for _, f := range files {
|
|
totalBytes += f.Size
|
|
}
|
|
s.mu.RUnlock()
|
|
|
|
builder := mfer.NewBuilder()
|
|
|
|
var scannedFiles int64
|
|
var scannedBytes int64
|
|
lastProgressTime := time.Now()
|
|
startTime := time.Now()
|
|
|
|
for _, entry := range files {
|
|
// Check for cancellation
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
default:
|
|
}
|
|
|
|
// Open file
|
|
f, err := s.fs.Open(entry.AbsPath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Add to manifest with progress callback
|
|
bytesRead, err := builder.AddFile(
|
|
entry.Path,
|
|
entry.Size,
|
|
entry.Mtime,
|
|
f,
|
|
func(fileBytes int64) {
|
|
// Send progress at most once per second
|
|
now := time.Now()
|
|
if progress != nil && now.Sub(lastProgressTime) >= time.Second {
|
|
elapsed := now.Sub(startTime).Seconds()
|
|
currentBytes := scannedBytes + fileBytes
|
|
var rate float64
|
|
if elapsed > 0 {
|
|
rate = float64(currentBytes) / elapsed
|
|
}
|
|
sendScanStatus(progress, ScanStatus{
|
|
TotalFiles: totalFiles,
|
|
ScannedFiles: scannedFiles,
|
|
TotalBytes: totalBytes,
|
|
ScannedBytes: currentBytes,
|
|
BytesPerSec: rate,
|
|
})
|
|
lastProgressTime = now
|
|
}
|
|
},
|
|
)
|
|
f.Close()
|
|
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
scannedFiles++
|
|
scannedBytes += bytesRead
|
|
}
|
|
|
|
// Send final progress
|
|
if progress != nil {
|
|
elapsed := time.Since(startTime).Seconds()
|
|
var rate float64
|
|
if elapsed > 0 {
|
|
rate = float64(scannedBytes) / elapsed
|
|
}
|
|
sendScanStatus(progress, ScanStatus{
|
|
TotalFiles: totalFiles,
|
|
ScannedFiles: scannedFiles,
|
|
TotalBytes: totalBytes,
|
|
ScannedBytes: scannedBytes,
|
|
BytesPerSec: rate,
|
|
})
|
|
}
|
|
|
|
// Build and write manifest
|
|
return builder.Build(w)
|
|
}
|
|
|
|
// pathIsHidden returns true if the path or any of its parent directories
|
|
// start with a dot (hidden files/directories).
|
|
func pathIsHidden(p string) bool {
|
|
tp := path.Clean(p)
|
|
if strings.HasPrefix(tp, ".") {
|
|
return true
|
|
}
|
|
for {
|
|
d, f := path.Split(tp)
|
|
if strings.HasPrefix(f, ".") {
|
|
return true
|
|
}
|
|
if d == "" {
|
|
return false
|
|
}
|
|
tp = d[0 : len(d)-1] // trim trailing slash from dir
|
|
}
|
|
}
|
|
|
|
// sendEnumerateStatus sends a status update without blocking.
|
|
// If the channel is full, the update is dropped.
|
|
func sendEnumerateStatus(ch chan<- EnumerateStatus, status EnumerateStatus) {
|
|
if ch == nil {
|
|
return
|
|
}
|
|
select {
|
|
case ch <- status:
|
|
default:
|
|
// Channel full, drop this update
|
|
}
|
|
}
|
|
|
|
// sendScanStatus sends a status update without blocking.
|
|
// If the channel is full, the update is dropped.
|
|
func sendScanStatus(ch chan<- ScanStatus, status ScanStatus) {
|
|
if ch == nil {
|
|
return
|
|
}
|
|
select {
|
|
case ch <- status:
|
|
default:
|
|
// Channel full, drop this update
|
|
}
|
|
}
|