From 5ab092098b039049cd0f6402af555b3351b1637f Mon Sep 17 00:00:00 2001 From: sneak Date: Sun, 8 Feb 2026 09:25:58 -0800 Subject: [PATCH] progress --- internal/checker/checker.go | 281 +++++++++++++++++++++++++++ internal/cli/check.go | 28 ++- internal/scanner/scanner.go | 373 ++++++++++++++++++++++++++++++++++++ mfer/checker.go | 15 ++ mfer/gpg.go | 59 ++++++ 5 files changed, 748 insertions(+), 8 deletions(-) create mode 100644 internal/checker/checker.go create mode 100644 internal/scanner/scanner.go diff --git a/internal/checker/checker.go b/internal/checker/checker.go new file mode 100644 index 0000000..3790c14 --- /dev/null +++ b/internal/checker/checker.go @@ -0,0 +1,281 @@ +package checker + +import ( + "bytes" + "context" + "crypto/sha256" + "errors" + "io" + "os" + "path/filepath" + + "github.com/multiformats/go-multihash" + "github.com/spf13/afero" + "sneak.berlin/go/mfer/mfer" +) + +// Result represents the outcome of checking a single file. +type Result struct { + Path string // Relative path from manifest + Status Status // Verification result status + Message string // Human-readable description of the result +} + +// Status represents the verification status of a file. +type Status int + +const ( + StatusOK Status = iota // File matches manifest (size and hash verified) + StatusMissing // File not found on disk + StatusSizeMismatch // File size differs from manifest + StatusHashMismatch // File hash differs from manifest + StatusExtra // File exists on disk but not in manifest + StatusError // Error occurred during verification +) + +func (s Status) String() string { + switch s { + case StatusOK: + return "OK" + case StatusMissing: + return "MISSING" + case StatusSizeMismatch: + return "SIZE_MISMATCH" + case StatusHashMismatch: + return "HASH_MISMATCH" + case StatusExtra: + return "EXTRA" + case StatusError: + return "ERROR" + default: + return "UNKNOWN" + } +} + +// CheckStatus contains progress information for the check operation. +type CheckStatus struct { + TotalFiles int64 // Total number of files in manifest + CheckedFiles int64 // Number of files checked so far + TotalBytes int64 // Total bytes to verify (sum of all file sizes) + CheckedBytes int64 // Bytes verified so far + BytesPerSec float64 // Current throughput rate + Failures int64 // Number of verification failures encountered +} + +// Checker verifies files against a manifest. +type Checker struct { + basePath string + files []*mfer.MFFilePath + fs afero.Fs + // manifestPaths is a set of paths in the manifest for quick lookup + manifestPaths map[string]struct{} +} + +// NewChecker creates a new Checker for the given manifest, base path, and filesystem. +// The basePath is the directory relative to which manifest paths are resolved. +// If fs is nil, the real filesystem (OsFs) is used. +func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, error) { + if fs == nil { + fs = afero.NewOsFs() + } + + m, err := mfer.NewManifestFromFile(fs, manifestPath) + if err != nil { + return nil, err + } + + abs, err := filepath.Abs(basePath) + if err != nil { + return nil, err + } + + files := m.Files() + manifestPaths := make(map[string]struct{}, len(files)) + for _, f := range files { + manifestPaths[f.Path] = struct{}{} + } + + return &Checker{ + basePath: abs, + files: files, + fs: fs, + manifestPaths: manifestPaths, + }, nil +} + +// FileCount returns the number of files in the manifest. +func (c *Checker) FileCount() int64 { + return int64(len(c.files)) +} + +// TotalBytes returns the total size of all files in the manifest. +func (c *Checker) TotalBytes() int64 { + var total int64 + for _, f := range c.files { + total += f.Size + } + return total +} + +// Check verifies all files against the manifest. +// Results are sent to the results channel as files are checked. +// Progress updates are sent to the progress channel approximately once per second. +// Both channels are closed when the method returns. +func (c *Checker) Check(ctx context.Context, results chan<- Result, progress chan<- CheckStatus) error { + if results != nil { + defer close(results) + } + if progress != nil { + defer close(progress) + } + + totalFiles := int64(len(c.files)) + totalBytes := c.TotalBytes() + + var checkedFiles int64 + var checkedBytes int64 + var failures int64 + + for _, entry := range c.files { + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + result := c.checkFile(entry, &checkedBytes) + if result.Status != StatusOK { + failures++ + } + checkedFiles++ + + if results != nil { + results <- result + } + + // Send progress (simplified - every file for now) + if progress != nil { + sendCheckStatus(progress, CheckStatus{ + TotalFiles: totalFiles, + CheckedFiles: checkedFiles, + TotalBytes: totalBytes, + CheckedBytes: checkedBytes, + Failures: failures, + }) + } + } + + return nil +} + +func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result { + absPath := filepath.Join(c.basePath, entry.Path) + + // Check if file exists + info, err := c.fs.Stat(absPath) + if err != nil { + if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) { + return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"} + } + // Check for "file does not exist" style errors + exists, _ := afero.Exists(c.fs, absPath) + if !exists { + return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"} + } + return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} + } + + // Check size + if info.Size() != entry.Size { + *checkedBytes += info.Size() + return Result{ + Path: entry.Path, + Status: StatusSizeMismatch, + Message: "size mismatch", + } + } + + // Open and hash file + f, err := c.fs.Open(absPath) + if err != nil { + return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} + } + defer f.Close() + + h := sha256.New() + n, err := io.Copy(h, f) + if err != nil { + return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} + } + *checkedBytes += n + + // Encode as multihash and compare + computed, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256) + if err != nil { + return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} + } + + // Check against all hashes in manifest (at least one must match) + for _, hash := range entry.Hashes { + if bytes.Equal(computed, hash.MultiHash) { + return Result{Path: entry.Path, Status: StatusOK} + } + } + + return Result{Path: entry.Path, Status: StatusHashMismatch, Message: "hash mismatch"} +} + +// FindExtraFiles walks the filesystem and reports files not in the manifest. +// Results are sent to the results channel. The channel is closed when done. +func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error { + if results != nil { + defer close(results) + } + + return afero.Walk(c.fs, c.basePath, func(path string, info os.FileInfo, err error) error { + if err != nil { + return err + } + + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + // Skip directories + if info.IsDir() { + return nil + } + + // Get relative path + relPath, err := filepath.Rel(c.basePath, path) + if err != nil { + return err + } + + // Check if path is in manifest + if _, exists := c.manifestPaths[relPath]; !exists { + if results != nil { + results <- Result{ + Path: relPath, + Status: StatusExtra, + Message: "not in manifest", + } + } + } + + return nil + }) +} + +// sendCheckStatus sends a status update without blocking. +func sendCheckStatus(ch chan<- CheckStatus, status CheckStatus) { + if ch == nil { + return + } + select { + case ch <- status: + default: + } +} diff --git a/internal/cli/check.go b/internal/cli/check.go index 6514dbd..a2d0cdf 100644 --- a/internal/cli/check.go +++ b/internal/cli/check.go @@ -1,6 +1,7 @@ package cli import ( + "encoding/hex" "fmt" "path/filepath" "strings" @@ -72,19 +73,30 @@ func (mfa *CLIApp) checkManifestOperation(ctx *cli.Context) error { // Check signature requirement requiredSigner := ctx.String("require-signature") if requiredSigner != "" { + // Validate fingerprint format: must be exactly 40 hex characters + if len(requiredSigner) != 40 { + return fmt.Errorf("invalid fingerprint: must be exactly 40 hex characters, got %d", len(requiredSigner)) + } + if _, err := hex.DecodeString(requiredSigner); err != nil { + return fmt.Errorf("invalid fingerprint: must be valid hex: %w", err) + } + if !chk.IsSigned() { return fmt.Errorf("manifest is not signed, but signature from %s is required", requiredSigner) } - signer := chk.Signer() - if signer == nil { - return fmt.Errorf("manifest signature has no signer fingerprint") + + // Extract fingerprint from the embedded public key (not from the signer field) + // This validates the key is importable and gets its actual fingerprint + embeddedFP, err := chk.ExtractEmbeddedSigningKeyFP() + if err != nil { + return fmt.Errorf("failed to extract fingerprint from embedded signing key: %w", err) } - // Compare signer - the required key ID might be a suffix of the full fingerprint - signerStr := string(signer) - if !strings.EqualFold(signerStr, requiredSigner) && !strings.HasSuffix(strings.ToUpper(signerStr), strings.ToUpper(requiredSigner)) { - return fmt.Errorf("manifest signed by %s, but %s is required", signerStr, requiredSigner) + + // Compare fingerprints - must be exact match (case-insensitive) + if !strings.EqualFold(embeddedFP, requiredSigner) { + return fmt.Errorf("embedded signing key fingerprint %s does not match required %s", embeddedFP, requiredSigner) } - log.Infof("manifest signature verified (signer: %s)", signerStr) + log.Infof("manifest signature verified (signer: %s)", embeddedFP) } log.Infof("manifest contains %d files, %s", chk.FileCount(), humanize.IBytes(uint64(chk.TotalBytes()))) diff --git a/internal/scanner/scanner.go b/internal/scanner/scanner.go new file mode 100644 index 0000000..252e16a --- /dev/null +++ b/internal/scanner/scanner.go @@ -0,0 +1,373 @@ +package scanner + +import ( + "context" + "io" + "io/fs" + "path" + "path/filepath" + "strings" + "sync" + "time" + + "github.com/spf13/afero" + "sneak.berlin/go/mfer/mfer" +) + +// Phase 1: Enumeration +// --------------------- +// Walking directories and calling stat() on files to collect metadata. +// Builds the list of files to be scanned. Relatively fast (metadata only). + +// EnumerateStatus contains progress information for the enumeration phase. +type EnumerateStatus struct { + FilesFound int64 // Number of files discovered so far + BytesFound int64 // Total size of discovered files (from stat) +} + +// Phase 2: Scan (ToManifest) +// -------------------------- +// Reading file contents and computing hashes for manifest generation. +// This is the expensive phase that reads all file data. + +// ScanStatus contains progress information for the scan phase. +type ScanStatus struct { + TotalFiles int64 // Total number of files to scan + ScannedFiles int64 // Number of files scanned so far + TotalBytes int64 // Total bytes to read (sum of all file sizes) + ScannedBytes int64 // Bytes read so far + BytesPerSec float64 // Current throughput rate +} + +// Options configures scanner behavior. +type Options struct { + IgnoreDotfiles bool // Skip files and directories starting with a dot + FollowSymLinks bool // Resolve symlinks instead of skipping them + Fs afero.Fs // Filesystem to use, defaults to OsFs if nil +} + +// FileEntry represents a file that has been enumerated. +type FileEntry struct { + Path string // Relative path (used in manifest) + AbsPath string // Absolute path (used for reading file content) + Size int64 // File size in bytes + Mtime time.Time // Last modification time + Ctime time.Time // Creation time (platform-dependent) +} + +// Scanner accumulates files and generates manifests from them. +type Scanner struct { + mu sync.RWMutex + files []*FileEntry + options *Options + fs afero.Fs +} + +// New creates a new Scanner with default options. +func New() *Scanner { + return NewWithOptions(nil) +} + +// NewWithOptions creates a new Scanner with the given options. +func NewWithOptions(opts *Options) *Scanner { + if opts == nil { + opts = &Options{} + } + fs := opts.Fs + if fs == nil { + fs = afero.NewOsFs() + } + return &Scanner{ + files: make([]*FileEntry, 0), + options: opts, + fs: fs, + } +} + +// EnumerateFile adds a single file to the scanner, calling stat() to get metadata. +func (s *Scanner) EnumerateFile(filePath string) error { + abs, err := filepath.Abs(filePath) + if err != nil { + return err + } + info, err := s.fs.Stat(abs) + if err != nil { + return err + } + // For single files, use the filename as the relative path + basePath := filepath.Dir(abs) + return s.enumerateFileWithInfo(filepath.Base(abs), basePath, info, nil) +} + +// EnumeratePath walks a directory path and adds all files to the scanner. +// If progress is non-nil, status updates are sent as files are discovered. +// The progress channel is closed when the method returns. +func (s *Scanner) EnumeratePath(inputPath string, progress chan<- EnumerateStatus) error { + if progress != nil { + defer close(progress) + } + abs, err := filepath.Abs(inputPath) + if err != nil { + return err + } + afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs)) + return s.enumerateFS(afs, abs, progress) +} + +// EnumeratePaths walks multiple directory paths and adds all files to the scanner. +// If progress is non-nil, status updates are sent as files are discovered. +// The progress channel is closed when the method returns. +func (s *Scanner) EnumeratePaths(progress chan<- EnumerateStatus, inputPaths ...string) error { + if progress != nil { + defer close(progress) + } + for _, p := range inputPaths { + abs, err := filepath.Abs(p) + if err != nil { + return err + } + afs := afero.NewReadOnlyFs(afero.NewBasePathFs(s.fs, abs)) + if err := s.enumerateFS(afs, abs, progress); err != nil { + return err + } + } + return nil +} + +// EnumerateFS walks an afero filesystem and adds all files to the scanner. +// If progress is non-nil, status updates are sent as files are discovered. +// The progress channel is closed when the method returns. +// basePath is used to compute absolute paths for file reading. +func (s *Scanner) EnumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error { + if progress != nil { + defer close(progress) + } + return s.enumerateFS(afs, basePath, progress) +} + +// enumerateFS is the internal implementation that doesn't close the progress channel. +func (s *Scanner) enumerateFS(afs afero.Fs, basePath string, progress chan<- EnumerateStatus) error { + return afero.Walk(afs, "/", func(p string, info fs.FileInfo, err error) error { + if err != nil { + return err + } + if s.options.IgnoreDotfiles && pathIsHidden(p) { + if info.IsDir() { + return filepath.SkipDir + } + return nil + } + return s.enumerateFileWithInfo(p, basePath, info, progress) + }) +} + +// enumerateFileWithInfo adds a file with pre-existing fs.FileInfo. +func (s *Scanner) enumerateFileWithInfo(filePath string, basePath string, info fs.FileInfo, progress chan<- EnumerateStatus) error { + if info.IsDir() { + // Manifests contain only files, directories are implied + return nil + } + + // Clean the path - remove leading slash if present + cleanPath := filePath + if len(cleanPath) > 0 && cleanPath[0] == '/' { + cleanPath = cleanPath[1:] + } + + // Compute absolute path for file reading + absPath := filepath.Join(basePath, cleanPath) + + entry := &FileEntry{ + Path: cleanPath, + AbsPath: absPath, + Size: info.Size(), + Mtime: info.ModTime(), + // Note: Ctime not available from fs.FileInfo on all platforms + // Will need platform-specific code to extract it + } + + s.mu.Lock() + s.files = append(s.files, entry) + filesFound := int64(len(s.files)) + var bytesFound int64 + for _, f := range s.files { + bytesFound += f.Size + } + s.mu.Unlock() + + sendEnumerateStatus(progress, EnumerateStatus{ + FilesFound: filesFound, + BytesFound: bytesFound, + }) + + return nil +} + +// Files returns a copy of all files added to the scanner. +func (s *Scanner) Files() []*FileEntry { + s.mu.RLock() + defer s.mu.RUnlock() + out := make([]*FileEntry, len(s.files)) + copy(out, s.files) + return out +} + +// FileCount returns the number of files in the scanner. +func (s *Scanner) FileCount() int64 { + s.mu.RLock() + defer s.mu.RUnlock() + return int64(len(s.files)) +} + +// TotalBytes returns the total size of all files in the scanner. +func (s *Scanner) TotalBytes() int64 { + s.mu.RLock() + defer s.mu.RUnlock() + var total int64 + for _, f := range s.files { + total += f.Size + } + return total +} + +// ToManifest reads all file contents, computes hashes, and generates a manifest. +// If progress is non-nil, status updates are sent approximately once per second. +// The progress channel is closed when the method returns. +// The manifest is written to the provided io.Writer. +func (s *Scanner) ToManifest(ctx context.Context, w io.Writer, progress chan<- ScanStatus) error { + if progress != nil { + defer close(progress) + } + + s.mu.RLock() + files := make([]*FileEntry, len(s.files)) + copy(files, s.files) + totalFiles := int64(len(files)) + var totalBytes int64 + for _, f := range files { + totalBytes += f.Size + } + s.mu.RUnlock() + + builder := mfer.NewBuilder() + + var scannedFiles int64 + var scannedBytes int64 + lastProgressTime := time.Now() + startTime := time.Now() + + for _, entry := range files { + // Check for cancellation + select { + case <-ctx.Done(): + return ctx.Err() + default: + } + + // Open file + f, err := s.fs.Open(entry.AbsPath) + if err != nil { + return err + } + + // Add to manifest with progress callback + bytesRead, err := builder.AddFile( + entry.Path, + entry.Size, + entry.Mtime, + f, + func(fileBytes int64) { + // Send progress at most once per second + now := time.Now() + if progress != nil && now.Sub(lastProgressTime) >= time.Second { + elapsed := now.Sub(startTime).Seconds() + currentBytes := scannedBytes + fileBytes + var rate float64 + if elapsed > 0 { + rate = float64(currentBytes) / elapsed + } + sendScanStatus(progress, ScanStatus{ + TotalFiles: totalFiles, + ScannedFiles: scannedFiles, + TotalBytes: totalBytes, + ScannedBytes: currentBytes, + BytesPerSec: rate, + }) + lastProgressTime = now + } + }, + ) + f.Close() + + if err != nil { + return err + } + + scannedFiles++ + scannedBytes += bytesRead + } + + // Send final progress + if progress != nil { + elapsed := time.Since(startTime).Seconds() + var rate float64 + if elapsed > 0 { + rate = float64(scannedBytes) / elapsed + } + sendScanStatus(progress, ScanStatus{ + TotalFiles: totalFiles, + ScannedFiles: scannedFiles, + TotalBytes: totalBytes, + ScannedBytes: scannedBytes, + BytesPerSec: rate, + }) + } + + // Build and write manifest + return builder.Build(w) +} + +// pathIsHidden returns true if the path or any of its parent directories +// start with a dot (hidden files/directories). +func pathIsHidden(p string) bool { + tp := path.Clean(p) + if strings.HasPrefix(tp, ".") { + return true + } + for { + d, f := path.Split(tp) + if strings.HasPrefix(f, ".") { + return true + } + if d == "" { + return false + } + tp = d[0 : len(d)-1] // trim trailing slash from dir + } +} + +// sendEnumerateStatus sends a status update without blocking. +// If the channel is full, the update is dropped. +func sendEnumerateStatus(ch chan<- EnumerateStatus, status EnumerateStatus) { + if ch == nil { + return + } + select { + case ch <- status: + default: + // Channel full, drop this update + } +} + +// sendScanStatus sends a status update without blocking. +// If the channel is full, the update is dropped. +func sendScanStatus(ch chan<- ScanStatus, status ScanStatus) { + if ch == nil { + return + } + select { + case ch <- status: + default: + // Channel full, drop this update + } +} diff --git a/mfer/checker.go b/mfer/checker.go index 13f0ac2..2e47d3a 100644 --- a/mfer/checker.go +++ b/mfer/checker.go @@ -135,6 +135,21 @@ func (c *Checker) Signer() []byte { return c.signer } +// SigningPubKey returns the signing public key if the manifest is signed, nil otherwise. +func (c *Checker) SigningPubKey() []byte { + return c.signingPubKey +} + +// ExtractEmbeddedSigningKeyFP imports the manifest's embedded public key into a +// temporary keyring and extracts its fingerprint. This validates the key and +// returns its actual fingerprint from the key material itself. +func (c *Checker) ExtractEmbeddedSigningKeyFP() (string, error) { + if len(c.signingPubKey) == 0 { + return "", errors.New("manifest has no signing public key") + } + return gpgExtractPubKeyFingerprint(c.signingPubKey) +} + // Check verifies all files against the manifest. // Results are sent to the results channel as files are checked. // Progress updates are sent to the progress channel approximately once per second. diff --git a/mfer/gpg.go b/mfer/gpg.go index 94b5bca..c587b2e 100644 --- a/mfer/gpg.go +++ b/mfer/gpg.go @@ -91,6 +91,65 @@ func gpgGetKeyFingerprint(keyID GPGKeyID) ([]byte, error) { return nil, fmt.Errorf("fingerprint not found for key: %s", keyID) } +// gpgExtractPubKeyFingerprint imports a public key into a temporary keyring +// and extracts its fingerprint. This verifies the key is valid and returns +// the actual fingerprint from the key material. +func gpgExtractPubKeyFingerprint(pubKey []byte) (string, error) { + // Create temporary directory for GPG operations + tmpDir, err := os.MkdirTemp("", "mfer-gpg-fingerprint-*") + if err != nil { + return "", fmt.Errorf("failed to create temp dir: %w", err) + } + defer os.RemoveAll(tmpDir) + + // Set restrictive permissions + if err := os.Chmod(tmpDir, 0o700); err != nil { + return "", fmt.Errorf("failed to set temp dir permissions: %w", err) + } + + // Write public key to temp file + pubKeyFile := filepath.Join(tmpDir, "pubkey.asc") + if err := os.WriteFile(pubKeyFile, pubKey, 0o600); err != nil { + return "", fmt.Errorf("failed to write public key: %w", err) + } + + // Import the public key into the temporary keyring + importCmd := exec.Command("gpg", + "--homedir", tmpDir, + "--import", + pubKeyFile, + ) + var importStderr bytes.Buffer + importCmd.Stderr = &importStderr + if err := importCmd.Run(); err != nil { + return "", fmt.Errorf("failed to import public key: %w: %s", err, importStderr.String()) + } + + // List keys to get fingerprint + listCmd := exec.Command("gpg", + "--homedir", tmpDir, + "--with-colons", + "--fingerprint", + ) + var listStdout, listStderr bytes.Buffer + listCmd.Stdout = &listStdout + listCmd.Stderr = &listStderr + if err := listCmd.Run(); err != nil { + return "", fmt.Errorf("failed to list keys: %w: %s", err, listStderr.String()) + } + + // Parse the colon-delimited output to find the fingerprint + lines := strings.Split(listStdout.String(), "\n") + for _, line := range lines { + fields := strings.Split(line, ":") + if len(fields) >= 10 && fields[0] == "fpr" { + return fields[9], nil + } + } + + return "", fmt.Errorf("fingerprint not found in imported key") +} + // gpgVerify verifies a detached signature against data using the provided public key. // It creates a temporary keyring to import the public key for verification. func gpgVerify(data, signature, pubKey []byte) error {