package checker import ( "bytes" "context" "crypto/sha256" "errors" "io" "os" "path/filepath" "github.com/multiformats/go-multihash" "github.com/spf13/afero" "sneak.berlin/go/mfer/mfer" ) // Result represents the outcome of checking a single file. type Result struct { Path string // Relative path from manifest Status Status // Verification result status Message string // Human-readable description of the result } // Status represents the verification status of a file. type Status int const ( StatusOK Status = iota // File matches manifest (size and hash verified) StatusMissing // File not found on disk StatusSizeMismatch // File size differs from manifest StatusHashMismatch // File hash differs from manifest StatusExtra // File exists on disk but not in manifest StatusError // Error occurred during verification ) func (s Status) String() string { switch s { case StatusOK: return "OK" case StatusMissing: return "MISSING" case StatusSizeMismatch: return "SIZE_MISMATCH" case StatusHashMismatch: return "HASH_MISMATCH" case StatusExtra: return "EXTRA" case StatusError: return "ERROR" default: return "UNKNOWN" } } // CheckStatus contains progress information for the check operation. type CheckStatus struct { TotalFiles int64 // Total number of files in manifest CheckedFiles int64 // Number of files checked so far TotalBytes int64 // Total bytes to verify (sum of all file sizes) CheckedBytes int64 // Bytes verified so far BytesPerSec float64 // Current throughput rate Failures int64 // Number of verification failures encountered } // Checker verifies files against a manifest. type Checker struct { basePath string files []*mfer.MFFilePath fs afero.Fs // manifestPaths is a set of paths in the manifest for quick lookup manifestPaths map[string]struct{} } // NewChecker creates a new Checker for the given manifest, base path, and filesystem. // The basePath is the directory relative to which manifest paths are resolved. // If fs is nil, the real filesystem (OsFs) is used. func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, error) { if fs == nil { fs = afero.NewOsFs() } m, err := mfer.NewManifestFromFile(fs, manifestPath) if err != nil { return nil, err } abs, err := filepath.Abs(basePath) if err != nil { return nil, err } files := m.Files() manifestPaths := make(map[string]struct{}, len(files)) for _, f := range files { manifestPaths[f.Path] = struct{}{} } return &Checker{ basePath: abs, files: files, fs: fs, manifestPaths: manifestPaths, }, nil } // FileCount returns the number of files in the manifest. func (c *Checker) FileCount() int64 { return int64(len(c.files)) } // TotalBytes returns the total size of all files in the manifest. func (c *Checker) TotalBytes() int64 { var total int64 for _, f := range c.files { total += f.Size } return total } // Check verifies all files against the manifest. // Results are sent to the results channel as files are checked. // Progress updates are sent to the progress channel approximately once per second. // Both channels are closed when the method returns. func (c *Checker) Check(ctx context.Context, results chan<- Result, progress chan<- CheckStatus) error { if results != nil { defer close(results) } if progress != nil { defer close(progress) } totalFiles := int64(len(c.files)) totalBytes := c.TotalBytes() var checkedFiles int64 var checkedBytes int64 var failures int64 for _, entry := range c.files { select { case <-ctx.Done(): return ctx.Err() default: } result := c.checkFile(entry, &checkedBytes) if result.Status != StatusOK { failures++ } checkedFiles++ if results != nil { results <- result } // Send progress (simplified - every file for now) if progress != nil { sendCheckStatus(progress, CheckStatus{ TotalFiles: totalFiles, CheckedFiles: checkedFiles, TotalBytes: totalBytes, CheckedBytes: checkedBytes, Failures: failures, }) } } return nil } func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *int64) Result { absPath := filepath.Join(c.basePath, entry.Path) // Check if file exists info, err := c.fs.Stat(absPath) if err != nil { if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) { return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"} } // Check for "file does not exist" style errors exists, _ := afero.Exists(c.fs, absPath) if !exists { return Result{Path: entry.Path, Status: StatusMissing, Message: "file not found"} } return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} } // Check size if info.Size() != entry.Size { *checkedBytes += info.Size() return Result{ Path: entry.Path, Status: StatusSizeMismatch, Message: "size mismatch", } } // Open and hash file f, err := c.fs.Open(absPath) if err != nil { return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} } defer f.Close() h := sha256.New() n, err := io.Copy(h, f) if err != nil { return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} } *checkedBytes += n // Encode as multihash and compare computed, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256) if err != nil { return Result{Path: entry.Path, Status: StatusError, Message: err.Error()} } // Check against all hashes in manifest (at least one must match) for _, hash := range entry.Hashes { if bytes.Equal(computed, hash.MultiHash) { return Result{Path: entry.Path, Status: StatusOK} } } return Result{Path: entry.Path, Status: StatusHashMismatch, Message: "hash mismatch"} } // FindExtraFiles walks the filesystem and reports files not in the manifest. // Results are sent to the results channel. The channel is closed when done. func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error { if results != nil { defer close(results) } return afero.Walk(c.fs, c.basePath, func(path string, info os.FileInfo, err error) error { if err != nil { return err } select { case <-ctx.Done(): return ctx.Err() default: } // Skip directories if info.IsDir() { return nil } // Get relative path relPath, err := filepath.Rel(c.basePath, path) if err != nil { return err } // Check if path is in manifest if _, exists := c.manifestPaths[relPath]; !exists { if results != nil { results <- Result{ Path: relPath, Status: StatusExtra, Message: "not in manifest", } } } return nil }) } // sendCheckStatus sends a status update without blocking. func sendCheckStatus(ch chan<- CheckStatus, status CheckStatus) { if ch == nil { return } select { case ch <- status: default: } }