mfer/internal/checker/checker.go
sneak dc115c5ba2 Add custom types for type safety throughout codebase
- Add FileCount, FileSize, RelFilePath, AbsFilePath, ModTime, Multihash types
- Add UnixSeconds and UnixNanos types for timestamp handling
- Add URL types (ManifestURL, FileURL, BaseURL) with safe path joining
- Consolidate scanner package into mfer package
- Update checker to use custom types in Result and CheckStatus
- Add ModTime.Timestamp() method for protobuf conversion
- Update all tests to use proper custom types
2025-12-18 01:01:18 -08:00

302 lines
7.7 KiB
Go

package checker
import (
"bytes"
"context"
"crypto/sha256"
"errors"
"io"
"os"
"path/filepath"
"time"
"github.com/multiformats/go-multihash"
"github.com/spf13/afero"
"sneak.berlin/go/mfer/mfer"
)
// Result represents the outcome of checking a single file.
type Result struct {
Path mfer.RelFilePath // Relative path from manifest
Status Status // Verification result status
Message string // Human-readable description of the result
}
// Status represents the verification status of a file.
type Status int
const (
StatusOK Status = iota // File matches manifest (size and hash verified)
StatusMissing // File not found on disk
StatusSizeMismatch // File size differs from manifest
StatusHashMismatch // File hash differs from manifest
StatusExtra // File exists on disk but not in manifest
StatusError // Error occurred during verification
)
func (s Status) String() string {
switch s {
case StatusOK:
return "OK"
case StatusMissing:
return "MISSING"
case StatusSizeMismatch:
return "SIZE_MISMATCH"
case StatusHashMismatch:
return "HASH_MISMATCH"
case StatusExtra:
return "EXTRA"
case StatusError:
return "ERROR"
default:
return "UNKNOWN"
}
}
// CheckStatus contains progress information for the check operation.
type CheckStatus struct {
TotalFiles mfer.FileCount // Total number of files in manifest
CheckedFiles mfer.FileCount // Number of files checked so far
TotalBytes mfer.FileSize // Total bytes to verify (sum of all file sizes)
CheckedBytes mfer.FileSize // Bytes verified so far
BytesPerSec float64 // Current throughput rate
ETA time.Duration // Estimated time to completion
Failures mfer.FileCount // Number of verification failures encountered
}
// Checker verifies files against a manifest.
type Checker struct {
basePath mfer.AbsFilePath
files []*mfer.MFFilePath
fs afero.Fs
// manifestPaths is a set of paths in the manifest for quick lookup
manifestPaths map[mfer.RelFilePath]struct{}
}
// NewChecker creates a new Checker for the given manifest, base path, and filesystem.
// The basePath is the directory relative to which manifest paths are resolved.
// If fs is nil, the real filesystem (OsFs) is used.
func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, error) {
if fs == nil {
fs = afero.NewOsFs()
}
m, err := mfer.NewManifestFromFile(fs, manifestPath)
if err != nil {
return nil, err
}
abs, err := filepath.Abs(basePath)
if err != nil {
return nil, err
}
files := m.Files()
manifestPaths := make(map[mfer.RelFilePath]struct{}, len(files))
for _, f := range files {
manifestPaths[mfer.RelFilePath(f.Path)] = struct{}{}
}
return &Checker{
basePath: mfer.AbsFilePath(abs),
files: files,
fs: fs,
manifestPaths: manifestPaths,
}, nil
}
// FileCount returns the number of files in the manifest.
func (c *Checker) FileCount() mfer.FileCount {
return mfer.FileCount(len(c.files))
}
// TotalBytes returns the total size of all files in the manifest.
func (c *Checker) TotalBytes() mfer.FileSize {
var total mfer.FileSize
for _, f := range c.files {
total += mfer.FileSize(f.Size)
}
return total
}
// Check verifies all files against the manifest.
// Results are sent to the results channel as files are checked.
// Progress updates are sent to the progress channel approximately once per second.
// Both channels are closed when the method returns.
func (c *Checker) Check(ctx context.Context, results chan<- Result, progress chan<- CheckStatus) error {
if results != nil {
defer close(results)
}
if progress != nil {
defer close(progress)
}
totalFiles := mfer.FileCount(len(c.files))
totalBytes := c.TotalBytes()
var checkedFiles mfer.FileCount
var checkedBytes mfer.FileSize
var failures mfer.FileCount
startTime := time.Now()
for _, entry := range c.files {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
result := c.checkFile(entry, &checkedBytes)
if result.Status != StatusOK {
failures++
}
checkedFiles++
if results != nil {
results <- result
}
// Send progress with rate and ETA calculation
if progress != nil {
elapsed := time.Since(startTime)
var bytesPerSec float64
var eta time.Duration
if elapsed > 0 && checkedBytes > 0 {
bytesPerSec = float64(checkedBytes) / elapsed.Seconds()
remainingBytes := totalBytes - checkedBytes
if bytesPerSec > 0 {
eta = time.Duration(float64(remainingBytes)/bytesPerSec) * time.Second
}
}
sendCheckStatus(progress, CheckStatus{
TotalFiles: totalFiles,
CheckedFiles: checkedFiles,
TotalBytes: totalBytes,
CheckedBytes: checkedBytes,
BytesPerSec: bytesPerSec,
ETA: eta,
Failures: failures,
})
}
}
return nil
}
func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *mfer.FileSize) Result {
absPath := filepath.Join(string(c.basePath), entry.Path)
relPath := mfer.RelFilePath(entry.Path)
// Check if file exists
info, err := c.fs.Stat(absPath)
if err != nil {
if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) {
return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
}
// Check for "file does not exist" style errors
exists, _ := afero.Exists(c.fs, absPath)
if !exists {
return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
}
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
}
// Check size
if info.Size() != entry.Size {
*checkedBytes += mfer.FileSize(info.Size())
return Result{
Path: relPath,
Status: StatusSizeMismatch,
Message: "size mismatch",
}
}
// Open and hash file
f, err := c.fs.Open(absPath)
if err != nil {
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
}
defer func() { _ = f.Close() }()
h := sha256.New()
n, err := io.Copy(h, f)
if err != nil {
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
}
*checkedBytes += mfer.FileSize(n)
// Encode as multihash and compare
computed, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
if err != nil {
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
}
// Check against all hashes in manifest (at least one must match)
for _, hash := range entry.Hashes {
if bytes.Equal(computed, hash.MultiHash) {
return Result{Path: relPath, Status: StatusOK}
}
}
return Result{Path: relPath, Status: StatusHashMismatch, Message: "hash mismatch"}
}
// FindExtraFiles walks the filesystem and reports files not in the manifest.
// Results are sent to the results channel. The channel is closed when done.
func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error {
if results != nil {
defer close(results)
}
return afero.Walk(c.fs, string(c.basePath), func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
select {
case <-ctx.Done():
return ctx.Err()
default:
}
// Skip directories
if info.IsDir() {
return nil
}
// Get relative path
rel, err := filepath.Rel(string(c.basePath), path)
if err != nil {
return err
}
relPath := mfer.RelFilePath(rel)
// Check if path is in manifest
if _, exists := c.manifestPaths[relPath]; !exists {
if results != nil {
results <- Result{
Path: relPath,
Status: StatusExtra,
Message: "not in manifest",
}
}
}
return nil
})
}
// sendCheckStatus sends a status update without blocking.
func sendCheckStatus(ch chan<- CheckStatus, status CheckStatus) {
if ch == nil {
return
}
select {
case ch <- status:
default:
}
}