Move checker package into mfer package

Consolidate checker functionality into the mfer package alongside
scanner, removing the need for a separate internal/checker package.
This commit is contained in:
2025-12-18 01:28:35 -08:00
parent dc115c5ba2
commit e25e309581
3 changed files with 62 additions and 64 deletions

View File

@@ -1,301 +0,0 @@
package checker
import (
"bytes"
"context"
"crypto/sha256"
"errors"
"io"
"os"
"path/filepath"
"time"
"github.com/multiformats/go-multihash"
"github.com/spf13/afero"
"sneak.berlin/go/mfer/mfer"
)
// Result represents the outcome of checking a single file.
type Result struct {
Path mfer.RelFilePath // Relative path from manifest
Status Status // Verification result status
Message string // Human-readable description of the result
}
// Status represents the verification status of a file.
type Status int
const (
StatusOK Status = iota // File matches manifest (size and hash verified)
StatusMissing // File not found on disk
StatusSizeMismatch // File size differs from manifest
StatusHashMismatch // File hash differs from manifest
StatusExtra // File exists on disk but not in manifest
StatusError // Error occurred during verification
)
func (s Status) String() string {
switch s {
case StatusOK:
return "OK"
case StatusMissing:
return "MISSING"
case StatusSizeMismatch:
return "SIZE_MISMATCH"
case StatusHashMismatch:
return "HASH_MISMATCH"
case StatusExtra:
return "EXTRA"
case StatusError:
return "ERROR"
default:
return "UNKNOWN"
}
}
// CheckStatus contains progress information for the check operation.
type CheckStatus struct {
TotalFiles mfer.FileCount // Total number of files in manifest
CheckedFiles mfer.FileCount // Number of files checked so far
TotalBytes mfer.FileSize // Total bytes to verify (sum of all file sizes)
CheckedBytes mfer.FileSize // Bytes verified so far
BytesPerSec float64 // Current throughput rate
ETA time.Duration // Estimated time to completion
Failures mfer.FileCount // Number of verification failures encountered
}
// Checker verifies files against a manifest.
type Checker struct {
basePath mfer.AbsFilePath
files []*mfer.MFFilePath
fs afero.Fs
// manifestPaths is a set of paths in the manifest for quick lookup
manifestPaths map[mfer.RelFilePath]struct{}
}
// NewChecker creates a new Checker for the given manifest, base path, and filesystem.
// The basePath is the directory relative to which manifest paths are resolved.
// If fs is nil, the real filesystem (OsFs) is used.
func NewChecker(manifestPath string, basePath string, fs afero.Fs) (*Checker, error) {
if fs == nil {
fs = afero.NewOsFs()
}
m, err := mfer.NewManifestFromFile(fs, manifestPath)
if err != nil {
return nil, err
}
abs, err := filepath.Abs(basePath)
if err != nil {
return nil, err
}
files := m.Files()
manifestPaths := make(map[mfer.RelFilePath]struct{}, len(files))
for _, f := range files {
manifestPaths[mfer.RelFilePath(f.Path)] = struct{}{}
}
return &Checker{
basePath: mfer.AbsFilePath(abs),
files: files,
fs: fs,
manifestPaths: manifestPaths,
}, nil
}
// FileCount returns the number of files in the manifest.
func (c *Checker) FileCount() mfer.FileCount {
return mfer.FileCount(len(c.files))
}
// TotalBytes returns the total size of all files in the manifest.
func (c *Checker) TotalBytes() mfer.FileSize {
var total mfer.FileSize
for _, f := range c.files {
total += mfer.FileSize(f.Size)
}
return total
}
// Check verifies all files against the manifest.
// Results are sent to the results channel as files are checked.
// Progress updates are sent to the progress channel approximately once per second.
// Both channels are closed when the method returns.
func (c *Checker) Check(ctx context.Context, results chan<- Result, progress chan<- CheckStatus) error {
if results != nil {
defer close(results)
}
if progress != nil {
defer close(progress)
}
totalFiles := mfer.FileCount(len(c.files))
totalBytes := c.TotalBytes()
var checkedFiles mfer.FileCount
var checkedBytes mfer.FileSize
var failures mfer.FileCount
startTime := time.Now()
for _, entry := range c.files {
select {
case <-ctx.Done():
return ctx.Err()
default:
}
result := c.checkFile(entry, &checkedBytes)
if result.Status != StatusOK {
failures++
}
checkedFiles++
if results != nil {
results <- result
}
// Send progress with rate and ETA calculation
if progress != nil {
elapsed := time.Since(startTime)
var bytesPerSec float64
var eta time.Duration
if elapsed > 0 && checkedBytes > 0 {
bytesPerSec = float64(checkedBytes) / elapsed.Seconds()
remainingBytes := totalBytes - checkedBytes
if bytesPerSec > 0 {
eta = time.Duration(float64(remainingBytes)/bytesPerSec) * time.Second
}
}
sendCheckStatus(progress, CheckStatus{
TotalFiles: totalFiles,
CheckedFiles: checkedFiles,
TotalBytes: totalBytes,
CheckedBytes: checkedBytes,
BytesPerSec: bytesPerSec,
ETA: eta,
Failures: failures,
})
}
}
return nil
}
func (c *Checker) checkFile(entry *mfer.MFFilePath, checkedBytes *mfer.FileSize) Result {
absPath := filepath.Join(string(c.basePath), entry.Path)
relPath := mfer.RelFilePath(entry.Path)
// Check if file exists
info, err := c.fs.Stat(absPath)
if err != nil {
if errors.Is(err, afero.ErrFileNotFound) || errors.Is(err, errors.New("file does not exist")) {
return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
}
// Check for "file does not exist" style errors
exists, _ := afero.Exists(c.fs, absPath)
if !exists {
return Result{Path: relPath, Status: StatusMissing, Message: "file not found"}
}
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
}
// Check size
if info.Size() != entry.Size {
*checkedBytes += mfer.FileSize(info.Size())
return Result{
Path: relPath,
Status: StatusSizeMismatch,
Message: "size mismatch",
}
}
// Open and hash file
f, err := c.fs.Open(absPath)
if err != nil {
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
}
defer func() { _ = f.Close() }()
h := sha256.New()
n, err := io.Copy(h, f)
if err != nil {
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
}
*checkedBytes += mfer.FileSize(n)
// Encode as multihash and compare
computed, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
if err != nil {
return Result{Path: relPath, Status: StatusError, Message: err.Error()}
}
// Check against all hashes in manifest (at least one must match)
for _, hash := range entry.Hashes {
if bytes.Equal(computed, hash.MultiHash) {
return Result{Path: relPath, Status: StatusOK}
}
}
return Result{Path: relPath, Status: StatusHashMismatch, Message: "hash mismatch"}
}
// FindExtraFiles walks the filesystem and reports files not in the manifest.
// Results are sent to the results channel. The channel is closed when done.
func (c *Checker) FindExtraFiles(ctx context.Context, results chan<- Result) error {
if results != nil {
defer close(results)
}
return afero.Walk(c.fs, string(c.basePath), func(path string, info os.FileInfo, err error) error {
if err != nil {
return err
}
select {
case <-ctx.Done():
return ctx.Err()
default:
}
// Skip directories
if info.IsDir() {
return nil
}
// Get relative path
rel, err := filepath.Rel(string(c.basePath), path)
if err != nil {
return err
}
relPath := mfer.RelFilePath(rel)
// Check if path is in manifest
if _, exists := c.manifestPaths[relPath]; !exists {
if results != nil {
results <- Result{
Path: relPath,
Status: StatusExtra,
Message: "not in manifest",
}
}
}
return nil
})
}
// sendCheckStatus sends a status update without blocking.
func sendCheckStatus(ch chan<- CheckStatus, status CheckStatus) {
if ch == nil {
return
}
select {
case ch <- status:
default:
}
}

View File

@@ -1,405 +0,0 @@
package checker
import (
"bytes"
"context"
"testing"
"time"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"sneak.berlin/go/mfer/mfer"
)
func TestStatusString(t *testing.T) {
tests := []struct {
status Status
expected string
}{
{StatusOK, "OK"},
{StatusMissing, "MISSING"},
{StatusSizeMismatch, "SIZE_MISMATCH"},
{StatusHashMismatch, "HASH_MISMATCH"},
{StatusExtra, "EXTRA"},
{StatusError, "ERROR"},
{Status(99), "UNKNOWN"},
}
for _, tt := range tests {
t.Run(tt.expected, func(t *testing.T) {
assert.Equal(t, tt.expected, tt.status.String())
})
}
}
// createTestManifest creates a manifest file in the filesystem with the given files.
func createTestManifest(t *testing.T, fs afero.Fs, manifestPath string, files map[string][]byte) {
t.Helper()
builder := mfer.NewBuilder()
for path, content := range files {
reader := bytes.NewReader(content)
_, err := builder.AddFile(mfer.RelFilePath(path), mfer.FileSize(len(content)), mfer.ModTime(time.Now()), reader, nil)
require.NoError(t, err)
}
var buf bytes.Buffer
require.NoError(t, builder.Build(&buf))
require.NoError(t, afero.WriteFile(fs, manifestPath, buf.Bytes(), 0644))
}
// createFilesOnDisk creates the given files on the filesystem.
func createFilesOnDisk(t *testing.T, fs afero.Fs, basePath string, files map[string][]byte) {
t.Helper()
for path, content := range files {
fullPath := basePath + "/" + path
require.NoError(t, fs.MkdirAll(basePath, 0755))
require.NoError(t, afero.WriteFile(fs, fullPath, content, 0644))
}
}
func TestNewChecker(t *testing.T) {
t.Run("valid manifest", func(t *testing.T) {
fs := afero.NewMemMapFs()
files := map[string][]byte{
"file1.txt": []byte("hello"),
"file2.txt": []byte("world"),
}
createTestManifest(t, fs, "/manifest.mf", files)
chk, err := NewChecker("/manifest.mf", "/", fs)
require.NoError(t, err)
assert.NotNil(t, chk)
assert.Equal(t, mfer.FileCount(2), chk.FileCount())
})
t.Run("missing manifest", func(t *testing.T) {
fs := afero.NewMemMapFs()
_, err := NewChecker("/nonexistent.mf", "/", fs)
assert.Error(t, err)
})
t.Run("invalid manifest", func(t *testing.T) {
fs := afero.NewMemMapFs()
require.NoError(t, afero.WriteFile(fs, "/bad.mf", []byte("not a manifest"), 0644))
_, err := NewChecker("/bad.mf", "/", fs)
assert.Error(t, err)
})
}
func TestCheckerFileCountAndTotalBytes(t *testing.T) {
fs := afero.NewMemMapFs()
files := map[string][]byte{
"small.txt": []byte("hi"),
"medium.txt": []byte("hello world"),
"large.txt": bytes.Repeat([]byte("x"), 1000),
}
createTestManifest(t, fs, "/manifest.mf", files)
chk, err := NewChecker("/manifest.mf", "/", fs)
require.NoError(t, err)
assert.Equal(t, mfer.FileCount(3), chk.FileCount())
assert.Equal(t, mfer.FileSize(2+11+1000), chk.TotalBytes())
}
func TestCheckAllFilesOK(t *testing.T) {
fs := afero.NewMemMapFs()
files := map[string][]byte{
"file1.txt": []byte("content one"),
"file2.txt": []byte("content two"),
}
createTestManifest(t, fs, "/manifest.mf", files)
createFilesOnDisk(t, fs, "/data", files)
chk, err := NewChecker("/manifest.mf", "/data", fs)
require.NoError(t, err)
results := make(chan Result, 10)
err = chk.Check(context.Background(), results, nil)
require.NoError(t, err)
var resultList []Result
for r := range results {
resultList = append(resultList, r)
}
assert.Len(t, resultList, 2)
for _, r := range resultList {
assert.Equal(t, StatusOK, r.Status, "file %s should be OK", r.Path)
}
}
func TestCheckMissingFile(t *testing.T) {
fs := afero.NewMemMapFs()
files := map[string][]byte{
"exists.txt": []byte("I exist"),
"missing.txt": []byte("I don't exist on disk"),
}
createTestManifest(t, fs, "/manifest.mf", files)
// Only create one file
createFilesOnDisk(t, fs, "/data", map[string][]byte{
"exists.txt": []byte("I exist"),
})
chk, err := NewChecker("/manifest.mf", "/data", fs)
require.NoError(t, err)
results := make(chan Result, 10)
err = chk.Check(context.Background(), results, nil)
require.NoError(t, err)
var okCount, missingCount int
for r := range results {
switch r.Status {
case StatusOK:
okCount++
case StatusMissing:
missingCount++
assert.Equal(t, mfer.RelFilePath("missing.txt"), r.Path)
}
}
assert.Equal(t, 1, okCount)
assert.Equal(t, 1, missingCount)
}
func TestCheckSizeMismatch(t *testing.T) {
fs := afero.NewMemMapFs()
files := map[string][]byte{
"file.txt": []byte("original content"),
}
createTestManifest(t, fs, "/manifest.mf", files)
// Create file with different size
createFilesOnDisk(t, fs, "/data", map[string][]byte{
"file.txt": []byte("short"),
})
chk, err := NewChecker("/manifest.mf", "/data", fs)
require.NoError(t, err)
results := make(chan Result, 10)
err = chk.Check(context.Background(), results, nil)
require.NoError(t, err)
r := <-results
assert.Equal(t, StatusSizeMismatch, r.Status)
assert.Equal(t, mfer.RelFilePath("file.txt"), r.Path)
}
func TestCheckHashMismatch(t *testing.T) {
fs := afero.NewMemMapFs()
originalContent := []byte("original content")
files := map[string][]byte{
"file.txt": originalContent,
}
createTestManifest(t, fs, "/manifest.mf", files)
// Create file with same size but different content
differentContent := []byte("different contnt") // same length (16 bytes) but different
require.Equal(t, len(originalContent), len(differentContent), "test requires same length")
createFilesOnDisk(t, fs, "/data", map[string][]byte{
"file.txt": differentContent,
})
chk, err := NewChecker("/manifest.mf", "/data", fs)
require.NoError(t, err)
results := make(chan Result, 10)
err = chk.Check(context.Background(), results, nil)
require.NoError(t, err)
r := <-results
assert.Equal(t, StatusHashMismatch, r.Status)
assert.Equal(t, mfer.RelFilePath("file.txt"), r.Path)
}
func TestCheckWithProgress(t *testing.T) {
fs := afero.NewMemMapFs()
files := map[string][]byte{
"file1.txt": bytes.Repeat([]byte("a"), 100),
"file2.txt": bytes.Repeat([]byte("b"), 200),
}
createTestManifest(t, fs, "/manifest.mf", files)
createFilesOnDisk(t, fs, "/data", files)
chk, err := NewChecker("/manifest.mf", "/data", fs)
require.NoError(t, err)
results := make(chan Result, 10)
progress := make(chan CheckStatus, 10)
err = chk.Check(context.Background(), results, progress)
require.NoError(t, err)
// Drain results
for range results {
}
// Check progress was sent
var progressUpdates []CheckStatus
for p := range progress {
progressUpdates = append(progressUpdates, p)
}
assert.NotEmpty(t, progressUpdates)
// Final progress should show all files checked
final := progressUpdates[len(progressUpdates)-1]
assert.Equal(t, mfer.FileCount(2), final.TotalFiles)
assert.Equal(t, mfer.FileCount(2), final.CheckedFiles)
assert.Equal(t, mfer.FileSize(300), final.TotalBytes)
assert.Equal(t, mfer.FileSize(300), final.CheckedBytes)
assert.Equal(t, mfer.FileCount(0), final.Failures)
}
func TestCheckContextCancellation(t *testing.T) {
fs := afero.NewMemMapFs()
// Create many files to ensure we have time to cancel
files := make(map[string][]byte)
for i := 0; i < 100; i++ {
files[string(rune('a'+i%26))+".txt"] = bytes.Repeat([]byte("x"), 1000)
}
createTestManifest(t, fs, "/manifest.mf", files)
createFilesOnDisk(t, fs, "/data", files)
chk, err := NewChecker("/manifest.mf", "/data", fs)
require.NoError(t, err)
ctx, cancel := context.WithCancel(context.Background())
cancel() // Cancel immediately
results := make(chan Result, 200)
err = chk.Check(ctx, results, nil)
assert.ErrorIs(t, err, context.Canceled)
}
func TestFindExtraFiles(t *testing.T) {
fs := afero.NewMemMapFs()
// Manifest only contains file1
manifestFiles := map[string][]byte{
"file1.txt": []byte("in manifest"),
}
createTestManifest(t, fs, "/manifest.mf", manifestFiles)
// Disk has file1 and file2
createFilesOnDisk(t, fs, "/data", map[string][]byte{
"file1.txt": []byte("in manifest"),
"file2.txt": []byte("extra file"),
})
chk, err := NewChecker("/manifest.mf", "/data", fs)
require.NoError(t, err)
results := make(chan Result, 10)
err = chk.FindExtraFiles(context.Background(), results)
require.NoError(t, err)
var extras []Result
for r := range results {
extras = append(extras, r)
}
assert.Len(t, extras, 1)
assert.Equal(t, mfer.RelFilePath("file2.txt"), extras[0].Path)
assert.Equal(t, StatusExtra, extras[0].Status)
assert.Equal(t, "not in manifest", extras[0].Message)
}
func TestFindExtraFilesContextCancellation(t *testing.T) {
fs := afero.NewMemMapFs()
files := map[string][]byte{"file.txt": []byte("data")}
createTestManifest(t, fs, "/manifest.mf", files)
createFilesOnDisk(t, fs, "/data", files)
chk, err := NewChecker("/manifest.mf", "/data", fs)
require.NoError(t, err)
ctx, cancel := context.WithCancel(context.Background())
cancel() // Cancel immediately
results := make(chan Result, 10)
err = chk.FindExtraFiles(ctx, results)
assert.ErrorIs(t, err, context.Canceled)
}
func TestCheckNilChannels(t *testing.T) {
fs := afero.NewMemMapFs()
files := map[string][]byte{"file.txt": []byte("data")}
createTestManifest(t, fs, "/manifest.mf", files)
createFilesOnDisk(t, fs, "/data", files)
chk, err := NewChecker("/manifest.mf", "/data", fs)
require.NoError(t, err)
// Should not panic with nil channels
err = chk.Check(context.Background(), nil, nil)
assert.NoError(t, err)
}
func TestFindExtraFilesNilChannel(t *testing.T) {
fs := afero.NewMemMapFs()
files := map[string][]byte{"file.txt": []byte("data")}
createTestManifest(t, fs, "/manifest.mf", files)
createFilesOnDisk(t, fs, "/data", files)
chk, err := NewChecker("/manifest.mf", "/data", fs)
require.NoError(t, err)
// Should not panic with nil channel
err = chk.FindExtraFiles(context.Background(), nil)
assert.NoError(t, err)
}
func TestCheckSubdirectories(t *testing.T) {
fs := afero.NewMemMapFs()
files := map[string][]byte{
"dir1/file1.txt": []byte("content1"),
"dir1/dir2/file2.txt": []byte("content2"),
"dir1/dir2/dir3/deep.txt": []byte("deep content"),
}
createTestManifest(t, fs, "/manifest.mf", files)
// Create files with full directory structure
for path, content := range files {
fullPath := "/data/" + path
require.NoError(t, fs.MkdirAll("/data/dir1/dir2/dir3", 0755))
require.NoError(t, afero.WriteFile(fs, fullPath, content, 0644))
}
chk, err := NewChecker("/manifest.mf", "/data", fs)
require.NoError(t, err)
results := make(chan Result, 10)
err = chk.Check(context.Background(), results, nil)
require.NoError(t, err)
var okCount int
for r := range results {
assert.Equal(t, StatusOK, r.Status, "file %s should be OK", r.Path)
okCount++
}
assert.Equal(t, 3, okCount)
}
func TestCheckEmptyManifest(t *testing.T) {
fs := afero.NewMemMapFs()
// Create manifest with no files
createTestManifest(t, fs, "/manifest.mf", map[string][]byte{})
chk, err := NewChecker("/manifest.mf", "/data", fs)
require.NoError(t, err)
assert.Equal(t, mfer.FileCount(0), chk.FileCount())
assert.Equal(t, mfer.FileSize(0), chk.TotalBytes())
results := make(chan Result, 10)
err = chk.Check(context.Background(), results, nil)
require.NoError(t, err)
var count int
for range results {
count++
}
assert.Equal(t, 0, count)
}

View File

@@ -8,8 +8,8 @@ import (
"github.com/dustin/go-humanize"
"github.com/spf13/afero"
"github.com/urfave/cli/v2"
"sneak.berlin/go/mfer/internal/checker"
"sneak.berlin/go/mfer/internal/log"
"sneak.berlin/go/mfer/mfer"
)
// findManifest looks for a manifest file in the given directory.
@@ -63,7 +63,7 @@ func (mfa *CLIApp) checkManifestOperation(ctx *cli.Context) error {
log.Infof("checking manifest %s with base %s", manifestPath, basePath)
// Create checker
chk, err := checker.NewChecker(manifestPath, basePath, mfa.Fs)
chk, err := mfer.NewChecker(manifestPath, basePath, mfa.Fs)
if err != nil {
return fmt.Errorf("failed to load manifest: %w", err)
}
@@ -71,12 +71,12 @@ func (mfa *CLIApp) checkManifestOperation(ctx *cli.Context) error {
log.Infof("manifest contains %d files, %s", chk.FileCount(), humanize.IBytes(uint64(chk.TotalBytes())))
// Set up results channel
results := make(chan checker.Result, 1)
results := make(chan mfer.Result, 1)
// Set up progress channel
var progress chan checker.CheckStatus
var progress chan mfer.CheckStatus
if showProgress {
progress = make(chan checker.CheckStatus, 1)
progress = make(chan mfer.CheckStatus, 1)
go func() {
for status := range progress {
if status.ETA > 0 {
@@ -103,7 +103,7 @@ func (mfa *CLIApp) checkManifestOperation(ctx *cli.Context) error {
done := make(chan struct{})
go func() {
for result := range results {
if result.Status != checker.StatusOK {
if result.Status != mfer.StatusOK {
failures++
log.Infof("%s: %s (%s)", result.Status, result.Path, result.Message)
} else {
@@ -124,7 +124,7 @@ func (mfa *CLIApp) checkManifestOperation(ctx *cli.Context) error {
// Check for extra files if requested
if ctx.Bool("no-extra-files") {
extraResults := make(chan checker.Result, 1)
extraResults := make(chan mfer.Result, 1)
extraDone := make(chan struct{})
go func() {
for result := range extraResults {