396 lines
9.6 KiB
Go
396 lines
9.6 KiB
Go
package cli
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"fmt"
|
|
"io"
|
|
"io/fs"
|
|
"path/filepath"
|
|
"time"
|
|
|
|
"github.com/multiformats/go-multihash"
|
|
"github.com/spf13/afero"
|
|
"github.com/urfave/cli/v2"
|
|
"sneak.berlin/go/mfer/internal/log"
|
|
"sneak.berlin/go/mfer/mfer"
|
|
)
|
|
|
|
// FreshenStatus contains progress information for the freshen operation.
|
|
type FreshenStatus struct {
|
|
Phase string // "scan" or "hash"
|
|
TotalFiles int64 // Total files to process in current phase
|
|
CurrentFiles int64 // Files processed so far
|
|
TotalBytes int64 // Total bytes to hash (hash phase only)
|
|
CurrentBytes int64 // Bytes hashed so far
|
|
BytesPerSec float64 // Current throughput rate
|
|
ETA time.Duration // Estimated time to completion
|
|
}
|
|
|
|
// freshenEntry tracks a file's status during freshen
|
|
type freshenEntry struct {
|
|
path string
|
|
size int64
|
|
mtime time.Time
|
|
needsHash bool // true if new or changed
|
|
existing *mfer.MFFilePath // existing manifest entry if unchanged
|
|
}
|
|
|
|
func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error {
|
|
log.Debug("freshenManifestOperation()")
|
|
|
|
basePath := ctx.String("base")
|
|
showProgress := ctx.Bool("progress")
|
|
ignoreDotfiles := ctx.Bool("IgnoreDotfiles")
|
|
followSymlinks := ctx.Bool("FollowSymLinks")
|
|
|
|
// Find manifest file
|
|
var manifestPath string
|
|
var err error
|
|
|
|
if ctx.Args().Len() > 0 {
|
|
arg := ctx.Args().Get(0)
|
|
info, statErr := mfa.Fs.Stat(arg)
|
|
if statErr == nil && info.IsDir() {
|
|
manifestPath, err = findManifest(mfa.Fs, arg)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
} else {
|
|
manifestPath = arg
|
|
}
|
|
} else {
|
|
manifestPath, err = findManifest(mfa.Fs, ".")
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
log.Infof("loading manifest from %s", manifestPath)
|
|
|
|
// Load existing manifest
|
|
manifest, err := mfer.NewManifestFromFile(mfa.Fs, manifestPath)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to load manifest: %w", err)
|
|
}
|
|
|
|
existingFiles := manifest.Files()
|
|
log.Infof("manifest contains %d files", len(existingFiles))
|
|
|
|
// Build map of existing entries by path
|
|
existingByPath := make(map[string]*mfer.MFFilePath, len(existingFiles))
|
|
for _, f := range existingFiles {
|
|
existingByPath[f.Path] = f
|
|
}
|
|
|
|
// Phase 1: Scan filesystem
|
|
log.Infof("scanning filesystem...")
|
|
startScan := time.Now()
|
|
|
|
var entries []*freshenEntry
|
|
var scanCount int64
|
|
var removed, changed, added, unchanged int64
|
|
|
|
absBase, err := filepath.Abs(basePath)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
err = afero.Walk(mfa.Fs, absBase, func(path string, info fs.FileInfo, walkErr error) error {
|
|
if walkErr != nil {
|
|
return walkErr
|
|
}
|
|
|
|
// Get relative path
|
|
relPath, err := filepath.Rel(absBase, path)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Skip the manifest file itself
|
|
if relPath == filepath.Base(manifestPath) || relPath == "."+filepath.Base(manifestPath) {
|
|
return nil
|
|
}
|
|
|
|
// Handle dotfiles
|
|
if ignoreDotfiles && pathIsHidden(relPath) {
|
|
if info.IsDir() {
|
|
return filepath.SkipDir
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Skip directories
|
|
if info.IsDir() {
|
|
return nil
|
|
}
|
|
|
|
// Handle symlinks
|
|
if info.Mode()&fs.ModeSymlink != 0 {
|
|
if !followSymlinks {
|
|
return nil
|
|
}
|
|
realPath, err := filepath.EvalSymlinks(path)
|
|
if err != nil {
|
|
return nil // Skip broken symlinks
|
|
}
|
|
realInfo, err := mfa.Fs.Stat(realPath)
|
|
if err != nil || realInfo.IsDir() {
|
|
return nil
|
|
}
|
|
info = realInfo
|
|
}
|
|
|
|
scanCount++
|
|
|
|
// Check against existing manifest
|
|
existing, inManifest := existingByPath[relPath]
|
|
if inManifest {
|
|
// Check if changed (size or mtime)
|
|
existingMtime := time.Unix(existing.Mtime.Seconds, int64(existing.Mtime.Nanos))
|
|
if existing.Size != info.Size() || !existingMtime.Equal(info.ModTime()) {
|
|
changed++
|
|
log.Debugf("M %s", relPath)
|
|
entries = append(entries, &freshenEntry{
|
|
path: relPath,
|
|
size: info.Size(),
|
|
mtime: info.ModTime(),
|
|
needsHash: true,
|
|
})
|
|
} else {
|
|
unchanged++
|
|
entries = append(entries, &freshenEntry{
|
|
path: relPath,
|
|
size: info.Size(),
|
|
mtime: info.ModTime(),
|
|
needsHash: false,
|
|
existing: existing,
|
|
})
|
|
}
|
|
// Mark as seen
|
|
delete(existingByPath, relPath)
|
|
} else {
|
|
added++
|
|
log.Debugf("A %s", relPath)
|
|
entries = append(entries, &freshenEntry{
|
|
path: relPath,
|
|
size: info.Size(),
|
|
mtime: info.ModTime(),
|
|
needsHash: true,
|
|
})
|
|
}
|
|
|
|
// Report scan progress
|
|
if showProgress && scanCount%100 == 0 {
|
|
log.Progressf("Scanning: %d files found", scanCount)
|
|
}
|
|
|
|
return nil
|
|
})
|
|
|
|
if showProgress {
|
|
log.ProgressDone()
|
|
}
|
|
|
|
if err != nil {
|
|
return fmt.Errorf("failed to scan filesystem: %w", err)
|
|
}
|
|
|
|
// Remaining entries in existingByPath are removed files
|
|
removed = int64(len(existingByPath))
|
|
for path := range existingByPath {
|
|
log.Debugf("D %s", path)
|
|
}
|
|
|
|
scanDuration := time.Since(startScan)
|
|
log.Infof("scan complete in %s: %d unchanged, %d changed, %d added, %d removed",
|
|
scanDuration.Round(time.Millisecond), unchanged, changed, added, removed)
|
|
|
|
// Calculate total bytes to hash
|
|
var totalHashBytes int64
|
|
var filesToHash int64
|
|
for _, e := range entries {
|
|
if e.needsHash {
|
|
totalHashBytes += e.size
|
|
filesToHash++
|
|
}
|
|
}
|
|
|
|
// Phase 2: Hash changed and new files
|
|
if filesToHash > 0 {
|
|
log.Infof("hashing %d files (%.1f MB)...", filesToHash, float64(totalHashBytes)/1e6)
|
|
}
|
|
|
|
startHash := time.Now()
|
|
var hashedFiles int64
|
|
var hashedBytes int64
|
|
|
|
builder := mfer.NewBuilder()
|
|
|
|
for _, e := range entries {
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
default:
|
|
}
|
|
|
|
if e.needsHash {
|
|
// Need to read and hash the file
|
|
absPath := filepath.Join(absBase, e.path)
|
|
f, err := mfa.Fs.Open(absPath)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to open %s: %w", e.path, err)
|
|
}
|
|
|
|
hash, bytesRead, err := hashFile(f, e.size, func(n int64) {
|
|
if showProgress {
|
|
currentBytes := hashedBytes + n
|
|
elapsed := time.Since(startHash)
|
|
var rate float64
|
|
var eta time.Duration
|
|
if elapsed > 0 && currentBytes > 0 {
|
|
rate = float64(currentBytes) / elapsed.Seconds()
|
|
remaining := totalHashBytes - currentBytes
|
|
if rate > 0 {
|
|
eta = time.Duration(float64(remaining)/rate) * time.Second
|
|
}
|
|
}
|
|
if eta > 0 {
|
|
log.Progressf("Hashing: %d/%d files, %.1f MB/s, ETA %s",
|
|
hashedFiles, filesToHash, rate/1e6, eta.Round(time.Second))
|
|
} else {
|
|
log.Progressf("Hashing: %d/%d files, %.1f MB/s",
|
|
hashedFiles, filesToHash, rate/1e6)
|
|
}
|
|
}
|
|
})
|
|
_ = f.Close()
|
|
|
|
if err != nil {
|
|
return fmt.Errorf("failed to hash %s: %w", e.path, err)
|
|
}
|
|
|
|
hashedBytes += bytesRead
|
|
hashedFiles++
|
|
|
|
// Add to builder with computed hash
|
|
addFileToBuilder(builder, e.path, e.size, e.mtime, hash)
|
|
} else {
|
|
// Use existing entry
|
|
addExistingToBuilder(builder, e.existing)
|
|
}
|
|
}
|
|
|
|
if showProgress && filesToHash > 0 {
|
|
log.ProgressDone()
|
|
}
|
|
|
|
// Write updated manifest
|
|
tmpPath := manifestPath + ".tmp"
|
|
outFile, err := mfa.Fs.Create(tmpPath)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create temp file: %w", err)
|
|
}
|
|
|
|
err = builder.Build(outFile)
|
|
_ = outFile.Close()
|
|
if err != nil {
|
|
_ = mfa.Fs.Remove(tmpPath)
|
|
return fmt.Errorf("failed to write manifest: %w", err)
|
|
}
|
|
|
|
// Rename temp to final
|
|
if err := mfa.Fs.Rename(tmpPath, manifestPath); err != nil {
|
|
_ = mfa.Fs.Remove(tmpPath)
|
|
return fmt.Errorf("failed to rename manifest: %w", err)
|
|
}
|
|
|
|
// Print summary
|
|
if !ctx.Bool("quiet") {
|
|
totalDuration := time.Since(mfa.startupTime)
|
|
var hashRate float64
|
|
if hashedBytes > 0 {
|
|
hashDuration := time.Since(startHash)
|
|
hashRate = float64(hashedBytes) / hashDuration.Seconds() / 1e6
|
|
}
|
|
log.Infof("freshen complete: %d unchanged, %d changed, %d added, %d removed",
|
|
unchanged, changed, added, removed)
|
|
if filesToHash > 0 {
|
|
log.Infof("hashed %.1f MB in %.1fs (%.1f MB/s)",
|
|
float64(hashedBytes)/1e6, totalDuration.Seconds(), hashRate)
|
|
}
|
|
log.Infof("wrote %d files to %s", len(entries), manifestPath)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// hashFile reads a file and computes its SHA256 multihash.
|
|
// Progress callback is called with bytes read so far.
|
|
func hashFile(r io.Reader, size int64, progress func(int64)) ([]byte, int64, error) {
|
|
h := sha256.New()
|
|
buf := make([]byte, 64*1024)
|
|
var total int64
|
|
|
|
for {
|
|
n, err := r.Read(buf)
|
|
if n > 0 {
|
|
h.Write(buf[:n])
|
|
total += int64(n)
|
|
if progress != nil {
|
|
progress(total)
|
|
}
|
|
}
|
|
if err == io.EOF {
|
|
break
|
|
}
|
|
if err != nil {
|
|
return nil, total, err
|
|
}
|
|
}
|
|
|
|
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
|
|
if err != nil {
|
|
return nil, total, err
|
|
}
|
|
|
|
return mh, total, nil
|
|
}
|
|
|
|
// addFileToBuilder adds a new file entry to the builder
|
|
func addFileToBuilder(b *mfer.Builder, path string, size int64, mtime time.Time, hash []byte) {
|
|
// Use the builder's internal method indirectly by creating an entry
|
|
// Since Builder.AddFile reads from a reader, we need to use a different approach
|
|
// We'll access the builder's files directly through a custom method
|
|
b.AddFileWithHash(path, size, mtime, hash)
|
|
}
|
|
|
|
// addExistingToBuilder adds an existing manifest entry to the builder
|
|
func addExistingToBuilder(b *mfer.Builder, entry *mfer.MFFilePath) {
|
|
mtime := time.Unix(entry.Mtime.Seconds, int64(entry.Mtime.Nanos))
|
|
if len(entry.Hashes) > 0 {
|
|
b.AddFileWithHash(entry.Path, entry.Size, mtime, entry.Hashes[0].MultiHash)
|
|
}
|
|
}
|
|
|
|
// pathIsHidden checks if a path contains hidden components
|
|
func pathIsHidden(p string) bool {
|
|
for _, part := range filepath.SplitList(p) {
|
|
if len(part) > 0 && part[0] == '.' {
|
|
return true
|
|
}
|
|
}
|
|
// Also check each path component
|
|
for p != "" && p != "." && p != "/" {
|
|
base := filepath.Base(p)
|
|
if len(base) > 0 && base[0] == '.' {
|
|
return true
|
|
}
|
|
parent := filepath.Dir(p)
|
|
if parent == p {
|
|
break
|
|
}
|
|
p = parent
|
|
}
|
|
return false
|
|
}
|