package cli import ( "crypto/sha256" "fmt" "io" "io/fs" "path/filepath" "time" "github.com/multiformats/go-multihash" "github.com/spf13/afero" "github.com/urfave/cli/v2" "sneak.berlin/go/mfer/internal/log" "sneak.berlin/go/mfer/mfer" ) // FreshenStatus contains progress information for the freshen operation. type FreshenStatus struct { Phase string // "scan" or "hash" TotalFiles int64 // Total files to process in current phase CurrentFiles int64 // Files processed so far TotalBytes int64 // Total bytes to hash (hash phase only) CurrentBytes int64 // Bytes hashed so far BytesPerSec float64 // Current throughput rate ETA time.Duration // Estimated time to completion } // freshenEntry tracks a file's status during freshen type freshenEntry struct { path string size int64 mtime time.Time needsHash bool // true if new or changed existing *mfer.MFFilePath // existing manifest entry if unchanged } func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error { log.Debug("freshenManifestOperation()") basePath := ctx.String("base") showProgress := ctx.Bool("progress") includeDotfiles := ctx.Bool("IncludeDotfiles") followSymlinks := ctx.Bool("FollowSymLinks") // Find manifest file var manifestPath string var err error if ctx.Args().Len() > 0 { arg := ctx.Args().Get(0) info, statErr := mfa.Fs.Stat(arg) if statErr == nil && info.IsDir() { manifestPath, err = findManifest(mfa.Fs, arg) if err != nil { return err } } else { manifestPath = arg } } else { manifestPath, err = findManifest(mfa.Fs, ".") if err != nil { return err } } log.Infof("loading manifest from %s", manifestPath) // Load existing manifest manifest, err := mfer.NewManifestFromFile(mfa.Fs, manifestPath) if err != nil { return fmt.Errorf("failed to load manifest: %w", err) } existingFiles := manifest.Files() log.Infof("manifest contains %d files", len(existingFiles)) // Build map of existing entries by path existingByPath := make(map[string]*mfer.MFFilePath, len(existingFiles)) for _, f := range existingFiles { existingByPath[f.Path] = f } // Phase 1: Scan filesystem log.Infof("scanning filesystem...") startScan := time.Now() var entries []*freshenEntry var scanCount int64 var removed, changed, added, unchanged int64 absBase, err := filepath.Abs(basePath) if err != nil { return err } err = afero.Walk(mfa.Fs, absBase, func(path string, info fs.FileInfo, walkErr error) error { if walkErr != nil { return walkErr } // Get relative path relPath, err := filepath.Rel(absBase, path) if err != nil { return err } // Skip the manifest file itself if relPath == filepath.Base(manifestPath) || relPath == "."+filepath.Base(manifestPath) { return nil } // Handle dotfiles if !includeDotfiles && pathIsHidden(relPath) { if info.IsDir() { return filepath.SkipDir } return nil } // Skip directories if info.IsDir() { return nil } // Handle symlinks if info.Mode()&fs.ModeSymlink != 0 { if !followSymlinks { return nil } realPath, err := filepath.EvalSymlinks(path) if err != nil { return nil // Skip broken symlinks } realInfo, err := mfa.Fs.Stat(realPath) if err != nil || realInfo.IsDir() { return nil } info = realInfo } scanCount++ // Check against existing manifest existing, inManifest := existingByPath[relPath] if inManifest { // Check if changed (size or mtime) existingMtime := time.Unix(existing.Mtime.Seconds, int64(existing.Mtime.Nanos)) if existing.Size != info.Size() || !existingMtime.Equal(info.ModTime()) { changed++ log.Debugf("M %s", relPath) entries = append(entries, &freshenEntry{ path: relPath, size: info.Size(), mtime: info.ModTime(), needsHash: true, }) } else { unchanged++ entries = append(entries, &freshenEntry{ path: relPath, size: info.Size(), mtime: info.ModTime(), needsHash: false, existing: existing, }) } // Mark as seen delete(existingByPath, relPath) } else { added++ log.Debugf("A %s", relPath) entries = append(entries, &freshenEntry{ path: relPath, size: info.Size(), mtime: info.ModTime(), needsHash: true, }) } // Report scan progress if showProgress && scanCount%100 == 0 { log.Progressf("Scanning: %d files found", scanCount) } return nil }) if showProgress { log.ProgressDone() } if err != nil { return fmt.Errorf("failed to scan filesystem: %w", err) } // Remaining entries in existingByPath are removed files removed = int64(len(existingByPath)) for path := range existingByPath { log.Debugf("D %s", path) } scanDuration := time.Since(startScan) log.Infof("scan complete in %s: %d unchanged, %d changed, %d added, %d removed", scanDuration.Round(time.Millisecond), unchanged, changed, added, removed) // Calculate total bytes to hash var totalHashBytes int64 var filesToHash int64 for _, e := range entries { if e.needsHash { totalHashBytes += e.size filesToHash++ } } // Phase 2: Hash changed and new files if filesToHash > 0 { log.Infof("hashing %d files (%.1f MB)...", filesToHash, float64(totalHashBytes)/1e6) } startHash := time.Now() var hashedFiles int64 var hashedBytes int64 builder := mfer.NewBuilder() for _, e := range entries { select { case <-ctx.Done(): return ctx.Err() default: } if e.needsHash { // Need to read and hash the file absPath := filepath.Join(absBase, e.path) f, err := mfa.Fs.Open(absPath) if err != nil { return fmt.Errorf("failed to open %s: %w", e.path, err) } hash, bytesRead, err := hashFile(f, e.size, func(n int64) { if showProgress { currentBytes := hashedBytes + n elapsed := time.Since(startHash) var rate float64 var eta time.Duration if elapsed > 0 && currentBytes > 0 { rate = float64(currentBytes) / elapsed.Seconds() remaining := totalHashBytes - currentBytes if rate > 0 { eta = time.Duration(float64(remaining)/rate) * time.Second } } if eta > 0 { log.Progressf("Hashing: %d/%d files, %.1f MB/s, ETA %s", hashedFiles, filesToHash, rate/1e6, eta.Round(time.Second)) } else { log.Progressf("Hashing: %d/%d files, %.1f MB/s", hashedFiles, filesToHash, rate/1e6) } } }) _ = f.Close() if err != nil { return fmt.Errorf("failed to hash %s: %w", e.path, err) } hashedBytes += bytesRead hashedFiles++ // Add to builder with computed hash addFileToBuilder(builder, e.path, e.size, e.mtime, hash) } else { // Use existing entry addExistingToBuilder(builder, e.existing) } } if showProgress && filesToHash > 0 { log.ProgressDone() } // Write updated manifest tmpPath := manifestPath + ".tmp" outFile, err := mfa.Fs.Create(tmpPath) if err != nil { return fmt.Errorf("failed to create temp file: %w", err) } err = builder.Build(outFile) _ = outFile.Close() if err != nil { _ = mfa.Fs.Remove(tmpPath) return fmt.Errorf("failed to write manifest: %w", err) } // Rename temp to final if err := mfa.Fs.Rename(tmpPath, manifestPath); err != nil { _ = mfa.Fs.Remove(tmpPath) return fmt.Errorf("failed to rename manifest: %w", err) } // Print summary if !ctx.Bool("quiet") { totalDuration := time.Since(mfa.startupTime) var hashRate float64 if hashedBytes > 0 { hashDuration := time.Since(startHash) hashRate = float64(hashedBytes) / hashDuration.Seconds() / 1e6 } log.Infof("freshen complete: %d unchanged, %d changed, %d added, %d removed", unchanged, changed, added, removed) if filesToHash > 0 { log.Infof("hashed %.1f MB in %.1fs (%.1f MB/s)", float64(hashedBytes)/1e6, totalDuration.Seconds(), hashRate) } log.Infof("wrote %d files to %s", len(entries), manifestPath) } return nil } // hashFile reads a file and computes its SHA256 multihash. // Progress callback is called with bytes read so far. func hashFile(r io.Reader, size int64, progress func(int64)) ([]byte, int64, error) { h := sha256.New() buf := make([]byte, 64*1024) var total int64 for { n, err := r.Read(buf) if n > 0 { h.Write(buf[:n]) total += int64(n) if progress != nil { progress(total) } } if err == io.EOF { break } if err != nil { return nil, total, err } } mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256) if err != nil { return nil, total, err } return mh, total, nil } // addFileToBuilder adds a new file entry to the builder func addFileToBuilder(b *mfer.Builder, path string, size int64, mtime time.Time, hash []byte) { // Use the builder's internal method indirectly by creating an entry // Since Builder.AddFile reads from a reader, we need to use a different approach // We'll access the builder's files directly through a custom method b.AddFileWithHash(path, size, mtime, hash) } // addExistingToBuilder adds an existing manifest entry to the builder func addExistingToBuilder(b *mfer.Builder, entry *mfer.MFFilePath) { mtime := time.Unix(entry.Mtime.Seconds, int64(entry.Mtime.Nanos)) if len(entry.Hashes) > 0 { b.AddFileWithHash(entry.Path, entry.Size, mtime, entry.Hashes[0].MultiHash) } } // pathIsHidden checks if a path contains hidden components func pathIsHidden(p string) bool { for _, part := range filepath.SplitList(p) { if len(part) > 0 && part[0] == '.' { return true } } // Also check each path component for p != "" && p != "." && p != "/" { base := filepath.Base(p) if len(base) > 0 && base[0] == '.' { return true } parent := filepath.Dir(p) if parent == p { break } p = parent } return false }