package cli import ( "crypto/sha256" "fmt" "io" "io/fs" "path/filepath" "time" "github.com/dustin/go-humanize" "github.com/multiformats/go-multihash" "github.com/spf13/afero" "github.com/urfave/cli/v2" "sneak.berlin/go/mfer/internal/log" "sneak.berlin/go/mfer/mfer" ) // FreshenStatus contains progress information for the freshen operation. type FreshenStatus struct { Phase string // "scan" or "hash" TotalFiles int64 // Total files to process in current phase CurrentFiles int64 // Files processed so far TotalBytes int64 // Total bytes to hash (hash phase only) CurrentBytes int64 // Bytes hashed so far BytesPerSec float64 // Current throughput rate ETA time.Duration // Estimated time to completion } // freshenEntry tracks a file's status during freshen type freshenEntry struct { path string size int64 mtime time.Time needsHash bool // true if new or changed existing *mfer.MFFilePath // existing manifest entry if unchanged } func (mfa *CLIApp) freshenManifestOperation(ctx *cli.Context) error { log.Debug("freshenManifestOperation()") basePath := ctx.String("base") showProgress := ctx.Bool("progress") includeDotfiles := ctx.Bool("IncludeDotfiles") followSymlinks := ctx.Bool("FollowSymLinks") // Find manifest file var manifestPath string var err error if ctx.Args().Len() > 0 { arg := ctx.Args().Get(0) info, statErr := mfa.Fs.Stat(arg) if statErr == nil && info.IsDir() { manifestPath, err = findManifest(mfa.Fs, arg) if err != nil { return err } } else { manifestPath = arg } } else { manifestPath, err = findManifest(mfa.Fs, ".") if err != nil { return err } } log.Infof("loading manifest from %s", manifestPath) // Load existing manifest manifest, err := mfer.NewManifestFromFile(mfa.Fs, manifestPath) if err != nil { return fmt.Errorf("failed to load manifest: %w", err) } existingFiles := manifest.Files() log.Infof("manifest contains %d files", len(existingFiles)) // Build map of existing entries by path existingByPath := make(map[string]*mfer.MFFilePath, len(existingFiles)) for _, f := range existingFiles { existingByPath[f.Path] = f } // Phase 1: Scan filesystem log.Infof("scanning filesystem...") startScan := time.Now() var entries []*freshenEntry var scanCount int64 var removed, changed, added, unchanged int64 absBase, err := filepath.Abs(basePath) if err != nil { return err } err = afero.Walk(mfa.Fs, absBase, func(path string, info fs.FileInfo, walkErr error) error { if walkErr != nil { return walkErr } // Get relative path relPath, err := filepath.Rel(absBase, path) if err != nil { return err } // Skip the manifest file itself if relPath == filepath.Base(manifestPath) || relPath == "."+filepath.Base(manifestPath) { return nil } // Handle dotfiles if !includeDotfiles && mfer.IsHiddenPath(filepath.ToSlash(relPath)) { if info.IsDir() { return filepath.SkipDir } return nil } // Skip directories if info.IsDir() { return nil } // Handle symlinks if info.Mode()&fs.ModeSymlink != 0 { if !followSymlinks { return nil } realPath, err := filepath.EvalSymlinks(path) if err != nil { return nil // Skip broken symlinks } realInfo, err := mfa.Fs.Stat(realPath) if err != nil || realInfo.IsDir() { return nil } info = realInfo } scanCount++ // Check against existing manifest existing, inManifest := existingByPath[relPath] if inManifest { // Check if changed (size or mtime) existingMtime := time.Unix(existing.Mtime.Seconds, int64(existing.Mtime.Nanos)) if existing.Size != info.Size() || !existingMtime.Equal(info.ModTime()) { changed++ log.Verbosef("M %s", relPath) entries = append(entries, &freshenEntry{ path: relPath, size: info.Size(), mtime: info.ModTime(), needsHash: true, }) } else { unchanged++ entries = append(entries, &freshenEntry{ path: relPath, size: info.Size(), mtime: info.ModTime(), needsHash: false, existing: existing, }) } // Mark as seen delete(existingByPath, relPath) } else { added++ log.Verbosef("A %s", relPath) entries = append(entries, &freshenEntry{ path: relPath, size: info.Size(), mtime: info.ModTime(), needsHash: true, }) } // Report scan progress if showProgress && scanCount%100 == 0 { log.Progressf("Scanning: %d files found", scanCount) } return nil }) if showProgress { log.ProgressDone() } if err != nil { return fmt.Errorf("failed to scan filesystem: %w", err) } // Remaining entries in existingByPath are removed files removed = int64(len(existingByPath)) for path := range existingByPath { log.Verbosef("D %s", path) } scanDuration := time.Since(startScan) log.Infof("scan complete in %s: %d unchanged, %d changed, %d added, %d removed", scanDuration.Round(time.Millisecond), unchanged, changed, added, removed) // Calculate total bytes to hash var totalHashBytes int64 var filesToHash int64 for _, e := range entries { if e.needsHash { totalHashBytes += e.size filesToHash++ } } // Phase 2: Hash changed and new files if filesToHash > 0 { log.Infof("hashing %d files (%s)...", filesToHash, humanize.IBytes(uint64(totalHashBytes))) } startHash := time.Now() var hashedFiles int64 var hashedBytes int64 builder := mfer.NewBuilder() for _, e := range entries { select { case <-ctx.Done(): return ctx.Err() default: } if e.needsHash { // Need to read and hash the file absPath := filepath.Join(absBase, e.path) f, err := mfa.Fs.Open(absPath) if err != nil { return fmt.Errorf("failed to open %s: %w", e.path, err) } hash, bytesRead, err := hashFile(f, e.size, func(n int64) { if showProgress { currentBytes := hashedBytes + n elapsed := time.Since(startHash) var rate float64 var eta time.Duration if elapsed > 0 && currentBytes > 0 { rate = float64(currentBytes) / elapsed.Seconds() remaining := totalHashBytes - currentBytes if rate > 0 { eta = time.Duration(float64(remaining)/rate) * time.Second } } if eta > 0 { log.Progressf("Hashing: %d/%d files, %s/s, ETA %s", hashedFiles, filesToHash, humanize.IBytes(uint64(rate)), eta.Round(time.Second)) } else { log.Progressf("Hashing: %d/%d files, %s/s", hashedFiles, filesToHash, humanize.IBytes(uint64(rate))) } } }) _ = f.Close() if err != nil { return fmt.Errorf("failed to hash %s: %w", e.path, err) } hashedBytes += bytesRead hashedFiles++ // Add to builder with computed hash if err := addFileToBuilder(builder, e.path, e.size, e.mtime, hash); err != nil { return fmt.Errorf("failed to add %s: %w", e.path, err) } } else { // Use existing entry if err := addExistingToBuilder(builder, e.existing); err != nil { return fmt.Errorf("failed to add %s: %w", e.path, err) } } } if showProgress && filesToHash > 0 { log.ProgressDone() } // Print summary log.Infof("freshen complete: %d unchanged, %d changed, %d added, %d removed", unchanged, changed, added, removed) // Skip writing if nothing changed if changed == 0 && added == 0 && removed == 0 { log.Infof("manifest unchanged, skipping write") return nil } // Write updated manifest atomically (write to temp, then rename) tmpPath := manifestPath + ".tmp" outFile, err := mfa.Fs.Create(tmpPath) if err != nil { return fmt.Errorf("failed to create temp file: %w", err) } err = builder.Build(outFile) _ = outFile.Close() if err != nil { _ = mfa.Fs.Remove(tmpPath) return fmt.Errorf("failed to write manifest: %w", err) } // Rename temp to final if err := mfa.Fs.Rename(tmpPath, manifestPath); err != nil { _ = mfa.Fs.Remove(tmpPath) return fmt.Errorf("failed to rename manifest: %w", err) } totalDuration := time.Since(mfa.startupTime) if hashedBytes > 0 { hashDuration := time.Since(startHash) hashRate := float64(hashedBytes) / hashDuration.Seconds() log.Infof("hashed %s in %.1fs (%s/s)", humanize.IBytes(uint64(hashedBytes)), totalDuration.Seconds(), humanize.IBytes(uint64(hashRate))) } log.Infof("wrote %d files to %s", len(entries), manifestPath) return nil } // hashFile reads a file and computes its SHA256 multihash. // Progress callback is called with bytes read so far. func hashFile(r io.Reader, size int64, progress func(int64)) ([]byte, int64, error) { h := sha256.New() buf := make([]byte, 64*1024) var total int64 for { n, err := r.Read(buf) if n > 0 { h.Write(buf[:n]) total += int64(n) if progress != nil { progress(total) } } if err == io.EOF { break } if err != nil { return nil, total, err } } mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256) if err != nil { return nil, total, err } return mh, total, nil } // addFileToBuilder adds a new file entry to the builder func addFileToBuilder(b *mfer.Builder, path string, size int64, mtime time.Time, hash []byte) error { return b.AddFileWithHash(mfer.RelFilePath(path), mfer.FileSize(size), mfer.ModTime(mtime), hash) } // addExistingToBuilder adds an existing manifest entry to the builder func addExistingToBuilder(b *mfer.Builder, entry *mfer.MFFilePath) error { mtime := time.Unix(entry.Mtime.Seconds, int64(entry.Mtime.Nanos)) if len(entry.Hashes) == 0 { return nil } return b.AddFileWithHash(mfer.RelFilePath(entry.Path), mfer.FileSize(entry.Size), mfer.ModTime(mtime), entry.Hashes[0].MultiHash) }