attrsum/attrsum.go
sneak d848c5e51b Remove dead code in symlink handling
filepath.Walk uses Lstat, so symlinks are reported with ModeSymlink set,
never ModeDir. The info.IsDir() check was always false, making the
filepath.SkipDir branch unreachable dead code.
2026-02-02 13:15:39 -08:00

540 lines
12 KiB
Go

package main
import (
"bufio"
"bytes"
"crypto/sha256"
"errors"
"fmt"
"io"
"log"
"os"
"path/filepath"
"strings"
"sync/atomic"
"time"
"github.com/bmatcuk/doublestar/v4"
base58 "github.com/mr-tron/base58/base58"
"github.com/multiformats/go-multihash"
"github.com/pkg/xattr"
"github.com/schollz/progressbar/v3"
"github.com/spf13/cobra"
)
const (
checksumKey = "berlin.sneak.app.attrsum.checksum"
sumTimeKey = "berlin.sneak.app.attrsum.sumtime"
)
var (
verbose bool
quiet bool
excludePatterns []string
excludeDotfiles bool
)
// Stats tracks operation statistics for summary reporting
type Stats struct {
FilesProcessed int64
FilesSkipped int64
FilesFailed int64
BytesProcessed int64
StartTime time.Time
}
func (s *Stats) Duration() time.Duration {
return time.Since(s.StartTime)
}
func (s *Stats) Print(operation string) {
if quiet {
return
}
fmt.Fprintf(os.Stderr, "\n%s complete: %d files processed, %d skipped, %d failed, %s bytes in %s\n",
operation,
s.FilesProcessed,
s.FilesSkipped,
s.FilesFailed,
formatBytes(s.BytesProcessed),
s.Duration().Round(time.Millisecond),
)
}
func formatBytes(b int64) string {
const unit = 1024
if b < unit {
return fmt.Sprintf("%d B", b)
}
div, exp := int64(unit), 0
for n := b / unit; n >= unit; n /= unit {
div *= unit
exp++
}
return fmt.Sprintf("%.1f %ciB", float64(b)/float64(div), "KMGTPE"[exp])
}
func main() {
rootCmd := &cobra.Command{
Use: "attrsum",
Short: "Compute and verify file checksums via xattrs",
}
rootCmd.SilenceUsage = true
rootCmd.SilenceErrors = true
rootCmd.PersistentFlags().BoolVarP(&verbose, "verbose", "v", false, "enable verbose output")
rootCmd.PersistentFlags().BoolVarP(&quiet, "quiet", "q", false, "suppress all output except errors")
rootCmd.PersistentFlags().StringArrayVar(&excludePatterns, "exclude", nil, "exclude files/directories matching pattern (rsync-style, repeatable)")
rootCmd.PersistentFlags().BoolVar(&excludeDotfiles, "exclude-dotfiles", false, "exclude any file or directory whose name starts with '.'")
rootCmd.AddCommand(newSumCmd())
rootCmd.AddCommand(newCheckCmd())
rootCmd.AddCommand(newClearCmd())
if err := rootCmd.Execute(); err != nil {
log.Fatal(err)
}
}
// expandPaths expands the given paths, reading from stdin if "-" is present
func expandPaths(args []string) ([]string, error) {
var paths []string
for _, arg := range args {
if arg == "-" {
scanner := bufio.NewScanner(os.Stdin)
for scanner.Scan() {
line := strings.TrimSpace(scanner.Text())
if line != "" {
paths = append(paths, line)
}
}
if err := scanner.Err(); err != nil {
return nil, fmt.Errorf("reading stdin: %w", err)
}
} else {
paths = append(paths, arg)
}
}
return paths, nil
}
///////////////////////////////////////////////////////////////////////////////
// Sum commands
///////////////////////////////////////////////////////////////////////////////
func newSumCmd() *cobra.Command {
cmd := &cobra.Command{
Use: "sum",
Short: "Checksum maintenance operations",
}
add := &cobra.Command{
Use: "add <path>... (use - to read paths from stdin)",
Short: "Write checksums for files missing them",
Args: cobra.MinimumNArgs(1),
RunE: func(_ *cobra.Command, a []string) error {
paths, err := expandPaths(a)
if err != nil {
return err
}
stats := &Stats{StartTime: time.Now()}
for _, p := range paths {
if err := ProcessSumAdd(p, stats); err != nil {
return err
}
}
stats.Print("sum add")
return nil
},
}
upd := &cobra.Command{
Use: "update <path>... (use - to read paths from stdin)",
Short: "Recalculate checksum when file newer than stored sumtime",
Args: cobra.MinimumNArgs(1),
RunE: func(_ *cobra.Command, a []string) error {
paths, err := expandPaths(a)
if err != nil {
return err
}
stats := &Stats{StartTime: time.Now()}
for _, p := range paths {
if err := ProcessSumUpdate(p, stats); err != nil {
return err
}
}
stats.Print("sum update")
return nil
},
}
cmd.AddCommand(add, upd)
return cmd
}
func ProcessSumAdd(dir string, stats *Stats) error {
return walkAndProcess(dir, stats, "Adding checksums", func(p string, info os.FileInfo, s *Stats) error {
if hasXattr(p, checksumKey) {
atomic.AddInt64(&s.FilesSkipped, 1)
return nil
}
if err := writeChecksumAndTime(p, info, s); err != nil {
atomic.AddInt64(&s.FilesFailed, 1)
return err
}
return nil
})
}
func ProcessSumUpdate(dir string, stats *Stats) error {
return walkAndProcess(dir, stats, "Updating checksums", func(p string, info os.FileInfo, s *Stats) error {
t, err := readSumTime(p)
if err != nil || info.ModTime().After(t) {
if err := writeChecksumAndTime(p, info, s); err != nil {
atomic.AddInt64(&s.FilesFailed, 1)
return err
}
} else {
atomic.AddInt64(&s.FilesSkipped, 1)
}
return nil
})
}
func writeChecksumAndTime(path string, info os.FileInfo, stats *Stats) error {
hash, err := fileMultihash(path)
if err != nil {
return err
}
if err := xattr.Set(path, checksumKey, hash); err != nil {
return fmt.Errorf("set checksum attr: %w", err)
}
if verbose && !quiet {
fmt.Printf("%s %s written\n", path, hash)
}
ts := time.Now().UTC().Format(time.RFC3339Nano)
if err := xattr.Set(path, sumTimeKey, []byte(ts)); err != nil {
return fmt.Errorf("set sumtime attr: %w", err)
}
if verbose && !quiet {
fmt.Printf("%s %s written\n", path, ts)
}
atomic.AddInt64(&stats.FilesProcessed, 1)
atomic.AddInt64(&stats.BytesProcessed, info.Size())
return nil
}
func readSumTime(path string) (time.Time, error) {
b, err := xattr.Get(path, sumTimeKey)
if err != nil {
return time.Time{}, err
}
return time.Parse(time.RFC3339Nano, string(b))
}
///////////////////////////////////////////////////////////////////////////////
// Clear command
///////////////////////////////////////////////////////////////////////////////
func newClearCmd() *cobra.Command {
return &cobra.Command{
Use: "clear <path>... (use - to read paths from stdin)",
Short: "Remove checksum xattrs from tree",
Args: cobra.MinimumNArgs(1),
RunE: func(_ *cobra.Command, a []string) error {
paths, err := expandPaths(a)
if err != nil {
return err
}
stats := &Stats{StartTime: time.Now()}
for _, p := range paths {
if err := ProcessClear(p, stats); err != nil {
return err
}
}
stats.Print("clear")
return nil
},
}
}
func ProcessClear(dir string, stats *Stats) error {
return walkAndProcess(dir, stats, "Clearing checksums", func(p string, info os.FileInfo, s *Stats) error {
cleared := false
for _, k := range []string{checksumKey, sumTimeKey} {
v, err := xattr.Get(p, k)
if err != nil {
if errors.Is(err, xattr.ENOATTR) {
continue
}
atomic.AddInt64(&s.FilesFailed, 1)
return err
}
if verbose && !quiet {
fmt.Printf("%s %s removed\n", p, string(v))
}
if err := xattr.Remove(p, k); err != nil {
atomic.AddInt64(&s.FilesFailed, 1)
return err
}
cleared = true
}
if cleared {
atomic.AddInt64(&s.FilesProcessed, 1)
atomic.AddInt64(&s.BytesProcessed, info.Size())
} else {
atomic.AddInt64(&s.FilesSkipped, 1)
}
return nil
})
}
///////////////////////////////////////////////////////////////////////////////
// Check command
///////////////////////////////////////////////////////////////////////////////
func newCheckCmd() *cobra.Command {
var cont bool
cmd := &cobra.Command{
Use: "check <path>... (use - to read paths from stdin)",
Short: "Verify stored checksums",
Args: cobra.MinimumNArgs(1),
RunE: func(_ *cobra.Command, a []string) error {
paths, err := expandPaths(a)
if err != nil {
return err
}
stats := &Stats{StartTime: time.Now()}
var finalErr error
for _, p := range paths {
if err := ProcessCheck(p, cont, stats); err != nil {
if cont {
finalErr = err
} else {
stats.Print("check")
return err
}
}
}
stats.Print("check")
return finalErr
},
}
cmd.Flags().BoolVar(&cont, "continue", false, "continue after errors and report each file")
return cmd
}
func ProcessCheck(dir string, cont bool, stats *Stats) error {
fail := errors.New("verification failed")
bad := false
err := walkAndProcess(dir, stats, "Verifying checksums", func(p string, info os.FileInfo, s *Stats) error {
exp, err := xattr.Get(p, checksumKey)
if err != nil {
if errors.Is(err, xattr.ENOATTR) {
bad = true
atomic.AddInt64(&s.FilesFailed, 1)
if verbose && !quiet {
fmt.Printf("%s <none> ERROR\n", p)
}
if cont {
return nil
}
return fail
}
return err
}
act, err := fileMultihash(p)
if err != nil {
atomic.AddInt64(&s.FilesFailed, 1)
return err
}
ok := bytes.Equal(exp, act)
if !ok {
bad = true
atomic.AddInt64(&s.FilesFailed, 1)
} else {
atomic.AddInt64(&s.FilesProcessed, 1)
atomic.AddInt64(&s.BytesProcessed, info.Size())
}
if verbose && !quiet {
status := "OK"
if !ok {
status = "ERROR"
}
fmt.Printf("%s %s %s\n", p, act, status)
}
if !ok && !cont {
return fail
}
return nil
})
if err != nil {
if errors.Is(err, fail) {
return fail
}
return err
}
if bad {
return fail
}
return nil
}
///////////////////////////////////////////////////////////////////////////////
// Helpers
///////////////////////////////////////////////////////////////////////////////
// countFiles counts the total number of regular files that will be processed
func countFiles(root string) int64 {
var count int64
root = filepath.Clean(root)
filepath.Walk(root, func(p string, info os.FileInfo, err error) error {
if err != nil {
return nil
}
// Skip symlinks - note: filepath.Walk uses Lstat, so symlinks are
// reported as ModeSymlink, never as directories. Walk doesn't follow them.
if info.Mode()&os.ModeSymlink != 0 {
return nil
}
rel, _ := filepath.Rel(root, p)
if shouldExclude(rel, info) {
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
if info.IsDir() {
return nil
}
if !info.Mode().IsRegular() {
return nil
}
count++
return nil
})
return count
}
func walkAndProcess(root string, stats *Stats, description string, fn func(string, os.FileInfo, *Stats) error) error {
root = filepath.Clean(root)
// Count files first for progress bar
total := countFiles(root)
// Create progress bar
var bar *progressbar.ProgressBar
if !quiet {
bar = progressbar.NewOptions64(total,
progressbar.OptionSetDescription(description),
progressbar.OptionSetWriter(os.Stderr),
progressbar.OptionShowCount(),
progressbar.OptionShowIts(),
progressbar.OptionSetItsString("files"),
progressbar.OptionThrottle(250*time.Millisecond),
progressbar.OptionShowElapsedTimeOnFinish(),
progressbar.OptionSetPredictTime(true),
progressbar.OptionFullWidth(),
progressbar.OptionSetTheme(progressbar.Theme{
Saucer: "=",
SaucerHead: ">",
SaucerPadding: " ",
BarStart: "[",
BarEnd: "]",
}),
)
}
err := filepath.Walk(root, func(p string, info os.FileInfo, err error) error {
if err != nil {
return err
}
// Skip symlinks - filepath.Walk uses Lstat, so symlinks are reported
// as ModeSymlink, never as directories. Walk doesn't follow them.
if info.Mode()&os.ModeSymlink != 0 {
if verbose && !quiet {
log.Printf("skip symlink %s", p)
}
return nil
}
rel, _ := filepath.Rel(root, p)
if shouldExclude(rel, info) {
if info.IsDir() {
return filepath.SkipDir
}
return nil
}
if info.IsDir() {
return nil
}
if !info.Mode().IsRegular() {
if verbose && !quiet {
log.Printf("skip non-regular %s", p)
}
return nil
}
fnErr := fn(p, info, stats)
if bar != nil {
bar.Add(1)
}
return fnErr
})
if bar != nil {
bar.Finish()
}
return err
}
func shouldExclude(rel string, info os.FileInfo) bool {
if rel == "." || rel == "" {
return false
}
if excludeDotfiles {
for _, part := range strings.Split(rel, string(os.PathSeparator)) {
if strings.HasPrefix(part, ".") {
return true
}
}
}
for _, pat := range excludePatterns {
if ok, _ := doublestar.PathMatch(pat, rel); ok {
return true
}
}
return false
}
func hasXattr(path, key string) bool {
_, err := xattr.Get(path, key)
return err == nil
}
func fileMultihash(path string) ([]byte, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
defer f.Close()
h := sha256.New()
if _, err := io.Copy(h, f); err != nil {
return nil, err
}
mh, err := multihash.Encode(h.Sum(nil), multihash.SHA2_256)
if err != nil {
return nil, err
}
return []byte(base58.Encode(mh)), nil
}