xsum/main.go

480 lines
9.4 KiB
Go

//3456789112345676892123456789312345678941234567895123456789612345678971234567898
package main
import "crypto/sha256"
import "fmt"
import "github.com/jessevdk/go-flags"
import "github.com/mr-tron/base58"
import "github.com/multiformats/go-multihash"
import "github.com/pkg/xattr"
import "github.com/sirupsen/logrus"
import "os"
import "io"
import "flag"
import "time"
import "strconv"
var Version string
var Buildtime string
var Builduser string
var Buildarch string
var log *logrus.Logger
const namespacePrefix = "berlin.sneak.xsum"
//FIXME(sneak) make this parallelize to NUM_CPUS when processing multiple
//args
//FIXME(sneak) add a -r (recursive) flag for directories
//FIXME(sneak) make checking support reading hash algo type from multihash
//instead of assumming sha256
func main() {
os.Exit(xsum())
}
func xsum() int {
log = logrus.New()
log.SetLevel(logrus.ErrorLevel)
log.SetReportCaller(false)
var opts struct {
// Slice of bool will append 'true' each time the option
// is encountered (can be set multiple times, like -vvv)
Verbose bool `short:"v" long:"verbose" description:"Show verbose debug information"`
}
args, err := flags.Parse(&opts)
if err != nil {
usage()
return -1
}
if opts.Verbose == true {
log.SetReportCaller(true)
log.SetLevel(logrus.DebugLevel)
}
log.Debugf(
"xsum version %s (%s) built %s by %s",
Version,
Buildarch,
Buildtime,
Builduser,
)
if len(args) < 2 {
usage()
return -1
}
mode := args[0]
paths := args[1:]
switch mode {
case "cron":
x := xsfCheckAndUpdate(paths)
if x != nil {
log.Debug(x)
return -1
} else {
return 0
}
case "check-and-update":
x := xsfCheckAndUpdate(paths)
if x != nil {
log.Debug(x)
return -1
} else {
return 0
}
case "check":
x := xsfCheck(paths)
if x != nil {
log.Debug(x)
return -1
} else {
return 0
}
case "update":
x := xsfUpdate(paths)
if x != nil {
log.Debug(x)
return -1
} else {
return 0
}
default:
usage()
return -1
}
}
func usage() {
fmt.Fprintf(os.Stderr, "usage: %s [-v] <update|check|check-and-update|cron> <path> [path2] [...]\n", os.Args[0])
flag.PrintDefaults()
}
func xsfCheck(paths []string) error {
log.Debugf("check")
for _, path := range paths {
x := newXsf(path)
err := x.Check()
if err != nil {
fmt.Printf("%s\tERROR (expected=%s actual=%s)\n", x.path, x.xmultihash, x.multihash)
return err
} else {
fmt.Printf("%s\tOK (hash=%s)\n", x.path, x.multihash)
}
}
return nil
}
func showError(e error) {
fmt.Fprintf(os.Stderr, "error: %s\n", e)
}
func xsfUpdate(paths []string) error {
log.Debugf("update")
for _, path := range paths {
x := newXsf(path)
err := x.Update()
if err != nil {
failure := fmt.Errorf("%s\tERROR (error=%s)\n", x.path, err)
log.Error(failure)
return failure
}
}
return nil
}
func xsfCheckAndUpdate(paths []string) error {
log.Debugf("check-and-update")
err := xsfCheck(paths)
if err != nil {
//xsfCheck() does the printing of errors itself, we just need to
//bubble it up and not update
log.Error(err)
return err
}
return xsfUpdate(paths)
}
func HashFile(fp *os.File) (string, error) {
h := sha256.New()
if _, err := io.Copy(h, fp); err != nil {
return "", err
}
mHashBuf, err := multihash.EncodeName(h.Sum(nil), "sha2-256")
if err != nil {
return "", err
}
return base58.Encode(mHashBuf), nil
}
func stringInSlice(a string, list []string) bool {
for _, b := range list {
if b == a {
return true
}
}
return false
}
/////////////////////////////////////////////////////////////////////////////////
// type xsf
/////////////////////////////////////////////////////////////////////////////////
type xsf struct {
fi *os.FileInfo
fp *os.File
multihash string
xmultihash string
mtime string
xmtime string
path string
size uint64
xsize uint64
}
/////////////////////////////////////////////////////////////////////////////////
// constructor
/////////////////////////////////////////////////////////////////////////////////
func newXsf(path string) *xsf {
x := xsf{}
x.path = path
return &x
}
//FIXME calling .List() three times might be slow, memoize if necessary
func (x *xsf) hasMtimeXattr() bool {
xn := fmt.Sprintf("%s.%s", namespacePrefix, "mtime")
l, err := xattr.FList(x.fp)
if err != nil {
return false
}
return stringInSlice(xn, l)
}
func (x *xsf) readMtimeXattr() error {
log.Infof("reading mtime xattr")
xn := fmt.Sprintf("%s.%s", namespacePrefix, "mtime")
v, err := xattr.FGet(x.fp, xn)
if err != nil {
return err
}
x.xmtime = string(v)
return nil
}
func (x *xsf) hasMultihashXattr() bool {
xn := fmt.Sprintf("%s.%s", namespacePrefix, "multihash")
l, err := xattr.FList(x.fp)
if err != nil {
return false
}
return stringInSlice(xn, l)
}
func (x *xsf) readMultihashXattr() error {
log.Infof("reading multihash xattr")
xn := fmt.Sprintf("%s.%s", namespacePrefix, "multihash")
v, err := xattr.FGet(x.fp, xn)
if err != nil {
return err
}
x.xmultihash = string(v)
return nil
}
func (x *xsf) hasSizeXattr() bool {
xn := fmt.Sprintf("%s.%s", namespacePrefix, "size")
l, err := xattr.FList(x.fp)
if err != nil {
return false
}
return stringInSlice(xn, l)
}
func (x *xsf) readSizeXattr() error {
log.Infof("reading size xattr")
xn := fmt.Sprintf("%s.%s", namespacePrefix, "size")
v, err := xattr.FGet(x.fp, xn)
if err != nil {
return err
}
a, b := strconv.ParseInt(string(v), 10, 64)
if b != nil {
return b
}
x.xsize = uint64(a)
return nil
}
func (x *xsf) writeXattrs() error {
log.Infof("writing xattrs")
var xn string
var err error
xn = fmt.Sprintf("%s.%s", namespacePrefix, "mtime")
log.Infof("writing xattr %s=%s", xn, x.mtime)
err = xattr.FSet(x.fp, xn, []byte(x.mtime))
if err != nil {
return err
}
xn = fmt.Sprintf("%s.%s", namespacePrefix, "size")
log.Infof("writing xattr %s=%s", xn, fmt.Sprintf("%d", x.size))
err = xattr.FSet(x.fp, xn, []byte(fmt.Sprintf("%d", x.size)))
if err != nil {
return err
}
xn = fmt.Sprintf("%s.%s", namespacePrefix, "multihash")
log.Infof("writing xattr %s=%s", xn, x.multihash)
err = xattr.FSet(x.fp, xn, []byte(x.multihash))
if err != nil {
return err
}
return nil
}
func (x *xsf) stat() error {
fi, err := x.fp.Stat()
if err != nil {
return err
}
x.size = uint64(fi.Size())
log.Debugf("size: %d", x.size)
t := fi.ModTime().UTC().Format(time.RFC3339)
log.Debugf("modtime: %s", t)
x.mtime = t
return nil
}
func (x *xsf) hash() error {
log.Debugf("hashing...")
var err error
if x.multihash, err = HashFile(x.fp); err != nil {
return err
}
log.Debugf("hash: %s", x.multihash)
return nil
}
func (x *xsf) Check() error {
fp, err := os.Open(x.path)
defer fp.Close()
if err != nil {
return err
}
x.fp = fp
serr := x.stat()
if serr != nil {
log.Errorf("error stat(): %s", serr)
return serr
}
if x.missingXattrs() == true {
log.Infof("can't check file %s, does not have appropriate xattrs", x.path)
return nil
}
//check to see if file needs update (wrong mtime, wrong size)
if x.needsUpdate() == true {
log.Infof("can't check file %s, needs update (xattrs not current)", x.path)
return nil
}
//finally hash the file
err2 := x.readMultihashXattr()
if err2 != nil {
log.Errorf("error reading file hash: %s", err2)
return err2
}
predictedHash := x.xmultihash
err3 := x.hash()
if err3 != nil {
log.Errorf("error hashing file: %s", err2)
return err3
}
actualHash := x.multihash
if predictedHash != actualHash {
failure := fmt.Errorf("file corruption detected: expected=%s actual=%s", predictedHash, actualHash)
return failure
} else {
log.Infof("file OK hash=%s", actualHash)
return nil
}
}
func (x *xsf) missingXattrs() bool {
if x.hasMtimeXattr() == false {
log.Debugf("file needs update, missing mtime xattr")
return true
}
if x.hasMultihashXattr() == false {
log.Debugf("file needs update, missing multihash xattr")
return true
}
if x.hasSizeXattr() == false {
log.Debugf("file needs update, missing size xattr")
return true
}
return false
}
func (x *xsf) needsUpdate() bool {
log.Debugf("checking if file needs update")
// this expects stat() to have been called on the xsf
// by Update already, so we have x.mtime et al populated from the
// filesystem
// if the file doesn't have all 3 xattrs, it needs an update.
if x.missingXattrs() == true {
log.Debugf("file is missing xattrs")
return true
}
// if the size doesn't match, it needs an update
e := x.readSizeXattr()
if e != nil {
log.Debugf("unable to read file size attribute")
return true
}
if x.size != x.xsize {
log.Debugf("file needs update, size is %s, xattr size is %s", x.size, x.xsize)
return true
}
// if the mtime is not the same, it needs an update
e2 := x.readMtimeXattr()
if e2 != nil {
log.Debugf("unable to read file mtime attribute")
return true
}
if x.mtime != x.xmtime {
log.Debugf("file needs update, mtime is %s, xattr mtime is %s", x.mtime, x.xmtime)
return true
}
return false
}
func (x *xsf) Update() error {
log.Debugf("updating file (path: %s)", x.path)
fp, err := os.Open(x.path)
x.fp = fp
defer fp.Close()
if err != nil {
return err
}
if err = x.stat(); err != nil {
return err
}
// reminder: needsUpdate() must be called after stat() so that the
// struct is populated
if x.needsUpdate() == false {
log.Debugf("skipping update on already hashed file %s", x.path)
return nil
}
if err = x.hash(); err != nil {
return err
}
if err = x.writeXattrs(); err != nil {
return err
}
return nil
}