diff --git a/internal/bork/error.go b/internal/bork/error.go new file mode 100644 index 0000000..83a99ae --- /dev/null +++ b/internal/bork/error.go @@ -0,0 +1,13 @@ +package bork + +import ( + "errors" + "fmt" +) + +var ErrMissingMagic = errors.New("missing magic bytes in file") +var ErrFileTruncated = errors.New("file/stream is truncated abnormally") + +func Newf(format string, args ...interface{}) error { + return errors.New(fmt.Sprintf(format, args...)) +} diff --git a/internal/log/log.go b/internal/log/log.go index df399f2..b52409b 100644 --- a/internal/log/log.go +++ b/internal/log/log.go @@ -1,6 +1,9 @@ package log import ( + "fmt" + "runtime" + "github.com/apex/log" acli "github.com/apex/log/handlers/cli" "github.com/davecgh/go-spew/spew" @@ -25,13 +28,25 @@ func Init() { log.SetLevel(log.InfoLevel) } +func Debugf(format string, args ...interface{}) { + DebugReal(fmt.Sprintf(format, args...), 2) +} + func Debug(arg string) { - log.Debug(arg) + DebugReal(arg, 2) +} + +func DebugReal(arg string, cs int) { + _, callerFile, callerLine, ok := runtime.Caller(cs) + if !ok { + return + } + tag := fmt.Sprintf("%s:%d: ", callerFile, callerLine) + log.Debug(tag + arg) } func Dump(args ...interface{}) { - str := spew.Sdump(args...) - Debug(str) + DebugReal(spew.Sdump(args...), 2) } func EnableDebugLogging() { diff --git a/mfer/deserialize.go b/mfer/deserialize.go index ecafc7e..b9ce966 100644 --- a/mfer/deserialize.go +++ b/mfer/deserialize.go @@ -1,12 +1,81 @@ package mfer -import "google.golang.org/protobuf/proto" +import ( + "bytes" + "compress/gzip" + "errors" + "io" -func NewFromProto(input []byte) (*manifest, error) { + "git.eeqj.de/sneak/mfer/internal/bork" + "git.eeqj.de/sneak/mfer/internal/log" + "google.golang.org/protobuf/proto" +) + +func (m *manifest) validateProto() error { + if m.pbOuter.Version != MFFileOuter_VERSION_ONE { + return errors.New("unknown version") + } + if m.pbOuter.CompressionType != MFFileOuter_COMPRESSION_GZIP { + return errors.New("unknown compression type") + } + + bb := bytes.NewBuffer(m.pbOuter.InnerMessage) + + gzr, err := gzip.NewReader(bb) + if err != nil { + return err + } + + dat, err := io.ReadAll(gzr) + defer gzr.Close() + + if err != nil { + return err + } + + isize := len(dat) + if int64(isize) != m.pbOuter.Size { + log.Debugf("truncated data, got %d expected %d", isize, m.pbOuter.Size) + return bork.ErrFileTruncated + } + + log.Dump(dat) + log.Dump(m.pbOuter.Sha256) + + return nil + +} + +func NewFromProto(input io.Reader) (*manifest, error) { m := New() - err := proto.Unmarshal(input, m.pbOuter) + dat, err := io.ReadAll(input) if err != nil { return nil, err } + ml := len([]byte(MAGIC)) + bb := bytes.NewBuffer(dat) + got := dat[0:ml] + log.Dump("got:") + log.Dump(got) + expected := []byte(MAGIC) + log.Dump("expected:") + log.Dump(expected) + if !bytes.Equal(got, expected) { + return nil, errors.New("invalid file format") + } + bb = bytes.NewBuffer(dat[ml:]) + dat = bb.Bytes() + log.Dump(dat) + + m.pbOuter = new(MFFileOuter) + err = proto.Unmarshal(dat, m.pbOuter) + + if err != nil { + return nil, err + } + ve := m.validateProto() + if ve != nil { + return nil, ve + } return m, nil } diff --git a/mfer/example_test.go b/mfer/example_test.go index 678cd23..5d2be5e 100644 --- a/mfer/example_test.go +++ b/mfer/example_test.go @@ -4,17 +4,40 @@ import ( "bytes" "testing" - "github.com/davecgh/go-spew/spew" + "git.eeqj.de/sneak/mfer/internal/log" "github.com/stretchr/testify/assert" ) func TestAPIExample(t *testing.T) { + + // read from filesystem m, err := NewFromFS(&ManifestScanOptions{ IgnoreDotfiles: true, - }, af) - assert.NotNil(t, err) + }, big) + assert.Nil(t, err) + assert.NotNil(t, m) + + // scan for files m.Scan() + + // serialize var buf bytes.Buffer m.WriteTo(&buf) - spew.Dump(buf.Bytes()) + + // show serialized + log.Dump(buf.Bytes()) + + // do it again + var buf2 bytes.Buffer + m.WriteTo(&buf2) + + // should be same! + assert.True(t, bytes.Equal(buf.Bytes(), buf2.Bytes())) + + // deserialize + m2, err := NewFromProto(&buf) + assert.Nil(t, err) + assert.NotNil(t, m2) + + log.Dump(m2) } diff --git a/mfer/mfer_test.go b/mfer/mfer_test.go index 2b01455..5db6e70 100644 --- a/mfer/mfer_test.go +++ b/mfer/mfer_test.go @@ -2,6 +2,7 @@ package mfer import ( "bytes" + "fmt" "testing" "git.eeqj.de/sneak/mfer/internal/log" @@ -15,19 +16,25 @@ var ( ) var ( - mf afero.Fs = afero.NewMemMapFs() - af *afero.Afero = &afero.Afero{Fs: mf} + af *afero.Afero = &afero.Afero{Fs: afero.NewMemMapFs()} + big *afero.Afero = &afero.Afero{Fs: afero.NewMemMapFs()} ) func init() { + log.EnableDebugLogging() + // create test files and directories af.MkdirAll("/a/b/c", 0o755) af.MkdirAll("/.hidden", 0o755) - afero.WriteFile(af, "/a/b/c/hello.txt", []byte("hello world\n\n\n\n"), 0o755) - afero.WriteFile(af, "/a/b/c/hello2.txt", []byte("hello world\n\n\n\n"), 0o755) - afero.WriteFile(af, "/.hidden/hello.txt", []byte("hello world\n"), 0o755) - afero.WriteFile(af, "/.hidden/hello2.txt", []byte("hello world\n"), 0o755) - log.EnableDebugLogging() + af.WriteFile("/a/b/c/hello.txt", []byte("hello world\n\n\n\n"), 0o755) + af.WriteFile("/a/b/c/hello2.txt", []byte("hello world\n\n\n\n"), 0o755) + af.WriteFile("/.hidden/hello.txt", []byte("hello world\n"), 0o755) + af.WriteFile("/.hidden/hello2.txt", []byte("hello world\n"), 0o755) + + big.MkdirAll("/home/user/Library", 0o755) + for i, _ := range [25]int{} { + big.WriteFile(fmt.Sprintf("/home/user/Library/hello%d.txt", i), []byte("hello world\n"), 0o755) + } } func TestPathHiddenFunc(t *testing.T) { diff --git a/mfer/serialize.go b/mfer/serialize.go index a3964c5..82dbff4 100644 --- a/mfer/serialize.go +++ b/mfer/serialize.go @@ -13,6 +13,8 @@ import ( //go:generate protoc --go_out=. --go_opt=paths=source_relative mf.proto +const MAGIC string = "ZNAVSRFG" + func newTimestampFromTime(t time.Time) *Timestamp { out := &Timestamp{ Seconds: t.Unix(), @@ -24,8 +26,6 @@ func newTimestampFromTime(t time.Time) *Timestamp { func (m *manifest) generate() error { log.Debug("generate()") - const MAGIC string = "ZNAVSRFG" - if m.pbInner == nil { e := m.generateInner() if e != nil {