diff --git a/.gitignore b/.gitignore index 4a757fe..73722d8 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,4 @@ feta output/ feta.sqlite .lintsetup +out diff --git a/README.md b/README.md index ae85347..8843119 100644 --- a/README.md +++ b/README.md @@ -2,9 +2,20 @@ archives the fediverse +# todo + +* scan toots for mentions and feed to locator +* put toots in a separate db file +* test with a real database +* save instances to store more often +* verify instances load properly on startup +* do some simple in-memory dedupe for toot storage +* make some templates using pongo2 and a simple website +* update APIs + # status -[![CircleCI](https://circleci.com/gh/sneak/feta.svg?style=svg)](https://circleci.com/gh/sneak/feta) +[![Build Status](https://drone.datavi.be/api/badges/sneak/feta/status.svg)](https://drone.datavi.be/sneak/feta) # ethics statement diff --git a/database/dbmodel.go b/database/imconnector.go similarity index 67% rename from database/dbmodel.go rename to database/imconnector.go index e2a29d0..19f0a0b 100644 --- a/database/dbmodel.go +++ b/database/imconnector.go @@ -1,40 +1,12 @@ package database import ( - "time" - "git.eeqj.de/sneak/feta/instance" - "github.com/google/uuid" - "github.com/jinzhu/gorm" "github.com/rs/zerolog/log" _ "github.com/jinzhu/gorm/dialects/sqlite" ) -type APInstance struct { - gorm.Model - UUID uuid.UUID `gorm:"type:uuid;primary_key;"` - ErrorCount uint - SuccessCount uint - HighestID uint - Hostname string `gorm:"type:varchar(100);unique_index"` - Identified bool - Fetching bool - Disabled bool - Implementation string - NextFetch time.Time - NodeInfoURL string - ServerVersionString string - ServerImplementationString string - FSMState string -} - -// NB that when you add a model below you must add it to this list! -func (m *Manager) doMigrations() { - log.Info().Msg("doing database migrations if required") - m.db.AutoMigrate(&APInstance{}) -} - func (m *Manager) SaveInstance(i *instance.Instance) error { i.Lock() defer i.Unlock() @@ -53,7 +25,7 @@ func (m *Manager) SaveInstance(i *instance.Instance) error { HighestID: i.HighestID, Hostname: i.Hostname, Identified: i.Identified, - Implementation: string(i.Implementation), + Implementation: i.Implementation, NextFetch: i.NextFetch, NodeInfoURL: i.NodeInfoURL, ServerImplementationString: i.ServerImplementationString, @@ -92,7 +64,29 @@ func (m *Manager) SaveInstance(i *instance.Instance) error { func (m *Manager) ListInstances() ([]*instance.Instance, error) { output := make([]*instance.Instance, 0) - // FIXME have this produce a list of Instance + + var results []APInstance + m.db.Find(&results) + + for _, i := range results { + newinst := instance.New(func(x *instance.Instance) { + x.UUID = i.UUID + x.Disabled = i.Disabled + x.ErrorCount = i.ErrorCount + x.InitialFSMState = i.FSMState + x.Fetching = i.Fetching + x.HighestID = i.HighestID + x.Hostname = i.Hostname + x.Identified = i.Identified + x.Implementation = i.Implementation + x.NextFetch = i.NextFetch + x.NodeInfoURL = i.NodeInfoURL + x.ServerImplementationString = i.ServerImplementationString + x.ServerVersionString = i.ServerVersionString + x.SuccessCount = i.SuccessCount + }) + output = append(output, newinst) + } return output, nil } diff --git a/database/model.go b/database/model.go new file mode 100644 index 0000000..b6404af --- /dev/null +++ b/database/model.go @@ -0,0 +1,49 @@ +package database + +import ( + "time" + + "github.com/google/uuid" + "github.com/jinzhu/gorm" + "github.com/rs/zerolog/log" + + _ "github.com/jinzhu/gorm/dialects/sqlite" +) + +type StoredToot struct { + gorm.Model + UUID uuid.UUID `gorm:"type:uuid;primary_key;"` + //Original string `sql:"type:text"` + Original []byte + Hash string `gorm:"unique_index"` + ServerCreated time.Time + Acct string + Content []byte + URL string + Hostname string +} + +type APInstance struct { + gorm.Model + UUID uuid.UUID `gorm:"type:uuid;primary_key;"` + ErrorCount uint + SuccessCount uint + HighestID uint + Hostname string `gorm:"type:varchar(100);unique_index"` + Identified bool + Fetching bool + Disabled bool + Implementation string + NextFetch time.Time + NodeInfoURL string + ServerVersionString string + ServerImplementationString string + FSMState string +} + +// NB that when you add a model below you must add it to this list! +func (m *Manager) doMigrations() { + log.Info().Msg("doing database migrations if required") + m.db.AutoMigrate(&APInstance{}) + m.db.AutoMigrate(&StoredToot{}) +} diff --git a/database/storageconnector.go b/database/storageconnector.go new file mode 100644 index 0000000..2e16e5c --- /dev/null +++ b/database/storageconnector.go @@ -0,0 +1,47 @@ +package database + +import ( + "fmt" + "strings" + + "git.eeqj.de/sneak/feta/toot" + + "github.com/google/uuid" + _ "github.com/jinzhu/gorm/dialects/sqlite" +) + +func (m *Manager) TootExists(t *toot.Toot) bool { + var try StoredToot + if m.db.Where("Hash = ?", t.GetHash()).First(&try).RecordNotFound() { + return false + } else { + return true + } +} + +func (m *Manager) StoreToot(t *toot.Toot) error { + + nt := new(StoredToot) + nt.UUID = uuid.New() + nt.ServerCreated = t.Parsed.CreatedAt + nt.Original = t.Original + // FIXME normalize this, check for @ and append hostname if none + nt.Acct = fmt.Sprintf("%s@%s", t.Parsed.Account.Acct, strings.ToLower(t.FromHost)) + nt.URL = t.Parsed.URL + nt.Content = t.Parsed.Content + nt.Hostname = strings.ToLower(t.FromHost) + nt.Hash = t.GetHash() + r := m.db.Create(&nt) + //panic(fmt.Sprintf("%+v", t)) + return r.Error +} + +func (m *Manager) StoreToots(tc []*toot.Toot) error { + for _, item := range tc { + err := m.StoreToot(item) + if err != nil { + return err + } + } + return nil +} diff --git a/ingester/ingester.go b/ingester/ingester.go index 9f22e14..aa5b6f3 100644 --- a/ingester/ingester.go +++ b/ingester/ingester.go @@ -1,9 +1,12 @@ package ingester -import "time" -import "github.com/rs/zerolog/log" -import "git.eeqj.de/sneak/feta/toot" -import "git.eeqj.de/sneak/feta/storage" +import ( + "time" + + "git.eeqj.de/sneak/feta/storage" + "git.eeqj.de/sneak/feta/toot" + "github.com/rs/zerolog/log" +) // TootIngester is the data structure for the ingester process that is // responsible for storing the discovered toots @@ -15,7 +18,7 @@ type TootIngester struct { type seenTootMemo struct { lastSeen time.Time - tootHash toot.Hash + tootHash string } // NewTootIngester returns a fresh TootIngester for your use @@ -55,5 +58,5 @@ func (ti *TootIngester) storeToot(t *toot.Toot) { if ti.storageBackend == nil { panic("no storage backend") } - ti.storageBackend.StoreToot(*t) + ti.storageBackend.StoreToot(t) } diff --git a/instance/instance.go b/instance/instance.go index 71f116b..6247ae8 100644 --- a/instance/instance.go +++ b/instance/instance.go @@ -11,7 +11,6 @@ import ( "time" "git.eeqj.de/sneak/feta/jsonapis" - "git.eeqj.de/sneak/feta/storage" "git.eeqj.de/sneak/feta/toot" "github.com/google/uuid" "github.com/looplab/fsm" @@ -26,14 +25,6 @@ const instanceHTTPTimeout = time.Second * 120 const instanceSpiderInterval = time.Second * 120 const instanceErrorInterval = time.Second * 60 * 30 -type instanceImplementation int - -const ( - implUnknown instanceImplementation = iota - implMastodon - implPleroma -) - // Instance stores all the information we know about an instance type Instance struct { Disabled bool @@ -42,17 +33,17 @@ type Instance struct { Fetching bool HighestID uint Hostname string - UUID uuid.UUID Identified bool - Implementation instanceImplementation + Implementation string + InitialFSMState string NextFetch time.Time NodeInfoURL string ServerImplementationString string ServerVersionString string SuccessCount uint + UUID uuid.UUID fetchingLock sync.Mutex fsmLock sync.Mutex - storageBackend *storage.TootStorageBackend structLock sync.Mutex tootDestination chan *toot.Toot } @@ -63,9 +54,14 @@ func New(options ...func(i *Instance)) *Instance { i := new(Instance) i.UUID = uuid.New() i.setNextFetchAfter(1 * time.Second) + i.InitialFSMState = "STATUS_UNKNOWN" + + for _, opt := range options { + opt(i) + } i.FSM = fsm.NewFSM( - "STATUS_UNKNOWN", + i.InitialFSMState, fsm.Events{ {Name: "BEGIN_NODEINFO_URL_FETCH", Src: []string{"STATUS_UNKNOWN"}, Dst: "FETCHING_NODEINFO_URL"}, {Name: "GOT_NODEINFO_URL", Src: []string{"FETCHING_NODEINFO_URL"}, Dst: "PRE_NODEINFO_FETCH"}, @@ -82,10 +78,6 @@ func New(options ...func(i *Instance)) *Instance { "enter_state": func(e *fsm.Event) { i.fsmEnterState(e) }, }, ) - - for _, opt := range options { - opt(i) - } return i } @@ -195,7 +187,7 @@ func (i *Instance) Tick() { func (i *Instance) nodeIdentified() bool { i.Lock() defer i.Unlock() - if i.Implementation > implUnknown { + if i.Implementation != "" { return true } return false @@ -379,7 +371,7 @@ func (i *Instance) fetchNodeInfo() error { Str("software", ni.Software.Name). Msg("detected server software") i.Identified = true - i.Implementation = implPleroma + i.Implementation = "pleroma" i.Unlock() i.registerSuccess() i.Event("GOT_NODEINFO") @@ -390,7 +382,7 @@ func (i *Instance) fetchNodeInfo() error { Str("software", ni.Software.Name). Msg("detected server software") i.Identified = true - i.Implementation = implMastodon + i.Implementation = "mastodon" i.Unlock() i.registerSuccess() i.Event("GOT_NODEINFO") diff --git a/manager/manager.go b/manager/manager.go index 922b3f6..d3f61dd 100644 --- a/manager/manager.go +++ b/manager/manager.go @@ -50,9 +50,15 @@ func (im *InstanceManager) RestoreFromDB() { } im.lock() defer im.unlock() + count := 0 for _, x := range newil { + x.SetTootDestination(im.tootDestination) im.instances[x.Hostname] = x + count = count + 1 } + log.Info(). + Int("count", count). + Msg("restored instances from database") } func (im *InstanceManager) SaveToDB() { diff --git a/process/feta.go b/process/feta.go index 8464661..92497c5 100644 --- a/process/feta.go +++ b/process/feta.go @@ -52,6 +52,8 @@ func (f *Feta) configure() { viper.AutomaticEnv() viper.SetDefault("Debug", false) + viper.SetDefault("TootsToDisk", false) + viper.SetDefault("TootsToDB", true) viper.SetDefault("HostDiscoveryParallelism", 5) viper.SetDefault("FSStorageLocation", os.ExpandEnv("$HOME/Library/ApplicationSupport/feta/tootarchive.d")) viper.SetDefault("DBStorageLocation", os.ExpandEnv("$HOME/Library/ApplicationSupport/feta/feta.state.db")) @@ -138,8 +140,14 @@ func (f *Feta) runForever() int { panic("can't find home directory") } - diskBackend := storage.NewTootFSStorage(home + "/.local/feta") - f.ingester.SetStorageBackend(diskBackend) + if viper.GetBool("TootsToDB") { + f.ingester.SetStorageBackend(f.dbm) + } else if viper.GetBool("TootsToDisk") { + diskBackend := storage.NewTootFSStorage(viper.GetString("FSStorageLocation")) + f.ingester.SetStorageBackend(diskBackend) + } else { + log.Info().Msg("toots will not be saved to disk") + } f.api = new(Server) f.api.SetFeta(f) // api needs to get to us to access data diff --git a/storage/tootstore.go b/storage/fs.go similarity index 78% rename from storage/tootstore.go rename to storage/fs.go index b488555..2045477 100644 --- a/storage/tootstore.go +++ b/storage/fs.go @@ -11,18 +11,6 @@ import ( "git.eeqj.de/sneak/feta/toot" ) -// TootStorageBackend is the interface to which storage backends must -// conform for storing toots -type TootStorageBackend interface { - TootExists(t toot.Toot) bool - StoreToot(t toot.Toot) error - StoreToots(tc []*toot.Toot) error -} - -type TootDBStorage struct { - db string -} - // TootFSStorage is a TootStorageBackend that writes to the local // filesystem. type TootFSStorage struct { @@ -41,7 +29,7 @@ func NewTootFSStorage(root string) *TootFSStorage { func (ts *TootFSStorage) StoreToots(tc []*toot.Toot) error { var returnErrors []string for _, item := range tc { - err := ts.StoreToot(*item) + err := ts.StoreToot(item) if err != nil { returnErrors = append(returnErrors, err.Error()) continue @@ -56,7 +44,7 @@ func (ts *TootFSStorage) StoreToots(tc []*toot.Toot) error { // TootExists checks to see if we have already written a toot to disk or // not. Note that the ingester de-dupes with a table in memory so that this // will only really get used on app restarts -func (ts *TootFSStorage) TootExists(t toot.Toot) bool { +func (ts *TootFSStorage) TootExists(t *toot.Toot) bool { path := t.DiskStoragePath() full := ts.root + "/" + path _, err := os.Stat(full) @@ -67,7 +55,7 @@ func (ts *TootFSStorage) TootExists(t toot.Toot) bool { } // StoreToot writes a single toot to disk -func (ts *TootFSStorage) StoreToot(t toot.Toot) error { +func (ts *TootFSStorage) StoreToot(t *toot.Toot) error { path := t.DiskStoragePath() full := ts.root + "/" + path dir := filepath.Dir(full) @@ -82,7 +70,7 @@ func (ts *TootFSStorage) StoreToot(t toot.Toot) error { // toots in ram forever until the computer fills up and catches fire and explodes type TootMemoryStorage struct { sync.Mutex - toots map[toot.Hash]toot.Toot + toots map[string]*toot.Toot //maxSize uint // FIXME support eviction } @@ -90,12 +78,12 @@ type TootMemoryStorage struct { // ram forever func NewTootMemoryStorage() *TootMemoryStorage { ts := new(TootMemoryStorage) - ts.toots = make(map[toot.Hash]toot.Toot) + ts.toots = make(map[string]*toot.Toot) return ts } // StoreToot saves a single toot into an in-memory hashtable -func (ts *TootMemoryStorage) StoreToot(t toot.Toot) { +func (ts *TootMemoryStorage) StoreToot(t *toot.Toot) { if ts.TootExists(t) { return } @@ -106,7 +94,7 @@ func (ts *TootMemoryStorage) StoreToot(t toot.Toot) { } // TootExists checks to see if we have a toot in memory already -func (ts *TootMemoryStorage) TootExists(t toot.Toot) bool { +func (ts *TootMemoryStorage) TootExists(t *toot.Toot) bool { ts.Lock() defer ts.Unlock() if _, ok := ts.toots[t.Hash]; ok { //this syntax is so gross diff --git a/storage/interface.go b/storage/interface.go new file mode 100644 index 0000000..01eaaa8 --- /dev/null +++ b/storage/interface.go @@ -0,0 +1,13 @@ +package storage + +import ( + "git.eeqj.de/sneak/feta/toot" +) + +// TootStorageBackend is the interface to which storage backends must +// conform for storing toots +type TootStorageBackend interface { + TootExists(t *toot.Toot) bool + StoreToot(t *toot.Toot) error + StoreToots(tc []*toot.Toot) error +} diff --git a/toot/toot.go b/toot/toot.go index 4d0936e..d9b6211 100644 --- a/toot/toot.go +++ b/toot/toot.go @@ -1,27 +1,30 @@ package toot -import "fmt" -import "encoding/json" -import "errors" -import "strings" -import "git.eeqj.de/sneak/feta/jsonapis" +import ( + "encoding/json" + "errors" + "fmt" + "strings" -//import "github.com/davecgh/go-spew/spew" -import "github.com/rs/zerolog/log" + "git.eeqj.de/sneak/feta/jsonapis" + "github.com/rs/zerolog/log" -//import "encoding/hex" -import mh "github.com/multiformats/go-multihash" -import mhopts "github.com/multiformats/go-multihash/opts" + //import "github.com/davecgh/go-spew/spew" + + //import "encoding/hex" + mh "github.com/multiformats/go-multihash" + + mhopts "github.com/multiformats/go-multihash/opts" +) // Hash is a type for storing a string-based base58 multihash of a // toot's identity -type Hash string // Toot is an object we use internally for storing a discovered toot type Toot struct { Original []byte Parsed *jsonapis.APISerializedToot - Hash Hash + Hash string FromHost string } @@ -111,7 +114,14 @@ func (t *Toot) identityHashInput() string { ) } +func (t *Toot) GetHash() string { + if t.Hash == "" { + t.calcHash() + } + return t.Hash +} + func (t *Toot) calcHash() { hi := t.identityHashInput() - t.Hash = Hash(t.multiHash([]byte(hi))) + t.Hash = string(t.multiHash([]byte(hi))) }