now actually does something

master
Jeffrey Paul 3 years ago
parent b3f672b84a
commit 2ecd833726
  1. 1
      .gitignore
  2. 13
      README.md
  3. 54
      database/imconnector.go
  4. 49
      database/model.go
  5. 47
      database/storageconnector.go
  6. 15
      ingester/ingester.go
  7. 32
      instance/instance.go
  8. 6
      manager/manager.go
  9. 12
      process/feta.go
  10. 26
      storage/fs.go
  11. 13
      storage/interface.go
  12. 36
      toot/toot.go

1
.gitignore vendored

@ -2,3 +2,4 @@ feta
output/
feta.sqlite
.lintsetup
out

@ -2,9 +2,20 @@
archives the fediverse
# todo
* scan toots for mentions and feed to locator
* put toots in a separate db file
* test with a real database
* save instances to store more often
* verify instances load properly on startup
* do some simple in-memory dedupe for toot storage
* make some templates using pongo2 and a simple website
* update APIs
# status
[![CircleCI](https://circleci.com/gh/sneak/feta.svg?style=svg)](https://circleci.com/gh/sneak/feta)
[![Build Status](https://drone.datavi.be/api/badges/sneak/feta/status.svg)](https://drone.datavi.be/sneak/feta)
# ethics statement

@ -1,40 +1,12 @@
package database
import (
"time"
"git.eeqj.de/sneak/feta/instance"
"github.com/google/uuid"
"github.com/jinzhu/gorm"
"github.com/rs/zerolog/log"
_ "github.com/jinzhu/gorm/dialects/sqlite"
)
type APInstance struct {
gorm.Model
UUID uuid.UUID `gorm:"type:uuid;primary_key;"`
ErrorCount uint
SuccessCount uint
HighestID uint
Hostname string `gorm:"type:varchar(100);unique_index"`
Identified bool
Fetching bool
Disabled bool
Implementation string
NextFetch time.Time
NodeInfoURL string
ServerVersionString string
ServerImplementationString string
FSMState string
}
// NB that when you add a model below you must add it to this list!
func (m *Manager) doMigrations() {
log.Info().Msg("doing database migrations if required")
m.db.AutoMigrate(&APInstance{})
}
func (m *Manager) SaveInstance(i *instance.Instance) error {
i.Lock()
defer i.Unlock()
@ -53,7 +25,7 @@ func (m *Manager) SaveInstance(i *instance.Instance) error {
HighestID: i.HighestID,
Hostname: i.Hostname,
Identified: i.Identified,
Implementation: string(i.Implementation),
Implementation: i.Implementation,
NextFetch: i.NextFetch,
NodeInfoURL: i.NodeInfoURL,
ServerImplementationString: i.ServerImplementationString,
@ -92,7 +64,29 @@ func (m *Manager) SaveInstance(i *instance.Instance) error {
func (m *Manager) ListInstances() ([]*instance.Instance, error) {
output := make([]*instance.Instance, 0)
// FIXME have this produce a list of Instance
var results []APInstance
m.db.Find(&results)
for _, i := range results {
newinst := instance.New(func(x *instance.Instance) {
x.UUID = i.UUID
x.Disabled = i.Disabled
x.ErrorCount = i.ErrorCount
x.InitialFSMState = i.FSMState
x.Fetching = i.Fetching
x.HighestID = i.HighestID
x.Hostname = i.Hostname
x.Identified = i.Identified
x.Implementation = i.Implementation
x.NextFetch = i.NextFetch
x.NodeInfoURL = i.NodeInfoURL
x.ServerImplementationString = i.ServerImplementationString
x.ServerVersionString = i.ServerVersionString
x.SuccessCount = i.SuccessCount
})
output = append(output, newinst)
}
return output, nil
}

@ -0,0 +1,49 @@
package database
import (
"time"
"github.com/google/uuid"
"github.com/jinzhu/gorm"
"github.com/rs/zerolog/log"
_ "github.com/jinzhu/gorm/dialects/sqlite"
)
type StoredToot struct {
gorm.Model
UUID uuid.UUID `gorm:"type:uuid;primary_key;"`
//Original string `sql:"type:text"`
Original []byte
Hash string `gorm:"unique_index"`
ServerCreated time.Time
Acct string
Content []byte
URL string
Hostname string
}
type APInstance struct {
gorm.Model
UUID uuid.UUID `gorm:"type:uuid;primary_key;"`
ErrorCount uint
SuccessCount uint
HighestID uint
Hostname string `gorm:"type:varchar(100);unique_index"`
Identified bool
Fetching bool
Disabled bool
Implementation string
NextFetch time.Time
NodeInfoURL string
ServerVersionString string
ServerImplementationString string
FSMState string
}
// NB that when you add a model below you must add it to this list!
func (m *Manager) doMigrations() {
log.Info().Msg("doing database migrations if required")
m.db.AutoMigrate(&APInstance{})
m.db.AutoMigrate(&StoredToot{})
}

@ -0,0 +1,47 @@
package database
import (
"fmt"
"strings"
"git.eeqj.de/sneak/feta/toot"
"github.com/google/uuid"
_ "github.com/jinzhu/gorm/dialects/sqlite"
)
func (m *Manager) TootExists(t *toot.Toot) bool {
var try StoredToot
if m.db.Where("Hash = ?", t.GetHash()).First(&try).RecordNotFound() {
return false
} else {
return true
}
}
func (m *Manager) StoreToot(t *toot.Toot) error {
nt := new(StoredToot)
nt.UUID = uuid.New()
nt.ServerCreated = t.Parsed.CreatedAt
nt.Original = t.Original
// FIXME normalize this, check for @ and append hostname if none
nt.Acct = fmt.Sprintf("%s@%s", t.Parsed.Account.Acct, strings.ToLower(t.FromHost))
nt.URL = t.Parsed.URL
nt.Content = t.Parsed.Content
nt.Hostname = strings.ToLower(t.FromHost)
nt.Hash = t.GetHash()
r := m.db.Create(&nt)
//panic(fmt.Sprintf("%+v", t))
return r.Error
}
func (m *Manager) StoreToots(tc []*toot.Toot) error {
for _, item := range tc {
err := m.StoreToot(item)
if err != nil {
return err
}
}
return nil
}

@ -1,9 +1,12 @@
package ingester
import "time"
import "github.com/rs/zerolog/log"
import "git.eeqj.de/sneak/feta/toot"
import "git.eeqj.de/sneak/feta/storage"
import (
"time"
"git.eeqj.de/sneak/feta/storage"
"git.eeqj.de/sneak/feta/toot"
"github.com/rs/zerolog/log"
)
// TootIngester is the data structure for the ingester process that is
// responsible for storing the discovered toots
@ -15,7 +18,7 @@ type TootIngester struct {
type seenTootMemo struct {
lastSeen time.Time
tootHash toot.Hash
tootHash string
}
// NewTootIngester returns a fresh TootIngester for your use
@ -55,5 +58,5 @@ func (ti *TootIngester) storeToot(t *toot.Toot) {
if ti.storageBackend == nil {
panic("no storage backend")
}
ti.storageBackend.StoreToot(*t)
ti.storageBackend.StoreToot(t)
}

@ -11,7 +11,6 @@ import (
"time"
"git.eeqj.de/sneak/feta/jsonapis"
"git.eeqj.de/sneak/feta/storage"
"git.eeqj.de/sneak/feta/toot"
"github.com/google/uuid"
"github.com/looplab/fsm"
@ -26,14 +25,6 @@ const instanceHTTPTimeout = time.Second * 120
const instanceSpiderInterval = time.Second * 120
const instanceErrorInterval = time.Second * 60 * 30
type instanceImplementation int
const (
implUnknown instanceImplementation = iota
implMastodon
implPleroma
)
// Instance stores all the information we know about an instance
type Instance struct {
Disabled bool
@ -42,17 +33,17 @@ type Instance struct {
Fetching bool
HighestID uint
Hostname string
UUID uuid.UUID
Identified bool
Implementation instanceImplementation
Implementation string
InitialFSMState string
NextFetch time.Time
NodeInfoURL string
ServerImplementationString string
ServerVersionString string
SuccessCount uint
UUID uuid.UUID
fetchingLock sync.Mutex
fsmLock sync.Mutex
storageBackend *storage.TootStorageBackend
structLock sync.Mutex
tootDestination chan *toot.Toot
}
@ -63,9 +54,14 @@ func New(options ...func(i *Instance)) *Instance {
i := new(Instance)
i.UUID = uuid.New()
i.setNextFetchAfter(1 * time.Second)
i.InitialFSMState = "STATUS_UNKNOWN"
for _, opt := range options {
opt(i)
}
i.FSM = fsm.NewFSM(
"STATUS_UNKNOWN",
i.InitialFSMState,
fsm.Events{
{Name: "BEGIN_NODEINFO_URL_FETCH", Src: []string{"STATUS_UNKNOWN"}, Dst: "FETCHING_NODEINFO_URL"},
{Name: "GOT_NODEINFO_URL", Src: []string{"FETCHING_NODEINFO_URL"}, Dst: "PRE_NODEINFO_FETCH"},
@ -82,10 +78,6 @@ func New(options ...func(i *Instance)) *Instance {
"enter_state": func(e *fsm.Event) { i.fsmEnterState(e) },
},
)
for _, opt := range options {
opt(i)
}
return i
}
@ -195,7 +187,7 @@ func (i *Instance) Tick() {
func (i *Instance) nodeIdentified() bool {
i.Lock()
defer i.Unlock()
if i.Implementation > implUnknown {
if i.Implementation != "" {
return true
}
return false
@ -379,7 +371,7 @@ func (i *Instance) fetchNodeInfo() error {
Str("software", ni.Software.Name).
Msg("detected server software")
i.Identified = true
i.Implementation = implPleroma
i.Implementation = "pleroma"
i.Unlock()
i.registerSuccess()
i.Event("GOT_NODEINFO")
@ -390,7 +382,7 @@ func (i *Instance) fetchNodeInfo() error {
Str("software", ni.Software.Name).
Msg("detected server software")
i.Identified = true
i.Implementation = implMastodon
i.Implementation = "mastodon"
i.Unlock()
i.registerSuccess()
i.Event("GOT_NODEINFO")

@ -50,9 +50,15 @@ func (im *InstanceManager) RestoreFromDB() {
}
im.lock()
defer im.unlock()
count := 0
for _, x := range newil {
x.SetTootDestination(im.tootDestination)
im.instances[x.Hostname] = x
count = count + 1
}
log.Info().
Int("count", count).
Msg("restored instances from database")
}
func (im *InstanceManager) SaveToDB() {

@ -52,6 +52,8 @@ func (f *Feta) configure() {
viper.AutomaticEnv()
viper.SetDefault("Debug", false)
viper.SetDefault("TootsToDisk", false)
viper.SetDefault("TootsToDB", true)
viper.SetDefault("HostDiscoveryParallelism", 5)
viper.SetDefault("FSStorageLocation", os.ExpandEnv("$HOME/Library/ApplicationSupport/feta/tootarchive.d"))
viper.SetDefault("DBStorageLocation", os.ExpandEnv("$HOME/Library/ApplicationSupport/feta/feta.state.db"))
@ -138,8 +140,14 @@ func (f *Feta) runForever() int {
panic("can't find home directory")
}
diskBackend := storage.NewTootFSStorage(home + "/.local/feta")
f.ingester.SetStorageBackend(diskBackend)
if viper.GetBool("TootsToDB") {
f.ingester.SetStorageBackend(f.dbm)
} else if viper.GetBool("TootsToDisk") {
diskBackend := storage.NewTootFSStorage(viper.GetString("FSStorageLocation"))
f.ingester.SetStorageBackend(diskBackend)
} else {
log.Info().Msg("toots will not be saved to disk")
}
f.api = new(Server)
f.api.SetFeta(f) // api needs to get to us to access data

@ -11,18 +11,6 @@ import (
"git.eeqj.de/sneak/feta/toot"
)
// TootStorageBackend is the interface to which storage backends must
// conform for storing toots
type TootStorageBackend interface {
TootExists(t toot.Toot) bool
StoreToot(t toot.Toot) error
StoreToots(tc []*toot.Toot) error
}
type TootDBStorage struct {
db string
}
// TootFSStorage is a TootStorageBackend that writes to the local
// filesystem.
type TootFSStorage struct {
@ -41,7 +29,7 @@ func NewTootFSStorage(root string) *TootFSStorage {
func (ts *TootFSStorage) StoreToots(tc []*toot.Toot) error {
var returnErrors []string
for _, item := range tc {
err := ts.StoreToot(*item)
err := ts.StoreToot(item)
if err != nil {
returnErrors = append(returnErrors, err.Error())
continue
@ -56,7 +44,7 @@ func (ts *TootFSStorage) StoreToots(tc []*toot.Toot) error {
// TootExists checks to see if we have already written a toot to disk or
// not. Note that the ingester de-dupes with a table in memory so that this
// will only really get used on app restarts
func (ts *TootFSStorage) TootExists(t toot.Toot) bool {
func (ts *TootFSStorage) TootExists(t *toot.Toot) bool {
path := t.DiskStoragePath()
full := ts.root + "/" + path
_, err := os.Stat(full)
@ -67,7 +55,7 @@ func (ts *TootFSStorage) TootExists(t toot.Toot) bool {
}
// StoreToot writes a single toot to disk
func (ts *TootFSStorage) StoreToot(t toot.Toot) error {
func (ts *TootFSStorage) StoreToot(t *toot.Toot) error {
path := t.DiskStoragePath()
full := ts.root + "/" + path
dir := filepath.Dir(full)
@ -82,7 +70,7 @@ func (ts *TootFSStorage) StoreToot(t toot.Toot) error {
// toots in ram forever until the computer fills up and catches fire and explodes
type TootMemoryStorage struct {
sync.Mutex
toots map[toot.Hash]toot.Toot
toots map[string]*toot.Toot
//maxSize uint // FIXME support eviction
}
@ -90,12 +78,12 @@ type TootMemoryStorage struct {
// ram forever
func NewTootMemoryStorage() *TootMemoryStorage {
ts := new(TootMemoryStorage)
ts.toots = make(map[toot.Hash]toot.Toot)
ts.toots = make(map[string]*toot.Toot)
return ts
}
// StoreToot saves a single toot into an in-memory hashtable
func (ts *TootMemoryStorage) StoreToot(t toot.Toot) {
func (ts *TootMemoryStorage) StoreToot(t *toot.Toot) {
if ts.TootExists(t) {
return
}
@ -106,7 +94,7 @@ func (ts *TootMemoryStorage) StoreToot(t toot.Toot) {
}
// TootExists checks to see if we have a toot in memory already
func (ts *TootMemoryStorage) TootExists(t toot.Toot) bool {
func (ts *TootMemoryStorage) TootExists(t *toot.Toot) bool {
ts.Lock()
defer ts.Unlock()
if _, ok := ts.toots[t.Hash]; ok { //this syntax is so gross

@ -0,0 +1,13 @@
package storage
import (
"git.eeqj.de/sneak/feta/toot"
)
// TootStorageBackend is the interface to which storage backends must
// conform for storing toots
type TootStorageBackend interface {
TootExists(t *toot.Toot) bool
StoreToot(t *toot.Toot) error
StoreToots(tc []*toot.Toot) error
}

@ -1,27 +1,30 @@
package toot
import "fmt"
import "encoding/json"
import "errors"
import "strings"
import "git.eeqj.de/sneak/feta/jsonapis"
import (
"encoding/json"
"errors"
"fmt"
"strings"
//import "github.com/davecgh/go-spew/spew"
import "github.com/rs/zerolog/log"
"git.eeqj.de/sneak/feta/jsonapis"
"github.com/rs/zerolog/log"
//import "encoding/hex"
import mh "github.com/multiformats/go-multihash"
import mhopts "github.com/multiformats/go-multihash/opts"
//import "github.com/davecgh/go-spew/spew"
//import "encoding/hex"
mh "github.com/multiformats/go-multihash"
mhopts "github.com/multiformats/go-multihash/opts"
)
// Hash is a type for storing a string-based base58 multihash of a
// toot's identity
type Hash string
// Toot is an object we use internally for storing a discovered toot
type Toot struct {
Original []byte
Parsed *jsonapis.APISerializedToot
Hash Hash
Hash string
FromHost string
}
@ -111,7 +114,14 @@ func (t *Toot) identityHashInput() string {
)
}
func (t *Toot) GetHash() string {
if t.Hash == "" {
t.calcHash()
}
return t.Hash
}
func (t *Toot) calcHash() {
hi := t.identityHashInput()
t.Hash = Hash(t.multiHash([]byte(hi)))
t.Hash = string(t.multiHash([]byte(hi)))
}

Loading…
Cancel
Save