now actually does something
continuous-integration/drone/push Build is failing Details

This commit is contained in:
Jeffrey Paul 2020-03-27 19:57:58 -07:00
parent b3f672b84a
commit 2ecd833726
12 changed files with 213 additions and 91 deletions

1
.gitignore vendored
View File

@ -2,3 +2,4 @@ feta
output/ output/
feta.sqlite feta.sqlite
.lintsetup .lintsetup
out

View File

@ -2,9 +2,20 @@
archives the fediverse archives the fediverse
# todo
* scan toots for mentions and feed to locator
* put toots in a separate db file
* test with a real database
* save instances to store more often
* verify instances load properly on startup
* do some simple in-memory dedupe for toot storage
* make some templates using pongo2 and a simple website
* update APIs
# status # status
[![CircleCI](https://circleci.com/gh/sneak/feta.svg?style=svg)](https://circleci.com/gh/sneak/feta) [![Build Status](https://drone.datavi.be/api/badges/sneak/feta/status.svg)](https://drone.datavi.be/sneak/feta)
# ethics statement # ethics statement

View File

@ -1,40 +1,12 @@
package database package database
import ( import (
"time"
"git.eeqj.de/sneak/feta/instance" "git.eeqj.de/sneak/feta/instance"
"github.com/google/uuid"
"github.com/jinzhu/gorm"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
_ "github.com/jinzhu/gorm/dialects/sqlite" _ "github.com/jinzhu/gorm/dialects/sqlite"
) )
type APInstance struct {
gorm.Model
UUID uuid.UUID `gorm:"type:uuid;primary_key;"`
ErrorCount uint
SuccessCount uint
HighestID uint
Hostname string `gorm:"type:varchar(100);unique_index"`
Identified bool
Fetching bool
Disabled bool
Implementation string
NextFetch time.Time
NodeInfoURL string
ServerVersionString string
ServerImplementationString string
FSMState string
}
// NB that when you add a model below you must add it to this list!
func (m *Manager) doMigrations() {
log.Info().Msg("doing database migrations if required")
m.db.AutoMigrate(&APInstance{})
}
func (m *Manager) SaveInstance(i *instance.Instance) error { func (m *Manager) SaveInstance(i *instance.Instance) error {
i.Lock() i.Lock()
defer i.Unlock() defer i.Unlock()
@ -53,7 +25,7 @@ func (m *Manager) SaveInstance(i *instance.Instance) error {
HighestID: i.HighestID, HighestID: i.HighestID,
Hostname: i.Hostname, Hostname: i.Hostname,
Identified: i.Identified, Identified: i.Identified,
Implementation: string(i.Implementation), Implementation: i.Implementation,
NextFetch: i.NextFetch, NextFetch: i.NextFetch,
NodeInfoURL: i.NodeInfoURL, NodeInfoURL: i.NodeInfoURL,
ServerImplementationString: i.ServerImplementationString, ServerImplementationString: i.ServerImplementationString,
@ -92,7 +64,29 @@ func (m *Manager) SaveInstance(i *instance.Instance) error {
func (m *Manager) ListInstances() ([]*instance.Instance, error) { func (m *Manager) ListInstances() ([]*instance.Instance, error) {
output := make([]*instance.Instance, 0) output := make([]*instance.Instance, 0)
// FIXME have this produce a list of Instance
var results []APInstance
m.db.Find(&results)
for _, i := range results {
newinst := instance.New(func(x *instance.Instance) {
x.UUID = i.UUID
x.Disabled = i.Disabled
x.ErrorCount = i.ErrorCount
x.InitialFSMState = i.FSMState
x.Fetching = i.Fetching
x.HighestID = i.HighestID
x.Hostname = i.Hostname
x.Identified = i.Identified
x.Implementation = i.Implementation
x.NextFetch = i.NextFetch
x.NodeInfoURL = i.NodeInfoURL
x.ServerImplementationString = i.ServerImplementationString
x.ServerVersionString = i.ServerVersionString
x.SuccessCount = i.SuccessCount
})
output = append(output, newinst)
}
return output, nil return output, nil
} }

49
database/model.go Normal file
View File

@ -0,0 +1,49 @@
package database
import (
"time"
"github.com/google/uuid"
"github.com/jinzhu/gorm"
"github.com/rs/zerolog/log"
_ "github.com/jinzhu/gorm/dialects/sqlite"
)
type StoredToot struct {
gorm.Model
UUID uuid.UUID `gorm:"type:uuid;primary_key;"`
//Original string `sql:"type:text"`
Original []byte
Hash string `gorm:"unique_index"`
ServerCreated time.Time
Acct string
Content []byte
URL string
Hostname string
}
type APInstance struct {
gorm.Model
UUID uuid.UUID `gorm:"type:uuid;primary_key;"`
ErrorCount uint
SuccessCount uint
HighestID uint
Hostname string `gorm:"type:varchar(100);unique_index"`
Identified bool
Fetching bool
Disabled bool
Implementation string
NextFetch time.Time
NodeInfoURL string
ServerVersionString string
ServerImplementationString string
FSMState string
}
// NB that when you add a model below you must add it to this list!
func (m *Manager) doMigrations() {
log.Info().Msg("doing database migrations if required")
m.db.AutoMigrate(&APInstance{})
m.db.AutoMigrate(&StoredToot{})
}

View File

@ -0,0 +1,47 @@
package database
import (
"fmt"
"strings"
"git.eeqj.de/sneak/feta/toot"
"github.com/google/uuid"
_ "github.com/jinzhu/gorm/dialects/sqlite"
)
func (m *Manager) TootExists(t *toot.Toot) bool {
var try StoredToot
if m.db.Where("Hash = ?", t.GetHash()).First(&try).RecordNotFound() {
return false
} else {
return true
}
}
func (m *Manager) StoreToot(t *toot.Toot) error {
nt := new(StoredToot)
nt.UUID = uuid.New()
nt.ServerCreated = t.Parsed.CreatedAt
nt.Original = t.Original
// FIXME normalize this, check for @ and append hostname if none
nt.Acct = fmt.Sprintf("%s@%s", t.Parsed.Account.Acct, strings.ToLower(t.FromHost))
nt.URL = t.Parsed.URL
nt.Content = t.Parsed.Content
nt.Hostname = strings.ToLower(t.FromHost)
nt.Hash = t.GetHash()
r := m.db.Create(&nt)
//panic(fmt.Sprintf("%+v", t))
return r.Error
}
func (m *Manager) StoreToots(tc []*toot.Toot) error {
for _, item := range tc {
err := m.StoreToot(item)
if err != nil {
return err
}
}
return nil
}

View File

@ -1,9 +1,12 @@
package ingester package ingester
import "time" import (
import "github.com/rs/zerolog/log" "time"
import "git.eeqj.de/sneak/feta/toot"
import "git.eeqj.de/sneak/feta/storage" "git.eeqj.de/sneak/feta/storage"
"git.eeqj.de/sneak/feta/toot"
"github.com/rs/zerolog/log"
)
// TootIngester is the data structure for the ingester process that is // TootIngester is the data structure for the ingester process that is
// responsible for storing the discovered toots // responsible for storing the discovered toots
@ -15,7 +18,7 @@ type TootIngester struct {
type seenTootMemo struct { type seenTootMemo struct {
lastSeen time.Time lastSeen time.Time
tootHash toot.Hash tootHash string
} }
// NewTootIngester returns a fresh TootIngester for your use // NewTootIngester returns a fresh TootIngester for your use
@ -55,5 +58,5 @@ func (ti *TootIngester) storeToot(t *toot.Toot) {
if ti.storageBackend == nil { if ti.storageBackend == nil {
panic("no storage backend") panic("no storage backend")
} }
ti.storageBackend.StoreToot(*t) ti.storageBackend.StoreToot(t)
} }

View File

@ -11,7 +11,6 @@ import (
"time" "time"
"git.eeqj.de/sneak/feta/jsonapis" "git.eeqj.de/sneak/feta/jsonapis"
"git.eeqj.de/sneak/feta/storage"
"git.eeqj.de/sneak/feta/toot" "git.eeqj.de/sneak/feta/toot"
"github.com/google/uuid" "github.com/google/uuid"
"github.com/looplab/fsm" "github.com/looplab/fsm"
@ -26,14 +25,6 @@ const instanceHTTPTimeout = time.Second * 120
const instanceSpiderInterval = time.Second * 120 const instanceSpiderInterval = time.Second * 120
const instanceErrorInterval = time.Second * 60 * 30 const instanceErrorInterval = time.Second * 60 * 30
type instanceImplementation int
const (
implUnknown instanceImplementation = iota
implMastodon
implPleroma
)
// Instance stores all the information we know about an instance // Instance stores all the information we know about an instance
type Instance struct { type Instance struct {
Disabled bool Disabled bool
@ -42,17 +33,17 @@ type Instance struct {
Fetching bool Fetching bool
HighestID uint HighestID uint
Hostname string Hostname string
UUID uuid.UUID
Identified bool Identified bool
Implementation instanceImplementation Implementation string
InitialFSMState string
NextFetch time.Time NextFetch time.Time
NodeInfoURL string NodeInfoURL string
ServerImplementationString string ServerImplementationString string
ServerVersionString string ServerVersionString string
SuccessCount uint SuccessCount uint
UUID uuid.UUID
fetchingLock sync.Mutex fetchingLock sync.Mutex
fsmLock sync.Mutex fsmLock sync.Mutex
storageBackend *storage.TootStorageBackend
structLock sync.Mutex structLock sync.Mutex
tootDestination chan *toot.Toot tootDestination chan *toot.Toot
} }
@ -63,9 +54,14 @@ func New(options ...func(i *Instance)) *Instance {
i := new(Instance) i := new(Instance)
i.UUID = uuid.New() i.UUID = uuid.New()
i.setNextFetchAfter(1 * time.Second) i.setNextFetchAfter(1 * time.Second)
i.InitialFSMState = "STATUS_UNKNOWN"
for _, opt := range options {
opt(i)
}
i.FSM = fsm.NewFSM( i.FSM = fsm.NewFSM(
"STATUS_UNKNOWN", i.InitialFSMState,
fsm.Events{ fsm.Events{
{Name: "BEGIN_NODEINFO_URL_FETCH", Src: []string{"STATUS_UNKNOWN"}, Dst: "FETCHING_NODEINFO_URL"}, {Name: "BEGIN_NODEINFO_URL_FETCH", Src: []string{"STATUS_UNKNOWN"}, Dst: "FETCHING_NODEINFO_URL"},
{Name: "GOT_NODEINFO_URL", Src: []string{"FETCHING_NODEINFO_URL"}, Dst: "PRE_NODEINFO_FETCH"}, {Name: "GOT_NODEINFO_URL", Src: []string{"FETCHING_NODEINFO_URL"}, Dst: "PRE_NODEINFO_FETCH"},
@ -82,10 +78,6 @@ func New(options ...func(i *Instance)) *Instance {
"enter_state": func(e *fsm.Event) { i.fsmEnterState(e) }, "enter_state": func(e *fsm.Event) { i.fsmEnterState(e) },
}, },
) )
for _, opt := range options {
opt(i)
}
return i return i
} }
@ -195,7 +187,7 @@ func (i *Instance) Tick() {
func (i *Instance) nodeIdentified() bool { func (i *Instance) nodeIdentified() bool {
i.Lock() i.Lock()
defer i.Unlock() defer i.Unlock()
if i.Implementation > implUnknown { if i.Implementation != "" {
return true return true
} }
return false return false
@ -379,7 +371,7 @@ func (i *Instance) fetchNodeInfo() error {
Str("software", ni.Software.Name). Str("software", ni.Software.Name).
Msg("detected server software") Msg("detected server software")
i.Identified = true i.Identified = true
i.Implementation = implPleroma i.Implementation = "pleroma"
i.Unlock() i.Unlock()
i.registerSuccess() i.registerSuccess()
i.Event("GOT_NODEINFO") i.Event("GOT_NODEINFO")
@ -390,7 +382,7 @@ func (i *Instance) fetchNodeInfo() error {
Str("software", ni.Software.Name). Str("software", ni.Software.Name).
Msg("detected server software") Msg("detected server software")
i.Identified = true i.Identified = true
i.Implementation = implMastodon i.Implementation = "mastodon"
i.Unlock() i.Unlock()
i.registerSuccess() i.registerSuccess()
i.Event("GOT_NODEINFO") i.Event("GOT_NODEINFO")

View File

@ -50,9 +50,15 @@ func (im *InstanceManager) RestoreFromDB() {
} }
im.lock() im.lock()
defer im.unlock() defer im.unlock()
count := 0
for _, x := range newil { for _, x := range newil {
x.SetTootDestination(im.tootDestination)
im.instances[x.Hostname] = x im.instances[x.Hostname] = x
count = count + 1
} }
log.Info().
Int("count", count).
Msg("restored instances from database")
} }
func (im *InstanceManager) SaveToDB() { func (im *InstanceManager) SaveToDB() {

View File

@ -52,6 +52,8 @@ func (f *Feta) configure() {
viper.AutomaticEnv() viper.AutomaticEnv()
viper.SetDefault("Debug", false) viper.SetDefault("Debug", false)
viper.SetDefault("TootsToDisk", false)
viper.SetDefault("TootsToDB", true)
viper.SetDefault("HostDiscoveryParallelism", 5) viper.SetDefault("HostDiscoveryParallelism", 5)
viper.SetDefault("FSStorageLocation", os.ExpandEnv("$HOME/Library/ApplicationSupport/feta/tootarchive.d")) viper.SetDefault("FSStorageLocation", os.ExpandEnv("$HOME/Library/ApplicationSupport/feta/tootarchive.d"))
viper.SetDefault("DBStorageLocation", os.ExpandEnv("$HOME/Library/ApplicationSupport/feta/feta.state.db")) viper.SetDefault("DBStorageLocation", os.ExpandEnv("$HOME/Library/ApplicationSupport/feta/feta.state.db"))
@ -138,8 +140,14 @@ func (f *Feta) runForever() int {
panic("can't find home directory") panic("can't find home directory")
} }
diskBackend := storage.NewTootFSStorage(home + "/.local/feta") if viper.GetBool("TootsToDB") {
f.ingester.SetStorageBackend(diskBackend) f.ingester.SetStorageBackend(f.dbm)
} else if viper.GetBool("TootsToDisk") {
diskBackend := storage.NewTootFSStorage(viper.GetString("FSStorageLocation"))
f.ingester.SetStorageBackend(diskBackend)
} else {
log.Info().Msg("toots will not be saved to disk")
}
f.api = new(Server) f.api = new(Server)
f.api.SetFeta(f) // api needs to get to us to access data f.api.SetFeta(f) // api needs to get to us to access data

View File

@ -11,18 +11,6 @@ import (
"git.eeqj.de/sneak/feta/toot" "git.eeqj.de/sneak/feta/toot"
) )
// TootStorageBackend is the interface to which storage backends must
// conform for storing toots
type TootStorageBackend interface {
TootExists(t toot.Toot) bool
StoreToot(t toot.Toot) error
StoreToots(tc []*toot.Toot) error
}
type TootDBStorage struct {
db string
}
// TootFSStorage is a TootStorageBackend that writes to the local // TootFSStorage is a TootStorageBackend that writes to the local
// filesystem. // filesystem.
type TootFSStorage struct { type TootFSStorage struct {
@ -41,7 +29,7 @@ func NewTootFSStorage(root string) *TootFSStorage {
func (ts *TootFSStorage) StoreToots(tc []*toot.Toot) error { func (ts *TootFSStorage) StoreToots(tc []*toot.Toot) error {
var returnErrors []string var returnErrors []string
for _, item := range tc { for _, item := range tc {
err := ts.StoreToot(*item) err := ts.StoreToot(item)
if err != nil { if err != nil {
returnErrors = append(returnErrors, err.Error()) returnErrors = append(returnErrors, err.Error())
continue continue
@ -56,7 +44,7 @@ func (ts *TootFSStorage) StoreToots(tc []*toot.Toot) error {
// TootExists checks to see if we have already written a toot to disk or // TootExists checks to see if we have already written a toot to disk or
// not. Note that the ingester de-dupes with a table in memory so that this // not. Note that the ingester de-dupes with a table in memory so that this
// will only really get used on app restarts // will only really get used on app restarts
func (ts *TootFSStorage) TootExists(t toot.Toot) bool { func (ts *TootFSStorage) TootExists(t *toot.Toot) bool {
path := t.DiskStoragePath() path := t.DiskStoragePath()
full := ts.root + "/" + path full := ts.root + "/" + path
_, err := os.Stat(full) _, err := os.Stat(full)
@ -67,7 +55,7 @@ func (ts *TootFSStorage) TootExists(t toot.Toot) bool {
} }
// StoreToot writes a single toot to disk // StoreToot writes a single toot to disk
func (ts *TootFSStorage) StoreToot(t toot.Toot) error { func (ts *TootFSStorage) StoreToot(t *toot.Toot) error {
path := t.DiskStoragePath() path := t.DiskStoragePath()
full := ts.root + "/" + path full := ts.root + "/" + path
dir := filepath.Dir(full) dir := filepath.Dir(full)
@ -82,7 +70,7 @@ func (ts *TootFSStorage) StoreToot(t toot.Toot) error {
// toots in ram forever until the computer fills up and catches fire and explodes // toots in ram forever until the computer fills up and catches fire and explodes
type TootMemoryStorage struct { type TootMemoryStorage struct {
sync.Mutex sync.Mutex
toots map[toot.Hash]toot.Toot toots map[string]*toot.Toot
//maxSize uint // FIXME support eviction //maxSize uint // FIXME support eviction
} }
@ -90,12 +78,12 @@ type TootMemoryStorage struct {
// ram forever // ram forever
func NewTootMemoryStorage() *TootMemoryStorage { func NewTootMemoryStorage() *TootMemoryStorage {
ts := new(TootMemoryStorage) ts := new(TootMemoryStorage)
ts.toots = make(map[toot.Hash]toot.Toot) ts.toots = make(map[string]*toot.Toot)
return ts return ts
} }
// StoreToot saves a single toot into an in-memory hashtable // StoreToot saves a single toot into an in-memory hashtable
func (ts *TootMemoryStorage) StoreToot(t toot.Toot) { func (ts *TootMemoryStorage) StoreToot(t *toot.Toot) {
if ts.TootExists(t) { if ts.TootExists(t) {
return return
} }
@ -106,7 +94,7 @@ func (ts *TootMemoryStorage) StoreToot(t toot.Toot) {
} }
// TootExists checks to see if we have a toot in memory already // TootExists checks to see if we have a toot in memory already
func (ts *TootMemoryStorage) TootExists(t toot.Toot) bool { func (ts *TootMemoryStorage) TootExists(t *toot.Toot) bool {
ts.Lock() ts.Lock()
defer ts.Unlock() defer ts.Unlock()
if _, ok := ts.toots[t.Hash]; ok { //this syntax is so gross if _, ok := ts.toots[t.Hash]; ok { //this syntax is so gross

13
storage/interface.go Normal file
View File

@ -0,0 +1,13 @@
package storage
import (
"git.eeqj.de/sneak/feta/toot"
)
// TootStorageBackend is the interface to which storage backends must
// conform for storing toots
type TootStorageBackend interface {
TootExists(t *toot.Toot) bool
StoreToot(t *toot.Toot) error
StoreToots(tc []*toot.Toot) error
}

View File

@ -1,27 +1,30 @@
package toot package toot
import "fmt" import (
import "encoding/json" "encoding/json"
import "errors" "errors"
import "strings" "fmt"
import "git.eeqj.de/sneak/feta/jsonapis" "strings"
//import "github.com/davecgh/go-spew/spew" "git.eeqj.de/sneak/feta/jsonapis"
import "github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
//import "encoding/hex" //import "github.com/davecgh/go-spew/spew"
import mh "github.com/multiformats/go-multihash"
import mhopts "github.com/multiformats/go-multihash/opts" //import "encoding/hex"
mh "github.com/multiformats/go-multihash"
mhopts "github.com/multiformats/go-multihash/opts"
)
// Hash is a type for storing a string-based base58 multihash of a // Hash is a type for storing a string-based base58 multihash of a
// toot's identity // toot's identity
type Hash string
// Toot is an object we use internally for storing a discovered toot // Toot is an object we use internally for storing a discovered toot
type Toot struct { type Toot struct {
Original []byte Original []byte
Parsed *jsonapis.APISerializedToot Parsed *jsonapis.APISerializedToot
Hash Hash Hash string
FromHost string FromHost string
} }
@ -111,7 +114,14 @@ func (t *Toot) identityHashInput() string {
) )
} }
func (t *Toot) GetHash() string {
if t.Hash == "" {
t.calcHash()
}
return t.Hash
}
func (t *Toot) calcHash() { func (t *Toot) calcHash() {
hi := t.identityHashInput() hi := t.identityHashInput()
t.Hash = Hash(t.multiHash([]byte(hi))) t.Hash = string(t.multiHash([]byte(hi)))
} }