This commit is contained in:
parent
b3f672b84a
commit
2ecd833726
1
.gitignore
vendored
1
.gitignore
vendored
@ -2,3 +2,4 @@ feta
|
||||
output/
|
||||
feta.sqlite
|
||||
.lintsetup
|
||||
out
|
||||
|
13
README.md
13
README.md
@ -2,9 +2,20 @@
|
||||
|
||||
archives the fediverse
|
||||
|
||||
# todo
|
||||
|
||||
* scan toots for mentions and feed to locator
|
||||
* put toots in a separate db file
|
||||
* test with a real database
|
||||
* save instances to store more often
|
||||
* verify instances load properly on startup
|
||||
* do some simple in-memory dedupe for toot storage
|
||||
* make some templates using pongo2 and a simple website
|
||||
* update APIs
|
||||
|
||||
# status
|
||||
|
||||
[![CircleCI](https://circleci.com/gh/sneak/feta.svg?style=svg)](https://circleci.com/gh/sneak/feta)
|
||||
[![Build Status](https://drone.datavi.be/api/badges/sneak/feta/status.svg)](https://drone.datavi.be/sneak/feta)
|
||||
|
||||
# ethics statement
|
||||
|
||||
|
@ -1,40 +1,12 @@
|
||||
package database
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/feta/instance"
|
||||
"github.com/google/uuid"
|
||||
"github.com/jinzhu/gorm"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
_ "github.com/jinzhu/gorm/dialects/sqlite"
|
||||
)
|
||||
|
||||
type APInstance struct {
|
||||
gorm.Model
|
||||
UUID uuid.UUID `gorm:"type:uuid;primary_key;"`
|
||||
ErrorCount uint
|
||||
SuccessCount uint
|
||||
HighestID uint
|
||||
Hostname string `gorm:"type:varchar(100);unique_index"`
|
||||
Identified bool
|
||||
Fetching bool
|
||||
Disabled bool
|
||||
Implementation string
|
||||
NextFetch time.Time
|
||||
NodeInfoURL string
|
||||
ServerVersionString string
|
||||
ServerImplementationString string
|
||||
FSMState string
|
||||
}
|
||||
|
||||
// NB that when you add a model below you must add it to this list!
|
||||
func (m *Manager) doMigrations() {
|
||||
log.Info().Msg("doing database migrations if required")
|
||||
m.db.AutoMigrate(&APInstance{})
|
||||
}
|
||||
|
||||
func (m *Manager) SaveInstance(i *instance.Instance) error {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
@ -53,7 +25,7 @@ func (m *Manager) SaveInstance(i *instance.Instance) error {
|
||||
HighestID: i.HighestID,
|
||||
Hostname: i.Hostname,
|
||||
Identified: i.Identified,
|
||||
Implementation: string(i.Implementation),
|
||||
Implementation: i.Implementation,
|
||||
NextFetch: i.NextFetch,
|
||||
NodeInfoURL: i.NodeInfoURL,
|
||||
ServerImplementationString: i.ServerImplementationString,
|
||||
@ -92,7 +64,29 @@ func (m *Manager) SaveInstance(i *instance.Instance) error {
|
||||
|
||||
func (m *Manager) ListInstances() ([]*instance.Instance, error) {
|
||||
output := make([]*instance.Instance, 0)
|
||||
// FIXME have this produce a list of Instance
|
||||
|
||||
var results []APInstance
|
||||
m.db.Find(&results)
|
||||
|
||||
for _, i := range results {
|
||||
newinst := instance.New(func(x *instance.Instance) {
|
||||
x.UUID = i.UUID
|
||||
x.Disabled = i.Disabled
|
||||
x.ErrorCount = i.ErrorCount
|
||||
x.InitialFSMState = i.FSMState
|
||||
x.Fetching = i.Fetching
|
||||
x.HighestID = i.HighestID
|
||||
x.Hostname = i.Hostname
|
||||
x.Identified = i.Identified
|
||||
x.Implementation = i.Implementation
|
||||
x.NextFetch = i.NextFetch
|
||||
x.NodeInfoURL = i.NodeInfoURL
|
||||
x.ServerImplementationString = i.ServerImplementationString
|
||||
x.ServerVersionString = i.ServerVersionString
|
||||
x.SuccessCount = i.SuccessCount
|
||||
})
|
||||
output = append(output, newinst)
|
||||
}
|
||||
|
||||
return output, nil
|
||||
}
|
49
database/model.go
Normal file
49
database/model.go
Normal file
@ -0,0 +1,49 @@
|
||||
package database
|
||||
|
||||
import (
|
||||
"time"
|
||||
|
||||
"github.com/google/uuid"
|
||||
"github.com/jinzhu/gorm"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
_ "github.com/jinzhu/gorm/dialects/sqlite"
|
||||
)
|
||||
|
||||
type StoredToot struct {
|
||||
gorm.Model
|
||||
UUID uuid.UUID `gorm:"type:uuid;primary_key;"`
|
||||
//Original string `sql:"type:text"`
|
||||
Original []byte
|
||||
Hash string `gorm:"unique_index"`
|
||||
ServerCreated time.Time
|
||||
Acct string
|
||||
Content []byte
|
||||
URL string
|
||||
Hostname string
|
||||
}
|
||||
|
||||
type APInstance struct {
|
||||
gorm.Model
|
||||
UUID uuid.UUID `gorm:"type:uuid;primary_key;"`
|
||||
ErrorCount uint
|
||||
SuccessCount uint
|
||||
HighestID uint
|
||||
Hostname string `gorm:"type:varchar(100);unique_index"`
|
||||
Identified bool
|
||||
Fetching bool
|
||||
Disabled bool
|
||||
Implementation string
|
||||
NextFetch time.Time
|
||||
NodeInfoURL string
|
||||
ServerVersionString string
|
||||
ServerImplementationString string
|
||||
FSMState string
|
||||
}
|
||||
|
||||
// NB that when you add a model below you must add it to this list!
|
||||
func (m *Manager) doMigrations() {
|
||||
log.Info().Msg("doing database migrations if required")
|
||||
m.db.AutoMigrate(&APInstance{})
|
||||
m.db.AutoMigrate(&StoredToot{})
|
||||
}
|
47
database/storageconnector.go
Normal file
47
database/storageconnector.go
Normal file
@ -0,0 +1,47 @@
|
||||
package database
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"git.eeqj.de/sneak/feta/toot"
|
||||
|
||||
"github.com/google/uuid"
|
||||
_ "github.com/jinzhu/gorm/dialects/sqlite"
|
||||
)
|
||||
|
||||
func (m *Manager) TootExists(t *toot.Toot) bool {
|
||||
var try StoredToot
|
||||
if m.db.Where("Hash = ?", t.GetHash()).First(&try).RecordNotFound() {
|
||||
return false
|
||||
} else {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func (m *Manager) StoreToot(t *toot.Toot) error {
|
||||
|
||||
nt := new(StoredToot)
|
||||
nt.UUID = uuid.New()
|
||||
nt.ServerCreated = t.Parsed.CreatedAt
|
||||
nt.Original = t.Original
|
||||
// FIXME normalize this, check for @ and append hostname if none
|
||||
nt.Acct = fmt.Sprintf("%s@%s", t.Parsed.Account.Acct, strings.ToLower(t.FromHost))
|
||||
nt.URL = t.Parsed.URL
|
||||
nt.Content = t.Parsed.Content
|
||||
nt.Hostname = strings.ToLower(t.FromHost)
|
||||
nt.Hash = t.GetHash()
|
||||
r := m.db.Create(&nt)
|
||||
//panic(fmt.Sprintf("%+v", t))
|
||||
return r.Error
|
||||
}
|
||||
|
||||
func (m *Manager) StoreToots(tc []*toot.Toot) error {
|
||||
for _, item := range tc {
|
||||
err := m.StoreToot(item)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
@ -1,9 +1,12 @@
|
||||
package ingester
|
||||
|
||||
import "time"
|
||||
import "github.com/rs/zerolog/log"
|
||||
import "git.eeqj.de/sneak/feta/toot"
|
||||
import "git.eeqj.de/sneak/feta/storage"
|
||||
import (
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/feta/storage"
|
||||
"git.eeqj.de/sneak/feta/toot"
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
// TootIngester is the data structure for the ingester process that is
|
||||
// responsible for storing the discovered toots
|
||||
@ -15,7 +18,7 @@ type TootIngester struct {
|
||||
|
||||
type seenTootMemo struct {
|
||||
lastSeen time.Time
|
||||
tootHash toot.Hash
|
||||
tootHash string
|
||||
}
|
||||
|
||||
// NewTootIngester returns a fresh TootIngester for your use
|
||||
@ -55,5 +58,5 @@ func (ti *TootIngester) storeToot(t *toot.Toot) {
|
||||
if ti.storageBackend == nil {
|
||||
panic("no storage backend")
|
||||
}
|
||||
ti.storageBackend.StoreToot(*t)
|
||||
ti.storageBackend.StoreToot(t)
|
||||
}
|
||||
|
@ -11,7 +11,6 @@ import (
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/feta/jsonapis"
|
||||
"git.eeqj.de/sneak/feta/storage"
|
||||
"git.eeqj.de/sneak/feta/toot"
|
||||
"github.com/google/uuid"
|
||||
"github.com/looplab/fsm"
|
||||
@ -26,14 +25,6 @@ const instanceHTTPTimeout = time.Second * 120
|
||||
const instanceSpiderInterval = time.Second * 120
|
||||
const instanceErrorInterval = time.Second * 60 * 30
|
||||
|
||||
type instanceImplementation int
|
||||
|
||||
const (
|
||||
implUnknown instanceImplementation = iota
|
||||
implMastodon
|
||||
implPleroma
|
||||
)
|
||||
|
||||
// Instance stores all the information we know about an instance
|
||||
type Instance struct {
|
||||
Disabled bool
|
||||
@ -42,17 +33,17 @@ type Instance struct {
|
||||
Fetching bool
|
||||
HighestID uint
|
||||
Hostname string
|
||||
UUID uuid.UUID
|
||||
Identified bool
|
||||
Implementation instanceImplementation
|
||||
Implementation string
|
||||
InitialFSMState string
|
||||
NextFetch time.Time
|
||||
NodeInfoURL string
|
||||
ServerImplementationString string
|
||||
ServerVersionString string
|
||||
SuccessCount uint
|
||||
UUID uuid.UUID
|
||||
fetchingLock sync.Mutex
|
||||
fsmLock sync.Mutex
|
||||
storageBackend *storage.TootStorageBackend
|
||||
structLock sync.Mutex
|
||||
tootDestination chan *toot.Toot
|
||||
}
|
||||
@ -63,9 +54,14 @@ func New(options ...func(i *Instance)) *Instance {
|
||||
i := new(Instance)
|
||||
i.UUID = uuid.New()
|
||||
i.setNextFetchAfter(1 * time.Second)
|
||||
i.InitialFSMState = "STATUS_UNKNOWN"
|
||||
|
||||
for _, opt := range options {
|
||||
opt(i)
|
||||
}
|
||||
|
||||
i.FSM = fsm.NewFSM(
|
||||
"STATUS_UNKNOWN",
|
||||
i.InitialFSMState,
|
||||
fsm.Events{
|
||||
{Name: "BEGIN_NODEINFO_URL_FETCH", Src: []string{"STATUS_UNKNOWN"}, Dst: "FETCHING_NODEINFO_URL"},
|
||||
{Name: "GOT_NODEINFO_URL", Src: []string{"FETCHING_NODEINFO_URL"}, Dst: "PRE_NODEINFO_FETCH"},
|
||||
@ -82,10 +78,6 @@ func New(options ...func(i *Instance)) *Instance {
|
||||
"enter_state": func(e *fsm.Event) { i.fsmEnterState(e) },
|
||||
},
|
||||
)
|
||||
|
||||
for _, opt := range options {
|
||||
opt(i)
|
||||
}
|
||||
return i
|
||||
}
|
||||
|
||||
@ -195,7 +187,7 @@ func (i *Instance) Tick() {
|
||||
func (i *Instance) nodeIdentified() bool {
|
||||
i.Lock()
|
||||
defer i.Unlock()
|
||||
if i.Implementation > implUnknown {
|
||||
if i.Implementation != "" {
|
||||
return true
|
||||
}
|
||||
return false
|
||||
@ -379,7 +371,7 @@ func (i *Instance) fetchNodeInfo() error {
|
||||
Str("software", ni.Software.Name).
|
||||
Msg("detected server software")
|
||||
i.Identified = true
|
||||
i.Implementation = implPleroma
|
||||
i.Implementation = "pleroma"
|
||||
i.Unlock()
|
||||
i.registerSuccess()
|
||||
i.Event("GOT_NODEINFO")
|
||||
@ -390,7 +382,7 @@ func (i *Instance) fetchNodeInfo() error {
|
||||
Str("software", ni.Software.Name).
|
||||
Msg("detected server software")
|
||||
i.Identified = true
|
||||
i.Implementation = implMastodon
|
||||
i.Implementation = "mastodon"
|
||||
i.Unlock()
|
||||
i.registerSuccess()
|
||||
i.Event("GOT_NODEINFO")
|
||||
|
@ -50,9 +50,15 @@ func (im *InstanceManager) RestoreFromDB() {
|
||||
}
|
||||
im.lock()
|
||||
defer im.unlock()
|
||||
count := 0
|
||||
for _, x := range newil {
|
||||
x.SetTootDestination(im.tootDestination)
|
||||
im.instances[x.Hostname] = x
|
||||
count = count + 1
|
||||
}
|
||||
log.Info().
|
||||
Int("count", count).
|
||||
Msg("restored instances from database")
|
||||
}
|
||||
|
||||
func (im *InstanceManager) SaveToDB() {
|
||||
|
@ -52,6 +52,8 @@ func (f *Feta) configure() {
|
||||
viper.AutomaticEnv()
|
||||
|
||||
viper.SetDefault("Debug", false)
|
||||
viper.SetDefault("TootsToDisk", false)
|
||||
viper.SetDefault("TootsToDB", true)
|
||||
viper.SetDefault("HostDiscoveryParallelism", 5)
|
||||
viper.SetDefault("FSStorageLocation", os.ExpandEnv("$HOME/Library/ApplicationSupport/feta/tootarchive.d"))
|
||||
viper.SetDefault("DBStorageLocation", os.ExpandEnv("$HOME/Library/ApplicationSupport/feta/feta.state.db"))
|
||||
@ -138,8 +140,14 @@ func (f *Feta) runForever() int {
|
||||
panic("can't find home directory")
|
||||
}
|
||||
|
||||
diskBackend := storage.NewTootFSStorage(home + "/.local/feta")
|
||||
f.ingester.SetStorageBackend(diskBackend)
|
||||
if viper.GetBool("TootsToDB") {
|
||||
f.ingester.SetStorageBackend(f.dbm)
|
||||
} else if viper.GetBool("TootsToDisk") {
|
||||
diskBackend := storage.NewTootFSStorage(viper.GetString("FSStorageLocation"))
|
||||
f.ingester.SetStorageBackend(diskBackend)
|
||||
} else {
|
||||
log.Info().Msg("toots will not be saved to disk")
|
||||
}
|
||||
|
||||
f.api = new(Server)
|
||||
f.api.SetFeta(f) // api needs to get to us to access data
|
||||
|
@ -11,18 +11,6 @@ import (
|
||||
"git.eeqj.de/sneak/feta/toot"
|
||||
)
|
||||
|
||||
// TootStorageBackend is the interface to which storage backends must
|
||||
// conform for storing toots
|
||||
type TootStorageBackend interface {
|
||||
TootExists(t toot.Toot) bool
|
||||
StoreToot(t toot.Toot) error
|
||||
StoreToots(tc []*toot.Toot) error
|
||||
}
|
||||
|
||||
type TootDBStorage struct {
|
||||
db string
|
||||
}
|
||||
|
||||
// TootFSStorage is a TootStorageBackend that writes to the local
|
||||
// filesystem.
|
||||
type TootFSStorage struct {
|
||||
@ -41,7 +29,7 @@ func NewTootFSStorage(root string) *TootFSStorage {
|
||||
func (ts *TootFSStorage) StoreToots(tc []*toot.Toot) error {
|
||||
var returnErrors []string
|
||||
for _, item := range tc {
|
||||
err := ts.StoreToot(*item)
|
||||
err := ts.StoreToot(item)
|
||||
if err != nil {
|
||||
returnErrors = append(returnErrors, err.Error())
|
||||
continue
|
||||
@ -56,7 +44,7 @@ func (ts *TootFSStorage) StoreToots(tc []*toot.Toot) error {
|
||||
// TootExists checks to see if we have already written a toot to disk or
|
||||
// not. Note that the ingester de-dupes with a table in memory so that this
|
||||
// will only really get used on app restarts
|
||||
func (ts *TootFSStorage) TootExists(t toot.Toot) bool {
|
||||
func (ts *TootFSStorage) TootExists(t *toot.Toot) bool {
|
||||
path := t.DiskStoragePath()
|
||||
full := ts.root + "/" + path
|
||||
_, err := os.Stat(full)
|
||||
@ -67,7 +55,7 @@ func (ts *TootFSStorage) TootExists(t toot.Toot) bool {
|
||||
}
|
||||
|
||||
// StoreToot writes a single toot to disk
|
||||
func (ts *TootFSStorage) StoreToot(t toot.Toot) error {
|
||||
func (ts *TootFSStorage) StoreToot(t *toot.Toot) error {
|
||||
path := t.DiskStoragePath()
|
||||
full := ts.root + "/" + path
|
||||
dir := filepath.Dir(full)
|
||||
@ -82,7 +70,7 @@ func (ts *TootFSStorage) StoreToot(t toot.Toot) error {
|
||||
// toots in ram forever until the computer fills up and catches fire and explodes
|
||||
type TootMemoryStorage struct {
|
||||
sync.Mutex
|
||||
toots map[toot.Hash]toot.Toot
|
||||
toots map[string]*toot.Toot
|
||||
//maxSize uint // FIXME support eviction
|
||||
}
|
||||
|
||||
@ -90,12 +78,12 @@ type TootMemoryStorage struct {
|
||||
// ram forever
|
||||
func NewTootMemoryStorage() *TootMemoryStorage {
|
||||
ts := new(TootMemoryStorage)
|
||||
ts.toots = make(map[toot.Hash]toot.Toot)
|
||||
ts.toots = make(map[string]*toot.Toot)
|
||||
return ts
|
||||
}
|
||||
|
||||
// StoreToot saves a single toot into an in-memory hashtable
|
||||
func (ts *TootMemoryStorage) StoreToot(t toot.Toot) {
|
||||
func (ts *TootMemoryStorage) StoreToot(t *toot.Toot) {
|
||||
if ts.TootExists(t) {
|
||||
return
|
||||
}
|
||||
@ -106,7 +94,7 @@ func (ts *TootMemoryStorage) StoreToot(t toot.Toot) {
|
||||
}
|
||||
|
||||
// TootExists checks to see if we have a toot in memory already
|
||||
func (ts *TootMemoryStorage) TootExists(t toot.Toot) bool {
|
||||
func (ts *TootMemoryStorage) TootExists(t *toot.Toot) bool {
|
||||
ts.Lock()
|
||||
defer ts.Unlock()
|
||||
if _, ok := ts.toots[t.Hash]; ok { //this syntax is so gross
|
13
storage/interface.go
Normal file
13
storage/interface.go
Normal file
@ -0,0 +1,13 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"git.eeqj.de/sneak/feta/toot"
|
||||
)
|
||||
|
||||
// TootStorageBackend is the interface to which storage backends must
|
||||
// conform for storing toots
|
||||
type TootStorageBackend interface {
|
||||
TootExists(t *toot.Toot) bool
|
||||
StoreToot(t *toot.Toot) error
|
||||
StoreToots(tc []*toot.Toot) error
|
||||
}
|
36
toot/toot.go
36
toot/toot.go
@ -1,27 +1,30 @@
|
||||
package toot
|
||||
|
||||
import "fmt"
|
||||
import "encoding/json"
|
||||
import "errors"
|
||||
import "strings"
|
||||
import "git.eeqj.de/sneak/feta/jsonapis"
|
||||
import (
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
//import "github.com/davecgh/go-spew/spew"
|
||||
import "github.com/rs/zerolog/log"
|
||||
"git.eeqj.de/sneak/feta/jsonapis"
|
||||
"github.com/rs/zerolog/log"
|
||||
|
||||
//import "encoding/hex"
|
||||
import mh "github.com/multiformats/go-multihash"
|
||||
import mhopts "github.com/multiformats/go-multihash/opts"
|
||||
//import "github.com/davecgh/go-spew/spew"
|
||||
|
||||
//import "encoding/hex"
|
||||
mh "github.com/multiformats/go-multihash"
|
||||
|
||||
mhopts "github.com/multiformats/go-multihash/opts"
|
||||
)
|
||||
|
||||
// Hash is a type for storing a string-based base58 multihash of a
|
||||
// toot's identity
|
||||
type Hash string
|
||||
|
||||
// Toot is an object we use internally for storing a discovered toot
|
||||
type Toot struct {
|
||||
Original []byte
|
||||
Parsed *jsonapis.APISerializedToot
|
||||
Hash Hash
|
||||
Hash string
|
||||
FromHost string
|
||||
}
|
||||
|
||||
@ -111,7 +114,14 @@ func (t *Toot) identityHashInput() string {
|
||||
)
|
||||
}
|
||||
|
||||
func (t *Toot) GetHash() string {
|
||||
if t.Hash == "" {
|
||||
t.calcHash()
|
||||
}
|
||||
return t.Hash
|
||||
}
|
||||
|
||||
func (t *Toot) calcHash() {
|
||||
hi := t.identityHashInput()
|
||||
t.Hash = Hash(t.multiHash([]byte(hi)))
|
||||
t.Hash = string(t.multiHash([]byte(hi)))
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user