now passes linting

This commit is contained in:
Jeffrey Paul 2019-12-19 05:20:23 -08:00
parent d2bd99801d
commit 5144a957e5
8 changed files with 91 additions and 23 deletions

View File

@ -42,7 +42,7 @@ build: ./$(FN)
touch .lintsetup touch .lintsetup
lint: fmt .lintsetup lint: fmt .lintsetup
fgt golint fgt golint ./...
go-get: go-get:
go get -v go get -v

View File

@ -5,7 +5,11 @@ import "os"
import "github.com/sneak/feta" import "github.com/sneak/feta"
// these are filled in at link-time by the build scripts // these are filled in at link-time by the build scripts
// Version is the git version of the app
var Version string var Version string
// Buildarch contains the architecture it is compiled for
var Buildarch string var Buildarch string
func main() { func main() {

View File

@ -5,30 +5,52 @@ import "github.com/rs/zerolog/log"
import "github.com/sneak/feta/toot" import "github.com/sneak/feta/toot"
import "github.com/sneak/feta/storage" import "github.com/sneak/feta/storage"
// TootIngester is the data structure for the ingester process that is
// responsible for storing the discovered toots
type TootIngester struct { type TootIngester struct {
inbound chan *toot.Toot inbound chan *toot.Toot
recentlySeen []*seenTootMemo recentlySeen []*seenTootMemo
storageBackend *storage.TootStorageBackend storageBackend storage.TootStorageBackend
} }
type seenTootMemo struct { type seenTootMemo struct {
lastSeen time.Time lastSeen time.Time
tootHash toot.TootHash tootHash toot.Hash
} }
// NewTootIngester returns a fresh TootIngester for your use
func NewTootIngester() *TootIngester { func NewTootIngester() *TootIngester {
ti := new(TootIngester) ti := new(TootIngester)
ti.inbound = make(chan *toot.Toot, 1) ti.inbound = make(chan *toot.Toot, 10000)
return ti return ti
} }
// SetStorageBackend takes a type conforming to TootStorageBackend for
// persisting toots somewhere/somehow
func (ti *TootIngester) SetStorageBackend(be storage.TootStorageBackend) {
ti.storageBackend = be
}
// GetDeliveryChannel returns a channel that receives pointers to toots
// which the ingester will dedupe and store
func (ti *TootIngester) GetDeliveryChannel() chan *toot.Toot { func (ti *TootIngester) GetDeliveryChannel() chan *toot.Toot {
return ti.inbound return ti.inbound
} }
// Ingest is the main entrypoint for the TootIngester goroutine
func (ti *TootIngester) Ingest() { func (ti *TootIngester) Ingest() {
log.Info().Msg("TootIngester starting") log.Info().Msg("TootIngester starting")
go ti.readFromInboundChannel()
}
func (ti *TootIngester) readFromInboundChannel() {
for { for {
time.Sleep(1 * time.Second) // FIXME do something nt := <-ti.inbound
go ti.storeToot(nt)
} }
} }
func (ti *TootIngester) storeToot(t *toot.Toot) {
// FIXME first check for dupes in recentlySeen
ti.storageBackend.StoreToot(*t)
}

View File

@ -441,7 +441,7 @@ func (i *instance) fetchRecentToots() error {
log.Info(). log.Info().
Str("hostname", i.hostname). Str("hostname", i.hostname).
Int("tootCount", len(*tc)). Int("tootCount", len(tc)).
Msgf("got and parsed toots") Msgf("got and parsed toots")
i.registerSuccess() i.registerSuccess()
i.Event("TOOTS_FETCHED") i.Event("TOOTS_FETCHED")

View File

@ -4,6 +4,9 @@ import "time"
// thank fuck for https://mholt.github.io/json-to-go/ otherwise // thank fuck for https://mholt.github.io/json-to-go/ otherwise
// this would have been a giant pain in the dick // this would have been a giant pain in the dick
// MastodonIndexResponse is the json api shape from the mastodon instance
// indexer
type MastodonIndexResponse struct { type MastodonIndexResponse struct {
Instances []struct { Instances []struct {
ID string `json:"_id"` ID string `json:"_id"`
@ -48,6 +51,8 @@ type MastodonIndexResponse struct {
} `json:"instances"` } `json:"instances"`
} }
// PleromaIndexResponse is the json api shape from the pleroma instance
// indexer
type PleromaIndexResponse []struct { type PleromaIndexResponse []struct {
Domain string `json:"domain"` Domain string `json:"domain"`
Title string `json:"title"` Title string `json:"title"`
@ -62,6 +67,7 @@ type PleromaIndexResponse []struct {
TextLimit int `json:"text_limit"` TextLimit int `json:"text_limit"`
} }
// NodeInfoVersionTwoSchema is the json format of nodeinfo 2.0
type NodeInfoVersionTwoSchema struct { type NodeInfoVersionTwoSchema struct {
Version string `json:"version"` Version string `json:"version"`
Software struct { Software struct {
@ -80,6 +86,7 @@ type NodeInfoVersionTwoSchema struct {
OpenRegistrations bool `json:"openRegistrations"` OpenRegistrations bool `json:"openRegistrations"`
} }
// NodeInfoWellKnownResponse is the json format of the nodeinfo schema
type NodeInfoWellKnownResponse struct { type NodeInfoWellKnownResponse struct {
Links []struct { Links []struct {
Rel string `json:"rel"` Rel string `json:"rel"`
@ -87,6 +94,10 @@ type NodeInfoWellKnownResponse struct {
} `json:"links"` } `json:"links"`
} }
// APISerializedToot is a partial shape of the json serialized form of a
// toot from the mastodon api (also used by pleroma). We save the original
// json from the server though so this is just a minimal subset that we need
// to deserialize for purposes of this indexer app.
type APISerializedToot struct { type APISerializedToot struct {
Account struct { Account struct {
Acct string `json:"acct"` Acct string `json:"acct"`

View File

@ -1,5 +1,10 @@
package seeds package seeds
// SeedInstances is a list of instance hostnames used to seed the indexer.
// This list so far is a bunch of instances that have been
// banned/defederated by others so it's important to seed them so that we
// can always get their toots for archiving; they will likely not appear in
// common mentions/public indices.
var SeedInstances = [...]string{ var SeedInstances = [...]string{
"splat.soy", "splat.soy",
"veenus.art", "veenus.art",

View File

@ -8,22 +8,29 @@ import "sync"
import "github.com/sneak/feta/toot" import "github.com/sneak/feta/toot"
// TootStorageBackend is the interface to which storage backends must
// conform for storing toots
type TootStorageBackend interface { type TootStorageBackend interface {
TootExists(t toot.Toot) bool TootExists(t toot.Toot) bool
StoreToot(t toot.Toot) error StoreToot(t toot.Toot) error
StoreToots(tc []*toot.Toot) error StoreToots(tc []*toot.Toot) error
} }
// TootFSStorage is a TootStorageBackend that writes to the local
// filesystem.
type TootFSStorage struct { type TootFSStorage struct {
root string root string
} }
// NewTootFSStorage returns a *TootFSStorage for writing toots to the
// local filesystem
func NewTootFSStorage(root string) *TootFSStorage { func NewTootFSStorage(root string) *TootFSStorage {
ts := new(TootFSStorage) ts := new(TootFSStorage)
ts.root = root ts.root = root
return ts return ts
} }
// StoreToots writes a slice of pointers to toots to disk
func (ts *TootFSStorage) StoreToots(tc []*toot.Toot) error { func (ts *TootFSStorage) StoreToots(tc []*toot.Toot) error {
var returnErrors []string var returnErrors []string
for _, item := range tc { for _, item := range tc {
@ -39,6 +46,9 @@ func (ts *TootFSStorage) StoreToots(tc []*toot.Toot) error {
return errors.New(strings.Join(returnErrors, "; ")) return errors.New(strings.Join(returnErrors, "; "))
} }
// TootExists checks to see if we have already written a toot to disk or
// not. Note that the ingester de-dupes with a table in memory so that this
// will only really get used on app restarts
func (ts *TootFSStorage) TootExists(t toot.Toot) bool { func (ts *TootFSStorage) TootExists(t toot.Toot) bool {
path := t.DiskStoragePath() path := t.DiskStoragePath()
full := ts.root + "/" + path full := ts.root + "/" + path
@ -49,39 +59,45 @@ func (ts *TootFSStorage) TootExists(t toot.Toot) bool {
return true return true
} }
// StoreToot writes a single toot to disk
func (ts *TootFSStorage) StoreToot(t toot.Toot) error { func (ts *TootFSStorage) StoreToot(t toot.Toot) error {
path := t.DiskStoragePath() path := t.DiskStoragePath()
full := ts.root + "/" + path full := ts.root + "/" + path
return ioutil.WriteFile(full, t.Original, 0644) return ioutil.WriteFile(full, t.Original, 0644)
} }
// TootMemoryStorage is a TootStorageBackend that just stores all ingested
// toots in ram forever until the computer fills up and catches fire and explodes
type TootMemoryStorage struct { type TootMemoryStorage struct {
sync.Mutex sync.Mutex
toots map[toot.TootHash]toot.Toot toots map[toot.Hash]toot.Toot
//maxSize uint // FIXME support eviction //maxSize uint // FIXME support eviction
} }
// NewTootMemoryStorage returns a *TootMemoryStorage for storing toots in
// ram forever
func NewTootMemoryStorage() *TootMemoryStorage { func NewTootMemoryStorage() *TootMemoryStorage {
ts := new(TootMemoryStorage) ts := new(TootMemoryStorage)
ts.toots = make(map[toot.TootHash]toot.Toot) ts.toots = make(map[toot.Hash]toot.Toot)
return ts return ts
} }
// StoreToot saves a single toot into an in-memory hashtable
func (ts *TootMemoryStorage) StoreToot(t toot.Toot) { func (ts *TootMemoryStorage) StoreToot(t toot.Toot) {
th := t.Hash if ts.TootExists(t) {
if ts.TootExists(th) {
return return
} }
ts.Lock() ts.Lock()
defer ts.Unlock() defer ts.Unlock()
ts.toots[th] = t ts.toots[t.Hash] = t
return return
} }
func (ts *TootMemoryStorage) TootExists(th toot.TootHash) bool { // TootExists checks to see if we have a toot in memory already
func (ts *TootMemoryStorage) TootExists(t toot.Toot) bool {
ts.Lock() ts.Lock()
defer ts.Unlock() defer ts.Unlock()
if _, ok := ts.toots[th]; ok { //this syntax is so gross if _, ok := ts.toots[t.Hash]; ok { //this syntax is so gross
return true return true
} }
return false return false

View File

@ -5,30 +5,37 @@ import "encoding/json"
import "errors" import "errors"
import "strings" import "strings"
import "github.com/sneak/feta/jsonapis" import "github.com/sneak/feta/jsonapis"
import "github.com/davecgh/go-spew/spew"
//import "github.com/davecgh/go-spew/spew"
import "github.com/rs/zerolog/log" import "github.com/rs/zerolog/log"
//import "encoding/hex" //import "encoding/hex"
import mh "github.com/multiformats/go-multihash" import mh "github.com/multiformats/go-multihash"
import mhopts "github.com/multiformats/go-multihash/opts" import mhopts "github.com/multiformats/go-multihash/opts"
type TootHash string // Hash is a type for storing a string-based base58 multihash of a
// toot's identity
type Hash string
// Toot is an object we use internally for storing a discovered toot
type Toot struct { type Toot struct {
Original []byte Original []byte
Parsed *jsonapis.APISerializedToot Parsed *jsonapis.APISerializedToot
Hash TootHash Hash Hash
FromHost string FromHost string
} }
func NewTootCollectionFromMastodonAPIResponse(in []byte, hostname string) (*[]Toot, error) { // NewTootCollectionFromMastodonAPIResponse takes a byte array from a masto
// api response and provides you with a nice array of pointers to parsed
// toots
func NewTootCollectionFromMastodonAPIResponse(in []byte, hostname string) ([]*Toot, error) {
var rt []json.RawMessage var rt []json.RawMessage
err := json.Unmarshal(in, &rt) err := json.Unmarshal(in, &rt)
if err != nil { if err != nil {
return nil, errors.New("unable to parse api response") return nil, errors.New("unable to parse api response")
} }
var tc []Toot var tc []*Toot
// iterate over rawtoots from api // iterate over rawtoots from api
for _, item := range rt { for _, item := range rt {
@ -47,11 +54,9 @@ func NewTootCollectionFromMastodonAPIResponse(in []byte, hostname string) (*[]To
t.Original = o t.Original = o
t.FromHost = hostname t.FromHost = hostname
t.calcHash() t.calcHash()
tc = append(tc, *t) tc = append(tc, t)
} }
spew.Dump(tc) return tc, nil
panic("")
return &tc, nil
} }
func (t *Toot) String() string { func (t *Toot) String() string {
@ -76,6 +81,11 @@ func (t *Toot) multiHash(in []byte) string {
return h.B58String() return h.B58String()
} }
// DiskStoragePath is a helper function on a Toot that allows it to provide
// a storage path on disk. This should probably be moved into the FSStorage
// backend instead. FIXME
// It's here because it's a pure function that just formats its own toot attributes
// into a string.
func (t *Toot) DiskStoragePath() string { func (t *Toot) DiskStoragePath() string {
// FIXME make this error if fields are missing // FIXME make this error if fields are missing
// '/YYYYMMDD/example.com/username/YYYY-MM-DD.HHMMSS.username@fromHost.multihash.json' // '/YYYYMMDD/example.com/username/YYYY-MM-DD.HHMMSS.username@fromHost.multihash.json'
@ -103,5 +113,5 @@ func (t *Toot) identityHashInput() string {
func (t *Toot) calcHash() { func (t *Toot) calcHash() {
hi := t.identityHashInput() hi := t.identityHashInput()
t.Hash = TootHash(t.multiHash([]byte(hi))) t.Hash = Hash(t.multiHash([]byte(hi)))
} }