now passes linting

This commit is contained in:
Jeffrey Paul 2019-12-19 05:20:23 -08:00
parent d2bd99801d
commit 5144a957e5
8 changed files with 91 additions and 23 deletions

View File

@ -42,7 +42,7 @@ build: ./$(FN)
touch .lintsetup
lint: fmt .lintsetup
fgt golint
fgt golint ./...
go-get:
go get -v

View File

@ -5,7 +5,11 @@ import "os"
import "github.com/sneak/feta"
// these are filled in at link-time by the build scripts
// Version is the git version of the app
var Version string
// Buildarch contains the architecture it is compiled for
var Buildarch string
func main() {

View File

@ -5,30 +5,52 @@ import "github.com/rs/zerolog/log"
import "github.com/sneak/feta/toot"
import "github.com/sneak/feta/storage"
// TootIngester is the data structure for the ingester process that is
// responsible for storing the discovered toots
type TootIngester struct {
inbound chan *toot.Toot
recentlySeen []*seenTootMemo
storageBackend *storage.TootStorageBackend
storageBackend storage.TootStorageBackend
}
type seenTootMemo struct {
lastSeen time.Time
tootHash toot.TootHash
tootHash toot.Hash
}
// NewTootIngester returns a fresh TootIngester for your use
func NewTootIngester() *TootIngester {
ti := new(TootIngester)
ti.inbound = make(chan *toot.Toot, 1)
ti.inbound = make(chan *toot.Toot, 10000)
return ti
}
// SetStorageBackend takes a type conforming to TootStorageBackend for
// persisting toots somewhere/somehow
func (ti *TootIngester) SetStorageBackend(be storage.TootStorageBackend) {
ti.storageBackend = be
}
// GetDeliveryChannel returns a channel that receives pointers to toots
// which the ingester will dedupe and store
func (ti *TootIngester) GetDeliveryChannel() chan *toot.Toot {
return ti.inbound
}
// Ingest is the main entrypoint for the TootIngester goroutine
func (ti *TootIngester) Ingest() {
log.Info().Msg("TootIngester starting")
go ti.readFromInboundChannel()
}
func (ti *TootIngester) readFromInboundChannel() {
for {
time.Sleep(1 * time.Second) // FIXME do something
nt := <-ti.inbound
go ti.storeToot(nt)
}
}
func (ti *TootIngester) storeToot(t *toot.Toot) {
// FIXME first check for dupes in recentlySeen
ti.storageBackend.StoreToot(*t)
}

View File

@ -441,7 +441,7 @@ func (i *instance) fetchRecentToots() error {
log.Info().
Str("hostname", i.hostname).
Int("tootCount", len(*tc)).
Int("tootCount", len(tc)).
Msgf("got and parsed toots")
i.registerSuccess()
i.Event("TOOTS_FETCHED")

View File

@ -4,6 +4,9 @@ import "time"
// thank fuck for https://mholt.github.io/json-to-go/ otherwise
// this would have been a giant pain in the dick
// MastodonIndexResponse is the json api shape from the mastodon instance
// indexer
type MastodonIndexResponse struct {
Instances []struct {
ID string `json:"_id"`
@ -48,6 +51,8 @@ type MastodonIndexResponse struct {
} `json:"instances"`
}
// PleromaIndexResponse is the json api shape from the pleroma instance
// indexer
type PleromaIndexResponse []struct {
Domain string `json:"domain"`
Title string `json:"title"`
@ -62,6 +67,7 @@ type PleromaIndexResponse []struct {
TextLimit int `json:"text_limit"`
}
// NodeInfoVersionTwoSchema is the json format of nodeinfo 2.0
type NodeInfoVersionTwoSchema struct {
Version string `json:"version"`
Software struct {
@ -80,6 +86,7 @@ type NodeInfoVersionTwoSchema struct {
OpenRegistrations bool `json:"openRegistrations"`
}
// NodeInfoWellKnownResponse is the json format of the nodeinfo schema
type NodeInfoWellKnownResponse struct {
Links []struct {
Rel string `json:"rel"`
@ -87,6 +94,10 @@ type NodeInfoWellKnownResponse struct {
} `json:"links"`
}
// APISerializedToot is a partial shape of the json serialized form of a
// toot from the mastodon api (also used by pleroma). We save the original
// json from the server though so this is just a minimal subset that we need
// to deserialize for purposes of this indexer app.
type APISerializedToot struct {
Account struct {
Acct string `json:"acct"`

View File

@ -1,5 +1,10 @@
package seeds
// SeedInstances is a list of instance hostnames used to seed the indexer.
// This list so far is a bunch of instances that have been
// banned/defederated by others so it's important to seed them so that we
// can always get their toots for archiving; they will likely not appear in
// common mentions/public indices.
var SeedInstances = [...]string{
"splat.soy",
"veenus.art",

View File

@ -8,22 +8,29 @@ import "sync"
import "github.com/sneak/feta/toot"
// TootStorageBackend is the interface to which storage backends must
// conform for storing toots
type TootStorageBackend interface {
TootExists(t toot.Toot) bool
StoreToot(t toot.Toot) error
StoreToots(tc []*toot.Toot) error
}
// TootFSStorage is a TootStorageBackend that writes to the local
// filesystem.
type TootFSStorage struct {
root string
}
// NewTootFSStorage returns a *TootFSStorage for writing toots to the
// local filesystem
func NewTootFSStorage(root string) *TootFSStorage {
ts := new(TootFSStorage)
ts.root = root
return ts
}
// StoreToots writes a slice of pointers to toots to disk
func (ts *TootFSStorage) StoreToots(tc []*toot.Toot) error {
var returnErrors []string
for _, item := range tc {
@ -39,6 +46,9 @@ func (ts *TootFSStorage) StoreToots(tc []*toot.Toot) error {
return errors.New(strings.Join(returnErrors, "; "))
}
// TootExists checks to see if we have already written a toot to disk or
// not. Note that the ingester de-dupes with a table in memory so that this
// will only really get used on app restarts
func (ts *TootFSStorage) TootExists(t toot.Toot) bool {
path := t.DiskStoragePath()
full := ts.root + "/" + path
@ -49,39 +59,45 @@ func (ts *TootFSStorage) TootExists(t toot.Toot) bool {
return true
}
// StoreToot writes a single toot to disk
func (ts *TootFSStorage) StoreToot(t toot.Toot) error {
path := t.DiskStoragePath()
full := ts.root + "/" + path
return ioutil.WriteFile(full, t.Original, 0644)
}
// TootMemoryStorage is a TootStorageBackend that just stores all ingested
// toots in ram forever until the computer fills up and catches fire and explodes
type TootMemoryStorage struct {
sync.Mutex
toots map[toot.TootHash]toot.Toot
toots map[toot.Hash]toot.Toot
//maxSize uint // FIXME support eviction
}
// NewTootMemoryStorage returns a *TootMemoryStorage for storing toots in
// ram forever
func NewTootMemoryStorage() *TootMemoryStorage {
ts := new(TootMemoryStorage)
ts.toots = make(map[toot.TootHash]toot.Toot)
ts.toots = make(map[toot.Hash]toot.Toot)
return ts
}
// StoreToot saves a single toot into an in-memory hashtable
func (ts *TootMemoryStorage) StoreToot(t toot.Toot) {
th := t.Hash
if ts.TootExists(th) {
if ts.TootExists(t) {
return
}
ts.Lock()
defer ts.Unlock()
ts.toots[th] = t
ts.toots[t.Hash] = t
return
}
func (ts *TootMemoryStorage) TootExists(th toot.TootHash) bool {
// TootExists checks to see if we have a toot in memory already
func (ts *TootMemoryStorage) TootExists(t toot.Toot) bool {
ts.Lock()
defer ts.Unlock()
if _, ok := ts.toots[th]; ok { //this syntax is so gross
if _, ok := ts.toots[t.Hash]; ok { //this syntax is so gross
return true
}
return false

View File

@ -5,30 +5,37 @@ import "encoding/json"
import "errors"
import "strings"
import "github.com/sneak/feta/jsonapis"
import "github.com/davecgh/go-spew/spew"
//import "github.com/davecgh/go-spew/spew"
import "github.com/rs/zerolog/log"
//import "encoding/hex"
import mh "github.com/multiformats/go-multihash"
import mhopts "github.com/multiformats/go-multihash/opts"
type TootHash string
// Hash is a type for storing a string-based base58 multihash of a
// toot's identity
type Hash string
// Toot is an object we use internally for storing a discovered toot
type Toot struct {
Original []byte
Parsed *jsonapis.APISerializedToot
Hash TootHash
Hash Hash
FromHost string
}
func NewTootCollectionFromMastodonAPIResponse(in []byte, hostname string) (*[]Toot, error) {
// NewTootCollectionFromMastodonAPIResponse takes a byte array from a masto
// api response and provides you with a nice array of pointers to parsed
// toots
func NewTootCollectionFromMastodonAPIResponse(in []byte, hostname string) ([]*Toot, error) {
var rt []json.RawMessage
err := json.Unmarshal(in, &rt)
if err != nil {
return nil, errors.New("unable to parse api response")
}
var tc []Toot
var tc []*Toot
// iterate over rawtoots from api
for _, item := range rt {
@ -47,11 +54,9 @@ func NewTootCollectionFromMastodonAPIResponse(in []byte, hostname string) (*[]To
t.Original = o
t.FromHost = hostname
t.calcHash()
tc = append(tc, *t)
tc = append(tc, t)
}
spew.Dump(tc)
panic("")
return &tc, nil
return tc, nil
}
func (t *Toot) String() string {
@ -76,6 +81,11 @@ func (t *Toot) multiHash(in []byte) string {
return h.B58String()
}
// DiskStoragePath is a helper function on a Toot that allows it to provide
// a storage path on disk. This should probably be moved into the FSStorage
// backend instead. FIXME
// It's here because it's a pure function that just formats its own toot attributes
// into a string.
func (t *Toot) DiskStoragePath() string {
// FIXME make this error if fields are missing
// '/YYYYMMDD/example.com/username/YYYY-MM-DD.HHMMSS.username@fromHost.multihash.json'
@ -103,5 +113,5 @@ func (t *Toot) identityHashInput() string {
func (t *Toot) calcHash() {
hi := t.identityHashInput()
t.Hash = TootHash(t.multiHash([]byte(hi)))
t.Hash = Hash(t.multiHash([]byte(hi)))
}