Merge pull request #1 from sneak/next
basic functionality: spiders and writes to disk. passes linting and builds.
This commit is contained in:
commit
bad43a1eba
14
Makefile
14
Makefile
@ -12,8 +12,6 @@ IMAGENAME := sneak/$(FN)
|
|||||||
UNAME_S := $(shell uname -s)
|
UNAME_S := $(shell uname -s)
|
||||||
|
|
||||||
GOLDFLAGS += -X main.Version=$(VERSION)
|
GOLDFLAGS += -X main.Version=$(VERSION)
|
||||||
GOLDFLAGS += -X main.Buildtime=$(BUILDTIME)
|
|
||||||
GOLDFLAGS += -X main.Builduser=$(BUILDUSER)@$(BUILDHOST)
|
|
||||||
GOLDFLAGS += -X main.Buildarch=$(BUILDARCH)
|
GOLDFLAGS += -X main.Buildarch=$(BUILDARCH)
|
||||||
|
|
||||||
# osx can't statically link apparently?!
|
# osx can't statically link apparently?!
|
||||||
@ -39,21 +37,21 @@ clean:
|
|||||||
build: ./$(FN)
|
build: ./$(FN)
|
||||||
|
|
||||||
.lintsetup:
|
.lintsetup:
|
||||||
go get -u golang.org/x/lint/golint
|
go get -v -u golang.org/x/lint/golint
|
||||||
go get -u github.com/GeertJohan/fgt
|
go get -u github.com/GeertJohan/fgt
|
||||||
touch .lintsetup
|
touch .lintsetup
|
||||||
|
|
||||||
lint: .lintsetup
|
lint: fmt .lintsetup
|
||||||
fgt golint
|
fgt golint ./...
|
||||||
|
|
||||||
go-get:
|
go-get:
|
||||||
go get -v
|
cd cmd/$(FN) && go get -v
|
||||||
|
|
||||||
./$(FN): *.go cmd/*/*.go go-get
|
./$(FN): */*.go cmd/*/*.go go-get
|
||||||
cd cmd/$(FN) && go build -o ../../$(FN) $(GOFLAGS) .
|
cd cmd/$(FN) && go build -o ../../$(FN) $(GOFLAGS) .
|
||||||
|
|
||||||
fmt:
|
fmt:
|
||||||
go fmt *.go
|
gofmt -s -w .
|
||||||
|
|
||||||
test: lint build-docker-image
|
test: lint build-docker-image
|
||||||
|
|
||||||
|
22
README.md
22
README.md
@ -6,7 +6,27 @@ archives the fediverse
|
|||||||
|
|
||||||
[![CircleCI](https://circleci.com/gh/sneak/feta.svg?style=svg)](https://circleci.com/gh/sneak/feta)
|
[![CircleCI](https://circleci.com/gh/sneak/feta.svg?style=svg)](https://circleci.com/gh/sneak/feta)
|
||||||
|
|
||||||
# author
|
# ethics statement
|
||||||
|
|
||||||
|
It seems that some splinter groups are not well acquainted with the norms of
|
||||||
|
publishing data on the web.
|
||||||
|
|
||||||
|
Publishing your toots/messages on a server without marking them private or
|
||||||
|
requiring authentication and thus making them available to the web is an act
|
||||||
|
of affirmative consent to allowing others to download those toots/messages
|
||||||
|
(usually by viewing them in a browser on your profile page). If you don't
|
||||||
|
want your toots downloaded by remote/unauthenticated users on the web, do
|
||||||
|
not publish them to the web.
|
||||||
|
|
||||||
|
If you publish them to the whole web (and your home instance serves them to
|
||||||
|
all comers), do not be surprised or feel violated when people download (and
|
||||||
|
optionally save) them, as your home instance permits them to.
|
||||||
|
|
||||||
|
We do not have a right to be forgotten, as we do not have a right to delete
|
||||||
|
legitimately-obtained files from the hard drives of other people.
|
||||||
|
|
||||||
|
# Author
|
||||||
|
|
||||||
Jeffrey Paul <[sneak@sneak.berlin](mailto:sneak@sneak.berlin)>
|
Jeffrey Paul <[sneak@sneak.berlin](mailto:sneak@sneak.berlin)>
|
||||||
|
|
||||||
|
[@sneak@sneak.berlin](https://s.sneak.berlin/@sneak)
|
||||||
|
@ -2,14 +2,16 @@ package main
|
|||||||
|
|
||||||
import "os"
|
import "os"
|
||||||
|
|
||||||
import "github.com/sneak/feta"
|
import "github.com/sneak/feta/process"
|
||||||
|
|
||||||
// these are filled in at link-time by the build scripts
|
// these are filled in at link-time by the build scripts
|
||||||
|
|
||||||
|
// Version is the git version of the app
|
||||||
var Version string
|
var Version string
|
||||||
var Buildtime string
|
|
||||||
var Builduser string
|
// Buildarch contains the architecture it is compiled for
|
||||||
var Buildarch string
|
var Buildarch string
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
os.Exit(feta.CLIEntry(Version, Buildtime, Buildarch, Builduser))
|
os.Exit(process.CLIEntry(Version, Buildarch))
|
||||||
}
|
}
|
||||||
|
19
config.go
Normal file
19
config.go
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
package feta
|
||||||
|
|
||||||
|
import "time"
|
||||||
|
|
||||||
|
// FIXME this should use viper or something
|
||||||
|
|
||||||
|
// Config stores the configuration for the feta process
|
||||||
|
type Config struct {
|
||||||
|
LogReportInterval time.Duration
|
||||||
|
FSStorageLocation string
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetConfig returns the config
|
||||||
|
func GetConfig() *Config {
|
||||||
|
c := new(Config)
|
||||||
|
c.LogReportInterval = time.Second * 10
|
||||||
|
c.FSStorageLocation = "/home/sneak/Library/ApplicationSupport/feta/tootarchive"
|
||||||
|
return c
|
||||||
|
}
|
@ -1,5 +1,6 @@
|
|||||||
package feta
|
package db
|
||||||
|
|
||||||
|
import "github.com/sneak/feta/process"
|
||||||
import "github.com/jinzhu/gorm"
|
import "github.com/jinzhu/gorm"
|
||||||
import _ "github.com/jinzhu/gorm/dialects/sqlite" // required for orm
|
import _ "github.com/jinzhu/gorm/dialects/sqlite" // required for orm
|
||||||
|
|
||||||
@ -9,6 +10,6 @@ type savedInstance struct {
|
|||||||
software string
|
software string
|
||||||
}
|
}
|
||||||
|
|
||||||
func (f *Process) databaseMigrations() {
|
func (f *process.Feta) databaseMigrations() {
|
||||||
f.db.AutoMigrate(&savedInstance{})
|
f.db.AutoMigrate(&savedInstance{})
|
||||||
}
|
}
|
124
feta.go
124
feta.go
@ -1,124 +0,0 @@
|
|||||||
package feta
|
|
||||||
|
|
||||||
import "os"
|
|
||||||
import "time"
|
|
||||||
|
|
||||||
import "github.com/jinzhu/gorm"
|
|
||||||
import _ "github.com/jinzhu/gorm/dialects/sqlite" // required for orm
|
|
||||||
|
|
||||||
import "github.com/rs/zerolog"
|
|
||||||
import "github.com/rs/zerolog/log"
|
|
||||||
import "github.com/mattn/go-isatty"
|
|
||||||
|
|
||||||
// InstanceHostname is a special type for holding the hostname of an
|
|
||||||
// instance (string)
|
|
||||||
type InstanceHostname string
|
|
||||||
|
|
||||||
// CLIEntry is the main entrypoint for the feta process from the cli
|
|
||||||
func CLIEntry(version string, buildtime string, buildarch string, builduser string) int {
|
|
||||||
f := new(Process)
|
|
||||||
f.version = version
|
|
||||||
f.buildtime = buildtime
|
|
||||||
f.buildarch = buildarch
|
|
||||||
f.builduser = builduser
|
|
||||||
f.setupLogging()
|
|
||||||
return f.runForever()
|
|
||||||
}
|
|
||||||
|
|
||||||
// Process is the main structure/process of this app
|
|
||||||
type Process struct {
|
|
||||||
version string
|
|
||||||
buildtime string
|
|
||||||
buildarch string
|
|
||||||
builduser string
|
|
||||||
locator *InstanceLocator
|
|
||||||
manager *InstanceManager
|
|
||||||
api *fetaAPIServer
|
|
||||||
db *gorm.DB
|
|
||||||
startup time.Time
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Process) identify() {
|
|
||||||
log.Info().
|
|
||||||
Str("version", f.version).
|
|
||||||
Str("buildtime", f.buildtime).
|
|
||||||
Str("buildarch", f.buildarch).
|
|
||||||
Str("builduser", f.builduser).
|
|
||||||
Msg("starting")
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Process) setupLogging() {
|
|
||||||
|
|
||||||
log.Logger = log.With().Caller().Logger()
|
|
||||||
|
|
||||||
tty := isatty.IsTerminal(os.Stdin.Fd()) || isatty.IsCygwinTerminal(os.Stdin.Fd())
|
|
||||||
|
|
||||||
if tty {
|
|
||||||
out := zerolog.NewConsoleWriter(
|
|
||||||
func(w *zerolog.ConsoleWriter) {
|
|
||||||
// Customize time format
|
|
||||||
w.TimeFormat = time.RFC3339
|
|
||||||
},
|
|
||||||
)
|
|
||||||
log.Logger = log.Output(out)
|
|
||||||
}
|
|
||||||
|
|
||||||
// always log in UTC
|
|
||||||
zerolog.TimestampFunc = func() time.Time {
|
|
||||||
return time.Now().UTC()
|
|
||||||
}
|
|
||||||
|
|
||||||
zerolog.SetGlobalLevel(zerolog.InfoLevel)
|
|
||||||
if os.Getenv("DEBUG") != "" {
|
|
||||||
zerolog.SetGlobalLevel(zerolog.DebugLevel)
|
|
||||||
}
|
|
||||||
|
|
||||||
f.identify()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Process) uptime() time.Duration {
|
|
||||||
return time.Since(f.startup)
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Process) setupDatabase() {
|
|
||||||
var err error
|
|
||||||
f.db, err = gorm.Open("sqlite3", "feta.sqlite")
|
|
||||||
|
|
||||||
if err != nil {
|
|
||||||
panic(err)
|
|
||||||
}
|
|
||||||
|
|
||||||
f.databaseMigrations()
|
|
||||||
}
|
|
||||||
|
|
||||||
func (f *Process) runForever() int {
|
|
||||||
f.startup = time.Now()
|
|
||||||
|
|
||||||
f.setupDatabase()
|
|
||||||
|
|
||||||
newInstanceHostnameNotifications := make(chan InstanceHostname)
|
|
||||||
|
|
||||||
f.locator = newInstanceLocator()
|
|
||||||
f.manager = newInstanceManager()
|
|
||||||
f.api = new(fetaAPIServer)
|
|
||||||
f.api.setFeta(f) // api needs to get to us to access data
|
|
||||||
|
|
||||||
f.locator.addInstanceNotificationChannel(newInstanceHostnameNotifications)
|
|
||||||
f.manager.addInstanceNotificationChannel(newInstanceHostnameNotifications)
|
|
||||||
|
|
||||||
// locator goroutine:
|
|
||||||
go f.locator.locate()
|
|
||||||
|
|
||||||
// manager goroutine:
|
|
||||||
go f.manager.manage()
|
|
||||||
|
|
||||||
go f.api.serve()
|
|
||||||
|
|
||||||
// this goroutine (main) does nothing until we handle signals
|
|
||||||
// FIXME(sneak)
|
|
||||||
for {
|
|
||||||
time.Sleep(1 * time.Second)
|
|
||||||
}
|
|
||||||
|
|
||||||
return 0
|
|
||||||
}
|
|
59
ingester/ingester.go
Normal file
59
ingester/ingester.go
Normal file
@ -0,0 +1,59 @@
|
|||||||
|
package ingester
|
||||||
|
|
||||||
|
import "time"
|
||||||
|
import "github.com/rs/zerolog/log"
|
||||||
|
import "github.com/sneak/feta/toot"
|
||||||
|
import "github.com/sneak/feta/storage"
|
||||||
|
|
||||||
|
// TootIngester is the data structure for the ingester process that is
|
||||||
|
// responsible for storing the discovered toots
|
||||||
|
type TootIngester struct {
|
||||||
|
inbound chan *toot.Toot
|
||||||
|
recentlySeen []*seenTootMemo
|
||||||
|
storageBackend storage.TootStorageBackend
|
||||||
|
}
|
||||||
|
|
||||||
|
type seenTootMemo struct {
|
||||||
|
lastSeen time.Time
|
||||||
|
tootHash toot.Hash
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewTootIngester returns a fresh TootIngester for your use
|
||||||
|
func NewTootIngester() *TootIngester {
|
||||||
|
ti := new(TootIngester)
|
||||||
|
ti.inbound = make(chan *toot.Toot, 10000)
|
||||||
|
return ti
|
||||||
|
}
|
||||||
|
|
||||||
|
// SetStorageBackend takes a type conforming to TootStorageBackend for
|
||||||
|
// persisting toots somewhere/somehow
|
||||||
|
func (ti *TootIngester) SetStorageBackend(be storage.TootStorageBackend) {
|
||||||
|
ti.storageBackend = be
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetDeliveryChannel returns a channel that receives pointers to toots
|
||||||
|
// which the ingester will dedupe and store
|
||||||
|
func (ti *TootIngester) GetDeliveryChannel() chan *toot.Toot {
|
||||||
|
return ti.inbound
|
||||||
|
}
|
||||||
|
|
||||||
|
// Ingest is the main entrypoint for the TootIngester goroutine
|
||||||
|
func (ti *TootIngester) Ingest() {
|
||||||
|
log.Info().Msg("TootIngester starting")
|
||||||
|
go ti.readFromInboundChannel()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ti *TootIngester) readFromInboundChannel() {
|
||||||
|
for {
|
||||||
|
nt := <-ti.inbound
|
||||||
|
go ti.storeToot(nt)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (ti *TootIngester) storeToot(t *toot.Toot) {
|
||||||
|
// FIXME first check for dupes in recentlySeen
|
||||||
|
if ti.storageBackend == nil {
|
||||||
|
panic("no storage backend")
|
||||||
|
}
|
||||||
|
ti.storageBackend.StoreToot(*t)
|
||||||
|
}
|
@ -1,4 +1,4 @@
|
|||||||
package feta
|
package instance
|
||||||
|
|
||||||
import "encoding/json"
|
import "encoding/json"
|
||||||
import "fmt"
|
import "fmt"
|
||||||
@ -12,46 +12,53 @@ import "errors"
|
|||||||
//import "github.com/gin-gonic/gin"
|
//import "github.com/gin-gonic/gin"
|
||||||
import "github.com/looplab/fsm"
|
import "github.com/looplab/fsm"
|
||||||
import "github.com/rs/zerolog/log"
|
import "github.com/rs/zerolog/log"
|
||||||
|
import "github.com/sneak/feta/storage"
|
||||||
|
import "github.com/sneak/feta/toot"
|
||||||
|
import "github.com/sneak/feta/jsonapis"
|
||||||
|
|
||||||
const nodeInfoSchemaVersionTwoName = "http://nodeinfo.diaspora.software/ns/schema/2.0"
|
const nodeInfoSchemaVersionTwoName = "http://nodeinfo.diaspora.software/ns/schema/2.0"
|
||||||
|
|
||||||
const instanceNodeinfoTimeout = time.Second * 50
|
const instanceNodeinfoTimeout = time.Second * 50
|
||||||
|
const instanceHTTPTimeout = time.Second * 120
|
||||||
const instanceHTTPTimeout = time.Second * 50
|
|
||||||
|
|
||||||
const instanceSpiderInterval = time.Second * 120
|
const instanceSpiderInterval = time.Second * 120
|
||||||
|
|
||||||
const instanceErrorInterval = time.Second * 60 * 30
|
const instanceErrorInterval = time.Second * 60 * 30
|
||||||
|
|
||||||
type instanceImplementation int
|
type instanceImplementation int
|
||||||
|
|
||||||
|
// Hostname is a special type for holding the hostname of an
|
||||||
|
// instance (string)
|
||||||
|
type Hostname string
|
||||||
|
|
||||||
const (
|
const (
|
||||||
implUnknown instanceImplementation = iota
|
implUnknown instanceImplementation = iota
|
||||||
implMastodon
|
implMastodon
|
||||||
implPleroma
|
implPleroma
|
||||||
)
|
)
|
||||||
|
|
||||||
type instance struct {
|
// Instance stores all the information we know about an instance
|
||||||
|
type Instance struct {
|
||||||
structLock sync.Mutex
|
structLock sync.Mutex
|
||||||
errorCount uint
|
tootDestination chan *toot.Toot
|
||||||
successCount uint
|
ErrorCount uint
|
||||||
|
SuccessCount uint
|
||||||
highestID int
|
highestID int
|
||||||
hostname string
|
Hostname string
|
||||||
identified bool
|
Identified bool
|
||||||
fetching bool
|
fetching bool
|
||||||
implementation instanceImplementation
|
implementation instanceImplementation
|
||||||
backend *instanceBackend
|
storageBackend *storage.TootStorageBackend
|
||||||
nextFetch time.Time
|
NextFetch time.Time
|
||||||
nodeInfoURL string
|
nodeInfoURL string
|
||||||
serverVersionString string
|
ServerVersionString string
|
||||||
serverImplementationString string
|
ServerImplementationString string
|
||||||
fetchingLock sync.Mutex
|
fetchingLock sync.Mutex
|
||||||
fsm *fsm.FSM
|
fsm *fsm.FSM
|
||||||
fsmLock sync.Mutex
|
fsmLock sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func newInstance(options ...func(i *instance)) *instance {
|
// New returns a new instance, argument is a function that operates on the
|
||||||
i := new(instance)
|
// new instance
|
||||||
|
func New(options ...func(i *Instance)) *Instance {
|
||||||
|
i := new(Instance)
|
||||||
i.setNextFetchAfter(1 * time.Second)
|
i.setNextFetchAfter(1 * time.Second)
|
||||||
|
|
||||||
i.fsm = fsm.NewFSM(
|
i.fsm = fsm.NewFSM(
|
||||||
@ -62,9 +69,11 @@ func newInstance(options ...func(i *instance)) *instance {
|
|||||||
{Name: "BEGIN_NODEINFO_FETCH", Src: []string{"PRE_NODEINFO_FETCH"}, Dst: "FETCHING_NODEINFO"},
|
{Name: "BEGIN_NODEINFO_FETCH", Src: []string{"PRE_NODEINFO_FETCH"}, Dst: "FETCHING_NODEINFO"},
|
||||||
{Name: "GOT_NODEINFO", Src: []string{"FETCHING_NODEINFO"}, Dst: "READY_FOR_TOOTFETCH"},
|
{Name: "GOT_NODEINFO", Src: []string{"FETCHING_NODEINFO"}, Dst: "READY_FOR_TOOTFETCH"},
|
||||||
{Name: "FETCH_TIME_REACHED", Src: []string{"READY_FOR_TOOTFETCH"}, Dst: "READY_AND_DUE_FETCH"},
|
{Name: "FETCH_TIME_REACHED", Src: []string{"READY_FOR_TOOTFETCH"}, Dst: "READY_AND_DUE_FETCH"},
|
||||||
|
{Name: "BEGIN_TOOT_FETCH", Src: []string{"READY_AND_DUE_FETCH"}, Dst: "FETCHING"},
|
||||||
{Name: "WEIRD_NODE_RESPONSE", Src: []string{"FETCHING_NODEINFO_URL", "PRE_NODEINFO_FETCH", "FETCHING_NODEINFO"}, Dst: "WEIRD_NODE"},
|
{Name: "WEIRD_NODE_RESPONSE", Src: []string{"FETCHING_NODEINFO_URL", "PRE_NODEINFO_FETCH", "FETCHING_NODEINFO"}, Dst: "WEIRD_NODE"},
|
||||||
{Name: "EARLY_FETCH_ERROR", Src: []string{"FETCHING_NODEINFO_URL", "PRE_NODEINFO_FETCH", "FETCHING_NODEINFO"}, Dst: "EARLY_ERROR"},
|
{Name: "EARLY_FETCH_ERROR", Src: []string{"FETCHING_NODEINFO_URL", "PRE_NODEINFO_FETCH", "FETCHING_NODEINFO"}, Dst: "EARLY_ERROR"},
|
||||||
{Name: "TOOT_FETCH_ERROR", Src: []string{"READY_FOR_TOOTFETCH"}, Dst: "TOOT_FETCH_ERROR"},
|
{Name: "TOOT_FETCH_ERROR", Src: []string{"FETCHING"}, Dst: "TOOT_FETCH_ERROR"},
|
||||||
|
{Name: "TOOTS_FETCHED", Src: []string{"FETCHING"}, Dst: "READY_FOR_TOOTFETCH"},
|
||||||
},
|
},
|
||||||
fsm.Callbacks{
|
fsm.Callbacks{
|
||||||
"enter_state": func(e *fsm.Event) { i.fsmEnterState(e) },
|
"enter_state": func(e *fsm.Event) { i.fsmEnterState(e) },
|
||||||
@ -77,65 +86,82 @@ func newInstance(options ...func(i *instance)) *instance {
|
|||||||
return i
|
return i
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *instance) Status() string {
|
// Status returns the instance's state in the FSM
|
||||||
|
func (i *Instance) Status() string {
|
||||||
i.fsmLock.Lock()
|
i.fsmLock.Lock()
|
||||||
defer i.fsmLock.Unlock()
|
defer i.fsmLock.Unlock()
|
||||||
return i.fsm.Current()
|
return i.fsm.Current()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *instance) Event(eventname string) {
|
// SetTootDestination takes a channel from the manager that all toots
|
||||||
|
// fetched from this instance should be pushed into. The instance is not
|
||||||
|
// responsible for deduplication, it should shove all toots on every fetch
|
||||||
|
// into the channel.
|
||||||
|
func (i *Instance) SetTootDestination(d chan *toot.Toot) {
|
||||||
|
i.tootDestination = d
|
||||||
|
}
|
||||||
|
|
||||||
|
// Event is the method that alters the FSM
|
||||||
|
func (i *Instance) Event(eventname string) {
|
||||||
i.fsmLock.Lock()
|
i.fsmLock.Lock()
|
||||||
defer i.fsmLock.Unlock()
|
defer i.fsmLock.Unlock()
|
||||||
i.fsm.Event(eventname)
|
i.fsm.Event(eventname)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *instance) fsmEnterState(e *fsm.Event) {
|
func (i *Instance) fsmEnterState(e *fsm.Event) {
|
||||||
log.Debug().
|
log.Debug().
|
||||||
Str("hostname", i.hostname).
|
Str("hostname", i.Hostname).
|
||||||
Str("state", e.Dst).
|
Str("state", e.Dst).
|
||||||
Msg("instance changed state")
|
Msg("instance changed state")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *instance) Lock() {
|
// Lock locks the instance's mutex for reading/writing from the structure
|
||||||
|
func (i *Instance) Lock() {
|
||||||
i.structLock.Lock()
|
i.structLock.Lock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *instance) Unlock() {
|
// Unlock unlocks the instance's mutex for reading/writing from the structure
|
||||||
|
func (i *Instance) Unlock() {
|
||||||
i.structLock.Unlock()
|
i.structLock.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *instance) bumpFetch() {
|
func (i *Instance) bumpFetch() {
|
||||||
i.Lock()
|
i.Lock()
|
||||||
defer i.Unlock()
|
defer i.Unlock()
|
||||||
i.nextFetch = time.Now().Add(120 * time.Second)
|
i.NextFetch = time.Now().Add(120 * time.Second)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *instance) setNextFetchAfter(d time.Duration) {
|
func (i *Instance) setNextFetchAfter(d time.Duration) {
|
||||||
i.Lock()
|
i.Lock()
|
||||||
defer i.Unlock()
|
defer i.Unlock()
|
||||||
i.nextFetch = time.Now().Add(d)
|
i.NextFetch = time.Now().Add(d)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *instance) Fetch() {
|
// Fetch prepares an instance for fetching. Bad name, fix it.
|
||||||
|
// FIXME(sneak)
|
||||||
|
func (i *Instance) Fetch() {
|
||||||
i.fetchingLock.Lock()
|
i.fetchingLock.Lock()
|
||||||
defer i.fetchingLock.Unlock()
|
defer i.fetchingLock.Unlock()
|
||||||
|
|
||||||
i.setNextFetchAfter(instanceErrorInterval)
|
i.setNextFetchAfter(instanceErrorInterval)
|
||||||
|
|
||||||
err := i.detectNodeTypeIfNecessary()
|
err := i.DetectNodeTypeIfNecessary()
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debug().
|
log.Debug().
|
||||||
Str("hostname", i.hostname).
|
Str("hostname", i.Hostname).
|
||||||
Err(err).
|
Err(err).
|
||||||
Msg("unable to fetch instance metadata")
|
Msg("unable to fetch instance metadata")
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
i.setNextFetchAfter(instanceSpiderInterval)
|
i.setNextFetchAfter(instanceSpiderInterval)
|
||||||
log.Info().Msgf("i (%s) IS NOW READY FOR FETCH", i.hostname)
|
log.Info().
|
||||||
|
Str("hostname", i.Hostname).
|
||||||
|
Msg("instance now ready for fetch")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *instance) dueForFetch() bool {
|
// FIXME rename this function
|
||||||
|
func (i *Instance) dueForFetch() bool {
|
||||||
// this just checks FSM state, the ticker must update it and do time
|
// this just checks FSM state, the ticker must update it and do time
|
||||||
// calcs
|
// calcs
|
||||||
if i.Status() == "READY_AND_DUE_FETCH" {
|
if i.Status() == "READY_AND_DUE_FETCH" {
|
||||||
@ -144,21 +170,26 @@ func (i *instance) dueForFetch() bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *instance) isNowPastFetchTime() bool {
|
func (i *Instance) isNowPastFetchTime() bool {
|
||||||
return time.Now().After(i.nextFetch)
|
return time.Now().After(i.NextFetch)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *instance) Tick() {
|
// Tick is responsible for pushing idle instance records between states.
|
||||||
|
// The instances will transition between states when doing stuff (e.g.
|
||||||
|
// investigating, fetching, et c) as well.
|
||||||
|
func (i *Instance) Tick() {
|
||||||
if i.Status() == "READY_FOR_TOOTFETCH" {
|
if i.Status() == "READY_FOR_TOOTFETCH" {
|
||||||
if i.isNowPastFetchTime() {
|
if i.isNowPastFetchTime() {
|
||||||
i.Event("FETCH_TIME_REACHED")
|
i.Event("FETCH_TIME_REACHED")
|
||||||
}
|
}
|
||||||
} else if i.Status() == "STATUS_UNKNOWN" {
|
} else if i.Status() == "STATUS_UNKNOWN" {
|
||||||
i.Fetch()
|
i.Fetch()
|
||||||
|
} else if i.Status() == "READY_AND_DUE_FETCH" {
|
||||||
|
i.fetchRecentToots()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *instance) nodeIdentified() bool {
|
func (i *Instance) nodeIdentified() bool {
|
||||||
i.Lock()
|
i.Lock()
|
||||||
defer i.Unlock()
|
defer i.Unlock()
|
||||||
if i.implementation > implUnknown {
|
if i.implementation > implUnknown {
|
||||||
@ -167,47 +198,50 @@ func (i *instance) nodeIdentified() bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *instance) detectNodeTypeIfNecessary() error {
|
// DetectNodeTypeIfNecessary does some network requests if the node is as
|
||||||
|
// yet unidenfitied. No-op otherwise.
|
||||||
|
func (i *Instance) DetectNodeTypeIfNecessary() error {
|
||||||
if !i.nodeIdentified() {
|
if !i.nodeIdentified() {
|
||||||
return i.fetchNodeInfo()
|
return i.fetchNodeInfo()
|
||||||
}
|
}
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *instance) registerError() {
|
func (i *Instance) registerError() {
|
||||||
i.Lock()
|
i.Lock()
|
||||||
defer i.Unlock()
|
defer i.Unlock()
|
||||||
i.errorCount++
|
i.ErrorCount++
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *instance) registerSuccess() {
|
func (i *Instance) registerSuccess() {
|
||||||
i.Lock()
|
i.Lock()
|
||||||
defer i.Unlock()
|
defer i.Unlock()
|
||||||
i.successCount++
|
i.SuccessCount++
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *instance) Up() bool {
|
// Up returns true if the success count is >0
|
||||||
|
func (i *Instance) Up() bool {
|
||||||
i.Lock()
|
i.Lock()
|
||||||
defer i.Unlock()
|
defer i.Unlock()
|
||||||
return i.successCount > 0
|
return i.SuccessCount > 0
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *instance) fetchNodeInfoURL() error {
|
func (i *Instance) fetchNodeInfoURL() error {
|
||||||
url := fmt.Sprintf("https://%s/.well-known/nodeinfo", i.hostname)
|
url := fmt.Sprintf("https://%s/.well-known/nodeinfo", i.Hostname)
|
||||||
var c = &http.Client{
|
var c = &http.Client{
|
||||||
Timeout: instanceNodeinfoTimeout,
|
Timeout: instanceNodeinfoTimeout,
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Debug().
|
log.Debug().
|
||||||
Str("url", url).
|
Str("url", url).
|
||||||
Str("hostname", i.hostname).
|
Str("hostname", i.Hostname).
|
||||||
Msg("fetching nodeinfo reference URL")
|
Msg("fetching nodeinfo reference URL")
|
||||||
|
|
||||||
i.Event("BEGIN_NODEINFO_URL_FETCH")
|
i.Event("BEGIN_NODEINFO_URL_FETCH")
|
||||||
resp, err := c.Get(url)
|
resp, err := c.Get(url)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debug().
|
log.Debug().
|
||||||
Str("hostname", i.hostname).
|
Str("hostname", i.Hostname).
|
||||||
Err(err).
|
Err(err).
|
||||||
Msg("unable to fetch nodeinfo, node is down?")
|
Msg("unable to fetch nodeinfo, node is down?")
|
||||||
i.registerError()
|
i.registerError()
|
||||||
@ -220,7 +254,7 @@ func (i *instance) fetchNodeInfoURL() error {
|
|||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debug().
|
log.Debug().
|
||||||
Str("hostname", i.hostname).
|
Str("hostname", i.Hostname).
|
||||||
Err(err).
|
Err(err).
|
||||||
Msg("unable to read nodeinfo")
|
Msg("unable to read nodeinfo")
|
||||||
i.registerError()
|
i.registerError()
|
||||||
@ -228,11 +262,11 @@ func (i *instance) fetchNodeInfoURL() error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
nir := new(nodeInfoWellKnownResponse)
|
nir := new(jsonapis.NodeInfoWellKnownResponse)
|
||||||
err = json.Unmarshal(body, &nir)
|
err = json.Unmarshal(body, &nir)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debug().
|
log.Debug().
|
||||||
Str("hostname", i.hostname).
|
Str("hostname", i.Hostname).
|
||||||
Err(err).
|
Err(err).
|
||||||
Msg("unable to parse nodeinfo, node is weird")
|
Msg("unable to parse nodeinfo, node is weird")
|
||||||
i.registerError()
|
i.registerError()
|
||||||
@ -243,7 +277,7 @@ func (i *instance) fetchNodeInfoURL() error {
|
|||||||
for _, item := range nir.Links {
|
for _, item := range nir.Links {
|
||||||
if item.Rel == nodeInfoSchemaVersionTwoName {
|
if item.Rel == nodeInfoSchemaVersionTwoName {
|
||||||
log.Debug().
|
log.Debug().
|
||||||
Str("hostname", i.hostname).
|
Str("hostname", i.Hostname).
|
||||||
Str("nodeinfourl", item.Href).
|
Str("nodeinfourl", item.Href).
|
||||||
Msg("success fetching url for nodeinfo")
|
Msg("success fetching url for nodeinfo")
|
||||||
|
|
||||||
@ -255,21 +289,21 @@ func (i *instance) fetchNodeInfoURL() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
log.Debug().
|
log.Debug().
|
||||||
Str("hostname", i.hostname).
|
Str("hostname", i.Hostname).
|
||||||
Str("item-rel", item.Rel).
|
Str("item-rel", item.Rel).
|
||||||
Str("item-href", item.Href).
|
Str("item-href", item.Href).
|
||||||
Msg("nodeinfo entry")
|
Msg("nodeinfo entry")
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Error().
|
log.Error().
|
||||||
Str("hostname", i.hostname).
|
Str("hostname", i.Hostname).
|
||||||
Msg("incomplete nodeinfo")
|
Msg("incomplete nodeinfo")
|
||||||
i.registerError()
|
i.registerError()
|
||||||
i.Event("WEIRD_NODE_RESPONSE")
|
i.Event("WEIRD_NODE_RESPONSE")
|
||||||
return errors.New("incomplete nodeinfo")
|
return errors.New("incomplete nodeinfo")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (i *instance) fetchNodeInfo() error {
|
func (i *Instance) fetchNodeInfo() error {
|
||||||
err := i.fetchNodeInfoURL()
|
err := i.fetchNodeInfoURL()
|
||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@ -291,7 +325,7 @@ func (i *instance) fetchNodeInfo() error {
|
|||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Debug().
|
log.Debug().
|
||||||
Str("hostname", i.hostname).
|
Str("hostname", i.Hostname).
|
||||||
Err(err).
|
Err(err).
|
||||||
Msgf("unable to fetch nodeinfo data")
|
Msgf("unable to fetch nodeinfo data")
|
||||||
i.registerError()
|
i.registerError()
|
||||||
@ -304,7 +338,7 @@ func (i *instance) fetchNodeInfo() error {
|
|||||||
|
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error().
|
log.Error().
|
||||||
Str("hostname", i.hostname).
|
Str("hostname", i.Hostname).
|
||||||
Err(err).
|
Err(err).
|
||||||
Msgf("unable to read nodeinfo data")
|
Msgf("unable to read nodeinfo data")
|
||||||
i.registerError()
|
i.registerError()
|
||||||
@ -312,11 +346,11 @@ func (i *instance) fetchNodeInfo() error {
|
|||||||
return err
|
return err
|
||||||
}
|
}
|
||||||
|
|
||||||
ni := new(nodeInfoVersionTwoSchema)
|
ni := new(jsonapis.NodeInfoVersionTwoSchema)
|
||||||
err = json.Unmarshal(body, &ni)
|
err = json.Unmarshal(body, &ni)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error().
|
log.Error().
|
||||||
Str("hostname", i.hostname).
|
Str("hostname", i.Hostname).
|
||||||
Err(err).
|
Err(err).
|
||||||
Msgf("unable to parse nodeinfo")
|
Msgf("unable to parse nodeinfo")
|
||||||
i.registerError()
|
i.registerError()
|
||||||
@ -327,21 +361,21 @@ func (i *instance) fetchNodeInfo() error {
|
|||||||
log.Debug().
|
log.Debug().
|
||||||
Str("serverVersion", ni.Software.Version).
|
Str("serverVersion", ni.Software.Version).
|
||||||
Str("software", ni.Software.Name).
|
Str("software", ni.Software.Name).
|
||||||
Str("hostname", i.hostname).
|
Str("hostname", i.Hostname).
|
||||||
Str("nodeInfoURL", i.nodeInfoURL).
|
Str("nodeInfoURL", i.nodeInfoURL).
|
||||||
Msg("received nodeinfo from instance")
|
Msg("received nodeinfo from instance")
|
||||||
|
|
||||||
i.Lock()
|
i.Lock()
|
||||||
i.serverVersionString = ni.Software.Version
|
i.ServerVersionString = ni.Software.Version
|
||||||
i.serverImplementationString = ni.Software.Name
|
i.ServerImplementationString = ni.Software.Name
|
||||||
ni.Software.Name = strings.ToLower(ni.Software.Name)
|
ni.Software.Name = strings.ToLower(ni.Software.Name)
|
||||||
|
|
||||||
if ni.Software.Name == "pleroma" {
|
if ni.Software.Name == "pleroma" {
|
||||||
log.Debug().
|
log.Debug().
|
||||||
Str("hostname", i.hostname).
|
Str("hostname", i.Hostname).
|
||||||
Str("software", ni.Software.Name).
|
Str("software", ni.Software.Name).
|
||||||
Msg("detected server software")
|
Msg("detected server software")
|
||||||
i.identified = true
|
i.Identified = true
|
||||||
i.implementation = implPleroma
|
i.implementation = implPleroma
|
||||||
i.Unlock()
|
i.Unlock()
|
||||||
i.registerSuccess()
|
i.registerSuccess()
|
||||||
@ -349,10 +383,10 @@ func (i *instance) fetchNodeInfo() error {
|
|||||||
return nil
|
return nil
|
||||||
} else if ni.Software.Name == "mastodon" {
|
} else if ni.Software.Name == "mastodon" {
|
||||||
log.Debug().
|
log.Debug().
|
||||||
Str("hostname", i.hostname).
|
Str("hostname", i.Hostname).
|
||||||
Str("software", ni.Software.Name).
|
Str("software", ni.Software.Name).
|
||||||
Msg("detected server software")
|
Msg("detected server software")
|
||||||
i.identified = true
|
i.Identified = true
|
||||||
i.implementation = implMastodon
|
i.implementation = implMastodon
|
||||||
i.Unlock()
|
i.Unlock()
|
||||||
i.registerSuccess()
|
i.registerSuccess()
|
||||||
@ -360,7 +394,7 @@ func (i *instance) fetchNodeInfo() error {
|
|||||||
return nil
|
return nil
|
||||||
} else {
|
} else {
|
||||||
log.Error().
|
log.Error().
|
||||||
Str("hostname", i.hostname).
|
Str("hostname", i.Hostname).
|
||||||
Str("software", ni.Software.Name).
|
Str("software", ni.Software.Name).
|
||||||
Msg("FIXME unknown server implementation")
|
Msg("FIXME unknown server implementation")
|
||||||
i.Unlock()
|
i.Unlock()
|
||||||
@ -370,34 +404,78 @@ func (i *instance) fetchNodeInfo() error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
func (i *Instance) fetchRecentToots() error {
|
||||||
func (i *Instance) fetchRecentToots() ([]byte, error) {
|
// this would have been about a billion times shorter in python
|
||||||
i.Lock()
|
|
||||||
impl := i.impl
|
|
||||||
i.Unlock()
|
|
||||||
|
|
||||||
if impl == Mastodon {
|
// it turns out pleroma supports the mastodon api so we'll just use that
|
||||||
return i.fetchRecentTootsJsonFromMastodon()
|
// for everything for now
|
||||||
} else if impl == Pleroma {
|
url := fmt.Sprintf("https://%s/api/v1/timelines/public?limit=40&local=true",
|
||||||
return i.fetchRecentTootsJsonFromPleroma()
|
i.Hostname)
|
||||||
} else {
|
|
||||||
panic("unimplemented")
|
var c = &http.Client{
|
||||||
|
Timeout: instanceHTTPTimeout,
|
||||||
|
}
|
||||||
|
|
||||||
|
i.Event("BEGIN_TOOT_FETCH")
|
||||||
|
// we set the interval now to the error interval regardless here as a
|
||||||
|
// safety against bugs to avoid fetching too frequently by logic
|
||||||
|
// bug. if the fetch is successful, we will conditionally re-update the
|
||||||
|
// next fetch to now+successInterval.
|
||||||
|
i.setNextFetchAfter(instanceErrorInterval)
|
||||||
|
resp, err := c.Get(url)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Debug().
|
||||||
|
Str("hostname", i.Hostname).
|
||||||
|
Err(err).
|
||||||
|
Msgf("unable to fetch recent toots")
|
||||||
|
i.registerError()
|
||||||
|
i.Event("TOOT_FETCH_ERROR")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
defer resp.Body.Close()
|
||||||
|
body, err := ioutil.ReadAll(resp.Body)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Debug().
|
||||||
|
Str("hostname", i.Hostname).
|
||||||
|
Err(err).
|
||||||
|
Msgf("unable to read recent toots from response")
|
||||||
|
i.registerError()
|
||||||
|
i.Event("TOOT_FETCH_ERROR")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
tc, err := toot.NewTootCollectionFromMastodonAPIResponse(body, i.Hostname)
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
log.Error().
|
||||||
|
Str("hostname", i.Hostname).
|
||||||
|
Err(err).
|
||||||
|
Msgf("unable to parse recent toot list")
|
||||||
|
i.registerError()
|
||||||
|
i.Event("TOOT_FETCH_ERROR")
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info().
|
||||||
|
Str("hostname", i.Hostname).
|
||||||
|
Int("tootCount", len(tc)).
|
||||||
|
Msgf("got and parsed toots")
|
||||||
|
i.registerSuccess()
|
||||||
|
i.Event("TOOTS_FETCHED")
|
||||||
|
i.setNextFetchAfter(instanceSpiderInterval)
|
||||||
|
|
||||||
|
// this should go fast as either the channel is buffered bigly or the
|
||||||
|
// ingester receives fast and does its own buffering, but run it in its
|
||||||
|
// own goroutine anyway because why not
|
||||||
|
go i.sendTootsToIngester(tc)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (i *Instance) sendTootsToIngester(tc []*toot.Toot) {
|
||||||
|
for _, item := range tc {
|
||||||
|
i.tootDestination <- item
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
|
||||||
func (i *PleromaBackend) fetchRecentToots() ([]byte, error) {
|
|
||||||
//url :=
|
|
||||||
//fmt.Sprintf("https://%s/api/statuses/public_and_external_timeline.json?count=100",
|
|
||||||
//i.hostname)
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
func (i *MastodonBackend) fetchRecentTootsJsonFromMastodon() ([]byte, error) {
|
|
||||||
//url :=
|
|
||||||
//fmt.Sprintf("https://%s/api/v1/timelines/public?limit=40&local=true",
|
|
||||||
//i.hostname)
|
|
||||||
return nil, nil
|
|
||||||
}
|
|
||||||
*/
|
|
10
jsonapis/helpers.go
Normal file
10
jsonapis/helpers.go
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
package jsonapis
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
import "encoding/json"
|
||||||
|
|
||||||
|
func (atl *apTootList) String() string {
|
||||||
|
return fmt.Sprintf("%+v", atl)
|
||||||
|
}
|
||||||
|
|
||||||
|
type apTootList []json.RawMessage
|
@ -1,10 +1,13 @@
|
|||||||
package feta
|
package jsonapis
|
||||||
|
|
||||||
import "time"
|
import "time"
|
||||||
|
|
||||||
// thank fuck for https://mholt.github.io/json-to-go/ otherwise
|
// thank fuck for https://mholt.github.io/json-to-go/ otherwise
|
||||||
// this would have been a giant pain in the dick
|
// this would have been a giant pain in the dick
|
||||||
type mastodonIndexResponse struct {
|
|
||||||
|
// MastodonIndexResponse is the json api shape from the mastodon instance
|
||||||
|
// indexer
|
||||||
|
type MastodonIndexResponse struct {
|
||||||
Instances []struct {
|
Instances []struct {
|
||||||
ID string `json:"_id"`
|
ID string `json:"_id"`
|
||||||
AddedAt time.Time `json:"addedAt"`
|
AddedAt time.Time `json:"addedAt"`
|
||||||
@ -48,7 +51,9 @@ type mastodonIndexResponse struct {
|
|||||||
} `json:"instances"`
|
} `json:"instances"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type pleromaIndexResponse []struct {
|
// PleromaIndexResponse is the json api shape from the pleroma instance
|
||||||
|
// indexer
|
||||||
|
type PleromaIndexResponse []struct {
|
||||||
Domain string `json:"domain"`
|
Domain string `json:"domain"`
|
||||||
Title string `json:"title"`
|
Title string `json:"title"`
|
||||||
Thumbnail string `json:"thumbnail"`
|
Thumbnail string `json:"thumbnail"`
|
||||||
@ -62,7 +67,8 @@ type pleromaIndexResponse []struct {
|
|||||||
TextLimit int `json:"text_limit"`
|
TextLimit int `json:"text_limit"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type nodeInfoVersionTwoSchema struct {
|
// NodeInfoVersionTwoSchema is the json format of nodeinfo 2.0
|
||||||
|
type NodeInfoVersionTwoSchema struct {
|
||||||
Version string `json:"version"`
|
Version string `json:"version"`
|
||||||
Software struct {
|
Software struct {
|
||||||
Name string `json:"name"`
|
Name string `json:"name"`
|
||||||
@ -80,9 +86,34 @@ type nodeInfoVersionTwoSchema struct {
|
|||||||
OpenRegistrations bool `json:"openRegistrations"`
|
OpenRegistrations bool `json:"openRegistrations"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type nodeInfoWellKnownResponse struct {
|
// NodeInfoWellKnownResponse is the json format of the nodeinfo schema
|
||||||
|
type NodeInfoWellKnownResponse struct {
|
||||||
Links []struct {
|
Links []struct {
|
||||||
Rel string `json:"rel"`
|
Rel string `json:"rel"`
|
||||||
Href string `json:"href"`
|
Href string `json:"href"`
|
||||||
} `json:"links"`
|
} `json:"links"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// APISerializedToot is a partial shape of the json serialized form of a
|
||||||
|
// toot from the mastodon api (also used by pleroma). We save the original
|
||||||
|
// json from the server though so this is just a minimal subset that we need
|
||||||
|
// to deserialize for purposes of this indexer app.
|
||||||
|
type APISerializedToot struct {
|
||||||
|
Account struct {
|
||||||
|
Acct string `json:"acct"`
|
||||||
|
ID string `json:"id"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
Username string `json:"username"`
|
||||||
|
} `json:"account"`
|
||||||
|
Content string `json:"content"`
|
||||||
|
CreatedAt time.Time `json:"created_at"`
|
||||||
|
ID string `json:"id"`
|
||||||
|
Mentions []struct {
|
||||||
|
Acct string `json:"acct"`
|
||||||
|
ID string `json:"id"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
Username string `json:"username"`
|
||||||
|
} `json:"mentions"`
|
||||||
|
URI string `json:"uri"`
|
||||||
|
URL string `json:"url"`
|
||||||
|
}
|
@ -1,4 +1,4 @@
|
|||||||
package feta
|
package locator
|
||||||
|
|
||||||
import "encoding/json"
|
import "encoding/json"
|
||||||
import "io/ioutil"
|
import "io/ioutil"
|
||||||
@ -8,10 +8,13 @@ import "sync"
|
|||||||
|
|
||||||
import "github.com/rs/zerolog/log"
|
import "github.com/rs/zerolog/log"
|
||||||
import "golang.org/x/sync/semaphore"
|
import "golang.org/x/sync/semaphore"
|
||||||
|
import "github.com/sneak/feta/jsonapis"
|
||||||
|
import "github.com/sneak/feta/instance"
|
||||||
|
import "github.com/sneak/feta"
|
||||||
|
|
||||||
// IndexAPITimeout is the timeout for fetching json instance lists
|
// IndexAPITimeout is the timeout for fetching json instance lists
|
||||||
// from the listing servers
|
// from the listing servers
|
||||||
const IndexAPITimeout = time.Second * 60
|
const IndexAPITimeout = time.Second * 60 * 3
|
||||||
|
|
||||||
// UserAgent is the user-agent string we provide to servers
|
// UserAgent is the user-agent string we provide to servers
|
||||||
var UserAgent = "feta indexer bot, sneak@sneak.berlin for feedback"
|
var UserAgent = "feta indexer bot, sneak@sneak.berlin for feedback"
|
||||||
@ -24,10 +27,6 @@ var IndexCheckInterval = time.Second * 60 * 60
|
|||||||
// (default: 10m)
|
// (default: 10m)
|
||||||
var IndexErrorInterval = time.Second * 60 * 10
|
var IndexErrorInterval = time.Second * 60 * 10
|
||||||
|
|
||||||
// LogReportInterval defines how long between logging internal
|
|
||||||
// stats/reporting for user supervision
|
|
||||||
var LogReportInterval = time.Second * 10
|
|
||||||
|
|
||||||
const mastodonIndexURL = "https://instances.social/list.json?q%5Busers%5D=&q%5Bsearch%5D=&strict=false"
|
const mastodonIndexURL = "https://instances.social/list.json?q%5Busers%5D=&q%5Bsearch%5D=&strict=false"
|
||||||
const pleromaIndexURL = "https://distsn.org/cgi-bin/distsn-pleroma-instances-api.cgi"
|
const pleromaIndexURL = "https://distsn.org/cgi-bin/distsn-pleroma-instances-api.cgi"
|
||||||
|
|
||||||
@ -36,11 +35,12 @@ const pleromaIndexURL = "https://distsn.org/cgi-bin/distsn-pleroma-instances-api
|
|||||||
type InstanceLocator struct {
|
type InstanceLocator struct {
|
||||||
pleromaIndexNextRefresh *time.Time
|
pleromaIndexNextRefresh *time.Time
|
||||||
mastodonIndexNextRefresh *time.Time
|
mastodonIndexNextRefresh *time.Time
|
||||||
reportInstanceVia chan InstanceHostname
|
reportInstanceVia chan instance.Hostname
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
}
|
}
|
||||||
|
|
||||||
func newInstanceLocator() *InstanceLocator {
|
// New returns an InstanceLocator for use by the process.
|
||||||
|
func New() *InstanceLocator {
|
||||||
il := new(InstanceLocator)
|
il := new(InstanceLocator)
|
||||||
n := time.Now()
|
n := time.Now()
|
||||||
il.pleromaIndexNextRefresh = &n
|
il.pleromaIndexNextRefresh = &n
|
||||||
@ -56,13 +56,15 @@ func (il *InstanceLocator) unlock() {
|
|||||||
il.mu.Unlock()
|
il.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (il *InstanceLocator) addInstanceNotificationChannel(via chan InstanceHostname) {
|
// SetInstanceNotificationChannel is the way the instanceLocator returns
|
||||||
|
// newly discovered instances back to the manager for query/addition
|
||||||
|
func (il *InstanceLocator) SetInstanceNotificationChannel(via chan instance.Hostname) {
|
||||||
il.lock()
|
il.lock()
|
||||||
defer il.unlock()
|
defer il.unlock()
|
||||||
il.reportInstanceVia = via
|
il.reportInstanceVia = via
|
||||||
}
|
}
|
||||||
|
|
||||||
func (il *InstanceLocator) addInstance(hostname InstanceHostname) {
|
func (il *InstanceLocator) addInstance(hostname instance.Hostname) {
|
||||||
// receiver (InstanceManager) is responsible for de-duping against its
|
// receiver (InstanceManager) is responsible for de-duping against its
|
||||||
// map, we just locate and spray, it manages
|
// map, we just locate and spray, it manages
|
||||||
il.reportInstanceVia <- hostname
|
il.reportInstanceVia <- hostname
|
||||||
@ -84,7 +86,9 @@ func (il *InstanceLocator) durationUntilNextPleromaIndexRefresh() time.Duration
|
|||||||
return (time.Duration(-1) * time.Now().Sub(*il.pleromaIndexNextRefresh))
|
return (time.Duration(-1) * time.Now().Sub(*il.pleromaIndexNextRefresh))
|
||||||
}
|
}
|
||||||
|
|
||||||
func (il *InstanceLocator) locate() {
|
// Locate is the main entrypoint for the instancelocator, designed to be
|
||||||
|
// called once in its own gorutine.
|
||||||
|
func (il *InstanceLocator) Locate() {
|
||||||
log.Info().Msg("InstanceLocator starting")
|
log.Info().Msg("InstanceLocator starting")
|
||||||
x := time.Now()
|
x := time.Now()
|
||||||
var pleromaSemaphore = semaphore.NewWeighted(1)
|
var pleromaSemaphore = semaphore.NewWeighted(1)
|
||||||
@ -116,7 +120,8 @@ func (il *InstanceLocator) locate() {
|
|||||||
|
|
||||||
time.Sleep(1 * time.Second)
|
time.Sleep(1 * time.Second)
|
||||||
|
|
||||||
if time.Now().After(x.Add(LogReportInterval)) {
|
c := feta.GetConfig()
|
||||||
|
if time.Now().After(x.Add(c.LogReportInterval)) {
|
||||||
x = time.Now()
|
x = time.Now()
|
||||||
log.Debug().
|
log.Debug().
|
||||||
Str("nextMastodonIndexRefresh", il.durationUntilNextMastodonIndexRefresh().String()).
|
Str("nextMastodonIndexRefresh", il.durationUntilNextMastodonIndexRefresh().String()).
|
||||||
@ -171,7 +176,7 @@ func (il *InstanceLocator) locateMastodon() {
|
|||||||
il.mastodonIndexNextRefresh = &t
|
il.mastodonIndexNextRefresh = &t
|
||||||
il.unlock()
|
il.unlock()
|
||||||
|
|
||||||
mi := new(mastodonIndexResponse)
|
mi := new(jsonapis.MastodonIndexResponse)
|
||||||
err = json.Unmarshal(body, &mi)
|
err = json.Unmarshal(body, &mi)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Error().Msgf("unable to parse mastodon instance list: %s", err)
|
log.Error().Msgf("unable to parse mastodon instance list: %s", err)
|
||||||
@ -193,7 +198,7 @@ func (il *InstanceLocator) locateMastodon() {
|
|||||||
Msg("received hosts from mastodon index")
|
Msg("received hosts from mastodon index")
|
||||||
|
|
||||||
for k := range hosts {
|
for k := range hosts {
|
||||||
il.addInstance(InstanceHostname(k))
|
il.addInstance(instance.Hostname(k))
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
@ -239,7 +244,7 @@ func (il *InstanceLocator) locatePleroma() {
|
|||||||
il.pleromaIndexNextRefresh = &t
|
il.pleromaIndexNextRefresh = &t
|
||||||
il.unlock()
|
il.unlock()
|
||||||
|
|
||||||
pi := new(pleromaIndexResponse)
|
pi := new(jsonapis.PleromaIndexResponse)
|
||||||
err = json.Unmarshal(body, &pi)
|
err = json.Unmarshal(body, &pi)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
log.Warn().Msgf("unable to parse pleroma instance list: %s", err)
|
log.Warn().Msgf("unable to parse pleroma instance list: %s", err)
|
||||||
@ -261,7 +266,7 @@ func (il *InstanceLocator) locatePleroma() {
|
|||||||
Msg("received hosts from pleroma index")
|
Msg("received hosts from pleroma index")
|
||||||
|
|
||||||
for k := range hosts {
|
for k := range hosts {
|
||||||
il.addInstance(InstanceHostname(k))
|
il.addInstance(instance.Hostname(k))
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
@ -1,4 +1,4 @@
|
|||||||
package feta
|
package manager
|
||||||
|
|
||||||
import "sync"
|
import "sync"
|
||||||
import "time"
|
import "time"
|
||||||
@ -6,30 +6,41 @@ import "runtime"
|
|||||||
|
|
||||||
//import "github.com/gin-gonic/gin"
|
//import "github.com/gin-gonic/gin"
|
||||||
import "github.com/rs/zerolog/log"
|
import "github.com/rs/zerolog/log"
|
||||||
|
import "github.com/sneak/feta/toot"
|
||||||
|
import "github.com/sneak/feta/seeds"
|
||||||
|
import "github.com/sneak/feta/instance"
|
||||||
|
|
||||||
const hostDiscoveryParallelism = 20
|
const hostDiscoveryParallelism = 5
|
||||||
|
|
||||||
type instanceBackend interface {
|
// LogReportInterval defines how long between logging internal
|
||||||
//FIXME
|
// stats/reporting for user supervision
|
||||||
}
|
var LogReportInterval = time.Second * 10
|
||||||
|
|
||||||
// InstanceManager is the main data structure for the goroutine that manages
|
// InstanceManager is the main data structure for the goroutine that manages
|
||||||
// the list of all known instances, fed by the locator
|
// the list of all known instances, fed by the locator
|
||||||
type InstanceManager struct {
|
type InstanceManager struct {
|
||||||
mu sync.Mutex
|
mu sync.Mutex
|
||||||
instances map[InstanceHostname]*instance
|
instances map[instance.Hostname]*instance.Instance
|
||||||
newInstanceNotifications chan InstanceHostname
|
newInstanceNotifications chan instance.Hostname
|
||||||
|
tootDestination chan *toot.Toot
|
||||||
startup time.Time
|
startup time.Time
|
||||||
hostAdderSemaphore chan bool
|
hostAdderSemaphore chan bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func newInstanceManager() *InstanceManager {
|
// New returns a new InstanceManager for use by the Process
|
||||||
|
func New() *InstanceManager {
|
||||||
i := new(InstanceManager)
|
i := new(InstanceManager)
|
||||||
i.hostAdderSemaphore = make(chan bool, hostDiscoveryParallelism)
|
i.hostAdderSemaphore = make(chan bool, hostDiscoveryParallelism)
|
||||||
i.instances = make(map[InstanceHostname]*instance)
|
i.instances = make(map[instance.Hostname]*instance.Instance)
|
||||||
return i
|
return i
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// SetTootDestination provides the instancemanager with a channel to the
|
||||||
|
// ingester that it can give to its instances
|
||||||
|
func (im *InstanceManager) SetTootDestination(td chan *toot.Toot) {
|
||||||
|
im.tootDestination = td
|
||||||
|
}
|
||||||
|
|
||||||
func (im *InstanceManager) logCaller(msg string) {
|
func (im *InstanceManager) logCaller(msg string) {
|
||||||
fpcs := make([]uintptr, 1)
|
fpcs := make([]uintptr, 1)
|
||||||
// Skip 2 levels to get the caller
|
// Skip 2 levels to get the caller
|
||||||
@ -62,19 +73,39 @@ func (im *InstanceManager) unlock() {
|
|||||||
im.mu.Unlock()
|
im.mu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
func (im *InstanceManager) addInstanceNotificationChannel(via chan InstanceHostname) {
|
// SetInstanceNotificationChannel is how the Process tells the
|
||||||
|
// InstanceManager about the channel from the InstanceLocator so that the
|
||||||
|
// InstanceLocator can provide it/us (the InstanceManager) with new
|
||||||
|
// instance.Hostnames. We (the manager) deduplicate the list ourselves.
|
||||||
|
func (im *InstanceManager) SetInstanceNotificationChannel(via chan instance.Hostname) {
|
||||||
im.lock()
|
im.lock()
|
||||||
defer im.unlock()
|
defer im.unlock()
|
||||||
im.newInstanceNotifications = via
|
im.newInstanceNotifications = via
|
||||||
}
|
}
|
||||||
|
|
||||||
func (im *InstanceManager) manage() {
|
func (im *InstanceManager) receiveSeedInstanceHostnames() {
|
||||||
|
for _, x := range seeds.SeedInstances {
|
||||||
|
go func(tmp instance.Hostname) {
|
||||||
|
im.addInstanceByHostname(tmp)
|
||||||
|
}(instance.Hostname(x))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Manage is the main entrypoint of the InstanceManager, designed to be
|
||||||
|
// called once in its own goroutine.
|
||||||
|
func (im *InstanceManager) Manage() {
|
||||||
log.Info().Msg("InstanceManager starting")
|
log.Info().Msg("InstanceManager starting")
|
||||||
go func() {
|
go func() {
|
||||||
im.receiveNewInstanceHostnames()
|
im.receiveNewInstanceHostnames()
|
||||||
}()
|
}()
|
||||||
|
|
||||||
im.startup = time.Now()
|
im.startup = time.Now()
|
||||||
x := im.startup
|
x := im.startup
|
||||||
|
|
||||||
|
go func() {
|
||||||
|
im.receiveSeedInstanceHostnames()
|
||||||
|
}()
|
||||||
|
|
||||||
for {
|
for {
|
||||||
log.Info().Msg("InstanceManager tick")
|
log.Info().Msg("InstanceManager tick")
|
||||||
im.managerLoop()
|
im.managerLoop()
|
||||||
@ -88,7 +119,7 @@ func (im *InstanceManager) manage() {
|
|||||||
|
|
||||||
func (im *InstanceManager) managerLoop() {
|
func (im *InstanceManager) managerLoop() {
|
||||||
im.lock()
|
im.lock()
|
||||||
il := make([]*instance, 0)
|
il := make([]*instance.Instance, 0)
|
||||||
for _, v := range im.instances {
|
for _, v := range im.instances {
|
||||||
il = append(il, v)
|
il = append(il, v)
|
||||||
}
|
}
|
||||||
@ -96,13 +127,13 @@ func (im *InstanceManager) managerLoop() {
|
|||||||
|
|
||||||
// FIXME is this a bug outside of the mutex above?
|
// FIXME is this a bug outside of the mutex above?
|
||||||
for _, v := range il {
|
for _, v := range il {
|
||||||
go func(i *instance) {
|
go func(i *instance.Instance) {
|
||||||
i.Tick()
|
i.Tick()
|
||||||
}(v)
|
}(v)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (im *InstanceManager) hostnameExists(newhn InstanceHostname) bool {
|
func (im *InstanceManager) hostnameExists(newhn instance.Hostname) bool {
|
||||||
im.lock()
|
im.lock()
|
||||||
defer im.unlock()
|
defer im.unlock()
|
||||||
for k := range im.instances {
|
for k := range im.instances {
|
||||||
@ -113,7 +144,7 @@ func (im *InstanceManager) hostnameExists(newhn InstanceHostname) bool {
|
|||||||
return false
|
return false
|
||||||
}
|
}
|
||||||
|
|
||||||
func (im *InstanceManager) addInstanceByHostname(newhn InstanceHostname) {
|
func (im *InstanceManager) addInstanceByHostname(newhn instance.Hostname) {
|
||||||
if im.hostnameExists(newhn) {
|
if im.hostnameExists(newhn) {
|
||||||
// ignore adding new if we already know about it
|
// ignore adding new if we already know about it
|
||||||
return
|
return
|
||||||
@ -122,12 +153,13 @@ func (im *InstanceManager) addInstanceByHostname(newhn InstanceHostname) {
|
|||||||
// this blocks on the channel size, limiting concurrency
|
// this blocks on the channel size, limiting concurrency
|
||||||
im.hostAdderSemaphore <- true
|
im.hostAdderSemaphore <- true
|
||||||
|
|
||||||
i := newInstance(func(x *instance) {
|
i := instance.New(func(x *instance.Instance) {
|
||||||
x.hostname = string(newhn)
|
x.Hostname = string(newhn) // set hostname
|
||||||
|
x.SetTootDestination(im.tootDestination) // copy ingester input channel from manager to instance
|
||||||
})
|
})
|
||||||
// we do node detection under the addLock to avoid thundering
|
// we do node detection under the adderSemaphore to avoid thundering
|
||||||
// on startup
|
// on startup
|
||||||
i.detectNodeTypeIfNecessary()
|
i.DetectNodeTypeIfNecessary()
|
||||||
|
|
||||||
// pop an item from the buffered channel
|
// pop an item from the buffered channel
|
||||||
<-im.hostAdderSemaphore
|
<-im.hostAdderSemaphore
|
||||||
@ -140,7 +172,7 @@ func (im *InstanceManager) addInstanceByHostname(newhn InstanceHostname) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (im *InstanceManager) receiveNewInstanceHostnames() {
|
func (im *InstanceManager) receiveNewInstanceHostnames() {
|
||||||
var newhn InstanceHostname
|
var newhn instance.Hostname
|
||||||
for {
|
for {
|
||||||
newhn = <-im.newInstanceNotifications
|
newhn = <-im.newInstanceNotifications
|
||||||
// receive them fast out of the channel, let the adding function lock to add
|
// receive them fast out of the channel, let the adding function lock to add
|
||||||
@ -163,8 +195,10 @@ func (im *InstanceManager) logInstanceReport() {
|
|||||||
Msg("instance report")
|
Msg("instance report")
|
||||||
}
|
}
|
||||||
|
|
||||||
func (im *InstanceManager) listInstances() []*instance {
|
// ListInstances dumps a slice of all Instances the InstanceManager knows
|
||||||
var out []*instance
|
// about
|
||||||
|
func (im *InstanceManager) ListInstances() []*instance.Instance {
|
||||||
|
var out []*instance.Instance
|
||||||
im.lock()
|
im.lock()
|
||||||
defer im.unlock()
|
defer im.unlock()
|
||||||
for _, v := range im.instances {
|
for _, v := range im.instances {
|
||||||
@ -175,7 +209,7 @@ func (im *InstanceManager) listInstances() []*instance {
|
|||||||
|
|
||||||
func (im *InstanceManager) instanceSummaryReport() map[string]uint {
|
func (im *InstanceManager) instanceSummaryReport() map[string]uint {
|
||||||
r := make(map[string]uint)
|
r := make(map[string]uint)
|
||||||
for _, v := range im.listInstances() {
|
for _, v := range im.ListInstances() {
|
||||||
v.Lock()
|
v.Lock()
|
||||||
r[v.Status()]++
|
r[v.Status()]++
|
||||||
v.Unlock()
|
v.Unlock()
|
140
process/feta.go
Normal file
140
process/feta.go
Normal file
@ -0,0 +1,140 @@
|
|||||||
|
package process
|
||||||
|
|
||||||
|
import "os"
|
||||||
|
import "time"
|
||||||
|
|
||||||
|
import "github.com/jinzhu/gorm"
|
||||||
|
import _ "github.com/jinzhu/gorm/dialects/sqlite" // required for orm
|
||||||
|
|
||||||
|
import "github.com/rs/zerolog"
|
||||||
|
import "github.com/rs/zerolog/log"
|
||||||
|
import "github.com/mattn/go-isatty"
|
||||||
|
|
||||||
|
import "github.com/sneak/feta/ingester"
|
||||||
|
import "github.com/sneak/feta/storage"
|
||||||
|
import "github.com/sneak/feta/locator"
|
||||||
|
import "github.com/sneak/feta/manager"
|
||||||
|
import "github.com/sneak/feta/instance"
|
||||||
|
|
||||||
|
// CLIEntry is the main entrypoint for the feta process from the cli
|
||||||
|
func CLIEntry(version string, buildarch string) int {
|
||||||
|
f := new(Feta)
|
||||||
|
f.version = version
|
||||||
|
f.buildarch = buildarch
|
||||||
|
f.setupLogging()
|
||||||
|
return f.runForever()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Feta is the main structure/process of this app
|
||||||
|
type Feta struct {
|
||||||
|
version string
|
||||||
|
buildarch string
|
||||||
|
locator *locator.InstanceLocator
|
||||||
|
manager *manager.InstanceManager
|
||||||
|
ingester *ingester.TootIngester
|
||||||
|
api *Server
|
||||||
|
db *gorm.DB
|
||||||
|
startup time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *Feta) identify() {
|
||||||
|
log.Info().
|
||||||
|
Str("version", f.version).
|
||||||
|
Str("buildarch", f.buildarch).
|
||||||
|
Msg("starting")
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *Feta) setupLogging() {
|
||||||
|
|
||||||
|
log.Logger = log.With().Caller().Logger()
|
||||||
|
|
||||||
|
tty := isatty.IsTerminal(os.Stdin.Fd()) || isatty.IsCygwinTerminal(os.Stdin.Fd())
|
||||||
|
|
||||||
|
if tty {
|
||||||
|
out := zerolog.NewConsoleWriter(
|
||||||
|
func(w *zerolog.ConsoleWriter) {
|
||||||
|
// Customize time format
|
||||||
|
w.TimeFormat = time.RFC3339
|
||||||
|
},
|
||||||
|
)
|
||||||
|
log.Logger = log.Output(out)
|
||||||
|
}
|
||||||
|
|
||||||
|
// always log in UTC
|
||||||
|
zerolog.TimestampFunc = func() time.Time {
|
||||||
|
return time.Now().UTC()
|
||||||
|
}
|
||||||
|
|
||||||
|
zerolog.SetGlobalLevel(zerolog.InfoLevel)
|
||||||
|
if os.Getenv("DEBUG") != "" {
|
||||||
|
zerolog.SetGlobalLevel(zerolog.DebugLevel)
|
||||||
|
}
|
||||||
|
|
||||||
|
f.identify()
|
||||||
|
}
|
||||||
|
|
||||||
|
func (f *Feta) uptime() time.Duration {
|
||||||
|
return time.Since(f.startup)
|
||||||
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
func (f *Feta) setupDatabase() {
|
||||||
|
var err error
|
||||||
|
f.db, err = gorm.Open("sqlite3", "feta.sqlite")
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
|
||||||
|
//f.databaseMigrations()
|
||||||
|
}
|
||||||
|
*/
|
||||||
|
|
||||||
|
func (f *Feta) runForever() int {
|
||||||
|
f.startup = time.Now()
|
||||||
|
|
||||||
|
//f.setupDatabase()
|
||||||
|
|
||||||
|
// FIXME move this channel creation into the manager's constructor
|
||||||
|
// and add getters/setters on the manager/locator
|
||||||
|
newInstanceHostnameNotifications := make(chan instance.Hostname)
|
||||||
|
|
||||||
|
f.locator = locator.New()
|
||||||
|
f.manager = manager.New()
|
||||||
|
f.ingester = ingester.NewTootIngester()
|
||||||
|
|
||||||
|
home := os.Getenv("HOME")
|
||||||
|
if home == "" {
|
||||||
|
panic("can't find home directory")
|
||||||
|
}
|
||||||
|
|
||||||
|
diskBackend := storage.NewTootFSStorage(home + "/.local/feta")
|
||||||
|
f.ingester.SetStorageBackend(diskBackend)
|
||||||
|
|
||||||
|
f.api = new(Server)
|
||||||
|
f.api.SetFeta(f) // api needs to get to us to access data
|
||||||
|
|
||||||
|
f.locator.SetInstanceNotificationChannel(newInstanceHostnameNotifications)
|
||||||
|
f.manager.SetInstanceNotificationChannel(newInstanceHostnameNotifications)
|
||||||
|
|
||||||
|
f.manager.SetTootDestination(f.ingester.GetDeliveryChannel())
|
||||||
|
|
||||||
|
// ingester goroutine:
|
||||||
|
go f.ingester.Ingest()
|
||||||
|
|
||||||
|
// locator goroutine:
|
||||||
|
go f.locator.Locate()
|
||||||
|
|
||||||
|
// manager goroutine:
|
||||||
|
go f.manager.Manage()
|
||||||
|
|
||||||
|
go f.api.Serve()
|
||||||
|
|
||||||
|
// this goroutine (main) does nothing until we handle signals
|
||||||
|
// FIXME(sneak)
|
||||||
|
for {
|
||||||
|
time.Sleep(1 * time.Second)
|
||||||
|
}
|
||||||
|
|
||||||
|
return 0
|
||||||
|
}
|
@ -1,4 +1,4 @@
|
|||||||
package feta
|
package process
|
||||||
|
|
||||||
import "time"
|
import "time"
|
||||||
import "net/http"
|
import "net/http"
|
||||||
@ -11,25 +11,25 @@ import "github.com/gin-gonic/gin"
|
|||||||
|
|
||||||
type hash map[string]interface{}
|
type hash map[string]interface{}
|
||||||
|
|
||||||
func (a *fetaAPIServer) instances() []hash {
|
func (a *Server) instances() []hash {
|
||||||
resp := make([]hash, 0)
|
resp := make([]hash, 0)
|
||||||
now := time.Now()
|
now := time.Now()
|
||||||
for _, v := range a.feta.manager.listInstances() {
|
for _, v := range a.feta.manager.ListInstances() {
|
||||||
i := make(hash)
|
i := make(hash)
|
||||||
// FIXME figure out why a very short lock here deadlocks
|
// FIXME figure out why a very short lock here deadlocks
|
||||||
v.Lock()
|
v.Lock()
|
||||||
i["hostname"] = v.hostname
|
i["hostname"] = v.Hostname
|
||||||
i["nextCheck"] = v.nextFetch.UTC().Format(time.RFC3339)
|
i["nextCheck"] = v.NextFetch.UTC().Format(time.RFC3339)
|
||||||
i["nextCheckAfter"] = (-1 * now.Sub(v.nextFetch)).String()
|
i["nextCheckAfter"] = (-1 * now.Sub(v.NextFetch)).String()
|
||||||
i["successCount"] = v.successCount
|
i["successCount"] = v.SuccessCount
|
||||||
i["errorCount"] = v.errorCount
|
i["errorCount"] = v.ErrorCount
|
||||||
i["identified"] = v.identified
|
i["identified"] = v.Identified
|
||||||
i["status"] = v.Status()
|
i["status"] = v.Status()
|
||||||
i["software"] = "unknown"
|
i["software"] = "unknown"
|
||||||
i["version"] = "unknown"
|
i["version"] = "unknown"
|
||||||
if v.identified {
|
if v.Identified {
|
||||||
i["software"] = v.serverImplementationString
|
i["software"] = v.ServerImplementationString
|
||||||
i["version"] = v.serverVersionString
|
i["version"] = v.ServerVersionString
|
||||||
}
|
}
|
||||||
v.Unlock()
|
v.Unlock()
|
||||||
resp = append(resp, i)
|
resp = append(resp, i)
|
||||||
@ -37,21 +37,21 @@ func (a *fetaAPIServer) instances() []hash {
|
|||||||
return resp
|
return resp
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *fetaAPIServer) instanceSummary() map[string]int {
|
func (a *Server) instanceSummary() map[string]int {
|
||||||
resp := make(map[string]int)
|
resp := make(map[string]int)
|
||||||
for _, v := range a.feta.manager.listInstances() {
|
for _, v := range a.feta.manager.ListInstances() {
|
||||||
v.Lock()
|
v.Lock()
|
||||||
resp[fmt.Sprintf("STATUS_%s", v.Status())]++
|
resp[fmt.Sprintf("STATUS_%s", v.Status())]++
|
||||||
if v.serverImplementationString != "" {
|
if v.ServerImplementationString != "" {
|
||||||
//FIXME(sneak) sanitize this to a-z0-9, it is server-provided
|
//FIXME(sneak) sanitize this to a-z0-9, it is server-provided
|
||||||
resp[fmt.Sprintf("SOFTWARE_%s", strings.ToUpper(v.serverImplementationString))]++
|
resp[fmt.Sprintf("SOFTWARE_%s", strings.ToUpper(v.ServerImplementationString))]++
|
||||||
}
|
}
|
||||||
v.Unlock()
|
v.Unlock()
|
||||||
}
|
}
|
||||||
return resp
|
return resp
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *fetaAPIServer) getInstanceListHandler() http.HandlerFunc {
|
func (a *Server) getInstanceListHandler() http.HandlerFunc {
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
|
|
||||||
result := &gin.H{
|
result := &gin.H{
|
||||||
@ -69,7 +69,7 @@ func (a *fetaAPIServer) getInstanceListHandler() http.HandlerFunc {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *fetaAPIServer) getIndexHandler() http.HandlerFunc {
|
func (a *Server) getIndexHandler() http.HandlerFunc {
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
index := &gin.H{
|
index := &gin.H{
|
||||||
"server": &gin.H{
|
"server": &gin.H{
|
||||||
@ -78,9 +78,7 @@ func (a *fetaAPIServer) getIndexHandler() http.HandlerFunc {
|
|||||||
"goroutines": runtime.NumGoroutine(),
|
"goroutines": runtime.NumGoroutine(),
|
||||||
"goversion": runtime.Version(),
|
"goversion": runtime.Version(),
|
||||||
"version": a.feta.version,
|
"version": a.feta.version,
|
||||||
"buildtime": a.feta.buildtime,
|
|
||||||
"buildarch": a.feta.buildarch,
|
"buildarch": a.feta.buildarch,
|
||||||
"builduser": a.feta.builduser,
|
|
||||||
},
|
},
|
||||||
"instanceSummary": a.instanceSummary(),
|
"instanceSummary": a.instanceSummary(),
|
||||||
}
|
}
|
||||||
@ -96,7 +94,7 @@ func (a *fetaAPIServer) getIndexHandler() http.HandlerFunc {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *fetaAPIServer) getHealthCheckHandler() http.HandlerFunc {
|
func (a *Server) getHealthCheckHandler() http.HandlerFunc {
|
||||||
return func(w http.ResponseWriter, r *http.Request) {
|
return func(w http.ResponseWriter, r *http.Request) {
|
||||||
resp := &gin.H{
|
resp := &gin.H{
|
||||||
"status": "ok",
|
"status": "ok",
|
@ -1,4 +1,4 @@
|
|||||||
package feta
|
package process
|
||||||
|
|
||||||
import "fmt"
|
import "fmt"
|
||||||
import "net/http"
|
import "net/http"
|
||||||
@ -10,19 +10,24 @@ import "github.com/rs/zerolog/log"
|
|||||||
import "github.com/gin-gonic/gin"
|
import "github.com/gin-gonic/gin"
|
||||||
import "github.com/dn365/gin-zerolog"
|
import "github.com/dn365/gin-zerolog"
|
||||||
|
|
||||||
type fetaAPIServer struct {
|
// Server is the HTTP webserver object
|
||||||
feta *Process
|
type Server struct {
|
||||||
|
feta *Feta
|
||||||
port uint
|
port uint
|
||||||
router *gin.Engine
|
router *gin.Engine
|
||||||
server *http.Server
|
server *http.Server
|
||||||
debug bool
|
debug bool
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *fetaAPIServer) setFeta(feta *Process) {
|
// SetFeta tells the http Server where to find the Process object so that it
|
||||||
|
// can pull stats and other information for serving via http
|
||||||
|
func (a *Server) SetFeta(feta *Feta) {
|
||||||
a.feta = feta
|
a.feta = feta
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *fetaAPIServer) serve() {
|
// Serve is the entrypoint for the Server, which should run in its own
|
||||||
|
// goroutine (started by the Process)
|
||||||
|
func (a *Server) Serve() {
|
||||||
if a.feta == nil {
|
if a.feta == nil {
|
||||||
panic("must have feta app from which to serve stats")
|
panic("must have feta app from which to serve stats")
|
||||||
}
|
}
|
||||||
@ -50,7 +55,7 @@ func (a *fetaAPIServer) serve() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *fetaAPIServer) initRouter() {
|
func (a *Server) initRouter() {
|
||||||
|
|
||||||
// empty router
|
// empty router
|
||||||
r := gin.New()
|
r := gin.New()
|
||||||
@ -69,7 +74,7 @@ func (a *fetaAPIServer) initRouter() {
|
|||||||
a.router = r
|
a.router = r
|
||||||
}
|
}
|
||||||
|
|
||||||
func (a *fetaAPIServer) initServer() {
|
func (a *Server) initServer() {
|
||||||
if !a.debug {
|
if !a.debug {
|
||||||
gin.SetMode(gin.ReleaseMode)
|
gin.SetMode(gin.ReleaseMode)
|
||||||
}
|
}
|
553
seeds/seeds.go
Normal file
553
seeds/seeds.go
Normal file
@ -0,0 +1,553 @@
|
|||||||
|
package seeds
|
||||||
|
|
||||||
|
// SeedInstances is a list of instance hostnames used to seed the indexer.
|
||||||
|
// This list so far is a bunch of instances that have been
|
||||||
|
// banned/defederated by others so it's important to seed them so that we
|
||||||
|
// can always get their toots for archiving; they will likely not appear in
|
||||||
|
// common mentions/public indices.
|
||||||
|
// update: now includes a bunch of other instances too
|
||||||
|
var SeedInstances = [...]string{
|
||||||
|
"blobturtle.club",
|
||||||
|
"busshi.moe",
|
||||||
|
"fedi.valkyrie.world",
|
||||||
|
"gnosis.systems",
|
||||||
|
"iscute.moe",
|
||||||
|
"kink.town",
|
||||||
|
"kinky.business",
|
||||||
|
"kinkyelephant.com",
|
||||||
|
"kiwec.net",
|
||||||
|
"kiwifarms.cc",
|
||||||
|
"kiwifarms.is",
|
||||||
|
"kiwifarms.net",
|
||||||
|
"kneegrows.top",
|
||||||
|
"knzk.me",
|
||||||
|
"kowai.youkai.town",
|
||||||
|
"koyu.space",
|
||||||
|
"krauser.org",
|
||||||
|
"kuko.hamburg",
|
||||||
|
"kune.gouge.re",
|
||||||
|
"kyot.me",
|
||||||
|
"kys.moe",
|
||||||
|
"lanners.uk",
|
||||||
|
"larvata.com",
|
||||||
|
"latinos.social",
|
||||||
|
"layer8.space",
|
||||||
|
"leftlibertarian.club",
|
||||||
|
"lesbian.energy",
|
||||||
|
"lets.saynoto.lgbt",
|
||||||
|
"letsalllovela.in",
|
||||||
|
"lgbtq.cool",
|
||||||
|
"lgbtqia.is",
|
||||||
|
"liberdon.com",
|
||||||
|
"libertarianism.club",
|
||||||
|
"librem.one",
|
||||||
|
"librenet.co.za",
|
||||||
|
"ligma.pro",
|
||||||
|
"likeable.space",
|
||||||
|
"linuxrocks.online",
|
||||||
|
"litodon.de",
|
||||||
|
"littles.space",
|
||||||
|
"liveview.cf",
|
||||||
|
"loci.onl",
|
||||||
|
"logjam.city",
|
||||||
|
"lol5.tun.a4.io",
|
||||||
|
"loli.estate",
|
||||||
|
"lolis.world",
|
||||||
|
"lost-angles.im",
|
||||||
|
"luvdon.cc",
|
||||||
|
"luvdon.ddns.net",
|
||||||
|
"m.1994.io",
|
||||||
|
"m.apertron.com",
|
||||||
|
"m.bnolet.me",
|
||||||
|
"m.danq.me",
|
||||||
|
"m.eula.dev",
|
||||||
|
"m.fratm.com",
|
||||||
|
"m.kretschmann.social",
|
||||||
|
"m.xorkle.com",
|
||||||
|
"magikarp.fun",
|
||||||
|
"maik.social",
|
||||||
|
"majak.de",
|
||||||
|
"makito.me",
|
||||||
|
"maly.io",
|
||||||
|
"manx.social",
|
||||||
|
"marchgenso.me",
|
||||||
|
"mares.cafe",
|
||||||
|
"mas.korrigan.tech",
|
||||||
|
"mas.to",
|
||||||
|
"mast.astragroup.info",
|
||||||
|
"mastadon.ml",
|
||||||
|
"masto.lost-angles.im",
|
||||||
|
"masto.misell.cymru",
|
||||||
|
"masto.ml",
|
||||||
|
"masto.mywebprojects.co.uk",
|
||||||
|
"masto.polarisfm.net",
|
||||||
|
"masto.powerlot.net",
|
||||||
|
"masto.stanisic.nl",
|
||||||
|
"mastoc.net",
|
||||||
|
"mastodon-network.com",
|
||||||
|
"mastodon.aekrylov.me",
|
||||||
|
"mastodon.alienlebarge.ch",
|
||||||
|
"mastodon.amaseto.com",
|
||||||
|
"mastodon.art",
|
||||||
|
"mastodon.aventer.biz",
|
||||||
|
"mastodon.blue",
|
||||||
|
"mastodon.cipherbliss.com",
|
||||||
|
"mastodon.circlelinego.com",
|
||||||
|
"mastodon.codeplumbers.eu",
|
||||||
|
"mastodon.coder.town",
|
||||||
|
"mastodon.com.pl",
|
||||||
|
"mastodon.corecoding.dev",
|
||||||
|
"mastodon.cyber-tribal.com",
|
||||||
|
"mastodon.dlitz.net",
|
||||||
|
"mastodon.echoz.io",
|
||||||
|
"mastodon.eric.ovh",
|
||||||
|
"mastodon.ericbeckers.nl",
|
||||||
|
"mastodon.fail",
|
||||||
|
"mastodon.freifunk-minden.de",
|
||||||
|
"mastodon.fricloud.dk",
|
||||||
|
"mastodon.funigtor.fr",
|
||||||
|
"mastodon.gamedev.place",
|
||||||
|
"mastodon.gargantia.fr",
|
||||||
|
"mastodon.geofox.org",
|
||||||
|
"mastodon.globalrevolution.tv",
|
||||||
|
"mastodon.gougere.fr",
|
||||||
|
"mastodon.grin.hu",
|
||||||
|
"mastodon.h.etbus.ch",
|
||||||
|
"mastodon.host",
|
||||||
|
"mastodon.hugolecourt.fr",
|
||||||
|
"mastodon.ie",
|
||||||
|
"mastodon.immae.eu",
|
||||||
|
"mastodon.inferiorlattice.com",
|
||||||
|
"mastodon.inhji.de",
|
||||||
|
"mastodon.jectrum.de",
|
||||||
|
"mastodon.jeder.pl",
|
||||||
|
"mastodon.kerenon.com",
|
||||||
|
"mastodon.kliu.io",
|
||||||
|
"mastodon.kosebamse.com",
|
||||||
|
"mastodon.leptonics.com",
|
||||||
|
"mastodon.local.lubar.me",
|
||||||
|
"mastodon.loliandstuff.moe",
|
||||||
|
"mastodon.lubar.me",
|
||||||
|
"mastodon.lunorian.is",
|
||||||
|
"mastodon.macsnet.cz",
|
||||||
|
"mastodon.maescool.be",
|
||||||
|
"mastodon.me.uk",
|
||||||
|
"mastodon.mynameisivan.ru",
|
||||||
|
"mastodon.naoy.fr",
|
||||||
|
"mastodon.nobodysstuff.de",
|
||||||
|
"mastodon.ocf.berkeley.edu",
|
||||||
|
"mastodon.openpsychology.net",
|
||||||
|
"mastodon.org.ua",
|
||||||
|
"mastodon.org.uk",
|
||||||
|
"mastodon.otherreality.net",
|
||||||
|
"mastodon.owls.io",
|
||||||
|
"mastodon.redflag.social",
|
||||||
|
"mastodon.roocita.com",
|
||||||
|
"mastodon.rylees.net",
|
||||||
|
"mastodon.scarletsisters.xyz",
|
||||||
|
"mastodon.schemacs.com",
|
||||||
|
"mastodon.scuttle.org",
|
||||||
|
"mastodon.sebbo.net",
|
||||||
|
"mastodon.sedryk.info",
|
||||||
|
"mastodon.social",
|
||||||
|
"mastodon.social",
|
||||||
|
"mastodon.soses.ca",
|
||||||
|
"mastodon.spiderden.net",
|
||||||
|
"mastodon.starrevolution.org",
|
||||||
|
"mastodon.syntik.fr",
|
||||||
|
"mastodon.technology",
|
||||||
|
"mastodon.technology",
|
||||||
|
"mastodon.toni.im",
|
||||||
|
"mastodon.toniozz75.fr",
|
||||||
|
"mastodon.truf-kin.com",
|
||||||
|
"mastodon.xhrpb.com",
|
||||||
|
"mastodon.yolovision-inc.com",
|
||||||
|
"mastodon.zapashcanon.fr",
|
||||||
|
"mastodon.zwei.net",
|
||||||
|
"mastofant.de",
|
||||||
|
"masttest.zwei.net",
|
||||||
|
"mcphail.uk",
|
||||||
|
"me.frankmeeuwsen.xyz",
|
||||||
|
"megadon.net",
|
||||||
|
"melalandia.tk",
|
||||||
|
"menzel-it.social",
|
||||||
|
"meow.social",
|
||||||
|
"mgub.yt",
|
||||||
|
"mikep.ro",
|
||||||
|
"ministry.moonbutt.science",
|
||||||
|
"misskey.io",
|
||||||
|
"misskey.nl",
|
||||||
|
"mmorpg.social",
|
||||||
|
"mobile.co",
|
||||||
|
"monsterpit.net",
|
||||||
|
"moytura.org",
|
||||||
|
"mst.mpdevel.com",
|
||||||
|
"mst.thewebzone.net",
|
||||||
|
"mst.vsta.org",
|
||||||
|
"mstdn.alternanet.fr",
|
||||||
|
"mstdn.ikebuku.ro",
|
||||||
|
"mstdn.io",
|
||||||
|
"mstdn.jp",
|
||||||
|
"mstdn.maud.io",
|
||||||
|
"mstdn.mx",
|
||||||
|
"mstdn.novium.pw",
|
||||||
|
"mstdn.openalgeria.org",
|
||||||
|
"mstdn.social",
|
||||||
|
"mstdn.tsukiyono.0am.jp",
|
||||||
|
"mstdn.waifu.space",
|
||||||
|
"mstdn.xxil.cc",
|
||||||
|
"mu.zaitcev.nu",
|
||||||
|
"mudl.us",
|
||||||
|
"multicast.social",
|
||||||
|
"music.pawoo.net",
|
||||||
|
"myflog.net",
|
||||||
|
"mypolis.zapto.org",
|
||||||
|
"myprayer.center",
|
||||||
|
"neckbeard.xyz",
|
||||||
|
"neenster.org",
|
||||||
|
"nerdynate.live",
|
||||||
|
"networked.space",
|
||||||
|
"netzsphaere.xyz",
|
||||||
|
"newjack.city",
|
||||||
|
"newsbots.eu",
|
||||||
|
"niedersachsen.social",
|
||||||
|
"ninja.social",
|
||||||
|
"nitro.horse",
|
||||||
|
"niu.moe",
|
||||||
|
"noagenda.social",
|
||||||
|
"noagendasocial.com",
|
||||||
|
"nojack.easydns.ca",
|
||||||
|
"nomoresha.me",
|
||||||
|
"nonexiste.net",
|
||||||
|
"norden.social",
|
||||||
|
"nordenmedia.com",
|
||||||
|
"not-develop.gab.com",
|
||||||
|
"not.phrack.fyi",
|
||||||
|
"npf.mlpol.net",
|
||||||
|
"nsfw.social",
|
||||||
|
"nudie.social",
|
||||||
|
"nyaa.social",
|
||||||
|
"octodon.social",
|
||||||
|
"odin.run",
|
||||||
|
"ohhi.icu",
|
||||||
|
"oneway.masto.host",
|
||||||
|
"opensim.fun",
|
||||||
|
"order.life",
|
||||||
|
"oslo.town",
|
||||||
|
"our.wtf",
|
||||||
|
"oursquad.rocks",
|
||||||
|
"outaouais.club",
|
||||||
|
"pachyder.me",
|
||||||
|
"pars.ee",
|
||||||
|
"patch.cx",
|
||||||
|
"pawoo.net",
|
||||||
|
"penguicon.social",
|
||||||
|
"pettingzoo.co",
|
||||||
|
"photodon.org",
|
||||||
|
"phreedom.tk",
|
||||||
|
"pieville.net",
|
||||||
|
"pifke.social",
|
||||||
|
"pigeon.town",
|
||||||
|
"pixfed.com",
|
||||||
|
"pl.765racing.com",
|
||||||
|
"pl.apelsin.la",
|
||||||
|
"pl.knotteye.cc",
|
||||||
|
"pl.kotobank.ch",
|
||||||
|
"pl.koyu.space",
|
||||||
|
"pl.kys.moe",
|
||||||
|
"pl.ohno.host",
|
||||||
|
"pl.smuglo.li",
|
||||||
|
"pl.wowana.me",
|
||||||
|
"pla.social",
|
||||||
|
"plag.masto.host",
|
||||||
|
"plankton.cz",
|
||||||
|
"playvicious.social",
|
||||||
|
"pleroma.1d4.us",
|
||||||
|
"pleroma.ch405.xyz",
|
||||||
|
"pleroma.cloud",
|
||||||
|
"pleroma.comfy.moe",
|
||||||
|
"pleroma.cucked.me",
|
||||||
|
"pleroma.fr",
|
||||||
|
"pleroma.kiwifarms.net",
|
||||||
|
"pleroma.miniwa.moe",
|
||||||
|
"pleroma.quaylessed.icu",
|
||||||
|
"pleroma.rareome.ga",
|
||||||
|
"pleroma.soykaf.com",
|
||||||
|
"pleroma.teromene.fr",
|
||||||
|
"pleroma.travnewmatic.com",
|
||||||
|
"pleroma.tuxcrafting.cf",
|
||||||
|
"pleroma.yorha.club",
|
||||||
|
"pltest.feminism.lgbt",
|
||||||
|
"plural.cafe",
|
||||||
|
"pokemon.men",
|
||||||
|
"polycule.club",
|
||||||
|
"pornfed.social",
|
||||||
|
"porntoot.com",
|
||||||
|
"post.mashek.net",
|
||||||
|
"pouet.jablon.fr",
|
||||||
|
"ppl.town",
|
||||||
|
"preteengirls.biz",
|
||||||
|
"pridelands.io",
|
||||||
|
"princess.cat",
|
||||||
|
"privacytools.io",
|
||||||
|
"producers.masto.host",
|
||||||
|
"programmer.technology",
|
||||||
|
"programmingsocks.com",
|
||||||
|
"project.social",
|
||||||
|
"protohype.net",
|
||||||
|
"prsm.space",
|
||||||
|
"psyopshop.com",
|
||||||
|
"pumba.space",
|
||||||
|
"pyyython.org",
|
||||||
|
"qoto.org",
|
||||||
|
"quasi.social",
|
||||||
|
"queer.farm",
|
||||||
|
"queersin.space",
|
||||||
|
"quey.org",
|
||||||
|
"quitter.pw",
|
||||||
|
"r3bl.social",
|
||||||
|
"rainbowdash.net",
|
||||||
|
"raki.social",
|
||||||
|
"rapefeminists.network",
|
||||||
|
"rebels.rest",
|
||||||
|
"redliberal.com",
|
||||||
|
"redroo.ml",
|
||||||
|
"redterrorcollective.net",
|
||||||
|
"relay-mypolis.zapto.org",
|
||||||
|
"relay.selfhosting.rocks",
|
||||||
|
"remotenode.host",
|
||||||
|
"rhubarb.land",
|
||||||
|
"rigcz.club",
|
||||||
|
"rightmastodon.com",
|
||||||
|
"rivals.space",
|
||||||
|
"rly.wtf",
|
||||||
|
"roar.killtheradio.net",
|
||||||
|
"ronin.world",
|
||||||
|
"roughseas.xyz",
|
||||||
|
"rrfarmbot.appspot.com",
|
||||||
|
"rubber.social",
|
||||||
|
"rva.party",
|
||||||
|
"s.b252.gq",
|
||||||
|
"s.huggingservers.uk",
|
||||||
|
"sackheads.social",
|
||||||
|
"sadposting.space",
|
||||||
|
"sammiesweetie.com",
|
||||||
|
"sangha.social",
|
||||||
|
"sapphos.be",
|
||||||
|
"scouts.devosmium.xyz",
|
||||||
|
"sealion.club",
|
||||||
|
"secure.kiwi",
|
||||||
|
"serious.ferret.business",
|
||||||
|
"shigusegubu.club",
|
||||||
|
"shinomiya.group",
|
||||||
|
"shiro.dog",
|
||||||
|
"shitasstits.life",
|
||||||
|
"shitposter.club",
|
||||||
|
"shpposter.club",
|
||||||
|
"simstodon.com",
|
||||||
|
"simulacron.de",
|
||||||
|
"sinblr.com",
|
||||||
|
"skippers-bin.com",
|
||||||
|
"skoops.social",
|
||||||
|
"slum.cloud",
|
||||||
|
"smuglo.li",
|
||||||
|
"sn.angry.im",
|
||||||
|
"snabelen.no",
|
||||||
|
"snaggletooth.life",
|
||||||
|
"snel.social",
|
||||||
|
"snuskete.net",
|
||||||
|
"soc.psychedelic.cat",
|
||||||
|
"social.1in9.net",
|
||||||
|
"social.adlerweb.info",
|
||||||
|
"social.allthefallen.ninja",
|
||||||
|
"social.au2pb.net",
|
||||||
|
"social.avareborn.de",
|
||||||
|
"social.azkware.net",
|
||||||
|
"social.b252.gq",
|
||||||
|
"social.backbord.net",
|
||||||
|
"social.bam.yt",
|
||||||
|
"social.bau-ha.us",
|
||||||
|
"social.beepboop.ga",
|
||||||
|
"social.cereza.de",
|
||||||
|
"social.cloudsumu.com",
|
||||||
|
"social.culturewar.us",
|
||||||
|
"social.cutienaut.club",
|
||||||
|
"social.digimortal.org",
|
||||||
|
"social.elqhost.net",
|
||||||
|
"social.end-the-stigma.com",
|
||||||
|
"social.enyutech.io",
|
||||||
|
"social.fab-l3.org",
|
||||||
|
"social.fedi.farm",
|
||||||
|
"social.fff-du.de",
|
||||||
|
"social.firc.de",
|
||||||
|
"social.florianjensen.com",
|
||||||
|
"social.foxfam.club",
|
||||||
|
"social.gattai.net",
|
||||||
|
"social.gnu.one",
|
||||||
|
"social.guizzyordi.info",
|
||||||
|
"social.headsca.la",
|
||||||
|
"social.heldscal.la",
|
||||||
|
"social.heroicwisdom.com",
|
||||||
|
"social.hidamari.blue",
|
||||||
|
"social.hodakov.me",
|
||||||
|
"social.homunyan.com",
|
||||||
|
"social.i2p.rocks",
|
||||||
|
"social.imirhil.fr",
|
||||||
|
"social.ingobernable.net",
|
||||||
|
"social.joshuacasey.net",
|
||||||
|
"social.lansky.name",
|
||||||
|
"social.librem.one",
|
||||||
|
"social.longden.me",
|
||||||
|
"social.louisoft01.moe",
|
||||||
|
"social.lucci.xyz",
|
||||||
|
"social.luschmar.ch",
|
||||||
|
"social.lyte.dev",
|
||||||
|
"social.mark.atwood.name",
|
||||||
|
"social.mhtube.de",
|
||||||
|
"social.minkenstein.de",
|
||||||
|
"social.mjb.im",
|
||||||
|
"social.mochi.academy",
|
||||||
|
"social.moseskaranja.com",
|
||||||
|
"social.mylinux.cz",
|
||||||
|
"social.net.ua",
|
||||||
|
"social.netdc.ca",
|
||||||
|
"social.niicow974.fr",
|
||||||
|
"social.nobodyhasthe.biz",
|
||||||
|
"social.nofftopia.com",
|
||||||
|
"social.noscraft.cf",
|
||||||
|
"social.offline.network",
|
||||||
|
"social.omniatv.com",
|
||||||
|
"social.panthermodern.net",
|
||||||
|
"social.privacytools.io",
|
||||||
|
"social.proyectolanuevatierra.com",
|
||||||
|
"social.puri.sm",
|
||||||
|
"social.putz.space",
|
||||||
|
"social.quodverum.com",
|
||||||
|
"social.radio.af",
|
||||||
|
"social.raptorengineering.io",
|
||||||
|
"social.rosnovsky.us",
|
||||||
|
"social.ryankes.eu",
|
||||||
|
"social.seattle.wa.us",
|
||||||
|
"social.secline.de",
|
||||||
|
"social.skankhunt42.pw",
|
||||||
|
"social.sunshinegardens.org",
|
||||||
|
"social.super-niche.club",
|
||||||
|
"social.taker.fr",
|
||||||
|
"social.targaryen.house",
|
||||||
|
"social.tchncs.de",
|
||||||
|
"social.thisisjoes.site",
|
||||||
|
"social.tomica.me",
|
||||||
|
"social.troll.academy",
|
||||||
|
"social.wiuwiu.de",
|
||||||
|
"social.zwei.net",
|
||||||
|
"sociala.me",
|
||||||
|
"socialnetwork.ninja",
|
||||||
|
"socl.win",
|
||||||
|
"socnet.supes.com",
|
||||||
|
"soderstrom.social",
|
||||||
|
"soteria.mastodon.host",
|
||||||
|
"souk.getloci.com",
|
||||||
|
"southflorida.social",
|
||||||
|
"spacetime.social",
|
||||||
|
"speakfree.world",
|
||||||
|
"spinster.dev",
|
||||||
|
"spinster.xyz",
|
||||||
|
"splat.soy",
|
||||||
|
"sprocket.group",
|
||||||
|
"starship.coffee",
|
||||||
|
"stereophonic.space",
|
||||||
|
"sunbeam.city",
|
||||||
|
"sunshinegardens.org",
|
||||||
|
"sweet.sugarcube.pw",
|
||||||
|
"swingset.social",
|
||||||
|
"switter.at",
|
||||||
|
"switter.co",
|
||||||
|
"syrup.zone",
|
||||||
|
"take.iteasy.club",
|
||||||
|
"takeoverthe.world",
|
||||||
|
"tamiltoot.online",
|
||||||
|
"tank.im",
|
||||||
|
"taosforum.com",
|
||||||
|
"tardis.world",
|
||||||
|
"tassaron.com",
|
||||||
|
"techflake.ch",
|
||||||
|
"the.hedgehoghunter.club",
|
||||||
|
"the.scream.zone",
|
||||||
|
"thechad.zone",
|
||||||
|
"thefreestate.xyz",
|
||||||
|
"thelballwiki.gq",
|
||||||
|
"thetower.xyz",
|
||||||
|
"thewired.xyz",
|
||||||
|
"thicc.horse",
|
||||||
|
"toot.brussels",
|
||||||
|
"toot.cat",
|
||||||
|
"toot.chemnitz.social",
|
||||||
|
"toot.devfs.xyz",
|
||||||
|
"toot.flairy.de",
|
||||||
|
"toot.forumanalogue.fr",
|
||||||
|
"toot.kiez.xyz",
|
||||||
|
"toot.love",
|
||||||
|
"toot.my",
|
||||||
|
"toot.nx-pod.de",
|
||||||
|
"toot.onl",
|
||||||
|
"toot.party",
|
||||||
|
"toot.site",
|
||||||
|
"toot.temsa.me",
|
||||||
|
"toot.wales",
|
||||||
|
"toot.world",
|
||||||
|
"toot.worldrovine.com",
|
||||||
|
"toot.ws",
|
||||||
|
"tooting.ch",
|
||||||
|
"toots.slothy.win",
|
||||||
|
"toucans.social",
|
||||||
|
"travel-friends.chat",
|
||||||
|
"tri.cash",
|
||||||
|
"triangletoot.party",
|
||||||
|
"triggerhub.ru",
|
||||||
|
"tron.buzz",
|
||||||
|
"twimblr.xyz",
|
||||||
|
"twitter.1d4.us",
|
||||||
|
"uelfte.club",
|
||||||
|
"underscore.world",
|
||||||
|
"unsafe.space",
|
||||||
|
"unsocial.pztrn.name",
|
||||||
|
"va11hal.la",
|
||||||
|
"vampire.estate",
|
||||||
|
"veenus.art",
|
||||||
|
"veenus.art",
|
||||||
|
"voice.masto.host",
|
||||||
|
"voluntaryism.club",
|
||||||
|
"vulpine.club",
|
||||||
|
"wagesofsinisdeath.com",
|
||||||
|
"waifu.social",
|
||||||
|
"waifuappreciation.club",
|
||||||
|
"warc.space",
|
||||||
|
"weeaboo.space",
|
||||||
|
"weedis.life",
|
||||||
|
"weirder.earth",
|
||||||
|
"welovela.in",
|
||||||
|
"wetfish.space",
|
||||||
|
"whitespashe.uk",
|
||||||
|
"witches.live",
|
||||||
|
"witches.town",
|
||||||
|
"wogan.im",
|
||||||
|
"woofer.alfter.us",
|
||||||
|
"wrestlr.social",
|
||||||
|
"wrongthink.net",
|
||||||
|
"www.misanthropebazaar.com",
|
||||||
|
"wxw.moe",
|
||||||
|
"x0r.stream",
|
||||||
|
"xa0.uk",
|
||||||
|
"xn--6r8h.tk",
|
||||||
|
"xoldie.com",
|
||||||
|
"yang.social",
|
||||||
|
"yarr.io",
|
||||||
|
"yeehaw.town",
|
||||||
|
"yeet.social",
|
||||||
|
"yiff.rocks",
|
||||||
|
"yorishiro.space",
|
||||||
|
"youkai.town",
|
||||||
|
"zerohack.xyz",
|
||||||
|
"zion-techs.com",
|
||||||
|
"zomglol.wtf",
|
||||||
|
}
|
110
storage/tootstore.go
Normal file
110
storage/tootstore.go
Normal file
@ -0,0 +1,110 @@
|
|||||||
|
package storage
|
||||||
|
|
||||||
|
import "errors"
|
||||||
|
import "io/ioutil"
|
||||||
|
import "path/filepath"
|
||||||
|
import "os"
|
||||||
|
import "strings"
|
||||||
|
import "sync"
|
||||||
|
|
||||||
|
import "github.com/sneak/feta/toot"
|
||||||
|
|
||||||
|
// TootStorageBackend is the interface to which storage backends must
|
||||||
|
// conform for storing toots
|
||||||
|
type TootStorageBackend interface {
|
||||||
|
TootExists(t toot.Toot) bool
|
||||||
|
StoreToot(t toot.Toot) error
|
||||||
|
StoreToots(tc []*toot.Toot) error
|
||||||
|
}
|
||||||
|
|
||||||
|
// TootFSStorage is a TootStorageBackend that writes to the local
|
||||||
|
// filesystem.
|
||||||
|
type TootFSStorage struct {
|
||||||
|
root string
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewTootFSStorage returns a *TootFSStorage for writing toots to the
|
||||||
|
// local filesystem
|
||||||
|
func NewTootFSStorage(root string) *TootFSStorage {
|
||||||
|
ts := new(TootFSStorage)
|
||||||
|
ts.root = root
|
||||||
|
return ts
|
||||||
|
}
|
||||||
|
|
||||||
|
// StoreToots writes a slice of pointers to toots to disk
|
||||||
|
func (ts *TootFSStorage) StoreToots(tc []*toot.Toot) error {
|
||||||
|
var returnErrors []string
|
||||||
|
for _, item := range tc {
|
||||||
|
err := ts.StoreToot(*item)
|
||||||
|
if err != nil {
|
||||||
|
returnErrors = append(returnErrors, err.Error())
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if len(returnErrors) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
return errors.New(strings.Join(returnErrors, "; "))
|
||||||
|
}
|
||||||
|
|
||||||
|
// TootExists checks to see if we have already written a toot to disk or
|
||||||
|
// not. Note that the ingester de-dupes with a table in memory so that this
|
||||||
|
// will only really get used on app restarts
|
||||||
|
func (ts *TootFSStorage) TootExists(t toot.Toot) bool {
|
||||||
|
path := t.DiskStoragePath()
|
||||||
|
full := ts.root + "/" + path
|
||||||
|
_, err := os.Stat(full)
|
||||||
|
if os.IsNotExist(err) {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
|
||||||
|
// StoreToot writes a single toot to disk
|
||||||
|
func (ts *TootFSStorage) StoreToot(t toot.Toot) error {
|
||||||
|
path := t.DiskStoragePath()
|
||||||
|
full := ts.root + "/" + path
|
||||||
|
dir := filepath.Dir(full)
|
||||||
|
err := os.MkdirAll(dir, 0755)
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return ioutil.WriteFile(full, t.Original, 0644)
|
||||||
|
}
|
||||||
|
|
||||||
|
// TootMemoryStorage is a TootStorageBackend that just stores all ingested
|
||||||
|
// toots in ram forever until the computer fills up and catches fire and explodes
|
||||||
|
type TootMemoryStorage struct {
|
||||||
|
sync.Mutex
|
||||||
|
toots map[toot.Hash]toot.Toot
|
||||||
|
//maxSize uint // FIXME support eviction
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewTootMemoryStorage returns a *TootMemoryStorage for storing toots in
|
||||||
|
// ram forever
|
||||||
|
func NewTootMemoryStorage() *TootMemoryStorage {
|
||||||
|
ts := new(TootMemoryStorage)
|
||||||
|
ts.toots = make(map[toot.Hash]toot.Toot)
|
||||||
|
return ts
|
||||||
|
}
|
||||||
|
|
||||||
|
// StoreToot saves a single toot into an in-memory hashtable
|
||||||
|
func (ts *TootMemoryStorage) StoreToot(t toot.Toot) {
|
||||||
|
if ts.TootExists(t) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
ts.Lock()
|
||||||
|
defer ts.Unlock()
|
||||||
|
ts.toots[t.Hash] = t
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// TootExists checks to see if we have a toot in memory already
|
||||||
|
func (ts *TootMemoryStorage) TootExists(t toot.Toot) bool {
|
||||||
|
ts.Lock()
|
||||||
|
defer ts.Unlock()
|
||||||
|
if _, ok := ts.toots[t.Hash]; ok { //this syntax is so gross
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
return false
|
||||||
|
}
|
11
toot.go
11
toot.go
@ -1,11 +0,0 @@
|
|||||||
package feta
|
|
||||||
|
|
||||||
//import "github.com/rs/zerolog/log"
|
|
||||||
|
|
||||||
type toot struct {
|
|
||||||
}
|
|
||||||
|
|
||||||
func newToot(input []byte) *toot {
|
|
||||||
t := new(toot)
|
|
||||||
return t
|
|
||||||
}
|
|
117
toot/toot.go
Normal file
117
toot/toot.go
Normal file
@ -0,0 +1,117 @@
|
|||||||
|
package toot
|
||||||
|
|
||||||
|
import "fmt"
|
||||||
|
import "encoding/json"
|
||||||
|
import "errors"
|
||||||
|
import "strings"
|
||||||
|
import "github.com/sneak/feta/jsonapis"
|
||||||
|
|
||||||
|
//import "github.com/davecgh/go-spew/spew"
|
||||||
|
import "github.com/rs/zerolog/log"
|
||||||
|
|
||||||
|
//import "encoding/hex"
|
||||||
|
import mh "github.com/multiformats/go-multihash"
|
||||||
|
import mhopts "github.com/multiformats/go-multihash/opts"
|
||||||
|
|
||||||
|
// Hash is a type for storing a string-based base58 multihash of a
|
||||||
|
// toot's identity
|
||||||
|
type Hash string
|
||||||
|
|
||||||
|
// Toot is an object we use internally for storing a discovered toot
|
||||||
|
type Toot struct {
|
||||||
|
Original []byte
|
||||||
|
Parsed *jsonapis.APISerializedToot
|
||||||
|
Hash Hash
|
||||||
|
FromHost string
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewTootCollectionFromMastodonAPIResponse takes a byte array from a masto
|
||||||
|
// api response and provides you with a nice array of pointers to parsed
|
||||||
|
// toots
|
||||||
|
func NewTootCollectionFromMastodonAPIResponse(in []byte, hostname string) ([]*Toot, error) {
|
||||||
|
var rt []json.RawMessage
|
||||||
|
err := json.Unmarshal(in, &rt)
|
||||||
|
if err != nil {
|
||||||
|
return nil, errors.New("unable to parse api response")
|
||||||
|
}
|
||||||
|
|
||||||
|
var tc []*Toot
|
||||||
|
|
||||||
|
// iterate over rawtoots from api
|
||||||
|
for _, item := range rt {
|
||||||
|
parsed := new(jsonapis.APISerializedToot)
|
||||||
|
err := json.Unmarshal(item, parsed)
|
||||||
|
if err != nil {
|
||||||
|
log.Error().Msg("unable to parse toot, skipping")
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
t := new(Toot)
|
||||||
|
t.Parsed = parsed
|
||||||
|
o, err := item.MarshalJSON()
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
t.Original = o
|
||||||
|
t.FromHost = hostname
|
||||||
|
t.calcHash()
|
||||||
|
tc = append(tc, t)
|
||||||
|
}
|
||||||
|
return tc, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *Toot) String() string {
|
||||||
|
return fmt.Sprintf("%#v", t)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *Toot) multiHash(in []byte) string {
|
||||||
|
opts := new(mhopts.Options)
|
||||||
|
opts.Algorithm = "sha2-256"
|
||||||
|
opts.Encoding = "base58"
|
||||||
|
var found bool
|
||||||
|
opts.AlgorithmCode, found = mh.Names[opts.Algorithm]
|
||||||
|
if !found {
|
||||||
|
panic("oops")
|
||||||
|
}
|
||||||
|
opts.Length = mh.DefaultLengths[opts.AlgorithmCode]
|
||||||
|
r := strings.NewReader(string(in))
|
||||||
|
h, err := opts.Multihash(r)
|
||||||
|
if err != nil {
|
||||||
|
panic(err)
|
||||||
|
}
|
||||||
|
return h.B58String()
|
||||||
|
}
|
||||||
|
|
||||||
|
// DiskStoragePath is a helper function on a Toot that allows it to provide
|
||||||
|
// a storage path on disk. This should probably be moved into the FSStorage
|
||||||
|
// backend instead. FIXME
|
||||||
|
// It's here because it's a pure function that just formats its own toot attributes
|
||||||
|
// into a string.
|
||||||
|
func (t *Toot) DiskStoragePath() string {
|
||||||
|
// FIXME make this error if fields are missing
|
||||||
|
// '/YYYYMMDD/example.com/username/YYYY-MM-DD.HHMMSS.username@fromHost.multihash.json'
|
||||||
|
return fmt.Sprintf("%s/%s/%s/%s.%s@%s.%s.json",
|
||||||
|
t.Parsed.CreatedAt.Format("20060102"),
|
||||||
|
strings.ToLower(t.FromHost),
|
||||||
|
t.Parsed.Account.Acct,
|
||||||
|
t.Parsed.CreatedAt.Format("2006-01-02.150405"),
|
||||||
|
t.Parsed.Account.Acct,
|
||||||
|
strings.ToLower(t.FromHost),
|
||||||
|
t.Hash,
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *Toot) identityHashInput() string {
|
||||||
|
return fmt.Sprintf(
|
||||||
|
"%s.%s.%s.%s.%s",
|
||||||
|
t.Parsed.Account.URL,
|
||||||
|
t.Parsed.CreatedAt,
|
||||||
|
t.Parsed.ID,
|
||||||
|
t.Parsed.Content,
|
||||||
|
strings.ToLower(t.FromHost),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (t *Toot) calcHash() {
|
||||||
|
hi := t.identityHashInput()
|
||||||
|
t.Hash = Hash(t.multiHash([]byte(hi)))
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user