feta/locator/locator.go
2020-03-27 18:17:52 -07:00

273 lines
6.4 KiB
Go

package locator
import (
"encoding/json"
"io/ioutil"
"net/http"
"sync"
"time"
"git.eeqj.de/sneak/feta/jsonapis"
"github.com/rs/zerolog/log"
"github.com/spf13/viper"
"golang.org/x/sync/semaphore"
)
// IndexAPITimeout is the timeout for fetching json instance lists
// from the listing servers
const IndexAPITimeout = time.Second * 60 * 3
// UserAgent is the user-agent string we provide to servers
var UserAgent = "feta indexer bot, sneak@sneak.berlin for feedback"
// IndexCheckInterval defines the interval for downloading new lists from
// the index APIs run by mastodon/pleroma (default: 1h)
var IndexCheckInterval = time.Second * 60 * 60
// IndexErrorInterval is used for when the index fetch/parse fails
// (default: 10m)
var IndexErrorInterval = time.Second * 60 * 10
const mastodonIndexURL = "https://instances.social/list.json?q%5Busers%5D=&q%5Bsearch%5D=&strict=false"
const pleromaIndexURL = "https://distsn.org/cgi-bin/distsn-pleroma-instances-api.cgi"
// InstanceLocator is the main data structure for the locator goroutine
// which sprays discovered instance hostnames into the manager
type InstanceLocator struct {
pleromaIndexNextRefresh *time.Time
mastodonIndexNextRefresh *time.Time
reportInstanceVia chan string
mu sync.Mutex
}
// New returns an InstanceLocator for use by the process.
func New() *InstanceLocator {
il := new(InstanceLocator)
n := time.Now()
il.pleromaIndexNextRefresh = &n
il.mastodonIndexNextRefresh = &n
return il
}
func (il *InstanceLocator) lock() {
il.mu.Lock()
}
func (il *InstanceLocator) unlock() {
il.mu.Unlock()
}
// SetInstanceNotificationChannel is the way the instanceLocator returns
// newly discovered instances back to the manager for query/addition
func (il *InstanceLocator) SetInstanceNotificationChannel(via chan string) {
il.lock()
defer il.unlock()
il.reportInstanceVia = via
}
func (il *InstanceLocator) addInstance(hostname string) {
// receiver (InstanceManager) is responsible for de-duping against its
// map, we just locate and spray, it manages
il.reportInstanceVia <- hostname
}
func (il *InstanceLocator) mastodonIndexRefreshDue() bool {
return il.mastodonIndexNextRefresh.Before(time.Now())
}
func (il *InstanceLocator) durationUntilNextMastodonIndexRefresh() time.Duration {
return (time.Duration(-1) * time.Now().Sub(*il.mastodonIndexNextRefresh))
}
func (il *InstanceLocator) pleromaIndexRefreshDue() bool {
return il.pleromaIndexNextRefresh.Before(time.Now())
}
func (il *InstanceLocator) durationUntilNextPleromaIndexRefresh() time.Duration {
return (time.Duration(-1) * time.Now().Sub(*il.pleromaIndexNextRefresh))
}
// Locate is the main entrypoint for the instancelocator, designed to be
// called once in its own gorutine.
func (il *InstanceLocator) Locate() {
log.Info().Msg("InstanceLocator starting")
x := time.Now()
var pleromaSemaphore = semaphore.NewWeighted(1)
var mastodonSemaphore = semaphore.NewWeighted(1)
for {
log.Info().Msg("InstanceLocator tick")
go func() {
if il.pleromaIndexRefreshDue() {
if !pleromaSemaphore.TryAcquire(1) {
return
}
il.locatePleroma()
pleromaSemaphore.Release(1)
}
}()
go func() {
if il.mastodonIndexRefreshDue() {
if !mastodonSemaphore.TryAcquire(1) {
return
}
il.locateMastodon()
mastodonSemaphore.Release(1)
}
}()
time.Sleep(1 * time.Second)
if time.Now().After(x.Add(viper.GetDuration("LogReportInterval"))) {
x = time.Now()
log.Debug().
Str("nextMastodonIndexRefresh", il.durationUntilNextMastodonIndexRefresh().String()).
Msg("refresh countdown")
log.Debug().
Str("nextPleromaIndexRefresh", il.durationUntilNextPleromaIndexRefresh().String()).
Msg("refresh countdown")
}
}
}
func (il *InstanceLocator) locateMastodon() {
var c = &http.Client{
Timeout: IndexAPITimeout,
}
req, err := http.NewRequest("GET", mastodonIndexURL, nil)
if err != nil {
panic(err)
}
req.Header.Set("User-Agent", UserAgent)
resp, err := c.Do(req)
if err != nil {
log.Error().Msgf("unable to fetch mastodon instance list: %s", err)
t := time.Now().Add(IndexErrorInterval)
il.lock()
il.mastodonIndexNextRefresh = &t
il.unlock()
return
}
log.Info().
Msg("fetched mastodon index")
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Error().Msgf("unable to fetch mastodon instance list: %s", err)
t := time.Now().Add(IndexErrorInterval)
il.lock()
il.mastodonIndexNextRefresh = &t
il.unlock()
return
}
t := time.Now().Add(IndexCheckInterval)
il.lock()
il.mastodonIndexNextRefresh = &t
il.unlock()
mi := new(jsonapis.MastodonIndexResponse)
err = json.Unmarshal(body, &mi)
if err != nil {
log.Error().Msgf("unable to parse mastodon instance list: %s", err)
t := time.Now().Add(IndexErrorInterval)
il.lock()
il.mastodonIndexNextRefresh = &t
il.unlock()
return
}
hosts := make(map[string]bool)
x := 0
for _, instance := range mi.Instances {
hosts[instance.Name] = true
x++
}
log.Info().
Int("count", x).
Msg("received hosts from mastodon index")
for k := range hosts {
il.addInstance(k)
}
}
func (il *InstanceLocator) locatePleroma() {
var c = &http.Client{
Timeout: IndexAPITimeout,
}
req, err := http.NewRequest("GET", pleromaIndexURL, nil)
if err != nil {
panic(err)
}
req.Header.Set("User-Agent", UserAgent)
resp, err := c.Do(req)
if err != nil {
log.Error().Msgf("unable to fetch pleroma instance list: %s", err)
t := time.Now().Add(IndexErrorInterval)
il.lock()
il.pleromaIndexNextRefresh = &t
il.unlock()
return
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Error().Msgf("unable to fetch pleroma instance list: %s", err)
t := time.Now().Add(IndexErrorInterval)
il.lock()
il.pleromaIndexNextRefresh = &t
il.unlock()
return
}
// fetch worked
t := time.Now().Add(IndexCheckInterval)
il.lock()
il.pleromaIndexNextRefresh = &t
il.unlock()
pi := new(jsonapis.PleromaIndexResponse)
err = json.Unmarshal(body, &pi)
if err != nil {
log.Warn().Msgf("unable to parse pleroma instance list: %s", err)
t := time.Now().Add(IndexErrorInterval)
il.lock()
il.pleromaIndexNextRefresh = &t
il.unlock()
return
}
hosts := make(map[string]bool)
x := 0
for _, instance := range *pi {
hosts[instance.Domain] = true
x++
}
log.Info().
Int("count", x).
Msg("received hosts from pleroma index")
for k := range hosts {
il.addInstance(k)
}
}