feta/locator/locator.go

273 lines
6.4 KiB
Go
Raw Normal View History

2019-12-19 14:24:26 +00:00
package locator
2019-10-24 12:14:36 +00:00
2020-03-27 23:02:36 +00:00
import (
"encoding/json"
"io/ioutil"
"net/http"
"sync"
"time"
"git.eeqj.de/sneak/feta/jsonapis"
"github.com/rs/zerolog/log"
"github.com/spf13/viper"
"golang.org/x/sync/semaphore"
)
// IndexAPITimeout is the timeout for fetching json instance lists
// from the listing servers
const IndexAPITimeout = time.Second * 60 * 3
// UserAgent is the user-agent string we provide to servers
var UserAgent = "feta indexer bot, sneak@sneak.berlin for feedback"
2019-11-03 18:00:01 +00:00
// IndexCheckInterval defines the interval for downloading new lists from
2019-11-05 23:32:09 +00:00
// the index APIs run by mastodon/pleroma (default: 1h)
var IndexCheckInterval = time.Second * 60 * 60
2019-10-24 15:23:29 +00:00
// IndexErrorInterval is used for when the index fetch/parse fails
2019-11-05 23:32:09 +00:00
// (default: 10m)
var IndexErrorInterval = time.Second * 60 * 10
const mastodonIndexURL = "https://instances.social/list.json?q%5Busers%5D=&q%5Bsearch%5D=&strict=false"
const pleromaIndexURL = "https://distsn.org/cgi-bin/distsn-pleroma-instances-api.cgi"
2019-10-24 12:14:36 +00:00
// InstanceLocator is the main data structure for the locator goroutine
// which sprays discovered instance hostnames into the manager
2019-10-24 12:14:36 +00:00
type InstanceLocator struct {
pleromaIndexNextRefresh *time.Time
mastodonIndexNextRefresh *time.Time
2020-03-28 01:17:52 +00:00
reportInstanceVia chan string
2019-12-14 15:49:35 +00:00
mu sync.Mutex
2019-10-24 12:14:36 +00:00
}
2019-12-19 14:24:26 +00:00
// New returns an InstanceLocator for use by the process.
func New() *InstanceLocator {
il := new(InstanceLocator)
n := time.Now()
il.pleromaIndexNextRefresh = &n
il.mastodonIndexNextRefresh = &n
return il
2019-10-24 12:14:36 +00:00
}
2019-10-24 12:41:05 +00:00
func (il *InstanceLocator) lock() {
2019-12-14 15:49:35 +00:00
il.mu.Lock()
2019-11-03 13:17:00 +00:00
}
func (il *InstanceLocator) unlock() {
2019-12-14 15:49:35 +00:00
il.mu.Unlock()
}
2019-12-19 14:24:26 +00:00
// SetInstanceNotificationChannel is the way the instanceLocator returns
// newly discovered instances back to the manager for query/addition
2020-03-28 01:17:52 +00:00
func (il *InstanceLocator) SetInstanceNotificationChannel(via chan string) {
il.lock()
defer il.unlock()
il.reportInstanceVia = via
}
2020-03-28 01:17:52 +00:00
func (il *InstanceLocator) addInstance(hostname string) {
2019-11-03 18:00:01 +00:00
// receiver (InstanceManager) is responsible for de-duping against its
// map, we just locate and spray, it manages
il.reportInstanceVia <- hostname
}
func (il *InstanceLocator) mastodonIndexRefreshDue() bool {
return il.mastodonIndexNextRefresh.Before(time.Now())
2019-11-06 07:03:42 +00:00
}
func (il *InstanceLocator) durationUntilNextMastodonIndexRefresh() time.Duration {
return (time.Duration(-1) * time.Now().Sub(*il.mastodonIndexNextRefresh))
2019-11-06 07:03:42 +00:00
}
func (il *InstanceLocator) pleromaIndexRefreshDue() bool {
return il.pleromaIndexNextRefresh.Before(time.Now())
2019-11-06 07:03:42 +00:00
}
func (il *InstanceLocator) durationUntilNextPleromaIndexRefresh() time.Duration {
return (time.Duration(-1) * time.Now().Sub(*il.pleromaIndexNextRefresh))
2019-11-06 07:03:42 +00:00
}
// Locate is the main entrypoint for the instancelocator, designed to be
// called once in its own gorutine.
func (il *InstanceLocator) Locate() {
2019-11-03 18:00:01 +00:00
log.Info().Msg("InstanceLocator starting")
2019-11-03 13:17:00 +00:00
x := time.Now()
2019-11-06 00:46:52 +00:00
var pleromaSemaphore = semaphore.NewWeighted(1)
var mastodonSemaphore = semaphore.NewWeighted(1)
for {
2019-11-06 00:46:52 +00:00
2019-11-03 18:00:01 +00:00
log.Info().Msg("InstanceLocator tick")
2019-11-06 00:46:52 +00:00
2019-11-05 23:32:09 +00:00
go func() {
if il.pleromaIndexRefreshDue() {
2019-11-06 00:46:52 +00:00
if !pleromaSemaphore.TryAcquire(1) {
return
}
il.locatePleroma()
2019-11-06 00:46:52 +00:00
pleromaSemaphore.Release(1)
2019-11-05 23:32:09 +00:00
}
}()
2019-11-06 00:46:52 +00:00
2019-11-05 23:32:09 +00:00
go func() {
if il.mastodonIndexRefreshDue() {
2019-11-06 00:46:52 +00:00
if !mastodonSemaphore.TryAcquire(1) {
return
}
il.locateMastodon()
2019-11-06 00:46:52 +00:00
mastodonSemaphore.Release(1)
2019-11-05 23:32:09 +00:00
}
}()
2019-11-06 00:46:52 +00:00
time.Sleep(1 * time.Second)
2019-11-06 00:46:52 +00:00
2020-03-27 23:02:36 +00:00
if time.Now().After(x.Add(viper.GetDuration("LogReportInterval"))) {
2019-11-03 13:17:00 +00:00
x = time.Now()
log.Debug().
Str("nextMastodonIndexRefresh", il.durationUntilNextMastodonIndexRefresh().String()).
2019-11-06 07:03:42 +00:00
Msg("refresh countdown")
log.Debug().
Str("nextPleromaIndexRefresh", il.durationUntilNextPleromaIndexRefresh().String()).
2019-11-06 07:03:42 +00:00
Msg("refresh countdown")
}
}
}
func (il *InstanceLocator) locateMastodon() {
var c = &http.Client{
Timeout: IndexAPITimeout,
2019-10-24 12:41:05 +00:00
}
req, err := http.NewRequest("GET", mastodonIndexURL, nil)
2019-11-03 18:00:01 +00:00
if err != nil {
panic(err)
}
req.Header.Set("User-Agent", UserAgent)
2019-11-03 18:00:01 +00:00
resp, err := c.Do(req)
if err != nil {
log.Error().Msgf("unable to fetch mastodon instance list: %s", err)
t := time.Now().Add(IndexErrorInterval)
il.lock()
il.mastodonIndexNextRefresh = &t
il.unlock()
return
}
2019-12-14 15:49:35 +00:00
log.Info().
Msg("fetched mastodon index")
2019-10-24 12:41:05 +00:00
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
2019-10-24 12:41:05 +00:00
if err != nil {
log.Error().Msgf("unable to fetch mastodon instance list: %s", err)
t := time.Now().Add(IndexErrorInterval)
il.lock()
il.mastodonIndexNextRefresh = &t
il.unlock()
return
}
t := time.Now().Add(IndexCheckInterval)
il.lock()
il.mastodonIndexNextRefresh = &t
il.unlock()
2019-11-06 00:46:52 +00:00
mi := new(jsonapis.MastodonIndexResponse)
err = json.Unmarshal(body, &mi)
if err != nil {
log.Error().Msgf("unable to parse mastodon instance list: %s", err)
t := time.Now().Add(IndexErrorInterval)
il.lock()
il.mastodonIndexNextRefresh = &t
il.unlock()
return
}
2019-11-06 00:46:52 +00:00
hosts := make(map[string]bool)
x := 0
for _, instance := range mi.Instances {
2019-11-06 00:46:52 +00:00
hosts[instance.Name] = true
x++
}
log.Info().
Int("count", x).
Msg("received hosts from mastodon index")
for k := range hosts {
2020-03-28 01:17:52 +00:00
il.addInstance(k)
}
}
func (il *InstanceLocator) locatePleroma() {
var c = &http.Client{
Timeout: IndexAPITimeout,
}
2019-11-03 18:00:01 +00:00
req, err := http.NewRequest("GET", pleromaIndexURL, nil)
2019-11-03 18:00:01 +00:00
if err != nil {
panic(err)
}
req.Header.Set("User-Agent", UserAgent)
2019-11-03 18:00:01 +00:00
resp, err := c.Do(req)
if err != nil {
log.Error().Msgf("unable to fetch pleroma instance list: %s", err)
t := time.Now().Add(IndexErrorInterval)
il.lock()
il.pleromaIndexNextRefresh = &t
il.unlock()
return
}
defer resp.Body.Close()
body, err := ioutil.ReadAll(resp.Body)
if err != nil {
log.Error().Msgf("unable to fetch pleroma instance list: %s", err)
t := time.Now().Add(IndexErrorInterval)
il.lock()
il.pleromaIndexNextRefresh = &t
il.unlock()
return
}
// fetch worked
2019-11-06 00:46:52 +00:00
t := time.Now().Add(IndexCheckInterval)
il.lock()
il.pleromaIndexNextRefresh = &t
il.unlock()
2019-11-06 00:46:52 +00:00
pi := new(jsonapis.PleromaIndexResponse)
err = json.Unmarshal(body, &pi)
if err != nil {
log.Warn().Msgf("unable to parse pleroma instance list: %s", err)
t := time.Now().Add(IndexErrorInterval)
il.lock()
il.pleromaIndexNextRefresh = &t
il.unlock()
return
}
2019-11-06 00:46:52 +00:00
hosts := make(map[string]bool)
x := 0
for _, instance := range *pi {
2019-11-06 00:46:52 +00:00
hosts[instance.Domain] = true
x++
}
2019-11-06 00:46:52 +00:00
log.Info().
Int("count", x).
Msg("received hosts from pleroma index")
for k := range hosts {
2020-03-28 01:17:52 +00:00
il.addInstance(k)
2019-11-06 00:46:52 +00:00
}
2019-10-24 12:41:05 +00:00
}