getting there. need to use a channel buffer to avoid fetching too many

hostname resolutions at once, i think
This commit is contained in:
Jeffrey Paul 2019-10-24 08:11:07 -07:00
parent 8b926e8409
commit 8a029686d8
3 changed files with 349 additions and 33 deletions

View File

@ -32,6 +32,11 @@ func (a *Archiver) Run() int {
a.startup = &t
a.locator = NewInstanceLocator()
log.Info().Msg(fmt.Sprintf("in %#v.Run()", a))
a.locator.Locate()
go a.locator.Locate()
for {
time.Sleep(1 * time.Second)
}
return 0
}

View File

@ -1,34 +1,51 @@
package main
import (
"encoding/json"
"fmt"
"github.com/rs/zerolog/log"
"net/http"
"strings"
"time"
//"github.com/bitly/go-simplejson"
)
const NodeInfoSchemaVersionTwoName = "http://nodeinfo.diaspora.software/ns/schema/2.0"
const NODE_TIMEOUT = time.Second * 30
type ServerImplementation int
const (
ServerUnknown ServerImplementation = iota
ServerMastodon
ServerPleorama
ServerPleroma
)
type Instance struct {
errorCount uint
highestId int
hostName string
impl ServerImplementation
errorCount uint
lastFetch *time.Time
highestId int
skip bool
lastFailure *time.Time
lastSuccess *time.Time
nodeInfoUrl string
serverVersion string
identified bool
up bool
shouldSkip bool
}
func NewInstance(hostname string) *Instance {
i := new(Instance)
i.hostName = hostname
foreverago := time.Now().Add((-1 * 86400 * 365 * 100) * time.Second)
i.lastSuccess = &foreverago
i.lastFailure = &foreverago
i.identified = false
i.up = false
go func() {
i.detectNodeType()
}()
return i
}
@ -36,33 +53,174 @@ func (i *Instance) detectNodeType() {
if i.impl > ServerUnknown {
return
}
i.fetchNodeInfo()
}
type NodeInfoWellKnownResponse struct {
Links []struct {
Rel string `json:"rel"`
Href string `json:"href"`
} `json:"links"`
}
type NodeInfoVersionTwoSchema struct {
Version string `json:"version"`
Software struct {
Name string `json:"name"`
Version string `json:"version"`
} `json:"software"`
Protocols []string `json:"protocols"`
Usage struct {
Users struct {
Total int `json:"total"`
ActiveMonth int `json:"activeMonth"`
ActiveHalfyear int `json:"activeHalfyear"`
} `json:"users"`
LocalPosts int `json:"localPosts"`
} `json:"usage"`
OpenRegistrations bool `json:"openRegistrations"`
}
func (i *Instance) registerError() {
i.errorCount = i.errorCount + 1
t := time.Now()
i.lastFailure = &t
}
func (i *Instance) registerSuccess() {
t := time.Now()
i.lastSuccess = &t
}
func (i *Instance) fetchNodeInfoURL() {
url := fmt.Sprintf("https://%s/.well-known/nodeinfo", i.hostName)
var c = &http.Client{
Timeout: NODE_TIMEOUT,
}
log.Debug().
Str("url", url).
Str("hostname", i.hostName).
Msg("fetching nodeinfo reference URL")
resp, err := c.Get(url)
if err != nil {
log.Error().
Str("hostname", i.hostName).
Msg("unable to fetch nodeinfo, node is down?")
i.registerError()
} else {
i.up = true // node is alive and responding to us
nir := new(NodeInfoWellKnownResponse)
err = json.NewDecoder(resp.Body).Decode(&nir)
if err != nil {
log.Error().
Str("hostname", i.hostName).
Msg("unable to parse nodeinfo")
i.registerError()
return
}
for _, item := range nir.Links {
if item.Rel == NodeInfoSchemaVersionTwoName {
log.Info().
Str("hostname", i.hostName).
Str("nodeinfourl", item.Href).
Msg("success fetching url for nodeinfo")
i.nodeInfoUrl = item.Href
i.registerSuccess()
return
}
}
log.Error().
Str("hostname", i.hostName).
Msg("incomplete nodeinfo")
i.registerError()
return
}
}
func (i *Instance) fetchNodeInfo() {
url := fmt.Sprintf("https://%s/.well-known/nodeinfo")
var c = &http.Client{
Timeout: time.Second * 10,
}
response, err := c.Get(url)
if err != nil {
log.Debug().Msg("unable to fetch nodeinfo, node is down?")
i.skip = true
i.fetchNodeInfoURL()
if i.nodeInfoUrl == "" {
log.Error().
Str("hostname", i.hostName).
Msg("unable to fetch nodeinfo as nodeinfo URL cannot be determined")
return
}
log.Debug().Msgf("%#v", response)
var c = &http.Client{
Timeout: NODE_TIMEOUT,
}
//FIXME make sure the nodeinfourl is on the same domain as the instance
//hostname
resp, err := c.Get(i.nodeInfoUrl)
if err != nil {
log.Error().
Str("hostname", i.hostName).
Msgf("unable to fetch nodeinfo data: %s", err)
i.registerError()
} else {
ni := new(NodeInfoVersionTwoSchema)
err = json.NewDecoder(resp.Body).Decode(&ni)
if err != nil {
log.Error().
Str("hostname", i.hostName).
Msgf("unable to parse nodeinfo: %s", err)
i.registerError()
return
}
log.Info().
Str("serverVersion", ni.Software.Version).
Str("software", ni.Software.Name).
Str("hostName", i.hostName).
Str("nodeInfoUrl", i.nodeInfoUrl).
Msg("received nodeinfo from instance")
i.serverVersion = ni.Software.Version
ni.Software.Name = strings.ToLower(ni.Software.Name)
if ni.Software.Name == "pleroma" {
log.Info().
Str("hostname", i.hostName).
Str("software", ni.Software.Name).
Msg("detected server software")
i.registerSuccess()
i.identified = true
i.impl = ServerPleroma
} else if ni.Software.Name == "mastodon" {
log.Info().
Str("hostname", i.hostName).
Str("software", ni.Software.Name).
Msg("detected server software")
i.registerSuccess()
i.identified = true
i.impl = ServerMastodon
} else {
log.Error().
Str("hostname", i.hostName).
Str("software", ni.Software.Name).
Msg("unknown implementation on server")
i.registerError()
}
return
}
}
func (i *Instance) fetchRecentToots() ([]byte, error) {
if i.impl == ServerMastodon {
return i.fetchRecentTootsJsonFromMastodon()
} else if i.impl == ServerPleorama {
return i.fetchRecentTootsJsonFromPleorama()
} else if i.impl == ServerPleroma {
return i.fetchRecentTootsJsonFromPleroma()
} else {
panic("nope")
}
}
func (i *Instance) fetchRecentTootsJsonFromPleorama() ([]byte, error) {
func (i *Instance) fetchRecentTootsJsonFromPleroma() ([]byte, error) {
//url := fmt.Sprintf("https://%s/api/statuses/public_and_external_timeline.json?count=100", i.hostName)
return nil, nil
}

View File

@ -1,36 +1,189 @@
package main
import (
"encoding/json"
"github.com/rs/zerolog/log"
"io/ioutil"
"net/http"
"sync"
"time"
)
const mastodonIndexUrl = "https://instances.social/list.json?q%5Busers%5D=&q%5Bsearch%5D=&strict=false"
var foreverago = time.Now().Add((-1 * 86400 * 365 * 100) * time.Second)
// thank fuck for https://mholt.github.io/json-to-go/ otherwise
// this would have been a giant pain in the dick
type MastodonIndexResponse struct {
Instances []struct {
ID string `json:"_id"`
AddedAt time.Time `json:"addedAt"`
Name string `json:"name"`
Downchecks int `json:"downchecks"`
Upchecks int `json:"upchecks"`
HTTPSRank interface{} `json:"https_rank"`
HTTPSScore int `json:"https_score"`
ObsRank string `json:"obs_rank"`
ObsScore int `json:"obs_score"`
Ipv6 bool `json:"ipv6"`
Up bool `json:"up"`
Users int `json:"users"`
Statuses string `json:"statuses"`
Connections int `json:"connections"`
OpenRegistrations bool `json:"openRegistrations"`
Uptime float64 `json:"uptime"`
Version string `json:"version"`
VersionScore int `json:"version_score"`
UpdatedAt time.Time `json:"updatedAt"`
CheckedAt time.Time `json:"checkedAt"`
Dead bool `json:"dead"`
ObsDate time.Time `json:"obs_date"`
Second60 int `json:"second60"`
Second int `json:"second"`
ActiveUserCount interface{} `json:"active_user_count,omitempty"`
FirstUserCreatedAt interface{} `json:"first_user_created_at,omitempty"`
Thumbnail string `json:"thumbnail"`
ApUpdatedAt time.Time `json:"apUpdatedAt"`
Second5 int `json:"second5"`
RawVersion string `json:"raw_version"`
ActivityPrevw struct {
Statuses int `json:"statuses"`
Logins int `json:"logins"`
Registrations int `json:"registrations"`
} `json:"activity_prevw,omitempty"`
Mastodon bool `json:"mastodon"`
UptimeStr string `json:"uptime_str"`
Score int `json:"score"`
ScoreStr string `json:"score_str"`
} `json:"instances"`
}
const pleromaIndexUrl = "https://distsn.org/cgi-bin/distsn-pleroma-instances-api.cgi"
type PleromaIndexResponse []struct {
Domain string `json:"domain"`
Title string `json:"title"`
Thumbnail string `json:"thumbnail"`
Registration bool `json:"registration"`
Chat bool `json:"chat"`
Gopher bool `json:"gopher"`
WhoToFollow bool `json:"who_to_follow"`
MediaProxy bool `json:"media_proxy"`
ScopeOptions bool `json:"scope_options"`
AccountActivationRequired bool `json:"account_activation_required"`
TextLimit int `json:"text_limit"`
}
type InstanceLocator struct {
pleromaIndexLastRefresh *time.Time
mastodonIndexLastRefresh *time.Time
instances map[string]Instance
instances map[string]*Instance
sync.Mutex
}
func NewInstanceLocator() *InstanceLocator {
i := new(InstanceLocator)
i.instances = make(map[string]*Instance)
i.pleromaIndexLastRefresh = &foreverago
i.mastodonIndexLastRefresh = &foreverago
return i
}
func (i *InstanceLocator) addInstance(hostname string) {
// only add it if we haven't seen the hostname before
if i.instances[hostname] == nil {
log.Debug().Str("hostname", hostname).Msgf("adding discovered instance")
i.Lock()
i.instances[hostname] = NewInstance(hostname)
i.Unlock()
}
}
func (i *InstanceLocator) Locate() {
//var hostnames []string
log.Debug().Str("lastmastodonupdate", i.mastodonIndexLastRefresh.Format(time.RFC3339)).Send()
log.Debug().Str("lastpleromaupdate", i.pleromaIndexLastRefresh.Format(time.RFC3339)).Send()
i.locateMastodon()
i.locatePleroma()
time.Sleep(30 * time.Second)
i.instanceReport()
}
func (i *InstanceLocator) instanceReport() {
var upInstances int = 0
var identifiedInstances int = 0
var totalInstances int = 0
totalInstances = len(i.instances)
for _, elem := range i.instances {
if elem.identified == true {
identifiedInstances = identifiedInstances + 1
}
}
for _, elem := range i.instances {
if elem.up == true {
upInstances = upInstances + 1
}
}
log.Info().
Int("up", upInstances).
Int("total", totalInstances).
Int("identified", identifiedInstances).
Msg("instance report")
}
func (i *InstanceLocator) locateMastodon() {
var netClient = &http.Client{
Timeout: time.Second * 10,
Timeout: time.Second * 20,
}
resp, _ := netClient.Get(mastodonIndexUrl)
resp, err := netClient.Get(mastodonIndexUrl)
defer resp.Body.Close()
body, _ := ioutil.ReadAll(resp.Body)
log.Debug().Msgf("%#v", body)
if err != nil {
log.Warn().Msgf("unable to fetch mastodon instance list: %s", err)
} else {
// it worked
mi := new(MastodonIndexResponse)
err = json.NewDecoder(resp.Body).Decode(&mi)
if err != nil {
log.Warn().Msgf("unable to parse mastodon instance list: %s", err)
} else {
for _, instance := range mi.Instances {
i.addInstance(instance.Name)
}
i.Lock()
t := time.Now()
i.mastodonIndexLastRefresh = &t
i.Unlock()
}
}
}
func (i *InstanceLocator) locatePleroma() {
var netClient = &http.Client{
Timeout: time.Second * 20,
}
resp, err := netClient.Get(pleromaIndexUrl)
if err != nil {
log.Warn().Msgf("unable to fetch pleroma instance list: %s", err)
} else {
// fetch worked
pi := new(PleromaIndexResponse)
err = json.NewDecoder(resp.Body).Decode(&pi)
if err != nil {
log.Warn().Msgf("unable to parse pleroma instance list: %s", err)
} else {
for _, instance := range *pi {
i.addInstance(instance.Domain)
}
i.Lock()
t := time.Now()
i.pleromaIndexLastRefresh = &t
i.Unlock()
}
}
}