diff --git a/archiver.go b/archiver.go index 202a546..ee2c3f7 100644 --- a/archiver.go +++ b/archiver.go @@ -32,6 +32,11 @@ func (a *Archiver) Run() int { a.startup = &t a.locator = NewInstanceLocator() log.Info().Msg(fmt.Sprintf("in %#v.Run()", a)) - a.locator.Locate() + go a.locator.Locate() + + for { + time.Sleep(1 * time.Second) + } + return 0 } diff --git a/instance.go b/instance.go index 0d4cbf2..a762497 100644 --- a/instance.go +++ b/instance.go @@ -1,34 +1,51 @@ package main import ( + "encoding/json" "fmt" "github.com/rs/zerolog/log" "net/http" + "strings" "time" - //"github.com/bitly/go-simplejson" ) +const NodeInfoSchemaVersionTwoName = "http://nodeinfo.diaspora.software/ns/schema/2.0" + +const NODE_TIMEOUT = time.Second * 30 + type ServerImplementation int const ( ServerUnknown ServerImplementation = iota ServerMastodon - ServerPleorama + ServerPleroma ) type Instance struct { - hostName string - impl ServerImplementation - errorCount uint - lastFetch *time.Time - highestId int - skip bool + errorCount uint + highestId int + hostName string + impl ServerImplementation + lastFailure *time.Time + lastSuccess *time.Time + nodeInfoUrl string + serverVersion string + identified bool + up bool + shouldSkip bool } func NewInstance(hostname string) *Instance { i := new(Instance) i.hostName = hostname - i.detectNodeType() + foreverago := time.Now().Add((-1 * 86400 * 365 * 100) * time.Second) + i.lastSuccess = &foreverago + i.lastFailure = &foreverago + i.identified = false + i.up = false + go func() { + i.detectNodeType() + }() return i } @@ -36,33 +53,174 @@ func (i *Instance) detectNodeType() { if i.impl > ServerUnknown { return } + i.fetchNodeInfo() +} + +type NodeInfoWellKnownResponse struct { + Links []struct { + Rel string `json:"rel"` + Href string `json:"href"` + } `json:"links"` +} + +type NodeInfoVersionTwoSchema struct { + Version string `json:"version"` + Software struct { + Name string `json:"name"` + Version string `json:"version"` + } `json:"software"` + Protocols []string `json:"protocols"` + Usage struct { + Users struct { + Total int `json:"total"` + ActiveMonth int `json:"activeMonth"` + ActiveHalfyear int `json:"activeHalfyear"` + } `json:"users"` + LocalPosts int `json:"localPosts"` + } `json:"usage"` + OpenRegistrations bool `json:"openRegistrations"` +} + +func (i *Instance) registerError() { + i.errorCount = i.errorCount + 1 + t := time.Now() + i.lastFailure = &t +} + +func (i *Instance) registerSuccess() { + t := time.Now() + i.lastSuccess = &t +} + +func (i *Instance) fetchNodeInfoURL() { + url := fmt.Sprintf("https://%s/.well-known/nodeinfo", i.hostName) + var c = &http.Client{ + Timeout: NODE_TIMEOUT, + } + + log.Debug(). + Str("url", url). + Str("hostname", i.hostName). + Msg("fetching nodeinfo reference URL") + + resp, err := c.Get(url) + if err != nil { + log.Error(). + Str("hostname", i.hostName). + Msg("unable to fetch nodeinfo, node is down?") + i.registerError() + } else { + i.up = true // node is alive and responding to us + nir := new(NodeInfoWellKnownResponse) + err = json.NewDecoder(resp.Body).Decode(&nir) + if err != nil { + log.Error(). + Str("hostname", i.hostName). + Msg("unable to parse nodeinfo") + i.registerError() + return + } + for _, item := range nir.Links { + if item.Rel == NodeInfoSchemaVersionTwoName { + log.Info(). + Str("hostname", i.hostName). + Str("nodeinfourl", item.Href). + Msg("success fetching url for nodeinfo") + i.nodeInfoUrl = item.Href + i.registerSuccess() + return + } + } + log.Error(). + Str("hostname", i.hostName). + Msg("incomplete nodeinfo") + i.registerError() + return + } } func (i *Instance) fetchNodeInfo() { - url := fmt.Sprintf("https://%s/.well-known/nodeinfo") - var c = &http.Client{ - Timeout: time.Second * 10, - } - response, err := c.Get(url) - if err != nil { - log.Debug().Msg("unable to fetch nodeinfo, node is down?") - i.skip = true + i.fetchNodeInfoURL() + if i.nodeInfoUrl == "" { + log.Error(). + Str("hostname", i.hostName). + Msg("unable to fetch nodeinfo as nodeinfo URL cannot be determined") + return } - log.Debug().Msgf("%#v", response) + var c = &http.Client{ + Timeout: NODE_TIMEOUT, + } + + //FIXME make sure the nodeinfourl is on the same domain as the instance + //hostname + resp, err := c.Get(i.nodeInfoUrl) + + if err != nil { + log.Error(). + Str("hostname", i.hostName). + Msgf("unable to fetch nodeinfo data: %s", err) + i.registerError() + } else { + ni := new(NodeInfoVersionTwoSchema) + err = json.NewDecoder(resp.Body).Decode(&ni) + if err != nil { + log.Error(). + Str("hostname", i.hostName). + Msgf("unable to parse nodeinfo: %s", err) + i.registerError() + return + } + + log.Info(). + Str("serverVersion", ni.Software.Version). + Str("software", ni.Software.Name). + Str("hostName", i.hostName). + Str("nodeInfoUrl", i.nodeInfoUrl). + Msg("received nodeinfo from instance") + + i.serverVersion = ni.Software.Version + + ni.Software.Name = strings.ToLower(ni.Software.Name) + + if ni.Software.Name == "pleroma" { + log.Info(). + Str("hostname", i.hostName). + Str("software", ni.Software.Name). + Msg("detected server software") + i.registerSuccess() + i.identified = true + i.impl = ServerPleroma + } else if ni.Software.Name == "mastodon" { + log.Info(). + Str("hostname", i.hostName). + Str("software", ni.Software.Name). + Msg("detected server software") + i.registerSuccess() + i.identified = true + i.impl = ServerMastodon + } else { + log.Error(). + Str("hostname", i.hostName). + Str("software", ni.Software.Name). + Msg("unknown implementation on server") + i.registerError() + } + return + } } func (i *Instance) fetchRecentToots() ([]byte, error) { if i.impl == ServerMastodon { return i.fetchRecentTootsJsonFromMastodon() - } else if i.impl == ServerPleorama { - return i.fetchRecentTootsJsonFromPleorama() + } else if i.impl == ServerPleroma { + return i.fetchRecentTootsJsonFromPleroma() } else { panic("nope") } } -func (i *Instance) fetchRecentTootsJsonFromPleorama() ([]byte, error) { +func (i *Instance) fetchRecentTootsJsonFromPleroma() ([]byte, error) { //url := fmt.Sprintf("https://%s/api/statuses/public_and_external_timeline.json?count=100", i.hostName) return nil, nil } diff --git a/locator.go b/locator.go index 74524f5..0238f2f 100644 --- a/locator.go +++ b/locator.go @@ -1,36 +1,189 @@ package main import ( + "encoding/json" "github.com/rs/zerolog/log" - "io/ioutil" "net/http" + "sync" "time" ) const mastodonIndexUrl = "https://instances.social/list.json?q%5Busers%5D=&q%5Bsearch%5D=&strict=false" +var foreverago = time.Now().Add((-1 * 86400 * 365 * 100) * time.Second) + +// thank fuck for https://mholt.github.io/json-to-go/ otherwise +// this would have been a giant pain in the dick +type MastodonIndexResponse struct { + Instances []struct { + ID string `json:"_id"` + AddedAt time.Time `json:"addedAt"` + Name string `json:"name"` + Downchecks int `json:"downchecks"` + Upchecks int `json:"upchecks"` + HTTPSRank interface{} `json:"https_rank"` + HTTPSScore int `json:"https_score"` + ObsRank string `json:"obs_rank"` + ObsScore int `json:"obs_score"` + Ipv6 bool `json:"ipv6"` + Up bool `json:"up"` + Users int `json:"users"` + Statuses string `json:"statuses"` + Connections int `json:"connections"` + OpenRegistrations bool `json:"openRegistrations"` + Uptime float64 `json:"uptime"` + Version string `json:"version"` + VersionScore int `json:"version_score"` + UpdatedAt time.Time `json:"updatedAt"` + CheckedAt time.Time `json:"checkedAt"` + Dead bool `json:"dead"` + ObsDate time.Time `json:"obs_date"` + Second60 int `json:"second60"` + Second int `json:"second"` + ActiveUserCount interface{} `json:"active_user_count,omitempty"` + FirstUserCreatedAt interface{} `json:"first_user_created_at,omitempty"` + Thumbnail string `json:"thumbnail"` + ApUpdatedAt time.Time `json:"apUpdatedAt"` + Second5 int `json:"second5"` + RawVersion string `json:"raw_version"` + ActivityPrevw struct { + Statuses int `json:"statuses"` + Logins int `json:"logins"` + Registrations int `json:"registrations"` + } `json:"activity_prevw,omitempty"` + Mastodon bool `json:"mastodon"` + UptimeStr string `json:"uptime_str"` + Score int `json:"score"` + ScoreStr string `json:"score_str"` + } `json:"instances"` +} + const pleromaIndexUrl = "https://distsn.org/cgi-bin/distsn-pleroma-instances-api.cgi" +type PleromaIndexResponse []struct { + Domain string `json:"domain"` + Title string `json:"title"` + Thumbnail string `json:"thumbnail"` + Registration bool `json:"registration"` + Chat bool `json:"chat"` + Gopher bool `json:"gopher"` + WhoToFollow bool `json:"who_to_follow"` + MediaProxy bool `json:"media_proxy"` + ScopeOptions bool `json:"scope_options"` + AccountActivationRequired bool `json:"account_activation_required"` + TextLimit int `json:"text_limit"` +} + type InstanceLocator struct { pleromaIndexLastRefresh *time.Time mastodonIndexLastRefresh *time.Time - instances map[string]Instance + instances map[string]*Instance + sync.Mutex } func NewInstanceLocator() *InstanceLocator { i := new(InstanceLocator) + i.instances = make(map[string]*Instance) + i.pleromaIndexLastRefresh = &foreverago + i.mastodonIndexLastRefresh = &foreverago return i } -func (i *InstanceLocator) Locate() { - //var hostnames []string - - var netClient = &http.Client{ - Timeout: time.Second * 10, +func (i *InstanceLocator) addInstance(hostname string) { + // only add it if we haven't seen the hostname before + if i.instances[hostname] == nil { + log.Debug().Str("hostname", hostname).Msgf("adding discovered instance") + i.Lock() + i.instances[hostname] = NewInstance(hostname) + i.Unlock() } - resp, _ := netClient.Get(mastodonIndexUrl) - defer resp.Body.Close() - body, _ := ioutil.ReadAll(resp.Body) - log.Debug().Msgf("%#v", body) +} + +func (i *InstanceLocator) Locate() { + log.Debug().Str("lastmastodonupdate", i.mastodonIndexLastRefresh.Format(time.RFC3339)).Send() + log.Debug().Str("lastpleromaupdate", i.pleromaIndexLastRefresh.Format(time.RFC3339)).Send() + i.locateMastodon() + i.locatePleroma() + time.Sleep(30 * time.Second) + i.instanceReport() +} + +func (i *InstanceLocator) instanceReport() { + var upInstances int = 0 + var identifiedInstances int = 0 + var totalInstances int = 0 + + totalInstances = len(i.instances) + + for _, elem := range i.instances { + if elem.identified == true { + identifiedInstances = identifiedInstances + 1 + } + } + + for _, elem := range i.instances { + if elem.up == true { + upInstances = upInstances + 1 + } + } + + log.Info(). + Int("up", upInstances). + Int("total", totalInstances). + Int("identified", identifiedInstances). + Msg("instance report") +} + +func (i *InstanceLocator) locateMastodon() { + var netClient = &http.Client{ + Timeout: time.Second * 20, + } + resp, err := netClient.Get(mastodonIndexUrl) + defer resp.Body.Close() + + if err != nil { + log.Warn().Msgf("unable to fetch mastodon instance list: %s", err) + } else { + // it worked + mi := new(MastodonIndexResponse) + err = json.NewDecoder(resp.Body).Decode(&mi) + if err != nil { + log.Warn().Msgf("unable to parse mastodon instance list: %s", err) + } else { + for _, instance := range mi.Instances { + i.addInstance(instance.Name) + } + + i.Lock() + t := time.Now() + i.mastodonIndexLastRefresh = &t + i.Unlock() + } + } +} + +func (i *InstanceLocator) locatePleroma() { + var netClient = &http.Client{ + Timeout: time.Second * 20, + } + resp, err := netClient.Get(pleromaIndexUrl) + if err != nil { + log.Warn().Msgf("unable to fetch pleroma instance list: %s", err) + } else { + // fetch worked + pi := new(PleromaIndexResponse) + err = json.NewDecoder(resp.Body).Decode(&pi) + if err != nil { + log.Warn().Msgf("unable to parse pleroma instance list: %s", err) + } else { + for _, instance := range *pi { + i.addInstance(instance.Domain) + } + i.Lock() + t := time.Now() + i.pleromaIndexLastRefresh = &t + i.Unlock() + } + } }