WIP: prep for 1.0 #1

Draft
sneak wants to merge 9 commits from next into master
18 changed files with 300 additions and 94 deletions

1
.gitignore vendored
View File

@ -3,3 +3,4 @@ output/
feta.sqlite
.lintsetup
out
debug.log

View File

@ -17,7 +17,7 @@ FROM alpine
# here are the levers
ENV FETA_HOSTDISCOVERYPARALLELISM 20
ENV FETA_FSSTORAGELOCATION /state/tootstore
ENV FETA_DBSTORAGELOCATION /state/feta.state.sqlite3
ENV FETA_DBURL sqlite:///state/feta.state.sqlite3
ENV FETA_TOOTSTODISK false
ENV FETA_TOOTSTODB true
ENV FETA_DEBUG false

View File

@ -26,7 +26,7 @@ endif
default: build
debug: build
GOTRACEBACK=all FETA_DEBUG=1 ./$(FN)
GOTRACEBACK=all FETA_DEBUG=1 ./$(FN) 2>&1 | tee -a debug.log
run: build
./$(FN)

View File

@ -20,6 +20,25 @@ archives the fediverse
[![Build Status](https://drone.datavi.be/api/badges/sneak/feta/status.svg)](https://drone.datavi.be/sneak/feta)
# getting started
## sqlite
*using default file location:*
`./feta`
*using file:*
`FETA_DBURL=sqlite://<abs path to file> ./feta`
*using memory:*
`FETA_DBURL=sqlite://:memory: ./feta`
## postgres
1. `docker-compose up .`
2. `FETA_DBURL=postgres://feta_user:password@localhost:5432/feta?sslmode=disable ./feta`
Access the pgweb dashboard at `http://localhost:8081`
# ethics statement
It seems that some splinter groups are not well acquainted with the norms of
@ -41,6 +60,7 @@ legitimately-obtained files from the hard drives of other people.
# Author
Jeffrey Paul &lt;[sneak@sneak.berlin](mailto:sneak@sneak.berlin)&gt;
Jeffrey Paul &lt;[sneak@sneak.berlin](mailto:sneak@sneak.berlin)&gt; and
others
[@sneak@sneak.berlin](https://s.sneak.berlin/@sneak)

View File

@ -2,9 +2,8 @@ package database
import (
"git.eeqj.de/sneak/feta/instance"
"github.com/rs/zerolog/log"
_ "github.com/jinzhu/gorm/dialects/sqlite"
"github.com/rs/zerolog/log"
)
func (m *Manager) SaveInstance(i *instance.Instance) error {
@ -20,6 +19,7 @@ func (m *Manager) SaveInstance(i *instance.Instance) error {
UUID: i.UUID,
Disabled: i.Disabled,
ErrorCount: i.ErrorCount,
ConsecutiveErrorCount: i.ConsecutiveErrorCount,
FSMState: i.Status(),
Fetching: i.Fetching,
HighestID: i.HighestID,
@ -46,6 +46,7 @@ func (m *Manager) SaveInstance(i *instance.Instance) error {
m.db.Where("UUID = ?", i.UUID).First(&ei)
ei.Disabled = i.Disabled
ei.ErrorCount = i.ErrorCount
ei.ConsecutiveErrorCount = i.ConsecutiveErrorCount
ei.FSMState = i.Status()
ei.Fetching = i.Fetching
ei.HighestID = i.HighestID
@ -74,6 +75,7 @@ func (m *Manager) ListInstances() ([]*instance.Instance, error) {
x.UUID = i.UUID
x.Disabled = i.Disabled
x.ErrorCount = i.ErrorCount
x.ConsecutiveErrorCount = i.ConsecutiveErrorCount
x.InitialFSMState = i.FSMState
x.Fetching = i.Fetching
x.HighestID = i.HighestID

View File

@ -1,14 +1,18 @@
package database
import (
"net/url"
"path/filepath"
"strings"
"sync"
"github.com/jinzhu/gorm"
u "git.eeqj.de/sneak/goutil"
"github.com/golang/groupcache/lru"
"github.com/jinzhu/gorm"
_ "github.com/jinzhu/gorm/dialects/postgres"
_ "github.com/jinzhu/gorm/dialects/sqlite"
"github.com/rs/zerolog/log"
"github.com/spf13/viper"
"github.com/golang/groupcache/lru"
)
const cacheEntries = 1000000
@ -28,7 +32,7 @@ func New() *Manager {
}
func (m *Manager) init() {
m.open()
m.open(viper.GetString("DBURL"))
// breaks stuff, do not use:
//m.db.SingularTable(true)
m.db.LogMode(false)
@ -38,22 +42,55 @@ func (m *Manager) init() {
m.recentlyInsertedTootHashCache = lru.New(cacheEntries)
}
func (m *Manager) open() {
func (m *Manager) open(dbURL string) {
log.Info().Msg("opening database")
dirname := filepath.Dir(viper.GetString("DbStorageLocation"))
err := u.Mkdirp(dirname)
dsn, err := url.Parse(dbURL)
if err != nil {
log.Panic().
Err(err).
Msg("db path erro")
Msg("error parsing dbURL")
}
log.Info().
Str("scheme", dsn.Scheme).
Str("user", dsn.User.Username()).
Str("host", dsn.Host).
Str("db", dsn.Path).
Str("args", dsn.RawQuery).
Msg("db connection values")
switch {
case strings.HasPrefix(dbURL, "postgres://"):
log.Info().Msg("using postgres db")
db, err := gorm.Open("postgres", dbURL)
if err != nil {
log.Panic().
Err(err).
Msg("failed to open database")
}
m.db = db
case strings.HasPrefix(dbURL, "sqlite://"):
log.Info().Msg("using sqlite db")
if !strings.HasSuffix(dbURL, ":memory:") {
dirname := filepath.Dir(strings.TrimPrefix(dbURL, "sqlite://"))
err := u.Mkdirp(dirname)
if err != nil {
log.Panic().
Err(err).
Msg("db path error")
}
}
db, err := gorm.Open("sqlite3", strings.TrimPrefix(dbURL, "sqlite://"))
if err != nil {
log.Panic().
Err(err).
Str("dbURL", dbURL).
Msg("failed to open database")
}
m.db = db
default:
log.Panic().
Str("driver", dsn.Scheme).
Msg("unsupported driver in database url, must be 'postgres' or 'sqlite'")
}
db, err := gorm.Open("sqlite3", viper.GetString("DbStorageLocation"))
if err != nil {
log.Panic().
Err(err).
Msg("failed to open database")
}
m.db = db
m.doMigrations()
}

View File

@ -27,6 +27,7 @@ type StoredToot struct {
type APInstance struct {
gorm.Model
UUID uuid.UUID `gorm:"type:uuid;primary_key;"`
ConsecutiveErrorCount uint
ErrorCount uint
SuccessCount uint
HighestID uint

View File

@ -14,6 +14,16 @@ func (m *Manager) TootCountForHostname(hostname string) (uint, error) {
}
}
func (m *Manager) TotalTootCount() (uint, error) {
var c uint
e := m.db.Model(&StoredToot{}).Count(&c)
if e.Error != nil {
return 0, e.Error
} else {
return c, nil
}
}
func (m *Manager) GetAPInstanceFromUUID(uuid *uuid.UUID) (*APInstance, error) {
var i APInstance
e := m.db.Model(&APInstance{}).Where("uuid = ?", uuid).First(&i)

View File

@ -7,7 +7,7 @@ import (
"git.eeqj.de/sneak/feta/toot"
"github.com/google/uuid"
hstg "github.com/grokify/html-strip-tags-go"
_ "github.com/jinzhu/gorm/dialects/sqlite"
_ "github.com/jinzhu/gorm/dialects/postgres"
)
func (m *Manager) TootInsertHashCacheSize() uint {

25
docker-compose.yml Normal file
View File

@ -0,0 +1,25 @@
version: '3.1'
services:
postgres:
image: postgres:12
restart: always
container_name: postgres
ports:
- "5432:5432"
environment:
POSTGRES_PASSWORD: password
POSTGRES_USER: feta_user
POSTGRES_DB: feta
pgweb:
image: sosedoff/pgweb
restart: always
container_name: pgweb
ports:
- "8081:8081"
links:
- postgres:postgres
environment:
- DATABASE_URL=postgres://feta_user:password@postgres:5432/feta?sslmode=disable
depends_on:
- postgres

1
go.sum
View File

@ -115,6 +115,7 @@ github.com/labstack/gommon v0.3.0 h1:JEeO0bvc78PKdyHxloTKiF8BD5iGrH8T6MSeGvSgob0
github.com/labstack/gommon v0.3.0/go.mod h1:MULnywXg0yavhxWKc+lOruYdAhDwPK9wf0OL7NoOu+k=
github.com/leodido/go-urn v1.2.0 h1:hpXL4XnriNwQ/ABnpepYM/1vCLWNDfUNts8dX3xTG6Y=
github.com/leodido/go-urn v1.2.0/go.mod h1:+8+nEpDfqqsY+g338gtMEUOtuK+4dEMhiQEgxpxOKII=
github.com/lib/pq v1.1.1 h1:sJZmqHoEaY7f+NPP8pgLB/WxulyR3fewgCM2qaSlBb4=
github.com/lib/pq v1.1.1/go.mod h1:5WUZQaWbwv1U+lTReE5YruASi9Al49XbQIvNi/34Woo=
github.com/looplab/fsm v0.1.0 h1:Qte7Zdn/5hBNbXzP7yxVU4OIFHWXBovyTT2LaBTyC20=
github.com/looplab/fsm v0.1.0/go.mod h1:m2VaOfDHxqXBBMgc26m6yUOwkFn8H2AlJDE+jd/uafI=

View File

@ -17,18 +17,19 @@ import (
"github.com/rs/zerolog/log"
)
//import "github.com/gin-gonic/gin"
const nodeInfoSchemaVersionTwoName = "http://nodeinfo.diaspora.software/ns/schema/2.0"
const instanceNodeinfoTimeout = time.Second * 50
const instanceHTTPTimeout = time.Second * 120
const instanceSpiderInterval = time.Second * 120
const instanceErrorInterval = time.Second * 60 * 30
const instanceNodeinfoTimeout = time.Second * 60 * 2 // 2m
const instanceHTTPTimeout = time.Second * 60 * 2 // 2m
const instanceSpiderInterval = time.Second * 60 * 2 // 2m
const instanceErrorInterval = time.Second * 60 * 60 // 1h
const instancePersistentErrorInterval = time.Second * 86400 // 1d
const zeroInterval = time.Second * 0 // 0s
// Instance stores all the information we know about an instance
type Instance struct {
Disabled bool
ErrorCount uint
ConsecutiveErrorCount uint
FSM *fsm.FSM
Fetching bool
HighestID uint
@ -61,6 +62,10 @@ func New(options ...func(i *Instance)) *Instance {
opt(i)
}
if i.InitialFSMState == "FETCHING" {
i.InitialFSMState = "READY_FOR_TOOTFETCH"
}
i.FSM = fsm.NewFSM(
i.InitialFSMState,
fsm.Events{
@ -74,11 +79,13 @@ func New(options ...func(i *Instance)) *Instance {
{Name: "EARLY_FETCH_ERROR", Src: []string{"FETCHING_NODEINFO_URL", "PRE_NODEINFO_FETCH", "FETCHING_NODEINFO"}, Dst: "EARLY_ERROR"},
{Name: "TOOT_FETCH_ERROR", Src: []string{"FETCHING"}, Dst: "TOOT_FETCH_ERROR"},
{Name: "TOOTS_FETCHED", Src: []string{"FETCHING"}, Dst: "READY_FOR_TOOTFETCH"},
{Name: "DISABLEMENT", Src: []string{"WEIRD_NODE", "EARLY_ERROR", "TOOT_FETCH_ERROR"}, Dst: "DISABLED"},
},
fsm.Callbacks{
"enter_state": func(e *fsm.Event) { i.fsmEnterState(e) },
},
)
return i
}
@ -121,10 +128,36 @@ func (i *Instance) Unlock() {
i.structLock.Unlock()
}
func (i *Instance) bumpFetch() {
func (i *Instance) bumpFetchError() {
i.Lock()
defer i.Unlock()
i.NextFetch = time.Now().Add(120 * time.Second)
probablyDead := i.ConsecutiveErrorCount > 3
shouldDisable := i.ConsecutiveErrorCount > 6
i.Unlock()
if shouldDisable {
// auf wiedersehen, felicia
i.Lock()
i.Disabled = true
i.Unlock()
i.Event("DISABLEMENT")
return
}
if probablyDead {
// if three consecutive fetch errors happen, only try once per day:
i.setNextFetchAfter(instancePersistentErrorInterval)
} else {
// otherwise give them 1h
i.setNextFetchAfter(instanceErrorInterval)
}
}
func (i *Instance) bumpFetchSuccess() {
i.setNextFetchAfter(instanceSpiderInterval)
}
func (i *Instance) scheduleFetchImmediate() {
i.setNextFetchAfter(zeroInterval)
}
func (i *Instance) setNextFetchAfter(d time.Duration) {
@ -139,8 +172,7 @@ func (i *Instance) Fetch() {
i.fetchingLock.Lock()
defer i.fetchingLock.Unlock()
i.setNextFetchAfter(instanceErrorInterval)
i.bumpFetchError()
err := i.DetectNodeTypeIfNecessary()
if err != nil {
log.Debug().
@ -149,8 +181,7 @@ func (i *Instance) Fetch() {
Msg("unable to fetch instance metadata")
return
}
i.setNextFetchAfter(instanceSpiderInterval)
i.scheduleFetchImmediate()
log.Info().
Str("hostname", i.Hostname).
Msg("instance now ready for fetch")
@ -207,12 +238,14 @@ func (i *Instance) registerError() {
i.Lock()
defer i.Unlock()
i.ErrorCount++
i.ConsecutiveErrorCount++
}
func (i *Instance) registerSuccess() {
i.Lock()
defer i.Unlock()
i.SuccessCount++
i.ConsecutiveErrorCount = 0
}
// Up returns true if the success count is >0
@ -405,9 +438,12 @@ func (i *Instance) fetchRecentToots() error {
// it turns out pleroma supports the mastodon api so we'll just use that
// for everything for now
// FIXME would be nice to support non-https
url := fmt.Sprintf("https://%s/api/v1/timelines/public?limit=40&local=true",
i.Hostname)
// FIXME support broken/expired certs
var c = &http.Client{
Timeout: instanceHTTPTimeout,
}
@ -461,7 +497,7 @@ func (i *Instance) fetchRecentToots() error {
Msgf("got and parsed toots")
i.registerSuccess()
i.Event("TOOTS_FETCHED")
i.setNextFetchAfter(instanceSpiderInterval)
i.bumpFetchSuccess()
// this should go fast as either the channel is buffered bigly or the
// ingester receives fast and does its own buffering, but run it in its

View File

@ -1,6 +1,7 @@
package process
import (
"fmt"
"os"
"time"
@ -9,6 +10,7 @@ import (
"git.eeqj.de/sneak/feta/locator"
"git.eeqj.de/sneak/feta/manager"
"git.eeqj.de/sneak/feta/storage"
_ "github.com/jinzhu/gorm/dialects/postgres"
_ "github.com/jinzhu/gorm/dialects/sqlite"
"github.com/k0kubun/pp"
"github.com/mattn/go-isatty"
@ -56,7 +58,7 @@ func (f *Feta) configure() {
viper.SetDefault("TootsToDB", true)
viper.SetDefault("HostDiscoveryParallelism", 5)
viper.SetDefault("FSStorageLocation", os.ExpandEnv("$HOME/Library/ApplicationSupport/feta/tootarchive.d"))
viper.SetDefault("DBStorageLocation", os.ExpandEnv("$HOME/Library/ApplicationSupport/feta/feta.state.db"))
viper.SetDefault("DBURL", fmt.Sprintf("sqlite://%s", os.ExpandEnv("$HOME/Library/ApplicationSupport/feta/feta.state.db")))
viper.SetDefault("LogReportInterval", time.Second*10)
if err := viper.ReadInConfig(); err != nil {

View File

@ -9,7 +9,6 @@ import (
u "git.eeqj.de/sneak/goutil"
"github.com/flosch/pongo2"
"github.com/gin-gonic/gin"
"github.com/google/uuid"
"github.com/labstack/echo"
)
@ -23,7 +22,12 @@ func (a *Server) instances() []hash {
i := make(hash)
// TODO move this locking onto a method on Instance that just
// returns a new hash
// FIXME figure out why a very short lock here deadlocks
//this only locks the FSM, not the whole instance struct
i["status"] = v.Status()
// now do a quick lock of the whole instance just to copy out the
// attrs
v.Lock()
i["hostname"] = v.Hostname
i["uuid"] = v.UUID.String()
@ -31,9 +35,8 @@ func (a *Server) instances() []hash {
i["nextCheckAfter"] = (-1 * now.Sub(v.NextFetch)).String()
i["successCount"] = v.SuccessCount
i["errorCount"] = v.ErrorCount
i["consecutiveErrorCount"] = v.ConsecutiveErrorCount
i["identified"] = v.Identified
//this only locks the FSM, not the whole instance struct
i["status"] = v.Status()
i["software"] = "unknown"
i["version"] = "unknown"
if v.Identified {
@ -41,6 +44,7 @@ func (a *Server) instances() []hash {
i["version"] = v.ServerVersionString
}
v.Unlock()
resp = append(resp, i)
}
@ -54,7 +58,7 @@ func (a *Server) instances() []hash {
return resp
}
func (a *Server) instanceSummary() map[string]int {
func (a *Server) instanceStatusSummary() map[string]int {
resp := make(map[string]int)
for _, v := range a.feta.manager.ListInstances() {
v.Lock()
@ -68,26 +72,6 @@ func (a *Server) instanceSummary() map[string]int {
return resp
}
/*
func (a *Server) getInstanceListHandler() http.HandlerFunc {
return func(w http.ResponseWriter, r *http.Request) {
result := &gin.H{
"instances": a.instances(),
}
json, err := json.Marshal(result)
if err != nil {
http.Error(w, err.Error(), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
w.Write(json)
}
}
*/
func (a *Server) notFoundHandler(c echo.Context) error {
return c.String(http.StatusNotFound, "404 not found")
}
@ -117,17 +101,33 @@ func (a *Server) instanceHandler(c echo.Context) error {
}
func (a *Server) indexHandler(c echo.Context) error {
count, err := a.feta.dbm.TotalTootCount()
if err != nil {
count = 0
}
tc := pongo2.Context{
"time": time.Now().UTC().Format(time.RFC3339Nano),
"gitrev": a.feta.version,
"instances": a.instances(),
"time": time.Now().UTC().Format(time.RFC3339Nano),
"gitrev": a.feta.version,
"tootCount": count,
"instances": a.instances(),
"instanceStatusSummary": a.instanceStatusSummary(),
}
return c.Render(http.StatusOK, "index.html", tc)
}
func (a *Server) instanceListHandler(c echo.Context) error {
il := a.instances()
tc := pongo2.Context{
"time": time.Now().UTC().Format(time.RFC3339Nano),
"gitrev": a.feta.version,
"instances": il,
}
return c.Render(http.StatusOK, "instancelist.html", tc)
}
func (a *Server) statsHandler(c echo.Context) error {
index := &gin.H{
"server": &gin.H{
index := &hash{
"server": &hash{
"now": time.Now().UTC().Format(time.RFC3339),
"uptime": a.feta.uptime().String(),
"goroutines": runtime.NumGoroutine(),
@ -135,14 +135,14 @@ func (a *Server) statsHandler(c echo.Context) error {
"version": a.feta.version,
"buildarch": a.feta.buildarch,
},
"instanceSummary": a.instanceSummary(),
"instanceStatusSummary": a.instanceStatusSummary(),
}
return c.JSONPretty(http.StatusOK, index, " ")
}
func (a *Server) healthCheckHandler(c echo.Context) error {
resp := &gin.H{
resp := &hash{
"status": "ok",
"now": time.Now().UTC().Format(time.RFC3339),
"uptime": a.feta.uptime().String(),

View File

@ -96,6 +96,7 @@ func (s *Server) initRouter() {
// Routes
s.e.GET("/", s.indexHandler)
s.e.GET("/instance/:uuid", s.instanceHandler)
s.e.GET("/instances", s.instanceListHandler)
s.e.GET("/stats.json", s.statsHandler)
s.e.GET("/.well-known/healthcheck.json", s.healthCheckHandler)
//a.e.GET("/about", s.aboutHandler)

View File

@ -4,34 +4,44 @@
<div class="col-lg-12">
<h2>indexer stats</h2>
<h2>feta overview</h2>
<div class="card m-5">
<h5 class="card-header">Instances</h5>
<div class="card-body">
<h5 class="card-title">Tracking {{ instances | length }} instances
across the Fediverse.</h5>
<!--
<p class="card-text">
</p> -->
<a href="/instances" class="btn btn-primary">View Instance List</a>
</div>
</div>
<div class="card m-5">
<h5 class="card-header">Toots</h5>
<div class="card-body">
<h5 class="card-title">I have {{ tootCount }} toots
in my database.</h5>
<a href="/toots" class="btn btn-primary">View Latest Toots</a>
</div>
</div>
<div class="card m-5">
<h5 class="card-header">Recent Events</h5>
<div class="card-body">
<h5 class="card-title">Last n System Events</h5>
<p class="card-text"> Discovered instance toot1.example.com </p>
<p class="card-text"> Discovered instance toot2.example.com </p>
<p class="card-text"> Discovered instance toot3.example.com </p>
<p class="card-text"> Discovered instance toot4.example.com </p>
</div>
</div>
<table class="table table-striped table-hover">
<thead class="thead-dark">
<tr>
<th scope="col">instance id</th>
<th scope="col">hostname</th>
<th scope="col">status</th>
<th scope="col">tootCount</th>
<th scope="col">Detail</th>
</tr>
</thead>
<tbody>
{% for instance in instances %}
<tr>
<td><a href="/instance/{{instance.uuid}}">{{instance.uuid}}</a></td>
<td><a href="https://{{instance.hostname}}">{{instance.hostname}}</a></td>
<td>{{instance.status}}</td>
<td>{{instance.tootCount}}</td>
<td><a
href="/instance/{{instance.uuid}}"
class="btn btn-info">
<i class="fab fa-mastodon"></i>
</button></td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% endblock %}

24
view/instance.html Normal file
View File

@ -0,0 +1,24 @@
{% extends "page.html" %}
{% block content %}
<div class="col-lg-12">
<h2>instance {{instance.hostname}}</h2>
<div class="card m-5">
<div class="card-header">
<a href="/instance/{{instance.uuid}}">{{ instance.hostname }}</a>
({{instance.tootCount}} toots)
</div>
<div class="card-body">
<h5 class="card-title">{{instance.status}}</h5>
<p class="card-text">First Stat</p>
<p class="card-text">Second Stat</p>
<p class="card-text">Third Stat</p>
<a href="https://{{instance.hostname}}" class="btn btn-primary">View Instance Website</a>
</div>
</div>
</div>
{% endblock %}

36
view/instancelist.html Normal file
View File

@ -0,0 +1,36 @@
{% extends "page.html" %}
{% block content %}
<div class="col-lg-12">
<h2>instance list</h2>
<table class="table table-striped table-hover">
<thead class="thead-dark">
<tr>
<th scope="col">hostname</th>
<th scope="col">status</th>
<th scope="col">tootCount</th>
<th scope="col">nextFetch</th>
<th scope="col">Detail</th>
</tr>
</thead>
<tbody>
{% for instance in instances %}
<tr>
<td><a href="https://{{instance.hostname}}">{{instance.hostname}}</a></td>
<td>{{instance.status}}</td>
<td>{{instance.tootCount}}</td>
<td>{{instance.nextFetch}}</td>
<td><a
href="/instance/{{instance.uuid}}"
class="btn btn-info">
<i class="fab fa-mastodon"></i>
</button></td>
</tr>
{% endfor %}
</tbody>
</table>
</div>
{% endblock %}