From 9655265d85dd465dc89dadb8b0933a5217377166 Mon Sep 17 00:00:00 2001 From: sneak Date: Fri, 27 Mar 2020 20:18:55 -0700 Subject: [PATCH] builds again, not sure how i broke it, also: * fixes truncated content col * adds text_content for plain text (has space/tag strip bug) * update readme --- README.md | 7 +++++-- database/model.go | 2 ++ database/storageconnector.go | 9 ++++++++- go.mod | 1 + go.sum | 2 ++ 5 files changed, 18 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 8843119..9d2f1b0 100644 --- a/README.md +++ b/README.md @@ -7,11 +7,14 @@ archives the fediverse * scan toots for mentions and feed to locator * put toots in a separate db file * test with a real database -* save instances to store more often +* save instancelist to store more often (maybe on each new one added not + during initial load) * verify instances load properly on startup * do some simple in-memory dedupe for toot storage * make some templates using pongo2 and a simple website -* update APIs +* update json APIs +* index hashtags +* index seen urls # status diff --git a/database/model.go b/database/model.go index b6404af..fe49a6d 100644 --- a/database/model.go +++ b/database/model.go @@ -19,8 +19,10 @@ type StoredToot struct { ServerCreated time.Time Acct string Content []byte + TextContent []byte URL string Hostname string + Fetched time.Time } type APInstance struct { diff --git a/database/storageconnector.go b/database/storageconnector.go index 2e16e5c..a2c2272 100644 --- a/database/storageconnector.go +++ b/database/storageconnector.go @@ -2,11 +2,14 @@ package database import ( "fmt" + "html" "strings" + "time" "git.eeqj.de/sneak/feta/toot" "github.com/google/uuid" + hstg "github.com/grokify/html-strip-tags-go" _ "github.com/jinzhu/gorm/dialects/sqlite" ) @@ -28,9 +31,13 @@ func (m *Manager) StoreToot(t *toot.Toot) error { // FIXME normalize this, check for @ and append hostname if none nt.Acct = fmt.Sprintf("%s@%s", t.Parsed.Account.Acct, strings.ToLower(t.FromHost)) nt.URL = t.Parsed.URL - nt.Content = t.Parsed.Content + nt.Content = []byte(t.Parsed.Content) + // FIXME replace tags with spaces, don't just strip them, otherwise text + // gets messed up. + nt.TextContent = []byte(html.UnescapeString(hstg.StripTags(t.Parsed.Content))) nt.Hostname = strings.ToLower(t.FromHost) nt.Hash = t.GetHash() + nt.Fetched = time.Now() r := m.db.Create(&nt) //panic(fmt.Sprintf("%+v", t)) return r.Error diff --git a/go.mod b/go.mod index 641faa0..53043eb 100644 --- a/go.mod +++ b/go.mod @@ -6,6 +6,7 @@ require ( github.com/dn365/gin-zerolog v0.0.0-20171227063204-b43714b00db1 github.com/gin-gonic/gin v1.6.2 github.com/google/uuid v1.1.1 + github.com/grokify/html-strip-tags-go v0.0.0-20200322061010-ea0c1cf2f119 github.com/jinzhu/gorm v1.9.12 github.com/k0kubun/pp v3.0.1+incompatible github.com/looplab/fsm v0.1.0 diff --git a/go.sum b/go.sum index b6d4abf..096adc4 100644 --- a/go.sum +++ b/go.sum @@ -57,6 +57,8 @@ github.com/google/uuid v1.1.1 h1:Gkbcsh/GbpXz7lPftLA3P6TYMwjCLYm83jiFQZF/3gY= github.com/google/uuid v1.1.1/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gopherjs/gopherjs v0.0.0-20181017120253-0766667cb4d1/go.mod h1:wJfORRmW1u3UXTncJ5qlYoELFm8eSnnEO6hX4iZ3EWY= github.com/gorilla/websocket v1.4.0/go.mod h1:E7qHFY5m1UJ88s3WnNqhKjPHQ0heANvMoAMk2YaljkQ= +github.com/grokify/html-strip-tags-go v0.0.0-20200322061010-ea0c1cf2f119 h1:h3iGUlU8HyW4baKd6D+h1mwOHnM2kwskSuG6Bv4tSbc= +github.com/grokify/html-strip-tags-go v0.0.0-20200322061010-ea0c1cf2f119/go.mod h1:2Su6romC5/1VXOQMaWL2yb618ARB8iVo6/DR99A6d78= github.com/grpc-ecosystem/go-grpc-middleware v1.0.0/go.mod h1:FiyG127CGDf3tlThmgyCl78X/SZQqEOJBCDaAfeWzPs= github.com/grpc-ecosystem/go-grpc-prometheus v1.2.0/go.mod h1:8NvIoxWQoOIhqOTXgfV/d3M/q6VIi02HzZEHgUlZvzk= github.com/grpc-ecosystem/grpc-gateway v1.9.0/go.mod h1:vNeuVxBJEsws4ogUvrchl83t/GYV9WGTSLVdBhOQFDY=