writer half almost working
All checks were successful
continuous-integration/drone/push Build is passing

This commit is contained in:
Jeffrey Paul 2020-03-24 18:32:40 -07:00
parent c61227293e
commit 7e2704ce7d
2 changed files with 101 additions and 3 deletions

View File

@ -6,13 +6,26 @@ import (
"github.com/jinzhu/gorm"
)
// this schema is quite redundant, i know
type HNFrontPage struct {
gorm.Model
InternalID uint64 `gorm:"primary_key;auto_increment:true`
HNID uint // HN integer id
Appeared time.Time
Disappeared time.Time
HighestRank uint // frontpage index
Rank uint // frontpage index
Title string // submission title
URL string // duh
}
type HNStoryRank struct {
gorm.Model
InternalStoryID uint64 `gorm:"primary_key;auto_increment:true`
HNID uint // HN integer id
Title string // submission title
URL string // duh
FetchID uint // integer identifying fetch batch
Rank uint // frontpage index
FetchedAt time.Time // identical within fetchid
}

View File

@ -1,6 +1,7 @@
package hn
import (
"fmt"
"net/http"
"time"
@ -54,7 +55,10 @@ func (f *Fetcher) run() {
func (f *Fetcher) StoreFrontPage() error {
// FIXME set fetchid
r := f.db.Select("max(FetchID)").Find(&HNStoryRank)
//r, err := f.db.Table("hn_story_rank").Select("MAX(FetchID)").Rows()
//pp.Print(r)
//Select("max(FetchID)").Find(&HNStoryRank)
ids, err := f.hn.TopStories()
t := time.Now()
@ -62,6 +66,7 @@ func (f *Fetcher) StoreFrontPage() error {
return err
}
// 30 items on HN frontpage.
for i, id := range ids[:30] {
item, err := f.hn.Item(id)
@ -70,7 +75,6 @@ func (f *Fetcher) StoreFrontPage() error {
}
s := HNStoryRank{
HNID: uint(id),
FetchID: uint(0),
Rank: uint(i + 1),
URL: item.URL,
Title: item.Title,
@ -78,6 +82,87 @@ func (f *Fetcher) StoreFrontPage() error {
}
f.log.Info().Msgf("storing story with rank %d in db", (i + 1))
f.db.Create(&s)
// check to see if the item was on the frontpage already or not
var c int
f.db.Model(&HNFrontPage{}).Where("HNID = ? and Disappeared is NULL", id).Count(&c)
if c == 0 {
// first appearance on frontpage
r := HNFrontPage{
HNID: uint(id),
Appeared: t,
HighestRank: uint(i + 1),
Title: item.Title,
URL: item.URL,
}
f.db.Create(&r)
f.log.Info().
Uint("hnid", uint(id)).
Uint("rank", uint(i+1)).
Str("title", item.Title).
Str("url", item.URL).
Msg("HN new story on frontpage")
} else {
// it's still here, compare its ranking
var old HNFrontPage
f.db.Model(&HNFrontPage{}).Where("HNID = ? and Disappeared is NULL", id).First(&old)
// FIXME update highestrank if new is lower
needSave := false
if old.Rank != uint(i+1) {
f.log.Info().
Uint("hnid", uint(id)).
Uint("oldrank", old.Rank).
Uint("newrank", uint(i+1)).
Str("title", item.Title).
Str("url", item.URL).
Msg("HN story rank changed, recording new rank")
old.Rank = uint(i + 1)
needSave = true
}
if old.HighestRank > uint(i+1) {
old.HighestRank = uint(i + 1)
f.log.Info().
Uint("hnid", uint(id)).
Uint("oldrank", old.Rank).
Uint("newrank", uint(i+1)).
Msg("recording new record high rank for story")
needSave = true
}
if needSave {
f.db.Save(&old)
}
}
}
// FIXME iterate over frontpage items still active in DB and note any
// that are no longer on the scrape
fpitems, err := f.db.Model(&HNFrontPage{}).Where("Disappeared is NULL").Rows()
defer fpitems.Close()
for fpitems.Next() {
var item HNFrontPage
f.db.ScanRows(fpitems, &item)
fmt.Println(item)
exitedFrontPage := true
for _, xd := range ids[:30] {
if item.HNID == uint(xd) {
exitedFrontPage = false
}
}
if exitedFrontPage {
item.Disappeared = t
dur := item.Disappeared.Sub(item.Appeared)
f.db.Save(&item)
f.log.Info().
Uint("hnid", item.HNID).
Uint("HighestRank", item.HighestRank).
Str("title", item.Title).
Dur("fpduration", dur).
Str("url", item.URL).
Msg("HN story exited frontpage")
}
}
return nil
}