2020-03-25 00:49:19 +00:00
|
|
|
package hn
|
|
|
|
|
|
|
|
import (
|
2020-03-25 01:32:40 +00:00
|
|
|
"fmt"
|
2020-03-25 00:49:19 +00:00
|
|
|
"net/http"
|
|
|
|
"time"
|
|
|
|
|
|
|
|
"github.com/jinzhu/gorm"
|
|
|
|
_ "github.com/jinzhu/gorm/dialects/sqlite"
|
|
|
|
"github.com/peterhellberg/hn"
|
|
|
|
"github.com/rs/zerolog"
|
|
|
|
)
|
|
|
|
|
|
|
|
func NewFetcher(db *gorm.DB) *Fetcher {
|
|
|
|
f := new(Fetcher)
|
|
|
|
f.db = db
|
|
|
|
f.fetchIntervalSecs = 60
|
|
|
|
f.hn = hn.NewClient(&http.Client{
|
|
|
|
Timeout: time.Duration(5 * time.Second),
|
|
|
|
})
|
|
|
|
return f
|
|
|
|
}
|
|
|
|
|
|
|
|
type Fetcher struct {
|
|
|
|
nextFetch time.Time
|
|
|
|
fetchIntervalSecs uint
|
|
|
|
db *gorm.DB
|
|
|
|
hn *hn.Client
|
|
|
|
log *zerolog.Logger
|
|
|
|
}
|
|
|
|
|
|
|
|
func (f *Fetcher) AddLogger(l *zerolog.Logger) {
|
|
|
|
f.log = l
|
|
|
|
}
|
|
|
|
|
|
|
|
func (f *Fetcher) run() {
|
|
|
|
f.db.AutoMigrate(&HNStoryRank{})
|
|
|
|
f.db.AutoMigrate(&FrontPageCache{})
|
|
|
|
|
|
|
|
for {
|
|
|
|
f.log.Info().
|
|
|
|
Msg("fetching top stories from HN")
|
|
|
|
f.nextFetch = time.Now().Add(time.Duration(f.fetchIntervalSecs) * time.Second)
|
|
|
|
err := f.StoreFrontPage()
|
|
|
|
if err != nil {
|
|
|
|
panic(err)
|
|
|
|
}
|
|
|
|
until := time.Until(f.nextFetch)
|
|
|
|
countdown := time.NewTimer(until)
|
|
|
|
f.log.Info().Msgf("waiting %s until next fetch", until)
|
|
|
|
<-countdown.C
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
func (f *Fetcher) StoreFrontPage() error {
|
|
|
|
|
|
|
|
// FIXME set fetchid
|
2020-03-25 01:32:40 +00:00
|
|
|
//r, err := f.db.Table("hn_story_rank").Select("MAX(FetchID)").Rows()
|
|
|
|
|
|
|
|
//pp.Print(r)
|
|
|
|
//Select("max(FetchID)").Find(&HNStoryRank)
|
2020-03-25 00:49:19 +00:00
|
|
|
|
|
|
|
ids, err := f.hn.TopStories()
|
|
|
|
t := time.Now()
|
|
|
|
if err != nil {
|
|
|
|
return err
|
|
|
|
}
|
|
|
|
|
2020-03-25 01:32:40 +00:00
|
|
|
// 30 items on HN frontpage.
|
2020-03-25 00:49:19 +00:00
|
|
|
for i, id := range ids[:30] {
|
|
|
|
|
|
|
|
item, err := f.hn.Item(id)
|
|
|
|
if err != nil {
|
|
|
|
return (err)
|
|
|
|
}
|
|
|
|
s := HNStoryRank{
|
|
|
|
HNID: uint(id),
|
|
|
|
Rank: uint(i + 1),
|
|
|
|
URL: item.URL,
|
|
|
|
Title: item.Title,
|
|
|
|
FetchedAt: t,
|
|
|
|
}
|
|
|
|
f.log.Info().Msgf("storing story with rank %d in db", (i + 1))
|
|
|
|
f.db.Create(&s)
|
2020-03-25 01:32:40 +00:00
|
|
|
|
|
|
|
// check to see if the item was on the frontpage already or not
|
|
|
|
var c int
|
|
|
|
f.db.Model(&HNFrontPage{}).Where("HNID = ? and Disappeared is NULL", id).Count(&c)
|
|
|
|
if c == 0 {
|
|
|
|
// first appearance on frontpage
|
|
|
|
r := HNFrontPage{
|
|
|
|
HNID: uint(id),
|
|
|
|
Appeared: t,
|
|
|
|
HighestRank: uint(i + 1),
|
|
|
|
Title: item.Title,
|
|
|
|
URL: item.URL,
|
|
|
|
}
|
|
|
|
f.db.Create(&r)
|
|
|
|
f.log.Info().
|
|
|
|
Uint("hnid", uint(id)).
|
|
|
|
Uint("rank", uint(i+1)).
|
|
|
|
Str("title", item.Title).
|
|
|
|
Str("url", item.URL).
|
|
|
|
Msg("HN new story on frontpage")
|
|
|
|
} else {
|
|
|
|
// it's still here, compare its ranking
|
|
|
|
var old HNFrontPage
|
|
|
|
f.db.Model(&HNFrontPage{}).Where("HNID = ? and Disappeared is NULL", id).First(&old)
|
|
|
|
// FIXME update highestrank if new is lower
|
|
|
|
needSave := false
|
|
|
|
if old.Rank != uint(i+1) {
|
|
|
|
f.log.Info().
|
|
|
|
Uint("hnid", uint(id)).
|
|
|
|
Uint("oldrank", old.Rank).
|
|
|
|
Uint("newrank", uint(i+1)).
|
|
|
|
Str("title", item.Title).
|
|
|
|
Str("url", item.URL).
|
|
|
|
Msg("HN story rank changed, recording new rank")
|
|
|
|
old.Rank = uint(i + 1)
|
|
|
|
needSave = true
|
|
|
|
}
|
|
|
|
|
|
|
|
if old.HighestRank > uint(i+1) {
|
|
|
|
old.HighestRank = uint(i + 1)
|
|
|
|
f.log.Info().
|
|
|
|
Uint("hnid", uint(id)).
|
|
|
|
Uint("oldrank", old.Rank).
|
|
|
|
Uint("newrank", uint(i+1)).
|
|
|
|
Msg("recording new record high rank for story")
|
|
|
|
needSave = true
|
|
|
|
}
|
|
|
|
if needSave {
|
|
|
|
f.db.Save(&old)
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
// FIXME iterate over frontpage items still active in DB and note any
|
|
|
|
// that are no longer on the scrape
|
|
|
|
fpitems, err := f.db.Model(&HNFrontPage{}).Where("Disappeared is NULL").Rows()
|
|
|
|
defer fpitems.Close()
|
|
|
|
for fpitems.Next() {
|
|
|
|
var item HNFrontPage
|
|
|
|
f.db.ScanRows(fpitems, &item)
|
|
|
|
fmt.Println(item)
|
|
|
|
exitedFrontPage := true
|
|
|
|
for _, xd := range ids[:30] {
|
|
|
|
if item.HNID == uint(xd) {
|
|
|
|
exitedFrontPage = false
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if exitedFrontPage {
|
|
|
|
item.Disappeared = t
|
|
|
|
dur := item.Disappeared.Sub(item.Appeared)
|
|
|
|
f.db.Save(&item)
|
|
|
|
f.log.Info().
|
|
|
|
Uint("hnid", item.HNID).
|
|
|
|
Uint("HighestRank", item.HighestRank).
|
|
|
|
Str("title", item.Title).
|
|
|
|
Dur("fpduration", dur).
|
|
|
|
Str("url", item.URL).
|
|
|
|
Msg("HN story exited frontpage")
|
|
|
|
|
|
|
|
}
|
2020-03-25 00:49:19 +00:00
|
|
|
}
|
|
|
|
return nil
|
|
|
|
}
|