From 7e2704ce7d36f2c929676c36f803f5a83e2a7448 Mon Sep 17 00:00:00 2001 From: sneak Date: Tue, 24 Mar 2020 18:32:40 -0700 Subject: [PATCH] writer half almost working --- hn/db.go | 15 ++++++++- hn/fetcher.go | 89 +++++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 101 insertions(+), 3 deletions(-) diff --git a/hn/db.go b/hn/db.go index df78766..c6f8438 100644 --- a/hn/db.go +++ b/hn/db.go @@ -6,13 +6,26 @@ import ( "github.com/jinzhu/gorm" ) +// this schema is quite redundant, i know + +type HNFrontPage struct { + gorm.Model + InternalID uint64 `gorm:"primary_key;auto_increment:true` + HNID uint // HN integer id + Appeared time.Time + Disappeared time.Time + HighestRank uint // frontpage index + Rank uint // frontpage index + Title string // submission title + URL string // duh +} + type HNStoryRank struct { gorm.Model InternalStoryID uint64 `gorm:"primary_key;auto_increment:true` HNID uint // HN integer id Title string // submission title URL string // duh - FetchID uint // integer identifying fetch batch Rank uint // frontpage index FetchedAt time.Time // identical within fetchid } diff --git a/hn/fetcher.go b/hn/fetcher.go index 8d37570..379bee4 100644 --- a/hn/fetcher.go +++ b/hn/fetcher.go @@ -1,6 +1,7 @@ package hn import ( + "fmt" "net/http" "time" @@ -54,7 +55,10 @@ func (f *Fetcher) run() { func (f *Fetcher) StoreFrontPage() error { // FIXME set fetchid - r := f.db.Select("max(FetchID)").Find(&HNStoryRank) + //r, err := f.db.Table("hn_story_rank").Select("MAX(FetchID)").Rows() + + //pp.Print(r) + //Select("max(FetchID)").Find(&HNStoryRank) ids, err := f.hn.TopStories() t := time.Now() @@ -62,6 +66,7 @@ func (f *Fetcher) StoreFrontPage() error { return err } + // 30 items on HN frontpage. for i, id := range ids[:30] { item, err := f.hn.Item(id) @@ -70,7 +75,6 @@ func (f *Fetcher) StoreFrontPage() error { } s := HNStoryRank{ HNID: uint(id), - FetchID: uint(0), Rank: uint(i + 1), URL: item.URL, Title: item.Title, @@ -78,6 +82,87 @@ func (f *Fetcher) StoreFrontPage() error { } f.log.Info().Msgf("storing story with rank %d in db", (i + 1)) f.db.Create(&s) + + // check to see if the item was on the frontpage already or not + var c int + f.db.Model(&HNFrontPage{}).Where("HNID = ? and Disappeared is NULL", id).Count(&c) + if c == 0 { + // first appearance on frontpage + r := HNFrontPage{ + HNID: uint(id), + Appeared: t, + HighestRank: uint(i + 1), + Title: item.Title, + URL: item.URL, + } + f.db.Create(&r) + f.log.Info(). + Uint("hnid", uint(id)). + Uint("rank", uint(i+1)). + Str("title", item.Title). + Str("url", item.URL). + Msg("HN new story on frontpage") + } else { + // it's still here, compare its ranking + var old HNFrontPage + f.db.Model(&HNFrontPage{}).Where("HNID = ? and Disappeared is NULL", id).First(&old) + // FIXME update highestrank if new is lower + needSave := false + if old.Rank != uint(i+1) { + f.log.Info(). + Uint("hnid", uint(id)). + Uint("oldrank", old.Rank). + Uint("newrank", uint(i+1)). + Str("title", item.Title). + Str("url", item.URL). + Msg("HN story rank changed, recording new rank") + old.Rank = uint(i + 1) + needSave = true + } + + if old.HighestRank > uint(i+1) { + old.HighestRank = uint(i + 1) + f.log.Info(). + Uint("hnid", uint(id)). + Uint("oldrank", old.Rank). + Uint("newrank", uint(i+1)). + Msg("recording new record high rank for story") + needSave = true + } + if needSave { + f.db.Save(&old) + } + } + + } + + // FIXME iterate over frontpage items still active in DB and note any + // that are no longer on the scrape + fpitems, err := f.db.Model(&HNFrontPage{}).Where("Disappeared is NULL").Rows() + defer fpitems.Close() + for fpitems.Next() { + var item HNFrontPage + f.db.ScanRows(fpitems, &item) + fmt.Println(item) + exitedFrontPage := true + for _, xd := range ids[:30] { + if item.HNID == uint(xd) { + exitedFrontPage = false + } + } + if exitedFrontPage { + item.Disappeared = t + dur := item.Disappeared.Sub(item.Appeared) + f.db.Save(&item) + f.log.Info(). + Uint("hnid", item.HNID). + Uint("HighestRank", item.HighestRank). + Str("title", item.Title). + Dur("fpduration", dur). + Str("url", item.URL). + Msg("HN story exited frontpage") + + } } return nil }