package hn import ( "net/http" "os" "time" "github.com/jinzhu/gorm" _ "github.com/jinzhu/gorm/dialects/sqlite" "github.com/peterhellberg/hn" "github.com/rs/zerolog" ) func NewFetcher(db *gorm.DB) *Fetcher { f := new(Fetcher) f.db = db f.fetchIntervalSecs = 60 f.hn = hn.NewClient(&http.Client{ Timeout: time.Duration(5 * time.Second), }) return f } type Fetcher struct { nextFetch time.Time fetchIntervalSecs uint db *gorm.DB hn *hn.Client log *zerolog.Logger } func (f *Fetcher) AddLogger(l *zerolog.Logger) { f.log = l } func (f *Fetcher) run() { if os.Getenv("DEBUG") != "" { f.db.LogMode(true) } f.db.AutoMigrate(&HNStoryRank{}) f.db.AutoMigrate(&FrontPageCache{}) f.db.AutoMigrate(&HNFrontPage{}) for { f.log.Info(). Msg("fetching top stories from HN") f.nextFetch = time.Now().Add(time.Duration(f.fetchIntervalSecs) * time.Second) err := f.StoreFrontPage() if err != nil { panic(err) } until := time.Until(f.nextFetch) countdown := time.NewTimer(until) f.log.Info().Msgf("waiting %s until next fetch", until) <-countdown.C } } func (f *Fetcher) StoreFrontPage() error { // FIXME set fetchid //r, err := f.db.Table("hn_story_rank").Select("MAX(FetchID)").Rows() //pp.Print(r) //Select("max(FetchID)").Find(&HNStoryRank) ids, err := f.hn.TopStories() t := time.Now() if err != nil { return err } // 30 items on HN frontpage. for i, id := range ids[:30] { item, err := f.hn.Item(id) if err != nil { return (err) } /* s := HNStoryRank{ HNID: uint(id), Rank: uint(i + 1), URL: item.URL, Title: item.Title, Score: item.Score, FetchedAt: t, } */ //f.log.Debug().Msgf("storing story with rank %d in db", (i + 1)) // FIXME this will grow unbounded and make the file too big if // I don't clean this up or otherwise limit the data in here // disabled for now //f.db.Create(&s) //FIXME check to see if the same HNID was already on the frontpage //or not so we don't spam the db // check to see if the item was on the frontpage already or not var c int f.db.Model(&HNFrontPage{}).Where("hn_id = ?", id).Count(&c) if c == 0 { // first appearance on frontpage r := HNFrontPage{ HNID: uint(id), Appeared: t, Disappeared: time.Time{}, HighestRank: uint(i + 1), Rank: uint(i + 1), Title: item.Title, Score: uint(item.Score), URL: item.URL, } f.db.Create(&r) f.log.Info(). Uint("hnid", uint(id)). Uint("rank", uint(i+1)). Str("title", item.Title). Int("score", item.Score). Str("url", item.URL). Msg("HN new story on frontpage") } else { // it's still here, (or back) var old HNFrontPage f.db.Model(&HNFrontPage{}).Where("hn_id = ?", id).First(&old) if old.Rank != uint(i+1) { f.log.Info(). Uint("hnid", uint(id)). Uint("oldrank", old.Rank). Uint("newrank", uint(i+1)). Int("score", item.Score). Str("title", item.Title). Str("url", item.URL). Msg("HN story rank changed, recording new rank") old.Rank = uint(i + 1) old.Score = uint(item.Score) } if old.HighestRank > uint(i+1) { f.log.Info(). Uint("hnid", uint(id)). Uint("oldrecord", old.HighestRank). Uint("newrecord", uint(i+1)). Msg("recording new record high rank for story") old.HighestRank = uint(i + 1) } if old.Score != uint(item.Score) { old.Score = uint(item.Score) } // in any case it's here now old.Disappeared = time.Time{} f.db.Save(&old) } } // FIXME iterate over frontpage items still active in DB and note any // that are no longer on the scrape fpitems, err := f.db.Model(&HNFrontPage{}).Where("disappeared is ?", time.Time{}).Rows() if err != nil { f.log.Error(). Err(err) } var toupdate []uint for fpitems.Next() { var item HNFrontPage f.db.ScanRows(fpitems, &item) //pp.Print(item) exitedFrontPage := true for _, xd := range ids[:30] { if item.HNID == uint(xd) { exitedFrontPage = false } } if exitedFrontPage { toupdate = append(toupdate, item.HNID) //item.Disappeared = t dur := t.Sub(item.Appeared).String() //f.db.Save(&item) f.log.Info(). Uint("hnid", item.HNID). Uint("HighestRank", item.HighestRank). Str("title", item.Title). Str("time_on_frontpage", dur). Str("url", item.URL). Msg("HN story exited frontpage") } } fpitems.Close() // close result before we do the update f.db.Model(&HNFrontPage{}).Where("disappeared is ? and hn_id in (?)", time.Time{}, toupdate).Update("Disappeared", t) return nil }