package hn import ( "net/http" "os" "time" "github.com/jinzhu/gorm" _ "github.com/jinzhu/gorm/dialects/sqlite" "github.com/peterhellberg/hn" "github.com/rs/zerolog" ) const SQLITE_NULL_DATETIME = "0001-01-01 00:00:00+00:00" func NewFetcher(db *gorm.DB) *Fetcher { f := new(Fetcher) f.db = db f.fetchIntervalSecs = 60 f.hn = hn.NewClient(&http.Client{ Timeout: time.Duration(5 * time.Second), }) return f } type Fetcher struct { nextFetch time.Time fetchIntervalSecs uint db *gorm.DB hn *hn.Client log *zerolog.Logger } func (f *Fetcher) AddLogger(l *zerolog.Logger) { f.log = l } func (f *Fetcher) run() { if os.Getenv("DEBUG") != "" { f.db.LogMode(true) } f.db.AutoMigrate(&HNStoryRank{}) f.db.AutoMigrate(&FrontPageCache{}) f.db.AutoMigrate(&HNFrontPage{}) for { f.log.Info(). Msg("fetching top stories from HN") f.nextFetch = time.Now().Add(time.Duration(f.fetchIntervalSecs) * time.Second) err := f.StoreFrontPage() if err != nil { panic(err) } until := time.Until(f.nextFetch) countdown := time.NewTimer(until) f.log.Info().Msgf("waiting %s until next fetch", until) <-countdown.C } } func (f *Fetcher) StoreFrontPage() error { // FIXME set fetchid //r, err := f.db.Table("hn_story_rank").Select("MAX(FetchID)").Rows() //pp.Print(r) //Select("max(FetchID)").Find(&HNStoryRank) ids, err := f.hn.TopStories() t := time.Now() if err != nil { return err } // 30 items on HN frontpage. for i, id := range ids[:30] { item, err := f.hn.Item(id) if err != nil { return (err) } /* s := HNStoryRank{ HNID: uint(id), Rank: uint(i + 1), URL: item.URL, Title: item.Title, FetchedAt: t, } */ //f.log.Debug().Msgf("storing story with rank %d in db", (i + 1)) // FIXME this will grow unbounded and make the file too big if // I don't clean this up or otherwise limit the data in here // disabled for now //f.db.Create(&s) // check to see if the item was on the frontpage already or not var c int f.db.Model(&HNFrontPage{}).Where("hn_id = ? and disappeared is ?", id, SQLITE_NULL_DATETIME).Count(&c) if c == 0 { // first appearance on frontpage r := HNFrontPage{ HNID: uint(id), Appeared: t, HighestRank: uint(i + 1), Rank: uint(i + 1), Title: item.Title, URL: item.URL, } f.db.Create(&r) f.log.Info(). Uint("hnid", uint(id)). Uint("rank", uint(i+1)). Str("title", item.Title). Str("url", item.URL). Msg("HN new story on frontpage") } else { // it's still here, compare its ranking var old HNFrontPage f.db.Model(&HNFrontPage{}).Where("hn_id = ? and disappeared is ?", id, SQLITE_NULL_DATETIME).First(&old) // FIXME update highestrank if new is lower needSave := false if old.Rank != uint(i+1) { f.log.Info(). Uint("hnid", uint(id)). Uint("oldrank", old.Rank). Uint("newrank", uint(i+1)). Str("title", item.Title). Str("url", item.URL). Msg("HN story rank changed, recording new rank") old.Rank = uint(i + 1) needSave = true } if old.HighestRank > uint(i+1) { f.log.Info(). Uint("hnid", uint(id)). Uint("oldrecord", old.HighestRank). Uint("newrecord", uint(i+1)). Msg("recording new record high rank for story") old.HighestRank = uint(i + 1) needSave = true } if needSave { f.db.Save(&old) } } } // FIXME iterate over frontpage items still active in DB and note any // that are no longer on the scrape fpitems, err := f.db.Model(&HNFrontPage{}).Where("disappeared is ?", SQLITE_NULL_DATETIME).Rows() if err != nil { f.log.Error(). Err(err) } var toupdate []uint for fpitems.Next() { var item HNFrontPage f.db.ScanRows(fpitems, &item) //pp.Print(item) exitedFrontPage := true for _, xd := range ids[:30] { if item.HNID == uint(xd) { exitedFrontPage = false } } if exitedFrontPage { toupdate = append(toupdate, item.HNID) //item.Disappeared = t dur := t.Sub(item.Appeared).String() //f.db.Save(&item) f.log.Info(). Uint("hnid", item.HNID). Uint("HighestRank", item.HighestRank). Str("title", item.Title). Str("time_on_frontpage", dur). Str("url", item.URL). Msg("HN story exited frontpage") } } fpitems.Close() // close tx before we do the update f.db.Model(&HNFrontPage{}).Where("disappeared is ? and hn_id in (?)", SQLITE_NULL_DATETIME, toupdate).Update("Disappeared", t) return nil }