package main import ( "net/http" "sync" "time" "github.com/mmcdole/gofeed" ) // Map of source names to their abbreviations var sourceAbbreviations = map[string]string{ "BBC": "BBC", "CNN": "CNN", "NYTimes": "NYT", "Guardian": "Grd", "Al Jazeera": "AlJ", "NBC": "NBC", "ABC": "ABC", "CBS": "CBS", "Sky News": "Sky", "Time": "Time", "NPR": "NPR", "Deutsche Welle": "DW", "France 24": "F24", "The Independent": "Ind", "Washington Post": "WaPo", "WSJ": "WSJ", } var feeds = map[string]string{ "BBC": "https://feeds.bbci.co.uk/news/world/rss.xml", "CNN": "http://rss.cnn.com/rss/edition.rss", "NYTimes": "https://rss.nytimes.com/services/xml/rss/nyt/World.xml", "Guardian": "https://www.theguardian.com/world/rss", "Al Jazeera": "https://www.aljazeera.com/xml/rss/all.xml", "NBC": "http://feeds.nbcnews.com/nbcnews/public/news", "ABC": "https://abcnews.go.com/abcnews/topstories", "CBS": "https://www.cbsnews.com/latest/rss/world", "Sky News": "https://feeds.skynews.com/feeds/rss/world.xml", "Time": "https://time.com/feed/", "NPR": "https://feeds.npr.org/1001/rss.xml", "Deutsche Welle": "https://rss.dw.com/rdf/rss-en-world", "France 24": "https://www.france24.com/en/rss", "The Independent": "https://www.independent.co.uk/news/world/rss", "Washington Post": "https://feeds.washingtonpost.com/rss/world", "WSJ": "https://feeds.a.dj.com/rss/RSSWorldNews.xml", } // This function was unused and removed to satisfy linter // rssFeedChecker checks RSS feeds every 15 minutes and adds new articles to the database func rssFeedChecker(shutdown chan struct{}, ollamaURL, ollamaModel string) { logInfo("rss", "Starting RSS feed checker", map[string]interface{}{ "interval": RSS_CHECK_INTERVAL.String(), }) // Run immediately on startup checkRSSFeeds() // Then run on interval ticker := time.NewTicker(RSS_CHECK_INTERVAL) defer ticker.Stop() for { select { case <-ticker.C: checkRSSFeeds() case <-shutdown: logInfo("rss", "Shutting down RSS feed checker", nil) return } } } // checkRSSFeeds fetches all RSS feeds and adds new articles to the database func checkRSSFeeds() { articles := loadArticles() oldCount := len(articles) logInfo("rss", "Checking RSS feeds", map[string]interface{}{ "time": time.Now().Format("15:04:05"), "articlesBeforeFetch": oldCount, }) now := time.Now() newArticles := fetchAllFeedsParallel(now) newCount := 0 for _, a := range newArticles { if _, ok := articles[a.Link]; !ok { if a.ID == "" { a.ID = generateID(a.Link) } articles[a.Link] = a if err := saveArticle(a); err != nil { logInfo("rss", "Error saving article", map[string]interface{}{ "id": a.ID, "error": err.Error(), }) } newCount++ logInfo("new", "Found new article", map[string]interface{}{ "id": a.ID, "title": a.Title, "source": a.Source, "published": a.Published.Format(time.RFC3339), }) } } logInfo("rss", "Completed RSS check", map[string]interface{}{ "articlesBeforeFetch": oldCount, "articlesAfterFetch": oldCount + newCount, "newArticles": newCount, }) } func fetchAllFeedsParallel(now time.Time) []Article { type fetchResult struct { Source string URL string Feed *gofeed.Feed Err error HTTPStatus int Duration time.Duration } var wg sync.WaitGroup results := make(chan fetchResult, len(feeds)) for source, url := range feeds { wg.Add(1) go func(source, url string) { defer wg.Done() start := time.Now() fp := gofeed.NewParser() client := &http.Client{Timeout: 20 * time.Second} var httpStatus int resp, err := client.Get(url) var feed *gofeed.Feed if err == nil { httpStatus = resp.StatusCode defer resp.Body.Close() feed, err = fp.Parse(resp.Body) } duration := time.Since(start) details := map[string]interface{}{ "source": source, "url": url, "status": httpStatus, "duration": duration.Seconds(), } if err != nil { details["error"] = err.Error() } logEvent("rss_fetch_result", details) if err != nil { logInfo("rss", "Feed fetch failed", map[string]interface{}{ "source": source, "url": url, "duration": duration.Seconds(), "error": err.Error(), }) results <- fetchResult{Source: source, URL: url, Err: err, Duration: duration, HTTPStatus: httpStatus} return } logInfo("rss", "Feed fetch succeeded", map[string]interface{}{ "source": source, "url": url, "duration": duration.Seconds(), "status": httpStatus, "items": len(feed.Items), }) results <- fetchResult{ Source: source, URL: url, Feed: feed, Duration: duration, HTTPStatus: httpStatus, } }(source, url) } wg.Wait() close(results) var all []Article for result := range results { if result.Err != nil || result.Feed == nil { continue } for _, item := range result.Feed.Items { published := now // Set published to the current time if not available from feed if item.PublishedParsed != nil { published = *item.PublishedParsed } // Set originalDate to the feed's publication date if available originalDate := published // Skip articles older than 7 days based on the feed's publication date cutoffDate := now.AddDate(0, 0, -7) // 7 days ago if published.Before(cutoffDate) { // Skip this article as it's older than 7 days continue } all = append(all, Article{ Title: item.Title, Description: item.Description, Link: item.Link, Published: now, // When we first saw the article OriginalDate: originalDate, // Original publication date from the feed Source: result.Source, FirstSeen: now, ID: generateID(item.Link), }) } } return all }