package main import ( "crypto/sha256" "database/sql" "encoding/hex" "encoding/json" "log/slog" "os" "time" "github.com/oklog/ulid/v2" ) func setupDatabase() error { _, err := db.Exec(` CREATE TABLE IF NOT EXISTS articles ( link TEXT PRIMARY KEY, title TEXT NOT NULL, description TEXT, published TIMESTAMP NOT NULL, originalDate TIMESTAMP, source TEXT NOT NULL, firstseen TIMESTAMP NOT NULL, seen TIMESTAMP, summary TEXT, importance INTEGER, id TEXT, broadcastTime TIMESTAMP ) `) if err != nil { return err } // Create logs table for structured log entries _, err = db.Exec(` CREATE TABLE IF NOT EXISTS logs ( id TEXT PRIMARY KEY, timestamp TIMESTAMP NOT NULL, log JSON NOT NULL ) `) if err != nil { return err } // Create index on timestamp for efficient querying and deletion _, err = db.Exec(` CREATE INDEX IF NOT EXISTS idx_logs_timestamp ON logs (timestamp) `) if err != nil { return err } // Check if columns exist rows, err := db.Query(`PRAGMA table_info(articles)`) if err != nil { return err } defer rows.Close() hasIDColumn := false hasBroadcastTimeColumn := false hasOriginalDateColumn := false for rows.Next() { var cid, notnull, pk int var name, type_name string var dflt_value interface{} if err := rows.Scan(&cid, &name, &type_name, ¬null, &dflt_value, &pk); err != nil { return err } if name == "id" { hasIDColumn = true } if name == "broadcastTime" { hasBroadcastTimeColumn = true } if name == "originalDate" { hasOriginalDateColumn = true } } // Add missing columns if needed if !hasIDColumn { _, err = db.Exec(`ALTER TABLE articles ADD COLUMN id TEXT`) if err != nil { return err } } if !hasBroadcastTimeColumn { _, err = db.Exec(`ALTER TABLE articles ADD COLUMN broadcastTime TIMESTAMP`) if err != nil { return err } } if !hasOriginalDateColumn { _, err = db.Exec(`ALTER TABLE articles ADD COLUMN originalDate TIMESTAMP`) if err != nil { return err } logInfo("db", "Added originalDate column to articles table", nil) } return nil } // Generate a deterministic ID from a URL func generateID(url string) string { hash := sha256.Sum256([]byte(url)) return hex.EncodeToString(hash[:])[:26] // Return first 26 chars of the hash } func saveArticle(article Article) error { _, err := db.Exec(` INSERT OR IGNORE INTO articles (link, title, description, published, originalDate, source, firstseen, seen, summary, importance, id, broadcastTime) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) `, article.Link, article.Title, article.Description, article.Published, article.OriginalDate, article.Source, article.FirstSeen, article.Seen, article.Summary, article.Importance, article.ID, article.BroadcastTime) if err != nil { logEvent("db_insert_error", map[string]interface{}{ "article": article.Link, "id": article.ID, "error": err.Error(), }) } return err } func updateArticle(article Article) error { _, err := db.Exec(` UPDATE articles SET title = ?, description = ?, published = ?, originalDate = ?, source = ?, firstseen = ?, seen = ?, summary = ?, importance = ?, id = ?, broadcastTime = ? WHERE link = ? `, article.Title, article.Description, article.Published, article.OriginalDate, article.Source, article.FirstSeen, article.Seen, article.Summary, article.Importance, article.ID, article.BroadcastTime, article.Link) if err != nil { logEvent("db_update_error", map[string]interface{}{ "article": article.Link, "id": article.ID, "error": err.Error(), }) } return err } func loadArticles() map[string]Article { articles := make(map[string]Article) rows, err := db.Query(` SELECT link, title, description, published, originalDate, source, firstseen, seen, summary, importance, id, broadcastTime FROM articles `) if err != nil { logEvent("db_query_error", map[string]interface{}{ "error": err.Error(), }) return articles } defer rows.Close() for rows.Next() { var a Article var seen sql.NullTime var broadcastTime sql.NullTime var originalDate sql.NullTime err := rows.Scan( &a.Link, &a.Title, &a.Description, &a.Published, &originalDate, &a.Source, &a.FirstSeen, &seen, &a.Summary, &a.Importance, &a.ID, &broadcastTime, ) if err != nil { logEvent("db_scan_error", map[string]interface{}{ "error": err.Error(), }) continue } if seen.Valid { a.Seen = seen.Time } if broadcastTime.Valid { a.BroadcastTime = broadcastTime.Time } if originalDate.Valid { a.OriginalDate = originalDate.Time } articles[a.Link] = a } return articles } // getBroadcastArticles is a common function for retrieving broadcast articles // with consistent filtering criteria func getBroadcastArticles(limit int) ([]Article, error) { rows, err := db.Query(` SELECT link, title, description, published, originalDate, source, firstseen, seen, summary, importance, id, broadcastTime FROM articles WHERE broadcastTime IS NOT NULL AND broadcastTime > 1 AND broadcastTime != 0 AND datetime(broadcastTime) != '1970-01-01 00:00:00' AND datetime(broadcastTime) != '0001-01-01 00:00:00' AND strftime('%Y', broadcastTime) > '2000' -- Ensure year is at least 2000 ORDER BY broadcastTime DESC LIMIT ? `, limit) if err != nil { return nil, err } defer rows.Close() var articles []Article for rows.Next() { var a Article var seen sql.NullTime var broadcastTime sql.NullTime var originalDate sql.NullTime err := rows.Scan( &a.Link, &a.Title, &a.Description, &a.Published, &originalDate, &a.Source, &a.FirstSeen, &seen, &a.Summary, &a.Importance, &a.ID, &broadcastTime, ) if err != nil { continue } if seen.Valid { a.Seen = seen.Time } if broadcastTime.Valid { a.BroadcastTime = broadcastTime.Time } if originalDate.Valid { a.OriginalDate = originalDate.Time } articles = append(articles, a) } return articles, nil } // getBroadcastHistory gets the most recent broadcast articles func getBroadcastHistory(limit int) ([]Article, error) { return getBroadcastArticles(limit) } // getNextUpArticles gets the top 25 articles eligible for broadcast sorted by importance func getNextUpArticles() ([]Article, error) { now := time.Now() cutoff := now.Add(-ARTICLE_FRESHNESS_WINDOW) // Time window for considering articles fresh rows, err := db.Query(` SELECT link, title, description, published, originalDate, source, firstseen, seen, summary, importance, id, broadcastTime FROM articles WHERE broadcastTime IS NULL AND summary IS NOT NULL AND importance > 0 AND firstseen > ? ORDER BY importance DESC LIMIT 25 `, cutoff) if err != nil { return nil, err } defer rows.Close() var articles []Article for rows.Next() { var a Article var seen sql.NullTime var broadcastTime sql.NullTime var originalDate sql.NullTime err := rows.Scan( &a.Link, &a.Title, &a.Description, &a.Published, &originalDate, &a.Source, &a.FirstSeen, &seen, &a.Summary, &a.Importance, &a.ID, &broadcastTime, ) if err != nil { continue } if seen.Valid { a.Seen = seen.Time } if broadcastTime.Valid { a.BroadcastTime = broadcastTime.Time } if originalDate.Valid { a.OriginalDate = originalDate.Time } articles = append(articles, a) } return articles, nil } // getRecentBroadcasts retrieves the n most recently broadcast articles func getRecentBroadcasts(n int) []Article { articles, err := getBroadcastArticles(n) if err != nil { logInfo("db", "Error retrieving recent broadcasts", map[string]interface{}{ "error": err.Error(), }) return []Article{} } return articles } func setupLogging() { var err error logFile, err = os.Create(logPath) if err != nil { slog.Error("Could not create log file", "error", err) os.Exit(1) } // Set up structured logger jsonHandler := slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{ Level: slog.LevelInfo, }) slog.SetDefault(slog.New(jsonHandler)) } func flushLog() { logMutex.Lock() defer logMutex.Unlock() if logFile == nil { return } enc := json.NewEncoder(logFile) enc.SetIndent("", " ") _ = enc.Encode(logData) logFile.Close() } // logEvent logs a structured message to both console and database func logEvent(event string, details map[string]interface{}) { logMutex.Lock() defer logMutex.Unlock() // Set timestamp if not already provided if _, exists := details["timestamp"]; !exists { details["timestamp"] = time.Now() } // Set event if not already in details if _, exists := details["event"]; !exists { details["event"] = event } timestamp := time.Now() entry := LogEntry{ Timestamp: timestamp, Event: event, Details: details, } // Add to the permanent log data (for file-based logging) logData = append(logData, entry) // Store log in database logBytes, err := json.Marshal(entry) if err != nil { slog.Error("Error marshaling log entry", "error", err) return } // Generate ULID for the log entry entropy := ulid.DefaultEntropy() id := ulid.MustNew(ulid.Timestamp(timestamp), entropy).String() // Insert into database _, err = db.Exec("INSERT INTO logs (id, timestamp, log) VALUES (?, ?, ?)", id, timestamp, string(logBytes)) if err != nil { slog.Error("Error storing log in database", "error", err) } } // getRecentLogs retrieves recent log entries from the database func getRecentLogs(limit int) ([]LogEntry, error) { rows, err := db.Query(` SELECT log FROM logs ORDER BY timestamp DESC LIMIT ? `, limit) if err != nil { return nil, err } defer rows.Close() var logs []LogEntry for rows.Next() { var logJSON string if err := rows.Scan(&logJSON); err != nil { return nil, err } var entry LogEntry if err := json.Unmarshal([]byte(logJSON), &entry); err != nil { return nil, err } logs = append(logs, entry) } return logs, nil } // cleanupOldLogs deletes logs older than one month func cleanupOldLogs() error { // Calculate cutoff date (one month ago) cutoff := time.Now().AddDate(0, -1, 0) result, err := db.Exec("DELETE FROM logs WHERE timestamp < ?", cutoff) if err != nil { return err } rowsDeleted, _ := result.RowsAffected() if rowsDeleted > 0 { logInfo("logs", "Deleted old log entries", map[string]interface{}{ "count": rowsDeleted, "olderThan": "1 month", "cutoffDate": cutoff.Format(time.RFC3339), }) } return nil } // logCleanupWorker runs periodically to clean up old logs func logCleanupWorker(shutdown chan struct{}) { logInfo("logs", "Starting log cleanup worker", map[string]interface{}{ "interval": "15 minutes", "retention": "1 month", }) // Run cleanup immediately on startup if err := cleanupOldLogs(); err != nil { logInfo("logs", "Error cleaning up old logs", map[string]interface{}{ "error": err.Error(), }) } // Then run on interval ticker := time.NewTicker(15 * time.Minute) defer ticker.Stop() for { select { case <-ticker.C: if err := cleanupOldLogs(); err != nil { logInfo("logs", "Error cleaning up old logs", map[string]interface{}{ "error": err.Error(), }) } case <-shutdown: logInfo("logs", "Shutting down log cleanup worker", nil) return } } } // logInfo logs a structured message to both console and log file func logInfo(component string, message string, data map[string]interface{}) { // Create a copy of the data map to avoid modifying the original logData := make(map[string]interface{}) for k, v := range data { logData[k] = v } // Add component and message to the log data logData["component"] = component logData["message"] = message // Use slog for structured logging to console attrs := []any{} for k, v := range logData { attrs = append(attrs, k, v) } slog.Info(message, attrs...) // Log to structured log file and database logEvent("info", logData) }