529 lines
12 KiB
Go
529 lines
12 KiB
Go
package main
|
|
|
|
import (
|
|
"crypto/sha256"
|
|
"database/sql"
|
|
"encoding/hex"
|
|
"encoding/json"
|
|
"fmt"
|
|
"os"
|
|
"sort"
|
|
"time"
|
|
|
|
"github.com/oklog/ulid/v2"
|
|
)
|
|
|
|
func setupDatabase() error {
|
|
_, err := db.Exec(`
|
|
CREATE TABLE IF NOT EXISTS articles (
|
|
link TEXT PRIMARY KEY,
|
|
title TEXT NOT NULL,
|
|
description TEXT,
|
|
published TIMESTAMP NOT NULL,
|
|
originalDate TIMESTAMP,
|
|
source TEXT NOT NULL,
|
|
firstseen TIMESTAMP NOT NULL,
|
|
seen TIMESTAMP,
|
|
summary TEXT,
|
|
importance INTEGER,
|
|
id TEXT,
|
|
broadcastTime TIMESTAMP
|
|
)
|
|
`)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Create logs table for structured log entries
|
|
_, err = db.Exec(`
|
|
CREATE TABLE IF NOT EXISTS logs (
|
|
id TEXT PRIMARY KEY,
|
|
timestamp TIMESTAMP NOT NULL,
|
|
log JSON NOT NULL
|
|
)
|
|
`)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Create index on timestamp for efficient querying and deletion
|
|
_, err = db.Exec(`
|
|
CREATE INDEX IF NOT EXISTS idx_logs_timestamp ON logs (timestamp)
|
|
`)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
// Check if columns exist
|
|
rows, err := db.Query(`PRAGMA table_info(articles)`)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
defer rows.Close()
|
|
|
|
hasIDColumn := false
|
|
hasBroadcastTimeColumn := false
|
|
hasOriginalDateColumn := false
|
|
|
|
for rows.Next() {
|
|
var cid, notnull, pk int
|
|
var name, type_name string
|
|
var dflt_value interface{}
|
|
if err := rows.Scan(&cid, &name, &type_name, ¬null, &dflt_value, &pk); err != nil {
|
|
return err
|
|
}
|
|
if name == "id" {
|
|
hasIDColumn = true
|
|
}
|
|
if name == "broadcastTime" {
|
|
hasBroadcastTimeColumn = true
|
|
}
|
|
if name == "originalDate" {
|
|
hasOriginalDateColumn = true
|
|
}
|
|
}
|
|
|
|
// Add missing columns if needed
|
|
if !hasIDColumn {
|
|
_, err = db.Exec(`ALTER TABLE articles ADD COLUMN id TEXT`)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if !hasBroadcastTimeColumn {
|
|
_, err = db.Exec(`ALTER TABLE articles ADD COLUMN broadcastTime TIMESTAMP`)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
}
|
|
|
|
if !hasOriginalDateColumn {
|
|
_, err = db.Exec(`ALTER TABLE articles ADD COLUMN originalDate TIMESTAMP`)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
logInfo("db", "Added originalDate column to articles table", nil)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Generate a deterministic ID from a URL
|
|
func generateID(url string) string {
|
|
hash := sha256.Sum256([]byte(url))
|
|
return hex.EncodeToString(hash[:])[:26] // Return first 26 chars of the hash
|
|
}
|
|
|
|
func saveArticle(article Article) error {
|
|
_, err := db.Exec(`
|
|
INSERT OR IGNORE INTO articles
|
|
(link, title, description, published, originalDate, source, firstseen, seen, summary, importance, id, broadcastTime)
|
|
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
|
|
`,
|
|
article.Link, article.Title, article.Description, article.Published, article.OriginalDate,
|
|
article.Source, article.FirstSeen, article.Seen, article.Summary, article.Importance, article.ID,
|
|
article.BroadcastTime)
|
|
|
|
if err != nil {
|
|
logEvent("db_insert_error", map[string]interface{}{
|
|
"article": article.Link,
|
|
"id": article.ID,
|
|
"error": err.Error(),
|
|
})
|
|
}
|
|
return err
|
|
}
|
|
|
|
func updateArticle(article Article) error {
|
|
_, err := db.Exec(`
|
|
UPDATE articles SET
|
|
title = ?,
|
|
description = ?,
|
|
published = ?,
|
|
originalDate = ?,
|
|
source = ?,
|
|
firstseen = ?,
|
|
seen = ?,
|
|
summary = ?,
|
|
importance = ?,
|
|
id = ?,
|
|
broadcastTime = ?
|
|
WHERE link = ?
|
|
`,
|
|
article.Title, article.Description, article.Published, article.OriginalDate, article.Source,
|
|
article.FirstSeen, article.Seen, article.Summary, article.Importance, article.ID,
|
|
article.BroadcastTime, article.Link)
|
|
|
|
if err != nil {
|
|
logEvent("db_update_error", map[string]interface{}{
|
|
"article": article.Link,
|
|
"id": article.ID,
|
|
"error": err.Error(),
|
|
})
|
|
}
|
|
return err
|
|
}
|
|
|
|
func loadArticles() map[string]Article {
|
|
articles := make(map[string]Article)
|
|
|
|
rows, err := db.Query(`
|
|
SELECT link, title, description, published, originalDate, source, firstseen, seen, summary, importance, id, broadcastTime
|
|
FROM articles
|
|
`)
|
|
if err != nil {
|
|
logEvent("db_query_error", map[string]interface{}{
|
|
"error": err.Error(),
|
|
})
|
|
return articles
|
|
}
|
|
defer rows.Close()
|
|
|
|
for rows.Next() {
|
|
var a Article
|
|
var seen sql.NullTime
|
|
var broadcastTime sql.NullTime
|
|
var originalDate sql.NullTime
|
|
|
|
err := rows.Scan(
|
|
&a.Link, &a.Title, &a.Description, &a.Published, &originalDate, &a.Source,
|
|
&a.FirstSeen, &seen, &a.Summary, &a.Importance, &a.ID, &broadcastTime,
|
|
)
|
|
|
|
if err != nil {
|
|
logEvent("db_scan_error", map[string]interface{}{
|
|
"error": err.Error(),
|
|
})
|
|
continue
|
|
}
|
|
|
|
if seen.Valid {
|
|
a.Seen = seen.Time
|
|
}
|
|
|
|
if broadcastTime.Valid {
|
|
a.BroadcastTime = broadcastTime.Time
|
|
}
|
|
|
|
if originalDate.Valid {
|
|
a.OriginalDate = originalDate.Time
|
|
}
|
|
|
|
articles[a.Link] = a
|
|
}
|
|
|
|
return articles
|
|
}
|
|
|
|
// getBroadcastArticles is a common function for retrieving broadcast articles
|
|
// with consistent filtering criteria
|
|
func getBroadcastArticles(limit int) ([]Article, error) {
|
|
rows, err := db.Query(`
|
|
SELECT link, title, description, published, originalDate, source, firstseen, seen, summary, importance, id, broadcastTime
|
|
FROM articles
|
|
WHERE broadcastTime IS NOT NULL
|
|
AND broadcastTime > 1
|
|
AND broadcastTime != 0
|
|
AND datetime(broadcastTime) != '1970-01-01 00:00:00'
|
|
AND datetime(broadcastTime) != '0001-01-01 00:00:00'
|
|
AND strftime('%Y', broadcastTime) > '2000' -- Ensure year is at least 2000
|
|
ORDER BY broadcastTime DESC
|
|
LIMIT ?
|
|
`, limit)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
var articles []Article
|
|
for rows.Next() {
|
|
var a Article
|
|
var seen sql.NullTime
|
|
var broadcastTime sql.NullTime
|
|
var originalDate sql.NullTime
|
|
|
|
err := rows.Scan(
|
|
&a.Link, &a.Title, &a.Description, &a.Published, &originalDate, &a.Source,
|
|
&a.FirstSeen, &seen, &a.Summary, &a.Importance, &a.ID, &broadcastTime,
|
|
)
|
|
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
if seen.Valid {
|
|
a.Seen = seen.Time
|
|
}
|
|
|
|
if broadcastTime.Valid {
|
|
a.BroadcastTime = broadcastTime.Time
|
|
}
|
|
|
|
if originalDate.Valid {
|
|
a.OriginalDate = originalDate.Time
|
|
}
|
|
|
|
articles = append(articles, a)
|
|
}
|
|
|
|
return articles, nil
|
|
}
|
|
|
|
// getBroadcastHistory gets the most recent broadcast articles
|
|
func getBroadcastHistory(limit int) ([]Article, error) {
|
|
return getBroadcastArticles(limit)
|
|
}
|
|
|
|
// getNextUpArticles gets the top 25 articles eligible for broadcast sorted by importance
|
|
func getNextUpArticles() ([]Article, error) {
|
|
now := time.Now()
|
|
cutoff := now.Add(-ARTICLE_FRESHNESS_WINDOW) // Time window for considering articles fresh
|
|
|
|
rows, err := db.Query(`
|
|
SELECT link, title, description, published, originalDate, source, firstseen, seen, summary, importance, id, broadcastTime
|
|
FROM articles
|
|
WHERE broadcastTime IS NULL
|
|
AND summary IS NOT NULL
|
|
AND importance > 0
|
|
AND firstseen > ?
|
|
ORDER BY importance DESC
|
|
LIMIT 25
|
|
`, cutoff)
|
|
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
var articles []Article
|
|
for rows.Next() {
|
|
var a Article
|
|
var seen sql.NullTime
|
|
var broadcastTime sql.NullTime
|
|
var originalDate sql.NullTime
|
|
|
|
err := rows.Scan(
|
|
&a.Link, &a.Title, &a.Description, &a.Published, &originalDate, &a.Source,
|
|
&a.FirstSeen, &seen, &a.Summary, &a.Importance, &a.ID, &broadcastTime,
|
|
)
|
|
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
if seen.Valid {
|
|
a.Seen = seen.Time
|
|
}
|
|
|
|
if broadcastTime.Valid {
|
|
a.BroadcastTime = broadcastTime.Time
|
|
}
|
|
|
|
if originalDate.Valid {
|
|
a.OriginalDate = originalDate.Time
|
|
}
|
|
|
|
articles = append(articles, a)
|
|
}
|
|
|
|
return articles, nil
|
|
}
|
|
|
|
// getRecentBroadcasts retrieves the n most recently broadcast articles
|
|
func getRecentBroadcasts(n int) []Article {
|
|
articles, err := getBroadcastArticles(n)
|
|
if err != nil {
|
|
logInfo("db", "Error retrieving recent broadcasts", map[string]interface{}{
|
|
"error": err.Error(),
|
|
})
|
|
return []Article{}
|
|
}
|
|
return articles
|
|
}
|
|
|
|
func setupLogging() {
|
|
var err error
|
|
logFile, err = os.Create(logPath)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "could not create log file: %v\n", err)
|
|
os.Exit(1)
|
|
}
|
|
}
|
|
|
|
func flushLog() {
|
|
logMutex.Lock()
|
|
defer logMutex.Unlock()
|
|
|
|
if logFile == nil {
|
|
return
|
|
}
|
|
|
|
enc := json.NewEncoder(logFile)
|
|
enc.SetIndent("", " ")
|
|
_ = enc.Encode(logData)
|
|
logFile.Close()
|
|
}
|
|
|
|
// logEvent logs a structured message to both console and database
|
|
func logEvent(event string, details map[string]interface{}) {
|
|
logMutex.Lock()
|
|
defer logMutex.Unlock()
|
|
|
|
// Set timestamp if not already provided
|
|
if _, exists := details["timestamp"]; !exists {
|
|
details["timestamp"] = time.Now()
|
|
}
|
|
|
|
// Set event if not already in details
|
|
if _, exists := details["event"]; !exists {
|
|
details["event"] = event
|
|
}
|
|
|
|
timestamp := time.Now()
|
|
entry := LogEntry{
|
|
Timestamp: timestamp,
|
|
Event: event,
|
|
Details: details,
|
|
}
|
|
|
|
// Add to the permanent log data (for file-based logging)
|
|
logData = append(logData, entry)
|
|
|
|
// Store log in database
|
|
logBytes, err := json.Marshal(entry)
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error marshaling log entry: %v\n", err)
|
|
return
|
|
}
|
|
|
|
// Generate ULID for the log entry
|
|
entropy := ulid.DefaultEntropy()
|
|
id := ulid.MustNew(ulid.Timestamp(timestamp), entropy).String()
|
|
|
|
// Insert into database
|
|
_, err = db.Exec("INSERT INTO logs (id, timestamp, log) VALUES (?, ?, ?)",
|
|
id, timestamp, string(logBytes))
|
|
if err != nil {
|
|
fmt.Fprintf(os.Stderr, "Error storing log in database: %v\n", err)
|
|
}
|
|
}
|
|
|
|
// getRecentLogs retrieves recent log entries from the database
|
|
func getRecentLogs(limit int) ([]LogEntry, error) {
|
|
rows, err := db.Query(`
|
|
SELECT log FROM logs
|
|
ORDER BY timestamp DESC
|
|
LIMIT ?
|
|
`, limit)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer rows.Close()
|
|
|
|
var logs []LogEntry
|
|
for rows.Next() {
|
|
var logJSON string
|
|
if err := rows.Scan(&logJSON); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
var entry LogEntry
|
|
if err := json.Unmarshal([]byte(logJSON), &entry); err != nil {
|
|
return nil, err
|
|
}
|
|
|
|
logs = append(logs, entry)
|
|
}
|
|
|
|
return logs, nil
|
|
}
|
|
|
|
// cleanupOldLogs deletes logs older than one month
|
|
func cleanupOldLogs() error {
|
|
// Calculate cutoff date (one month ago)
|
|
cutoff := time.Now().AddDate(0, -1, 0)
|
|
|
|
result, err := db.Exec("DELETE FROM logs WHERE timestamp < ?", cutoff)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
|
|
rowsDeleted, _ := result.RowsAffected()
|
|
if rowsDeleted > 0 {
|
|
fmt.Fprintf(os.Stderr, "[logs] Deleted %d log entries older than one month\n", rowsDeleted)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// logCleanupWorker runs periodically to clean up old logs
|
|
func logCleanupWorker(shutdown chan struct{}) {
|
|
logInfo("logs", "Starting log cleanup worker", map[string]interface{}{
|
|
"interval": "15 minutes",
|
|
"retention": "1 month",
|
|
})
|
|
|
|
// Run cleanup immediately on startup
|
|
if err := cleanupOldLogs(); err != nil {
|
|
logInfo("logs", "Error cleaning up old logs", map[string]interface{}{
|
|
"error": err.Error(),
|
|
})
|
|
}
|
|
|
|
// Then run on interval
|
|
ticker := time.NewTicker(15 * time.Minute)
|
|
defer ticker.Stop()
|
|
|
|
for {
|
|
select {
|
|
case <-ticker.C:
|
|
if err := cleanupOldLogs(); err != nil {
|
|
logInfo("logs", "Error cleaning up old logs", map[string]interface{}{
|
|
"error": err.Error(),
|
|
})
|
|
}
|
|
case <-shutdown:
|
|
logInfo("logs", "Shutting down log cleanup worker", nil)
|
|
return
|
|
}
|
|
}
|
|
}
|
|
|
|
// logInfo logs a structured message to both console and log file
|
|
func logInfo(component, message string, data map[string]interface{}) {
|
|
// Create a copy of the data map to avoid modifying the original
|
|
logData := make(map[string]interface{})
|
|
for k, v := range data {
|
|
logData[k] = v
|
|
}
|
|
|
|
// Add component and message to the log data
|
|
logData["component"] = component
|
|
logData["message"] = message
|
|
|
|
// Format console output: timestamp component: message key1=val1 key2=val2
|
|
timestamp := time.Now().Format("15:04:05.000")
|
|
console := fmt.Sprintf("[%s] [%s] %s", timestamp, component, message)
|
|
|
|
// Add key-value pairs to console output
|
|
keys := make([]string, 0, len(data))
|
|
for k := range data {
|
|
keys = append(keys, k)
|
|
}
|
|
sort.Strings(keys) // Sort keys for consistent output
|
|
|
|
for _, k := range keys {
|
|
v := data[k]
|
|
console += fmt.Sprintf(" %s=%v", k, v)
|
|
}
|
|
|
|
// Print to console
|
|
fmt.Fprintln(os.Stderr, console)
|
|
|
|
// Log to structured log file
|
|
logEvent("info", logData)
|
|
}
|