gomeshalerter/storage.go

532 lines
12 KiB
Go

package main
import (
"crypto/sha256"
"database/sql"
"encoding/hex"
"encoding/json"
"log/slog"
"os"
"time"
"github.com/oklog/ulid/v2"
)
func setupDatabase() error {
_, err := db.Exec(`
CREATE TABLE IF NOT EXISTS articles (
link TEXT PRIMARY KEY,
title TEXT NOT NULL,
description TEXT,
published TIMESTAMP NOT NULL,
originalDate TIMESTAMP,
source TEXT NOT NULL,
firstseen TIMESTAMP NOT NULL,
seen TIMESTAMP,
summary TEXT,
importance INTEGER,
id TEXT,
broadcastTime TIMESTAMP
)
`)
if err != nil {
return err
}
// Create logs table for structured log entries
_, err = db.Exec(`
CREATE TABLE IF NOT EXISTS logs (
id TEXT PRIMARY KEY,
timestamp TIMESTAMP NOT NULL,
log JSON NOT NULL
)
`)
if err != nil {
return err
}
// Create index on timestamp for efficient querying and deletion
_, err = db.Exec(`
CREATE INDEX IF NOT EXISTS idx_logs_timestamp ON logs (timestamp)
`)
if err != nil {
return err
}
// Check if columns exist
rows, err := db.Query(`PRAGMA table_info(articles)`)
if err != nil {
return err
}
defer rows.Close()
hasIDColumn := false
hasBroadcastTimeColumn := false
hasOriginalDateColumn := false
for rows.Next() {
var cid, notnull, pk int
var name, type_name string
var dflt_value interface{}
if err := rows.Scan(&cid, &name, &type_name, &notnull, &dflt_value, &pk); err != nil {
return err
}
if name == "id" {
hasIDColumn = true
}
if name == "broadcastTime" {
hasBroadcastTimeColumn = true
}
if name == "originalDate" {
hasOriginalDateColumn = true
}
}
// Add missing columns if needed
if !hasIDColumn {
_, err = db.Exec(`ALTER TABLE articles ADD COLUMN id TEXT`)
if err != nil {
return err
}
}
if !hasBroadcastTimeColumn {
_, err = db.Exec(`ALTER TABLE articles ADD COLUMN broadcastTime TIMESTAMP`)
if err != nil {
return err
}
}
if !hasOriginalDateColumn {
_, err = db.Exec(`ALTER TABLE articles ADD COLUMN originalDate TIMESTAMP`)
if err != nil {
return err
}
logInfo("db", "Added originalDate column to articles table", nil)
}
return nil
}
// Generate a deterministic ID from a URL
func generateID(url string) string {
hash := sha256.Sum256([]byte(url))
return hex.EncodeToString(hash[:])[:26] // Return first 26 chars of the hash
}
func saveArticle(article Article) error {
_, err := db.Exec(`
INSERT OR IGNORE INTO articles
(link, title, description, published, originalDate, source, firstseen, seen, summary, importance, id, broadcastTime)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`,
article.Link, article.Title, article.Description, article.Published, article.OriginalDate,
article.Source, article.FirstSeen, article.Seen, article.Summary, article.Importance, article.ID,
article.BroadcastTime)
if err != nil {
logEvent("db_insert_error", map[string]interface{}{
"article": article.Link,
"id": article.ID,
"error": err.Error(),
})
}
return err
}
func updateArticle(article Article) error {
_, err := db.Exec(`
UPDATE articles SET
title = ?,
description = ?,
published = ?,
originalDate = ?,
source = ?,
firstseen = ?,
seen = ?,
summary = ?,
importance = ?,
id = ?,
broadcastTime = ?
WHERE link = ?
`,
article.Title, article.Description, article.Published, article.OriginalDate, article.Source,
article.FirstSeen, article.Seen, article.Summary, article.Importance, article.ID,
article.BroadcastTime, article.Link)
if err != nil {
logEvent("db_update_error", map[string]interface{}{
"article": article.Link,
"id": article.ID,
"error": err.Error(),
})
}
return err
}
func loadArticles() map[string]Article {
articles := make(map[string]Article)
rows, err := db.Query(`
SELECT link, title, description, published, originalDate, source, firstseen, seen, summary, importance, id, broadcastTime
FROM articles
`)
if err != nil {
logEvent("db_query_error", map[string]interface{}{
"error": err.Error(),
})
return articles
}
defer rows.Close()
for rows.Next() {
var a Article
var seen sql.NullTime
var broadcastTime sql.NullTime
var originalDate sql.NullTime
err := rows.Scan(
&a.Link, &a.Title, &a.Description, &a.Published, &originalDate, &a.Source,
&a.FirstSeen, &seen, &a.Summary, &a.Importance, &a.ID, &broadcastTime,
)
if err != nil {
logEvent("db_scan_error", map[string]interface{}{
"error": err.Error(),
})
continue
}
if seen.Valid {
a.Seen = seen.Time
}
if broadcastTime.Valid {
a.BroadcastTime = broadcastTime.Time
}
if originalDate.Valid {
a.OriginalDate = originalDate.Time
}
articles[a.Link] = a
}
return articles
}
// getBroadcastArticles is a common function for retrieving broadcast articles
// with consistent filtering criteria
func getBroadcastArticles(limit int) ([]Article, error) {
rows, err := db.Query(`
SELECT link, title, description, published, originalDate, source, firstseen, seen, summary, importance, id, broadcastTime
FROM articles
WHERE broadcastTime IS NOT NULL
AND broadcastTime > 1
AND broadcastTime != 0
AND datetime(broadcastTime) != '1970-01-01 00:00:00'
AND datetime(broadcastTime) != '0001-01-01 00:00:00'
AND strftime('%Y', broadcastTime) > '2000' -- Ensure year is at least 2000
ORDER BY broadcastTime DESC
LIMIT ?
`, limit)
if err != nil {
return nil, err
}
defer rows.Close()
var articles []Article
for rows.Next() {
var a Article
var seen sql.NullTime
var broadcastTime sql.NullTime
var originalDate sql.NullTime
err := rows.Scan(
&a.Link, &a.Title, &a.Description, &a.Published, &originalDate, &a.Source,
&a.FirstSeen, &seen, &a.Summary, &a.Importance, &a.ID, &broadcastTime,
)
if err != nil {
continue
}
if seen.Valid {
a.Seen = seen.Time
}
if broadcastTime.Valid {
a.BroadcastTime = broadcastTime.Time
}
if originalDate.Valid {
a.OriginalDate = originalDate.Time
}
articles = append(articles, a)
}
return articles, nil
}
// getBroadcastHistory gets the most recent broadcast articles
func getBroadcastHistory(limit int) ([]Article, error) {
return getBroadcastArticles(limit)
}
// getNextUpArticles gets the top 25 articles eligible for broadcast sorted by importance
func getNextUpArticles() ([]Article, error) {
now := time.Now()
cutoff := now.Add(-ARTICLE_FRESHNESS_WINDOW) // Time window for considering articles fresh
rows, err := db.Query(`
SELECT link, title, description, published, originalDate, source, firstseen, seen, summary, importance, id, broadcastTime
FROM articles
WHERE broadcastTime IS NULL
AND summary IS NOT NULL
AND importance > 0
AND firstseen > ?
ORDER BY importance DESC
LIMIT 25
`, cutoff)
if err != nil {
return nil, err
}
defer rows.Close()
var articles []Article
for rows.Next() {
var a Article
var seen sql.NullTime
var broadcastTime sql.NullTime
var originalDate sql.NullTime
err := rows.Scan(
&a.Link, &a.Title, &a.Description, &a.Published, &originalDate, &a.Source,
&a.FirstSeen, &seen, &a.Summary, &a.Importance, &a.ID, &broadcastTime,
)
if err != nil {
continue
}
if seen.Valid {
a.Seen = seen.Time
}
if broadcastTime.Valid {
a.BroadcastTime = broadcastTime.Time
}
if originalDate.Valid {
a.OriginalDate = originalDate.Time
}
articles = append(articles, a)
}
return articles, nil
}
// getRecentBroadcasts retrieves the n most recently broadcast articles
func getRecentBroadcasts(n int) []Article {
articles, err := getBroadcastArticles(n)
if err != nil {
logInfo("db", "Error retrieving recent broadcasts", map[string]interface{}{
"error": err.Error(),
})
return []Article{}
}
return articles
}
func setupLogging() {
var err error
logFile, err = os.Create(logPath)
if err != nil {
slog.Error("Could not create log file", "error", err)
os.Exit(1)
}
// Set up structured logger
jsonHandler := slog.NewJSONHandler(os.Stderr, &slog.HandlerOptions{
Level: slog.LevelInfo,
})
slog.SetDefault(slog.New(jsonHandler))
}
func flushLog() {
logMutex.Lock()
defer logMutex.Unlock()
if logFile == nil {
return
}
enc := json.NewEncoder(logFile)
enc.SetIndent("", " ")
_ = enc.Encode(logData)
logFile.Close()
}
// logEvent logs a structured message to both console and database
func logEvent(event string, details map[string]interface{}) {
logMutex.Lock()
defer logMutex.Unlock()
// Set timestamp if not already provided
if _, exists := details["timestamp"]; !exists {
details["timestamp"] = time.Now()
}
// Set event if not already in details
if _, exists := details["event"]; !exists {
details["event"] = event
}
timestamp := time.Now()
entry := LogEntry{
Timestamp: timestamp,
Event: event,
Details: details,
}
// Add to the permanent log data (for file-based logging)
logData = append(logData, entry)
// Only try to store in database if the database is initialized
if db == nil {
slog.Debug("Skipping database log storage - database not yet initialized")
return
}
// Store log in database
logBytes, err := json.Marshal(entry)
if err != nil {
slog.Error("Error marshaling log entry", "error", err)
return
}
// Generate ULID for the log entry
entropy := ulid.DefaultEntropy()
id := ulid.MustNew(ulid.Timestamp(timestamp), entropy).String()
// Insert into database
_, err = db.Exec("INSERT INTO logs (id, timestamp, log) VALUES (?, ?, ?)",
id, timestamp, string(logBytes))
if err != nil {
slog.Error("Error storing log in database", "error", err)
}
}
// getRecentLogs retrieves recent log entries from the database
func getRecentLogs(limit int) ([]LogEntry, error) {
rows, err := db.Query(`
SELECT log FROM logs
ORDER BY timestamp DESC
LIMIT ?
`, limit)
if err != nil {
return nil, err
}
defer rows.Close()
var logs []LogEntry
for rows.Next() {
var logJSON string
if err := rows.Scan(&logJSON); err != nil {
return nil, err
}
var entry LogEntry
if err := json.Unmarshal([]byte(logJSON), &entry); err != nil {
return nil, err
}
logs = append(logs, entry)
}
return logs, nil
}
// cleanupOldLogs deletes logs older than one month
func cleanupOldLogs() error {
// Calculate cutoff date (one month ago)
cutoff := time.Now().AddDate(0, -1, 0)
result, err := db.Exec("DELETE FROM logs WHERE timestamp < ?", cutoff)
if err != nil {
return err
}
rowsDeleted, _ := result.RowsAffected()
if rowsDeleted > 0 {
logInfo("logs", "Deleted old log entries", map[string]interface{}{
"count": rowsDeleted,
"olderThan": "1 month",
"cutoffDate": cutoff.Format(time.RFC3339),
})
}
return nil
}
// logCleanupWorker runs periodically to clean up old logs
func logCleanupWorker(shutdown chan struct{}) {
logInfo("logs", "Starting log cleanup worker", map[string]interface{}{
"interval": "15 minutes",
"retention": "1 month",
})
// Run cleanup immediately on startup
if err := cleanupOldLogs(); err != nil {
logInfo("logs", "Error cleaning up old logs", map[string]interface{}{
"error": err.Error(),
})
}
// Then run on interval
ticker := time.NewTicker(15 * time.Minute)
defer ticker.Stop()
for {
select {
case <-ticker.C:
if err := cleanupOldLogs(); err != nil {
logInfo("logs", "Error cleaning up old logs", map[string]interface{}{
"error": err.Error(),
})
}
case <-shutdown:
logInfo("logs", "Shutting down log cleanup worker", nil)
return
}
}
}
// logInfo logs a structured message to both console and log file
func logInfo(component string, message string, data map[string]interface{}) {
// Create a copy of the data map to avoid modifying the original
logData := make(map[string]interface{})
for k, v := range data {
logData[k] = v
}
// Add component and message to the log data
logData["component"] = component
logData["message"] = message
// Use slog for structured logging to console
attrs := []any{}
for k, v := range logData {
attrs = append(attrs, k, v)
}
slog.Info(message, attrs...)
// Log to structured log file and database
logEvent("info", logData)
}