diff --git a/internal/database/database.go b/internal/database/database.go index 91c93bd..659b902 100644 --- a/internal/database/database.go +++ b/internal/database/database.go @@ -1633,18 +1633,16 @@ func (d *Database) GetRandomPrefixesByLengthContext( return routes, nil } -// GetNextStaleASN returns an ASN that needs WHOIS data refresh. -// Priority: ASNs with no whois_updated_at, then oldest whois_updated_at. +// GetNextStaleASN returns a random ASN that needs WHOIS data refresh. func (d *Database) GetNextStaleASN(ctx context.Context, staleThreshold time.Duration) (int, error) { cutoff := time.Now().Add(-staleThreshold) + // Select a random stale ASN using ORDER BY RANDOM() query := ` SELECT asn FROM asns WHERE whois_updated_at IS NULL OR whois_updated_at < ? - ORDER BY - CASE WHEN whois_updated_at IS NULL THEN 0 ELSE 1 END, - whois_updated_at ASC + ORDER BY RANDOM() LIMIT 1 ` @@ -1661,6 +1659,41 @@ func (d *Database) GetNextStaleASN(ctx context.Context, staleThreshold time.Dura return asn, nil } +// WHOISStats contains statistics about WHOIS data freshness. +type WHOISStats struct { + TotalASNs int `json:"total_asns"` + StaleASNs int `json:"stale_asns"` + FreshASNs int `json:"fresh_asns"` + NeverFetched int `json:"never_fetched"` +} + +// GetWHOISStats returns statistics about WHOIS data freshness. +func (d *Database) GetWHOISStats(ctx context.Context, staleThreshold time.Duration) (*WHOISStats, error) { + cutoff := time.Now().Add(-staleThreshold) + + query := ` + SELECT + COUNT(*) as total, + SUM(CASE WHEN whois_updated_at IS NULL THEN 1 ELSE 0 END) as never_fetched, + SUM(CASE WHEN whois_updated_at IS NOT NULL AND whois_updated_at < ? THEN 1 ELSE 0 END) as stale, + SUM(CASE WHEN whois_updated_at IS NOT NULL AND whois_updated_at >= ? THEN 1 ELSE 0 END) as fresh + FROM asns + ` + + var stats WHOISStats + err := d.db.QueryRowContext(ctx, query, cutoff, cutoff).Scan( + &stats.TotalASNs, + &stats.NeverFetched, + &stats.StaleASNs, + &stats.FreshASNs, + ) + if err != nil { + return nil, fmt.Errorf("failed to get WHOIS stats: %w", err) + } + + return &stats, nil +} + // UpdateASNWHOIS updates an ASN record with WHOIS data. func (d *Database) UpdateASNWHOIS(ctx context.Context, update *ASNWHOISUpdate) error { d.lock("UpdateASNWHOIS") diff --git a/internal/database/interface.go b/internal/database/interface.go index 8874b6f..9eca3cb 100644 --- a/internal/database/interface.go +++ b/internal/database/interface.go @@ -67,6 +67,7 @@ type Store interface { // ASN WHOIS operations GetNextStaleASN(ctx context.Context, staleThreshold time.Duration) (int, error) UpdateASNWHOIS(ctx context.Context, update *ASNWHOISUpdate) error + GetWHOISStats(ctx context.Context, staleThreshold time.Duration) (*WHOISStats, error) // AS and prefix detail operations GetASDetails(asn int) (*ASN, []LiveRoute, error) diff --git a/internal/database/schema.sql b/internal/database/schema.sql index 366525c..cc2f35c 100644 --- a/internal/database/schema.sql +++ b/internal/database/schema.sql @@ -90,6 +90,7 @@ CREATE INDEX IF NOT EXISTS idx_peerings_lookup ON peerings(as_a, as_b); -- Indexes for asns table CREATE INDEX IF NOT EXISTS idx_asns_asn ON asns(asn); +CREATE INDEX IF NOT EXISTS idx_asns_whois_updated_at ON asns(whois_updated_at); -- Indexes for bgp_peers table CREATE INDEX IF NOT EXISTS idx_bgp_peers_asn ON bgp_peers(peer_asn); diff --git a/internal/routewatch/app_integration_test.go b/internal/routewatch/app_integration_test.go index 2e0fe39..2a176ce 100644 --- a/internal/routewatch/app_integration_test.go +++ b/internal/routewatch/app_integration_test.go @@ -335,6 +335,19 @@ func (m *mockStore) UpdateASNWHOIS(ctx context.Context, update *database.ASNWHOI return nil } +// GetWHOISStats mock implementation +func (m *mockStore) GetWHOISStats(ctx context.Context, staleThreshold time.Duration) (*database.WHOISStats, error) { + m.mu.Lock() + defer m.mu.Unlock() + + return &database.WHOISStats{ + TotalASNs: len(m.ASNs), + FreshASNs: 0, + StaleASNs: 0, + NeverFetched: len(m.ASNs), + }, nil +} + // UpsertLiveRouteBatch mock implementation func (m *mockStore) UpsertLiveRouteBatch(routes []*database.LiveRoute) error { m.mu.Lock() diff --git a/internal/routewatch/asnfetcher.go b/internal/routewatch/asnfetcher.go index 6117f11..8f1d5b6 100644 --- a/internal/routewatch/asnfetcher.go +++ b/internal/routewatch/asnfetcher.go @@ -8,6 +8,7 @@ import ( "time" "git.eeqj.de/sneak/routewatch/internal/database" + "git.eeqj.de/sneak/routewatch/internal/server" "git.eeqj.de/sneak/routewatch/internal/whois" ) @@ -17,7 +18,7 @@ const ( baseInterval = 15 * time.Second // minInterval is the minimum interval after successes (rate limit). - minInterval = 10 * time.Second + minInterval = 1 * time.Second // maxInterval is the maximum interval after failures (backoff cap). maxInterval = 5 * time.Minute @@ -30,8 +31,12 @@ const ( // immediateQueueSize is the buffer size for immediate fetch requests. immediateQueueSize = 100 + + // statsWindow is how long to keep stats for. + statsWindow = time.Hour ) + // ASNFetcher handles background WHOIS lookups for ASNs. type ASNFetcher struct { db database.Store @@ -45,9 +50,14 @@ type ASNFetcher struct { fetchMu sync.Mutex // interval tracking with mutex protection - intervalMu sync.Mutex - currentInterval time.Duration + intervalMu sync.Mutex + currentInterval time.Duration consecutiveFails int + + // hourly stats tracking + statsMu sync.Mutex + successTimes []time.Time + errorTimes []time.Time } // NewASNFetcher creates a new ASN fetcher. @@ -59,6 +69,8 @@ func NewASNFetcher(db database.Store, logger *slog.Logger) *ASNFetcher { immediateQueue: make(chan int, immediateQueueSize), stopCh: make(chan struct{}), currentInterval: baseInterval, + successTimes: make([]time.Time, 0), + errorTimes: make([]time.Time, 0), } } @@ -91,6 +103,41 @@ func (f *ASNFetcher) QueueImmediate(asn int) { } } +// GetStats returns statistics about fetcher activity. +func (f *ASNFetcher) GetStats() server.ASNFetcherStats { + f.statsMu.Lock() + defer f.statsMu.Unlock() + + f.intervalMu.Lock() + interval := f.currentInterval + fails := f.consecutiveFails + f.intervalMu.Unlock() + + // Prune old entries and count + cutoff := time.Now().Add(-statsWindow) + f.successTimes = pruneOldTimes(f.successTimes, cutoff) + f.errorTimes = pruneOldTimes(f.errorTimes, cutoff) + + return server.ASNFetcherStats{ + SuccessesLastHour: len(f.successTimes), + ErrorsLastHour: len(f.errorTimes), + CurrentInterval: interval, + ConsecutiveFails: fails, + } +} + +// pruneOldTimes removes times older than cutoff and returns the pruned slice. +func pruneOldTimes(times []time.Time, cutoff time.Time) []time.Time { + result := make([]time.Time, 0, len(times)) + for _, t := range times { + if t.After(cutoff) { + result = append(result, t) + } + } + + return result +} + // getInterval returns the current fetch interval. func (f *ASNFetcher) getInterval() time.Duration { f.intervalMu.Lock() @@ -102,8 +149,6 @@ func (f *ASNFetcher) getInterval() time.Duration { // recordSuccess decreases the interval on successful fetch. func (f *ASNFetcher) recordSuccess() { f.intervalMu.Lock() - defer f.intervalMu.Unlock() - f.consecutiveFails = 0 // Decrease interval by half, but not below minimum @@ -119,13 +164,17 @@ func (f *ASNFetcher) recordSuccess() { ) f.currentInterval = newInterval } + f.intervalMu.Unlock() + + // Record success time for stats + f.statsMu.Lock() + f.successTimes = append(f.successTimes, time.Now()) + f.statsMu.Unlock() } // recordFailure increases the interval on failed fetch using exponential backoff. func (f *ASNFetcher) recordFailure() { f.intervalMu.Lock() - defer f.intervalMu.Unlock() - f.consecutiveFails++ // Exponential backoff: multiply by 2, capped at max @@ -142,6 +191,12 @@ func (f *ASNFetcher) recordFailure() { ) f.currentInterval = newInterval } + f.intervalMu.Unlock() + + // Record error time for stats + f.statsMu.Lock() + f.errorTimes = append(f.errorTimes, time.Now()) + f.statsMu.Unlock() } // run is the main background loop. @@ -263,3 +318,8 @@ func (f *ASNFetcher) fetchAndUpdate(asn int) bool { return true } + +// GetStaleThreshold returns the WHOIS stale threshold duration. +func GetStaleThreshold() time.Duration { + return whoisStaleThreshold +} diff --git a/internal/server/handlers.go b/internal/server/handlers.go index 292d9c1..58333ab 100644 --- a/internal/server/handlers.go +++ b/internal/server/handlers.go @@ -58,6 +58,19 @@ func writeJSONSuccess(w http.ResponseWriter, data interface{}) error { }) } +// WHOISStatsInfo contains WHOIS fetcher statistics for the status page. +type WHOISStatsInfo struct { + TotalASNs int `json:"total_asns"` + FreshASNs int `json:"fresh_asns"` + StaleASNs int `json:"stale_asns"` + NeverFetched int `json:"never_fetched"` + SuccessesLastHour int `json:"successes_last_hour"` + ErrorsLastHour int `json:"errors_last_hour"` + CurrentInterval string `json:"current_interval"` + ConsecutiveFails int `json:"consecutive_fails"` + FreshPercent float64 `json:"fresh_percent"` +} + // handleStatusJSON returns a handler that serves JSON statistics including // uptime, message counts, database stats, and route information. func (s *Server) handleStatusJSON() http.HandlerFunc { @@ -88,6 +101,7 @@ func (s *Server) handleStatusJSON() http.HandlerFunc { IPv6UpdatesPerSec float64 `json:"ipv6_updates_per_sec"` IPv4PrefixDistribution []database.PrefixDistribution `json:"ipv4_prefix_distribution"` IPv6PrefixDistribution []database.PrefixDistribution `json:"ipv6_prefix_distribution"` + WHOISStats *WHOISStatsInfo `json:"whois_stats,omitempty"` } return func(w http.ResponseWriter, r *http.Request) { @@ -149,6 +163,12 @@ func (s *Server) handleStatusJSON() http.HandlerFunc { var memStats runtime.MemStats runtime.ReadMemStats(&memStats) + // Get WHOIS stats if fetcher is available + var whoisStats *WHOISStatsInfo + if s.asnFetcher != nil { + whoisStats = s.getWHOISStats(ctx) + } + stats := Stats{ Uptime: uptime, TotalMessages: metrics.TotalMessages, @@ -175,6 +195,7 @@ func (s *Server) handleStatusJSON() http.HandlerFunc { IPv6UpdatesPerSec: routeMetrics.IPv6UpdatesPerSec, IPv4PrefixDistribution: dbStats.IPv4PrefixDistribution, IPv6PrefixDistribution: dbStats.IPv6PrefixDistribution, + WHOISStats: whoisStats, } if err := writeJSONSuccess(w, stats); err != nil { @@ -183,6 +204,44 @@ func (s *Server) handleStatusJSON() http.HandlerFunc { } } +// getWHOISStats builds WHOIS statistics from database and fetcher. +func (s *Server) getWHOISStats(ctx context.Context) *WHOISStatsInfo { + // Get database WHOIS stats + dbStats, err := s.db.GetWHOISStats(ctx, whoisStaleThreshold) + if err != nil { + s.logger.Warn("Failed to get WHOIS stats", "error", err) + + return nil + } + + // Get fetcher stats + fetcherStats := s.asnFetcher.GetStats() + + // Calculate fresh percentage + var freshPercent float64 + if dbStats.TotalASNs > 0 { + freshPercent = float64(dbStats.FreshASNs) / float64(dbStats.TotalASNs) * percentMultiplier + } + + return &WHOISStatsInfo{ + TotalASNs: dbStats.TotalASNs, + FreshASNs: dbStats.FreshASNs, + StaleASNs: dbStats.StaleASNs, + NeverFetched: dbStats.NeverFetched, + SuccessesLastHour: fetcherStats.SuccessesLastHour, + ErrorsLastHour: fetcherStats.ErrorsLastHour, + CurrentInterval: fetcherStats.CurrentInterval.String(), + ConsecutiveFails: fetcherStats.ConsecutiveFails, + FreshPercent: freshPercent, + } +} + +// whoisStaleThreshold matches the fetcher's threshold for consistency. +const whoisStaleThreshold = 30 * 24 * time.Hour + +// percentMultiplier converts a ratio to a percentage. +const percentMultiplier = 100 + // handleStats returns a handler that serves API v1 statistics including // detailed handler queue statistics and performance metrics. func (s *Server) handleStats() http.HandlerFunc { @@ -227,6 +286,7 @@ func (s *Server) handleStats() http.HandlerFunc { HandlerStats []HandlerStatsInfo `json:"handler_stats"` IPv4PrefixDistribution []database.PrefixDistribution `json:"ipv4_prefix_distribution"` IPv6PrefixDistribution []database.PrefixDistribution `json:"ipv6_prefix_distribution"` + WHOISStats *WHOISStatsInfo `json:"whois_stats,omitempty"` } return func(w http.ResponseWriter, r *http.Request) { @@ -314,6 +374,12 @@ func (s *Server) handleStats() http.HandlerFunc { var memStats runtime.MemStats runtime.ReadMemStats(&memStats) + // Get WHOIS stats if fetcher is available + var whoisStats *WHOISStatsInfo + if s.asnFetcher != nil { + whoisStats = s.getWHOISStats(ctx) + } + stats := StatsResponse{ Uptime: uptime, TotalMessages: metrics.TotalMessages, @@ -341,6 +407,7 @@ func (s *Server) handleStats() http.HandlerFunc { HandlerStats: handlerStatsInfo, IPv4PrefixDistribution: dbStats.IPv4PrefixDistribution, IPv6PrefixDistribution: dbStats.IPv6PrefixDistribution, + WHOISStats: whoisStats, } if err := writeJSONSuccess(w, stats); err != nil { diff --git a/internal/server/server.go b/internal/server/server.go index ace5df6..f4498da 100644 --- a/internal/server/server.go +++ b/internal/server/server.go @@ -13,9 +13,18 @@ import ( "github.com/go-chi/chi/v5" ) +// ASNFetcherStats contains WHOIS fetcher statistics. +type ASNFetcherStats struct { + SuccessesLastHour int + ErrorsLastHour int + CurrentInterval time.Duration + ConsecutiveFails int +} + // ASNFetcher is an interface for queuing ASN WHOIS lookups. type ASNFetcher interface { QueueImmediate(asn int) + GetStats() ASNFetcherStats } // Server provides HTTP endpoints for status monitoring diff --git a/internal/templates/status.html b/internal/templates/status.html index bad37a2..578bb67 100644 --- a/internal/templates/status.html +++ b/internal/templates/status.html @@ -177,8 +177,40 @@ - + +
+

WHOIS Fetcher

+
+ Fresh ASNs + - +
+
+ Stale ASNs + - +
+
+ Never Fetched + - +
+
+ Fresh % + - +
+
+ Successes (1h) + - +
+
+ Errors (1h) + - +
+
+ Current Interval + - +
+
- +

IPv4 Prefix Distribution

@@ -318,7 +350,15 @@ document.getElementById('ipv6_routes').textContent = '-'; document.getElementById('ipv4_updates_per_sec').textContent = '-'; document.getElementById('ipv6_updates_per_sec').textContent = '-'; - + document.getElementById('whois_fresh').textContent = '-'; + document.getElementById('whois_stale').textContent = '-'; + document.getElementById('whois_never').textContent = '-'; + document.getElementById('whois_percent').textContent = '-'; + document.getElementById('whois_successes').textContent = '-'; + document.getElementById('whois_errors').textContent = '-'; + document.getElementById('whois_errors').className = 'metric-value'; + document.getElementById('whois_interval').textContent = '-'; + // Clear handler stats document.getElementById('handler-stats-container').innerHTML = ''; @@ -368,7 +408,20 @@ document.getElementById('ipv6_routes').textContent = formatNumber(data.ipv6_routes); document.getElementById('ipv4_updates_per_sec').textContent = data.ipv4_updates_per_sec.toFixed(1); document.getElementById('ipv6_updates_per_sec').textContent = data.ipv6_updates_per_sec.toFixed(1); - + + // Update WHOIS stats + if (data.whois_stats) { + document.getElementById('whois_fresh').textContent = formatNumber(data.whois_stats.fresh_asns); + document.getElementById('whois_stale').textContent = formatNumber(data.whois_stats.stale_asns); + document.getElementById('whois_never').textContent = formatNumber(data.whois_stats.never_fetched); + document.getElementById('whois_percent').textContent = data.whois_stats.fresh_percent.toFixed(1) + '%'; + document.getElementById('whois_successes').textContent = formatNumber(data.whois_stats.successes_last_hour); + const errorsEl = document.getElementById('whois_errors'); + errorsEl.textContent = formatNumber(data.whois_stats.errors_last_hour); + errorsEl.className = 'metric-value' + (data.whois_stats.errors_last_hour > 0 ? ' disconnected' : ''); + document.getElementById('whois_interval').textContent = data.whois_stats.current_interval; + } + // Update handler stats updateHandlerStats(data.handler_stats || []);