Add WHOIS stats to status page with adaptive fetcher improvements

- Add WHOIS Fetcher card showing fresh/stale/never-fetched ASN counts
- Display hourly success/error counts and current fetch interval
- Increase max WHOIS rate to 1/sec (down from 10 sec minimum)
- Select random stale ASN instead of oldest for better distribution
- Add index on whois_updated_at for query performance
- Track success/error timestamps for hourly stats
- Add GetWHOISStats database method for freshness statistics
This commit is contained in:
Jeffrey Paul 2025-12-27 16:20:09 +07:00
parent f8b7d3b773
commit d2041a5a55
8 changed files with 252 additions and 15 deletions

View File

@ -1633,18 +1633,16 @@ func (d *Database) GetRandomPrefixesByLengthContext(
return routes, nil
}
// GetNextStaleASN returns an ASN that needs WHOIS data refresh.
// Priority: ASNs with no whois_updated_at, then oldest whois_updated_at.
// GetNextStaleASN returns a random ASN that needs WHOIS data refresh.
func (d *Database) GetNextStaleASN(ctx context.Context, staleThreshold time.Duration) (int, error) {
cutoff := time.Now().Add(-staleThreshold)
// Select a random stale ASN using ORDER BY RANDOM()
query := `
SELECT asn FROM asns
WHERE whois_updated_at IS NULL
OR whois_updated_at < ?
ORDER BY
CASE WHEN whois_updated_at IS NULL THEN 0 ELSE 1 END,
whois_updated_at ASC
ORDER BY RANDOM()
LIMIT 1
`
@ -1661,6 +1659,41 @@ func (d *Database) GetNextStaleASN(ctx context.Context, staleThreshold time.Dura
return asn, nil
}
// WHOISStats contains statistics about WHOIS data freshness.
type WHOISStats struct {
TotalASNs int `json:"total_asns"`
StaleASNs int `json:"stale_asns"`
FreshASNs int `json:"fresh_asns"`
NeverFetched int `json:"never_fetched"`
}
// GetWHOISStats returns statistics about WHOIS data freshness.
func (d *Database) GetWHOISStats(ctx context.Context, staleThreshold time.Duration) (*WHOISStats, error) {
cutoff := time.Now().Add(-staleThreshold)
query := `
SELECT
COUNT(*) as total,
SUM(CASE WHEN whois_updated_at IS NULL THEN 1 ELSE 0 END) as never_fetched,
SUM(CASE WHEN whois_updated_at IS NOT NULL AND whois_updated_at < ? THEN 1 ELSE 0 END) as stale,
SUM(CASE WHEN whois_updated_at IS NOT NULL AND whois_updated_at >= ? THEN 1 ELSE 0 END) as fresh
FROM asns
`
var stats WHOISStats
err := d.db.QueryRowContext(ctx, query, cutoff, cutoff).Scan(
&stats.TotalASNs,
&stats.NeverFetched,
&stats.StaleASNs,
&stats.FreshASNs,
)
if err != nil {
return nil, fmt.Errorf("failed to get WHOIS stats: %w", err)
}
return &stats, nil
}
// UpdateASNWHOIS updates an ASN record with WHOIS data.
func (d *Database) UpdateASNWHOIS(ctx context.Context, update *ASNWHOISUpdate) error {
d.lock("UpdateASNWHOIS")

View File

@ -67,6 +67,7 @@ type Store interface {
// ASN WHOIS operations
GetNextStaleASN(ctx context.Context, staleThreshold time.Duration) (int, error)
UpdateASNWHOIS(ctx context.Context, update *ASNWHOISUpdate) error
GetWHOISStats(ctx context.Context, staleThreshold time.Duration) (*WHOISStats, error)
// AS and prefix detail operations
GetASDetails(asn int) (*ASN, []LiveRoute, error)

View File

@ -90,6 +90,7 @@ CREATE INDEX IF NOT EXISTS idx_peerings_lookup ON peerings(as_a, as_b);
-- Indexes for asns table
CREATE INDEX IF NOT EXISTS idx_asns_asn ON asns(asn);
CREATE INDEX IF NOT EXISTS idx_asns_whois_updated_at ON asns(whois_updated_at);
-- Indexes for bgp_peers table
CREATE INDEX IF NOT EXISTS idx_bgp_peers_asn ON bgp_peers(peer_asn);

View File

@ -335,6 +335,19 @@ func (m *mockStore) UpdateASNWHOIS(ctx context.Context, update *database.ASNWHOI
return nil
}
// GetWHOISStats mock implementation
func (m *mockStore) GetWHOISStats(ctx context.Context, staleThreshold time.Duration) (*database.WHOISStats, error) {
m.mu.Lock()
defer m.mu.Unlock()
return &database.WHOISStats{
TotalASNs: len(m.ASNs),
FreshASNs: 0,
StaleASNs: 0,
NeverFetched: len(m.ASNs),
}, nil
}
// UpsertLiveRouteBatch mock implementation
func (m *mockStore) UpsertLiveRouteBatch(routes []*database.LiveRoute) error {
m.mu.Lock()

View File

@ -8,6 +8,7 @@ import (
"time"
"git.eeqj.de/sneak/routewatch/internal/database"
"git.eeqj.de/sneak/routewatch/internal/server"
"git.eeqj.de/sneak/routewatch/internal/whois"
)
@ -17,7 +18,7 @@ const (
baseInterval = 15 * time.Second
// minInterval is the minimum interval after successes (rate limit).
minInterval = 10 * time.Second
minInterval = 1 * time.Second
// maxInterval is the maximum interval after failures (backoff cap).
maxInterval = 5 * time.Minute
@ -30,8 +31,12 @@ const (
// immediateQueueSize is the buffer size for immediate fetch requests.
immediateQueueSize = 100
// statsWindow is how long to keep stats for.
statsWindow = time.Hour
)
// ASNFetcher handles background WHOIS lookups for ASNs.
type ASNFetcher struct {
db database.Store
@ -45,9 +50,14 @@ type ASNFetcher struct {
fetchMu sync.Mutex
// interval tracking with mutex protection
intervalMu sync.Mutex
currentInterval time.Duration
intervalMu sync.Mutex
currentInterval time.Duration
consecutiveFails int
// hourly stats tracking
statsMu sync.Mutex
successTimes []time.Time
errorTimes []time.Time
}
// NewASNFetcher creates a new ASN fetcher.
@ -59,6 +69,8 @@ func NewASNFetcher(db database.Store, logger *slog.Logger) *ASNFetcher {
immediateQueue: make(chan int, immediateQueueSize),
stopCh: make(chan struct{}),
currentInterval: baseInterval,
successTimes: make([]time.Time, 0),
errorTimes: make([]time.Time, 0),
}
}
@ -91,6 +103,41 @@ func (f *ASNFetcher) QueueImmediate(asn int) {
}
}
// GetStats returns statistics about fetcher activity.
func (f *ASNFetcher) GetStats() server.ASNFetcherStats {
f.statsMu.Lock()
defer f.statsMu.Unlock()
f.intervalMu.Lock()
interval := f.currentInterval
fails := f.consecutiveFails
f.intervalMu.Unlock()
// Prune old entries and count
cutoff := time.Now().Add(-statsWindow)
f.successTimes = pruneOldTimes(f.successTimes, cutoff)
f.errorTimes = pruneOldTimes(f.errorTimes, cutoff)
return server.ASNFetcherStats{
SuccessesLastHour: len(f.successTimes),
ErrorsLastHour: len(f.errorTimes),
CurrentInterval: interval,
ConsecutiveFails: fails,
}
}
// pruneOldTimes removes times older than cutoff and returns the pruned slice.
func pruneOldTimes(times []time.Time, cutoff time.Time) []time.Time {
result := make([]time.Time, 0, len(times))
for _, t := range times {
if t.After(cutoff) {
result = append(result, t)
}
}
return result
}
// getInterval returns the current fetch interval.
func (f *ASNFetcher) getInterval() time.Duration {
f.intervalMu.Lock()
@ -102,8 +149,6 @@ func (f *ASNFetcher) getInterval() time.Duration {
// recordSuccess decreases the interval on successful fetch.
func (f *ASNFetcher) recordSuccess() {
f.intervalMu.Lock()
defer f.intervalMu.Unlock()
f.consecutiveFails = 0
// Decrease interval by half, but not below minimum
@ -119,13 +164,17 @@ func (f *ASNFetcher) recordSuccess() {
)
f.currentInterval = newInterval
}
f.intervalMu.Unlock()
// Record success time for stats
f.statsMu.Lock()
f.successTimes = append(f.successTimes, time.Now())
f.statsMu.Unlock()
}
// recordFailure increases the interval on failed fetch using exponential backoff.
func (f *ASNFetcher) recordFailure() {
f.intervalMu.Lock()
defer f.intervalMu.Unlock()
f.consecutiveFails++
// Exponential backoff: multiply by 2, capped at max
@ -142,6 +191,12 @@ func (f *ASNFetcher) recordFailure() {
)
f.currentInterval = newInterval
}
f.intervalMu.Unlock()
// Record error time for stats
f.statsMu.Lock()
f.errorTimes = append(f.errorTimes, time.Now())
f.statsMu.Unlock()
}
// run is the main background loop.
@ -263,3 +318,8 @@ func (f *ASNFetcher) fetchAndUpdate(asn int) bool {
return true
}
// GetStaleThreshold returns the WHOIS stale threshold duration.
func GetStaleThreshold() time.Duration {
return whoisStaleThreshold
}

View File

@ -58,6 +58,19 @@ func writeJSONSuccess(w http.ResponseWriter, data interface{}) error {
})
}
// WHOISStatsInfo contains WHOIS fetcher statistics for the status page.
type WHOISStatsInfo struct {
TotalASNs int `json:"total_asns"`
FreshASNs int `json:"fresh_asns"`
StaleASNs int `json:"stale_asns"`
NeverFetched int `json:"never_fetched"`
SuccessesLastHour int `json:"successes_last_hour"`
ErrorsLastHour int `json:"errors_last_hour"`
CurrentInterval string `json:"current_interval"`
ConsecutiveFails int `json:"consecutive_fails"`
FreshPercent float64 `json:"fresh_percent"`
}
// handleStatusJSON returns a handler that serves JSON statistics including
// uptime, message counts, database stats, and route information.
func (s *Server) handleStatusJSON() http.HandlerFunc {
@ -88,6 +101,7 @@ func (s *Server) handleStatusJSON() http.HandlerFunc {
IPv6UpdatesPerSec float64 `json:"ipv6_updates_per_sec"`
IPv4PrefixDistribution []database.PrefixDistribution `json:"ipv4_prefix_distribution"`
IPv6PrefixDistribution []database.PrefixDistribution `json:"ipv6_prefix_distribution"`
WHOISStats *WHOISStatsInfo `json:"whois_stats,omitempty"`
}
return func(w http.ResponseWriter, r *http.Request) {
@ -149,6 +163,12 @@ func (s *Server) handleStatusJSON() http.HandlerFunc {
var memStats runtime.MemStats
runtime.ReadMemStats(&memStats)
// Get WHOIS stats if fetcher is available
var whoisStats *WHOISStatsInfo
if s.asnFetcher != nil {
whoisStats = s.getWHOISStats(ctx)
}
stats := Stats{
Uptime: uptime,
TotalMessages: metrics.TotalMessages,
@ -175,6 +195,7 @@ func (s *Server) handleStatusJSON() http.HandlerFunc {
IPv6UpdatesPerSec: routeMetrics.IPv6UpdatesPerSec,
IPv4PrefixDistribution: dbStats.IPv4PrefixDistribution,
IPv6PrefixDistribution: dbStats.IPv6PrefixDistribution,
WHOISStats: whoisStats,
}
if err := writeJSONSuccess(w, stats); err != nil {
@ -183,6 +204,44 @@ func (s *Server) handleStatusJSON() http.HandlerFunc {
}
}
// getWHOISStats builds WHOIS statistics from database and fetcher.
func (s *Server) getWHOISStats(ctx context.Context) *WHOISStatsInfo {
// Get database WHOIS stats
dbStats, err := s.db.GetWHOISStats(ctx, whoisStaleThreshold)
if err != nil {
s.logger.Warn("Failed to get WHOIS stats", "error", err)
return nil
}
// Get fetcher stats
fetcherStats := s.asnFetcher.GetStats()
// Calculate fresh percentage
var freshPercent float64
if dbStats.TotalASNs > 0 {
freshPercent = float64(dbStats.FreshASNs) / float64(dbStats.TotalASNs) * percentMultiplier
}
return &WHOISStatsInfo{
TotalASNs: dbStats.TotalASNs,
FreshASNs: dbStats.FreshASNs,
StaleASNs: dbStats.StaleASNs,
NeverFetched: dbStats.NeverFetched,
SuccessesLastHour: fetcherStats.SuccessesLastHour,
ErrorsLastHour: fetcherStats.ErrorsLastHour,
CurrentInterval: fetcherStats.CurrentInterval.String(),
ConsecutiveFails: fetcherStats.ConsecutiveFails,
FreshPercent: freshPercent,
}
}
// whoisStaleThreshold matches the fetcher's threshold for consistency.
const whoisStaleThreshold = 30 * 24 * time.Hour
// percentMultiplier converts a ratio to a percentage.
const percentMultiplier = 100
// handleStats returns a handler that serves API v1 statistics including
// detailed handler queue statistics and performance metrics.
func (s *Server) handleStats() http.HandlerFunc {
@ -227,6 +286,7 @@ func (s *Server) handleStats() http.HandlerFunc {
HandlerStats []HandlerStatsInfo `json:"handler_stats"`
IPv4PrefixDistribution []database.PrefixDistribution `json:"ipv4_prefix_distribution"`
IPv6PrefixDistribution []database.PrefixDistribution `json:"ipv6_prefix_distribution"`
WHOISStats *WHOISStatsInfo `json:"whois_stats,omitempty"`
}
return func(w http.ResponseWriter, r *http.Request) {
@ -314,6 +374,12 @@ func (s *Server) handleStats() http.HandlerFunc {
var memStats runtime.MemStats
runtime.ReadMemStats(&memStats)
// Get WHOIS stats if fetcher is available
var whoisStats *WHOISStatsInfo
if s.asnFetcher != nil {
whoisStats = s.getWHOISStats(ctx)
}
stats := StatsResponse{
Uptime: uptime,
TotalMessages: metrics.TotalMessages,
@ -341,6 +407,7 @@ func (s *Server) handleStats() http.HandlerFunc {
HandlerStats: handlerStatsInfo,
IPv4PrefixDistribution: dbStats.IPv4PrefixDistribution,
IPv6PrefixDistribution: dbStats.IPv6PrefixDistribution,
WHOISStats: whoisStats,
}
if err := writeJSONSuccess(w, stats); err != nil {

View File

@ -13,9 +13,18 @@ import (
"github.com/go-chi/chi/v5"
)
// ASNFetcherStats contains WHOIS fetcher statistics.
type ASNFetcherStats struct {
SuccessesLastHour int
ErrorsLastHour int
CurrentInterval time.Duration
ConsecutiveFails int
}
// ASNFetcher is an interface for queuing ASN WHOIS lookups.
type ASNFetcher interface {
QueueImmediate(asn int)
GetStats() ASNFetcherStats
}
// Server provides HTTP endpoints for status monitoring

View File

@ -177,8 +177,40 @@
<span class="metric-value" id="ipv6_updates_per_sec">-</span>
</div>
</div>
<div class="status-card">
<h2>WHOIS Fetcher</h2>
<div class="metric">
<span class="metric-label">Fresh ASNs</span>
<span class="metric-value" id="whois_fresh">-</span>
</div>
<div class="metric">
<span class="metric-label">Stale ASNs</span>
<span class="metric-value" id="whois_stale">-</span>
</div>
<div class="metric">
<span class="metric-label">Never Fetched</span>
<span class="metric-value" id="whois_never">-</span>
</div>
<div class="metric">
<span class="metric-label">Fresh %</span>
<span class="metric-value" id="whois_percent">-</span>
</div>
<div class="metric">
<span class="metric-label">Successes (1h)</span>
<span class="metric-value" id="whois_successes">-</span>
</div>
<div class="metric">
<span class="metric-label">Errors (1h)</span>
<span class="metric-value" id="whois_errors">-</span>
</div>
<div class="metric">
<span class="metric-label">Current Interval</span>
<span class="metric-value" id="whois_interval">-</span>
</div>
</div>
</div>
<div class="status-grid">
<div class="status-card">
<h2>IPv4 Prefix Distribution</h2>
@ -318,7 +350,15 @@
document.getElementById('ipv6_routes').textContent = '-';
document.getElementById('ipv4_updates_per_sec').textContent = '-';
document.getElementById('ipv6_updates_per_sec').textContent = '-';
document.getElementById('whois_fresh').textContent = '-';
document.getElementById('whois_stale').textContent = '-';
document.getElementById('whois_never').textContent = '-';
document.getElementById('whois_percent').textContent = '-';
document.getElementById('whois_successes').textContent = '-';
document.getElementById('whois_errors').textContent = '-';
document.getElementById('whois_errors').className = 'metric-value';
document.getElementById('whois_interval').textContent = '-';
// Clear handler stats
document.getElementById('handler-stats-container').innerHTML = '';
@ -368,7 +408,20 @@
document.getElementById('ipv6_routes').textContent = formatNumber(data.ipv6_routes);
document.getElementById('ipv4_updates_per_sec').textContent = data.ipv4_updates_per_sec.toFixed(1);
document.getElementById('ipv6_updates_per_sec').textContent = data.ipv6_updates_per_sec.toFixed(1);
// Update WHOIS stats
if (data.whois_stats) {
document.getElementById('whois_fresh').textContent = formatNumber(data.whois_stats.fresh_asns);
document.getElementById('whois_stale').textContent = formatNumber(data.whois_stats.stale_asns);
document.getElementById('whois_never').textContent = formatNumber(data.whois_stats.never_fetched);
document.getElementById('whois_percent').textContent = data.whois_stats.fresh_percent.toFixed(1) + '%';
document.getElementById('whois_successes').textContent = formatNumber(data.whois_stats.successes_last_hour);
const errorsEl = document.getElementById('whois_errors');
errorsEl.textContent = formatNumber(data.whois_stats.errors_last_hour);
errorsEl.className = 'metric-value' + (data.whois_stats.errors_last_hour > 0 ? ' disconnected' : '');
document.getElementById('whois_interval').textContent = data.whois_stats.current_interval;
}
// Update handler stats
updateHandlerStats(data.handler_stats || []);