Add WHOIS stats to status page with adaptive fetcher improvements

- Add WHOIS Fetcher card showing fresh/stale/never-fetched ASN counts
- Display hourly success/error counts and current fetch interval
- Increase max WHOIS rate to 1/sec (down from 10 sec minimum)
- Select random stale ASN instead of oldest for better distribution
- Add index on whois_updated_at for query performance
- Track success/error timestamps for hourly stats
- Add GetWHOISStats database method for freshness statistics
This commit is contained in:
Jeffrey Paul 2025-12-27 16:20:09 +07:00
parent f8b7d3b773
commit d2041a5a55
8 changed files with 252 additions and 15 deletions

View File

@ -1633,18 +1633,16 @@ func (d *Database) GetRandomPrefixesByLengthContext(
return routes, nil return routes, nil
} }
// GetNextStaleASN returns an ASN that needs WHOIS data refresh. // GetNextStaleASN returns a random ASN that needs WHOIS data refresh.
// Priority: ASNs with no whois_updated_at, then oldest whois_updated_at.
func (d *Database) GetNextStaleASN(ctx context.Context, staleThreshold time.Duration) (int, error) { func (d *Database) GetNextStaleASN(ctx context.Context, staleThreshold time.Duration) (int, error) {
cutoff := time.Now().Add(-staleThreshold) cutoff := time.Now().Add(-staleThreshold)
// Select a random stale ASN using ORDER BY RANDOM()
query := ` query := `
SELECT asn FROM asns SELECT asn FROM asns
WHERE whois_updated_at IS NULL WHERE whois_updated_at IS NULL
OR whois_updated_at < ? OR whois_updated_at < ?
ORDER BY ORDER BY RANDOM()
CASE WHEN whois_updated_at IS NULL THEN 0 ELSE 1 END,
whois_updated_at ASC
LIMIT 1 LIMIT 1
` `
@ -1661,6 +1659,41 @@ func (d *Database) GetNextStaleASN(ctx context.Context, staleThreshold time.Dura
return asn, nil return asn, nil
} }
// WHOISStats contains statistics about WHOIS data freshness.
type WHOISStats struct {
TotalASNs int `json:"total_asns"`
StaleASNs int `json:"stale_asns"`
FreshASNs int `json:"fresh_asns"`
NeverFetched int `json:"never_fetched"`
}
// GetWHOISStats returns statistics about WHOIS data freshness.
func (d *Database) GetWHOISStats(ctx context.Context, staleThreshold time.Duration) (*WHOISStats, error) {
cutoff := time.Now().Add(-staleThreshold)
query := `
SELECT
COUNT(*) as total,
SUM(CASE WHEN whois_updated_at IS NULL THEN 1 ELSE 0 END) as never_fetched,
SUM(CASE WHEN whois_updated_at IS NOT NULL AND whois_updated_at < ? THEN 1 ELSE 0 END) as stale,
SUM(CASE WHEN whois_updated_at IS NOT NULL AND whois_updated_at >= ? THEN 1 ELSE 0 END) as fresh
FROM asns
`
var stats WHOISStats
err := d.db.QueryRowContext(ctx, query, cutoff, cutoff).Scan(
&stats.TotalASNs,
&stats.NeverFetched,
&stats.StaleASNs,
&stats.FreshASNs,
)
if err != nil {
return nil, fmt.Errorf("failed to get WHOIS stats: %w", err)
}
return &stats, nil
}
// UpdateASNWHOIS updates an ASN record with WHOIS data. // UpdateASNWHOIS updates an ASN record with WHOIS data.
func (d *Database) UpdateASNWHOIS(ctx context.Context, update *ASNWHOISUpdate) error { func (d *Database) UpdateASNWHOIS(ctx context.Context, update *ASNWHOISUpdate) error {
d.lock("UpdateASNWHOIS") d.lock("UpdateASNWHOIS")

View File

@ -67,6 +67,7 @@ type Store interface {
// ASN WHOIS operations // ASN WHOIS operations
GetNextStaleASN(ctx context.Context, staleThreshold time.Duration) (int, error) GetNextStaleASN(ctx context.Context, staleThreshold time.Duration) (int, error)
UpdateASNWHOIS(ctx context.Context, update *ASNWHOISUpdate) error UpdateASNWHOIS(ctx context.Context, update *ASNWHOISUpdate) error
GetWHOISStats(ctx context.Context, staleThreshold time.Duration) (*WHOISStats, error)
// AS and prefix detail operations // AS and prefix detail operations
GetASDetails(asn int) (*ASN, []LiveRoute, error) GetASDetails(asn int) (*ASN, []LiveRoute, error)

View File

@ -90,6 +90,7 @@ CREATE INDEX IF NOT EXISTS idx_peerings_lookup ON peerings(as_a, as_b);
-- Indexes for asns table -- Indexes for asns table
CREATE INDEX IF NOT EXISTS idx_asns_asn ON asns(asn); CREATE INDEX IF NOT EXISTS idx_asns_asn ON asns(asn);
CREATE INDEX IF NOT EXISTS idx_asns_whois_updated_at ON asns(whois_updated_at);
-- Indexes for bgp_peers table -- Indexes for bgp_peers table
CREATE INDEX IF NOT EXISTS idx_bgp_peers_asn ON bgp_peers(peer_asn); CREATE INDEX IF NOT EXISTS idx_bgp_peers_asn ON bgp_peers(peer_asn);

View File

@ -335,6 +335,19 @@ func (m *mockStore) UpdateASNWHOIS(ctx context.Context, update *database.ASNWHOI
return nil return nil
} }
// GetWHOISStats mock implementation
func (m *mockStore) GetWHOISStats(ctx context.Context, staleThreshold time.Duration) (*database.WHOISStats, error) {
m.mu.Lock()
defer m.mu.Unlock()
return &database.WHOISStats{
TotalASNs: len(m.ASNs),
FreshASNs: 0,
StaleASNs: 0,
NeverFetched: len(m.ASNs),
}, nil
}
// UpsertLiveRouteBatch mock implementation // UpsertLiveRouteBatch mock implementation
func (m *mockStore) UpsertLiveRouteBatch(routes []*database.LiveRoute) error { func (m *mockStore) UpsertLiveRouteBatch(routes []*database.LiveRoute) error {
m.mu.Lock() m.mu.Lock()

View File

@ -8,6 +8,7 @@ import (
"time" "time"
"git.eeqj.de/sneak/routewatch/internal/database" "git.eeqj.de/sneak/routewatch/internal/database"
"git.eeqj.de/sneak/routewatch/internal/server"
"git.eeqj.de/sneak/routewatch/internal/whois" "git.eeqj.de/sneak/routewatch/internal/whois"
) )
@ -17,7 +18,7 @@ const (
baseInterval = 15 * time.Second baseInterval = 15 * time.Second
// minInterval is the minimum interval after successes (rate limit). // minInterval is the minimum interval after successes (rate limit).
minInterval = 10 * time.Second minInterval = 1 * time.Second
// maxInterval is the maximum interval after failures (backoff cap). // maxInterval is the maximum interval after failures (backoff cap).
maxInterval = 5 * time.Minute maxInterval = 5 * time.Minute
@ -30,8 +31,12 @@ const (
// immediateQueueSize is the buffer size for immediate fetch requests. // immediateQueueSize is the buffer size for immediate fetch requests.
immediateQueueSize = 100 immediateQueueSize = 100
// statsWindow is how long to keep stats for.
statsWindow = time.Hour
) )
// ASNFetcher handles background WHOIS lookups for ASNs. // ASNFetcher handles background WHOIS lookups for ASNs.
type ASNFetcher struct { type ASNFetcher struct {
db database.Store db database.Store
@ -45,9 +50,14 @@ type ASNFetcher struct {
fetchMu sync.Mutex fetchMu sync.Mutex
// interval tracking with mutex protection // interval tracking with mutex protection
intervalMu sync.Mutex intervalMu sync.Mutex
currentInterval time.Duration currentInterval time.Duration
consecutiveFails int consecutiveFails int
// hourly stats tracking
statsMu sync.Mutex
successTimes []time.Time
errorTimes []time.Time
} }
// NewASNFetcher creates a new ASN fetcher. // NewASNFetcher creates a new ASN fetcher.
@ -59,6 +69,8 @@ func NewASNFetcher(db database.Store, logger *slog.Logger) *ASNFetcher {
immediateQueue: make(chan int, immediateQueueSize), immediateQueue: make(chan int, immediateQueueSize),
stopCh: make(chan struct{}), stopCh: make(chan struct{}),
currentInterval: baseInterval, currentInterval: baseInterval,
successTimes: make([]time.Time, 0),
errorTimes: make([]time.Time, 0),
} }
} }
@ -91,6 +103,41 @@ func (f *ASNFetcher) QueueImmediate(asn int) {
} }
} }
// GetStats returns statistics about fetcher activity.
func (f *ASNFetcher) GetStats() server.ASNFetcherStats {
f.statsMu.Lock()
defer f.statsMu.Unlock()
f.intervalMu.Lock()
interval := f.currentInterval
fails := f.consecutiveFails
f.intervalMu.Unlock()
// Prune old entries and count
cutoff := time.Now().Add(-statsWindow)
f.successTimes = pruneOldTimes(f.successTimes, cutoff)
f.errorTimes = pruneOldTimes(f.errorTimes, cutoff)
return server.ASNFetcherStats{
SuccessesLastHour: len(f.successTimes),
ErrorsLastHour: len(f.errorTimes),
CurrentInterval: interval,
ConsecutiveFails: fails,
}
}
// pruneOldTimes removes times older than cutoff and returns the pruned slice.
func pruneOldTimes(times []time.Time, cutoff time.Time) []time.Time {
result := make([]time.Time, 0, len(times))
for _, t := range times {
if t.After(cutoff) {
result = append(result, t)
}
}
return result
}
// getInterval returns the current fetch interval. // getInterval returns the current fetch interval.
func (f *ASNFetcher) getInterval() time.Duration { func (f *ASNFetcher) getInterval() time.Duration {
f.intervalMu.Lock() f.intervalMu.Lock()
@ -102,8 +149,6 @@ func (f *ASNFetcher) getInterval() time.Duration {
// recordSuccess decreases the interval on successful fetch. // recordSuccess decreases the interval on successful fetch.
func (f *ASNFetcher) recordSuccess() { func (f *ASNFetcher) recordSuccess() {
f.intervalMu.Lock() f.intervalMu.Lock()
defer f.intervalMu.Unlock()
f.consecutiveFails = 0 f.consecutiveFails = 0
// Decrease interval by half, but not below minimum // Decrease interval by half, but not below minimum
@ -119,13 +164,17 @@ func (f *ASNFetcher) recordSuccess() {
) )
f.currentInterval = newInterval f.currentInterval = newInterval
} }
f.intervalMu.Unlock()
// Record success time for stats
f.statsMu.Lock()
f.successTimes = append(f.successTimes, time.Now())
f.statsMu.Unlock()
} }
// recordFailure increases the interval on failed fetch using exponential backoff. // recordFailure increases the interval on failed fetch using exponential backoff.
func (f *ASNFetcher) recordFailure() { func (f *ASNFetcher) recordFailure() {
f.intervalMu.Lock() f.intervalMu.Lock()
defer f.intervalMu.Unlock()
f.consecutiveFails++ f.consecutiveFails++
// Exponential backoff: multiply by 2, capped at max // Exponential backoff: multiply by 2, capped at max
@ -142,6 +191,12 @@ func (f *ASNFetcher) recordFailure() {
) )
f.currentInterval = newInterval f.currentInterval = newInterval
} }
f.intervalMu.Unlock()
// Record error time for stats
f.statsMu.Lock()
f.errorTimes = append(f.errorTimes, time.Now())
f.statsMu.Unlock()
} }
// run is the main background loop. // run is the main background loop.
@ -263,3 +318,8 @@ func (f *ASNFetcher) fetchAndUpdate(asn int) bool {
return true return true
} }
// GetStaleThreshold returns the WHOIS stale threshold duration.
func GetStaleThreshold() time.Duration {
return whoisStaleThreshold
}

View File

@ -58,6 +58,19 @@ func writeJSONSuccess(w http.ResponseWriter, data interface{}) error {
}) })
} }
// WHOISStatsInfo contains WHOIS fetcher statistics for the status page.
type WHOISStatsInfo struct {
TotalASNs int `json:"total_asns"`
FreshASNs int `json:"fresh_asns"`
StaleASNs int `json:"stale_asns"`
NeverFetched int `json:"never_fetched"`
SuccessesLastHour int `json:"successes_last_hour"`
ErrorsLastHour int `json:"errors_last_hour"`
CurrentInterval string `json:"current_interval"`
ConsecutiveFails int `json:"consecutive_fails"`
FreshPercent float64 `json:"fresh_percent"`
}
// handleStatusJSON returns a handler that serves JSON statistics including // handleStatusJSON returns a handler that serves JSON statistics including
// uptime, message counts, database stats, and route information. // uptime, message counts, database stats, and route information.
func (s *Server) handleStatusJSON() http.HandlerFunc { func (s *Server) handleStatusJSON() http.HandlerFunc {
@ -88,6 +101,7 @@ func (s *Server) handleStatusJSON() http.HandlerFunc {
IPv6UpdatesPerSec float64 `json:"ipv6_updates_per_sec"` IPv6UpdatesPerSec float64 `json:"ipv6_updates_per_sec"`
IPv4PrefixDistribution []database.PrefixDistribution `json:"ipv4_prefix_distribution"` IPv4PrefixDistribution []database.PrefixDistribution `json:"ipv4_prefix_distribution"`
IPv6PrefixDistribution []database.PrefixDistribution `json:"ipv6_prefix_distribution"` IPv6PrefixDistribution []database.PrefixDistribution `json:"ipv6_prefix_distribution"`
WHOISStats *WHOISStatsInfo `json:"whois_stats,omitempty"`
} }
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
@ -149,6 +163,12 @@ func (s *Server) handleStatusJSON() http.HandlerFunc {
var memStats runtime.MemStats var memStats runtime.MemStats
runtime.ReadMemStats(&memStats) runtime.ReadMemStats(&memStats)
// Get WHOIS stats if fetcher is available
var whoisStats *WHOISStatsInfo
if s.asnFetcher != nil {
whoisStats = s.getWHOISStats(ctx)
}
stats := Stats{ stats := Stats{
Uptime: uptime, Uptime: uptime,
TotalMessages: metrics.TotalMessages, TotalMessages: metrics.TotalMessages,
@ -175,6 +195,7 @@ func (s *Server) handleStatusJSON() http.HandlerFunc {
IPv6UpdatesPerSec: routeMetrics.IPv6UpdatesPerSec, IPv6UpdatesPerSec: routeMetrics.IPv6UpdatesPerSec,
IPv4PrefixDistribution: dbStats.IPv4PrefixDistribution, IPv4PrefixDistribution: dbStats.IPv4PrefixDistribution,
IPv6PrefixDistribution: dbStats.IPv6PrefixDistribution, IPv6PrefixDistribution: dbStats.IPv6PrefixDistribution,
WHOISStats: whoisStats,
} }
if err := writeJSONSuccess(w, stats); err != nil { if err := writeJSONSuccess(w, stats); err != nil {
@ -183,6 +204,44 @@ func (s *Server) handleStatusJSON() http.HandlerFunc {
} }
} }
// getWHOISStats builds WHOIS statistics from database and fetcher.
func (s *Server) getWHOISStats(ctx context.Context) *WHOISStatsInfo {
// Get database WHOIS stats
dbStats, err := s.db.GetWHOISStats(ctx, whoisStaleThreshold)
if err != nil {
s.logger.Warn("Failed to get WHOIS stats", "error", err)
return nil
}
// Get fetcher stats
fetcherStats := s.asnFetcher.GetStats()
// Calculate fresh percentage
var freshPercent float64
if dbStats.TotalASNs > 0 {
freshPercent = float64(dbStats.FreshASNs) / float64(dbStats.TotalASNs) * percentMultiplier
}
return &WHOISStatsInfo{
TotalASNs: dbStats.TotalASNs,
FreshASNs: dbStats.FreshASNs,
StaleASNs: dbStats.StaleASNs,
NeverFetched: dbStats.NeverFetched,
SuccessesLastHour: fetcherStats.SuccessesLastHour,
ErrorsLastHour: fetcherStats.ErrorsLastHour,
CurrentInterval: fetcherStats.CurrentInterval.String(),
ConsecutiveFails: fetcherStats.ConsecutiveFails,
FreshPercent: freshPercent,
}
}
// whoisStaleThreshold matches the fetcher's threshold for consistency.
const whoisStaleThreshold = 30 * 24 * time.Hour
// percentMultiplier converts a ratio to a percentage.
const percentMultiplier = 100
// handleStats returns a handler that serves API v1 statistics including // handleStats returns a handler that serves API v1 statistics including
// detailed handler queue statistics and performance metrics. // detailed handler queue statistics and performance metrics.
func (s *Server) handleStats() http.HandlerFunc { func (s *Server) handleStats() http.HandlerFunc {
@ -227,6 +286,7 @@ func (s *Server) handleStats() http.HandlerFunc {
HandlerStats []HandlerStatsInfo `json:"handler_stats"` HandlerStats []HandlerStatsInfo `json:"handler_stats"`
IPv4PrefixDistribution []database.PrefixDistribution `json:"ipv4_prefix_distribution"` IPv4PrefixDistribution []database.PrefixDistribution `json:"ipv4_prefix_distribution"`
IPv6PrefixDistribution []database.PrefixDistribution `json:"ipv6_prefix_distribution"` IPv6PrefixDistribution []database.PrefixDistribution `json:"ipv6_prefix_distribution"`
WHOISStats *WHOISStatsInfo `json:"whois_stats,omitempty"`
} }
return func(w http.ResponseWriter, r *http.Request) { return func(w http.ResponseWriter, r *http.Request) {
@ -314,6 +374,12 @@ func (s *Server) handleStats() http.HandlerFunc {
var memStats runtime.MemStats var memStats runtime.MemStats
runtime.ReadMemStats(&memStats) runtime.ReadMemStats(&memStats)
// Get WHOIS stats if fetcher is available
var whoisStats *WHOISStatsInfo
if s.asnFetcher != nil {
whoisStats = s.getWHOISStats(ctx)
}
stats := StatsResponse{ stats := StatsResponse{
Uptime: uptime, Uptime: uptime,
TotalMessages: metrics.TotalMessages, TotalMessages: metrics.TotalMessages,
@ -341,6 +407,7 @@ func (s *Server) handleStats() http.HandlerFunc {
HandlerStats: handlerStatsInfo, HandlerStats: handlerStatsInfo,
IPv4PrefixDistribution: dbStats.IPv4PrefixDistribution, IPv4PrefixDistribution: dbStats.IPv4PrefixDistribution,
IPv6PrefixDistribution: dbStats.IPv6PrefixDistribution, IPv6PrefixDistribution: dbStats.IPv6PrefixDistribution,
WHOISStats: whoisStats,
} }
if err := writeJSONSuccess(w, stats); err != nil { if err := writeJSONSuccess(w, stats); err != nil {

View File

@ -13,9 +13,18 @@ import (
"github.com/go-chi/chi/v5" "github.com/go-chi/chi/v5"
) )
// ASNFetcherStats contains WHOIS fetcher statistics.
type ASNFetcherStats struct {
SuccessesLastHour int
ErrorsLastHour int
CurrentInterval time.Duration
ConsecutiveFails int
}
// ASNFetcher is an interface for queuing ASN WHOIS lookups. // ASNFetcher is an interface for queuing ASN WHOIS lookups.
type ASNFetcher interface { type ASNFetcher interface {
QueueImmediate(asn int) QueueImmediate(asn int)
GetStats() ASNFetcherStats
} }
// Server provides HTTP endpoints for status monitoring // Server provides HTTP endpoints for status monitoring

View File

@ -177,6 +177,38 @@
<span class="metric-value" id="ipv6_updates_per_sec">-</span> <span class="metric-value" id="ipv6_updates_per_sec">-</span>
</div> </div>
</div> </div>
<div class="status-card">
<h2>WHOIS Fetcher</h2>
<div class="metric">
<span class="metric-label">Fresh ASNs</span>
<span class="metric-value" id="whois_fresh">-</span>
</div>
<div class="metric">
<span class="metric-label">Stale ASNs</span>
<span class="metric-value" id="whois_stale">-</span>
</div>
<div class="metric">
<span class="metric-label">Never Fetched</span>
<span class="metric-value" id="whois_never">-</span>
</div>
<div class="metric">
<span class="metric-label">Fresh %</span>
<span class="metric-value" id="whois_percent">-</span>
</div>
<div class="metric">
<span class="metric-label">Successes (1h)</span>
<span class="metric-value" id="whois_successes">-</span>
</div>
<div class="metric">
<span class="metric-label">Errors (1h)</span>
<span class="metric-value" id="whois_errors">-</span>
</div>
<div class="metric">
<span class="metric-label">Current Interval</span>
<span class="metric-value" id="whois_interval">-</span>
</div>
</div>
</div> </div>
<div class="status-grid"> <div class="status-grid">
@ -318,6 +350,14 @@
document.getElementById('ipv6_routes').textContent = '-'; document.getElementById('ipv6_routes').textContent = '-';
document.getElementById('ipv4_updates_per_sec').textContent = '-'; document.getElementById('ipv4_updates_per_sec').textContent = '-';
document.getElementById('ipv6_updates_per_sec').textContent = '-'; document.getElementById('ipv6_updates_per_sec').textContent = '-';
document.getElementById('whois_fresh').textContent = '-';
document.getElementById('whois_stale').textContent = '-';
document.getElementById('whois_never').textContent = '-';
document.getElementById('whois_percent').textContent = '-';
document.getElementById('whois_successes').textContent = '-';
document.getElementById('whois_errors').textContent = '-';
document.getElementById('whois_errors').className = 'metric-value';
document.getElementById('whois_interval').textContent = '-';
// Clear handler stats // Clear handler stats
document.getElementById('handler-stats-container').innerHTML = ''; document.getElementById('handler-stats-container').innerHTML = '';
@ -369,6 +409,19 @@
document.getElementById('ipv4_updates_per_sec').textContent = data.ipv4_updates_per_sec.toFixed(1); document.getElementById('ipv4_updates_per_sec').textContent = data.ipv4_updates_per_sec.toFixed(1);
document.getElementById('ipv6_updates_per_sec').textContent = data.ipv6_updates_per_sec.toFixed(1); document.getElementById('ipv6_updates_per_sec').textContent = data.ipv6_updates_per_sec.toFixed(1);
// Update WHOIS stats
if (data.whois_stats) {
document.getElementById('whois_fresh').textContent = formatNumber(data.whois_stats.fresh_asns);
document.getElementById('whois_stale').textContent = formatNumber(data.whois_stats.stale_asns);
document.getElementById('whois_never').textContent = formatNumber(data.whois_stats.never_fetched);
document.getElementById('whois_percent').textContent = data.whois_stats.fresh_percent.toFixed(1) + '%';
document.getElementById('whois_successes').textContent = formatNumber(data.whois_stats.successes_last_hour);
const errorsEl = document.getElementById('whois_errors');
errorsEl.textContent = formatNumber(data.whois_stats.errors_last_hour);
errorsEl.className = 'metric-value' + (data.whois_stats.errors_last_hour > 0 ? ' disconnected' : '');
document.getElementById('whois_interval').textContent = data.whois_stats.current_interval;
}
// Update handler stats // Update handler stats
updateHandlerStats(data.handler_stats || []); updateHandlerStats(data.handler_stats || []);