Add WHOIS stats to status page with adaptive fetcher improvements
- Add WHOIS Fetcher card showing fresh/stale/never-fetched ASN counts - Display hourly success/error counts and current fetch interval - Increase max WHOIS rate to 1/sec (down from 10 sec minimum) - Select random stale ASN instead of oldest for better distribution - Add index on whois_updated_at for query performance - Track success/error timestamps for hourly stats - Add GetWHOISStats database method for freshness statistics
This commit is contained in:
parent
f8b7d3b773
commit
d2041a5a55
@ -1633,18 +1633,16 @@ func (d *Database) GetRandomPrefixesByLengthContext(
|
||||
return routes, nil
|
||||
}
|
||||
|
||||
// GetNextStaleASN returns an ASN that needs WHOIS data refresh.
|
||||
// Priority: ASNs with no whois_updated_at, then oldest whois_updated_at.
|
||||
// GetNextStaleASN returns a random ASN that needs WHOIS data refresh.
|
||||
func (d *Database) GetNextStaleASN(ctx context.Context, staleThreshold time.Duration) (int, error) {
|
||||
cutoff := time.Now().Add(-staleThreshold)
|
||||
|
||||
// Select a random stale ASN using ORDER BY RANDOM()
|
||||
query := `
|
||||
SELECT asn FROM asns
|
||||
WHERE whois_updated_at IS NULL
|
||||
OR whois_updated_at < ?
|
||||
ORDER BY
|
||||
CASE WHEN whois_updated_at IS NULL THEN 0 ELSE 1 END,
|
||||
whois_updated_at ASC
|
||||
ORDER BY RANDOM()
|
||||
LIMIT 1
|
||||
`
|
||||
|
||||
@ -1661,6 +1659,41 @@ func (d *Database) GetNextStaleASN(ctx context.Context, staleThreshold time.Dura
|
||||
return asn, nil
|
||||
}
|
||||
|
||||
// WHOISStats contains statistics about WHOIS data freshness.
|
||||
type WHOISStats struct {
|
||||
TotalASNs int `json:"total_asns"`
|
||||
StaleASNs int `json:"stale_asns"`
|
||||
FreshASNs int `json:"fresh_asns"`
|
||||
NeverFetched int `json:"never_fetched"`
|
||||
}
|
||||
|
||||
// GetWHOISStats returns statistics about WHOIS data freshness.
|
||||
func (d *Database) GetWHOISStats(ctx context.Context, staleThreshold time.Duration) (*WHOISStats, error) {
|
||||
cutoff := time.Now().Add(-staleThreshold)
|
||||
|
||||
query := `
|
||||
SELECT
|
||||
COUNT(*) as total,
|
||||
SUM(CASE WHEN whois_updated_at IS NULL THEN 1 ELSE 0 END) as never_fetched,
|
||||
SUM(CASE WHEN whois_updated_at IS NOT NULL AND whois_updated_at < ? THEN 1 ELSE 0 END) as stale,
|
||||
SUM(CASE WHEN whois_updated_at IS NOT NULL AND whois_updated_at >= ? THEN 1 ELSE 0 END) as fresh
|
||||
FROM asns
|
||||
`
|
||||
|
||||
var stats WHOISStats
|
||||
err := d.db.QueryRowContext(ctx, query, cutoff, cutoff).Scan(
|
||||
&stats.TotalASNs,
|
||||
&stats.NeverFetched,
|
||||
&stats.StaleASNs,
|
||||
&stats.FreshASNs,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get WHOIS stats: %w", err)
|
||||
}
|
||||
|
||||
return &stats, nil
|
||||
}
|
||||
|
||||
// UpdateASNWHOIS updates an ASN record with WHOIS data.
|
||||
func (d *Database) UpdateASNWHOIS(ctx context.Context, update *ASNWHOISUpdate) error {
|
||||
d.lock("UpdateASNWHOIS")
|
||||
|
||||
@ -67,6 +67,7 @@ type Store interface {
|
||||
// ASN WHOIS operations
|
||||
GetNextStaleASN(ctx context.Context, staleThreshold time.Duration) (int, error)
|
||||
UpdateASNWHOIS(ctx context.Context, update *ASNWHOISUpdate) error
|
||||
GetWHOISStats(ctx context.Context, staleThreshold time.Duration) (*WHOISStats, error)
|
||||
|
||||
// AS and prefix detail operations
|
||||
GetASDetails(asn int) (*ASN, []LiveRoute, error)
|
||||
|
||||
@ -90,6 +90,7 @@ CREATE INDEX IF NOT EXISTS idx_peerings_lookup ON peerings(as_a, as_b);
|
||||
|
||||
-- Indexes for asns table
|
||||
CREATE INDEX IF NOT EXISTS idx_asns_asn ON asns(asn);
|
||||
CREATE INDEX IF NOT EXISTS idx_asns_whois_updated_at ON asns(whois_updated_at);
|
||||
|
||||
-- Indexes for bgp_peers table
|
||||
CREATE INDEX IF NOT EXISTS idx_bgp_peers_asn ON bgp_peers(peer_asn);
|
||||
|
||||
@ -335,6 +335,19 @@ func (m *mockStore) UpdateASNWHOIS(ctx context.Context, update *database.ASNWHOI
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetWHOISStats mock implementation
|
||||
func (m *mockStore) GetWHOISStats(ctx context.Context, staleThreshold time.Duration) (*database.WHOISStats, error) {
|
||||
m.mu.Lock()
|
||||
defer m.mu.Unlock()
|
||||
|
||||
return &database.WHOISStats{
|
||||
TotalASNs: len(m.ASNs),
|
||||
FreshASNs: 0,
|
||||
StaleASNs: 0,
|
||||
NeverFetched: len(m.ASNs),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// UpsertLiveRouteBatch mock implementation
|
||||
func (m *mockStore) UpsertLiveRouteBatch(routes []*database.LiveRoute) error {
|
||||
m.mu.Lock()
|
||||
|
||||
@ -8,6 +8,7 @@ import (
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/routewatch/internal/database"
|
||||
"git.eeqj.de/sneak/routewatch/internal/server"
|
||||
"git.eeqj.de/sneak/routewatch/internal/whois"
|
||||
)
|
||||
|
||||
@ -17,7 +18,7 @@ const (
|
||||
baseInterval = 15 * time.Second
|
||||
|
||||
// minInterval is the minimum interval after successes (rate limit).
|
||||
minInterval = 10 * time.Second
|
||||
minInterval = 1 * time.Second
|
||||
|
||||
// maxInterval is the maximum interval after failures (backoff cap).
|
||||
maxInterval = 5 * time.Minute
|
||||
@ -30,8 +31,12 @@ const (
|
||||
|
||||
// immediateQueueSize is the buffer size for immediate fetch requests.
|
||||
immediateQueueSize = 100
|
||||
|
||||
// statsWindow is how long to keep stats for.
|
||||
statsWindow = time.Hour
|
||||
)
|
||||
|
||||
|
||||
// ASNFetcher handles background WHOIS lookups for ASNs.
|
||||
type ASNFetcher struct {
|
||||
db database.Store
|
||||
@ -45,9 +50,14 @@ type ASNFetcher struct {
|
||||
fetchMu sync.Mutex
|
||||
|
||||
// interval tracking with mutex protection
|
||||
intervalMu sync.Mutex
|
||||
currentInterval time.Duration
|
||||
intervalMu sync.Mutex
|
||||
currentInterval time.Duration
|
||||
consecutiveFails int
|
||||
|
||||
// hourly stats tracking
|
||||
statsMu sync.Mutex
|
||||
successTimes []time.Time
|
||||
errorTimes []time.Time
|
||||
}
|
||||
|
||||
// NewASNFetcher creates a new ASN fetcher.
|
||||
@ -59,6 +69,8 @@ func NewASNFetcher(db database.Store, logger *slog.Logger) *ASNFetcher {
|
||||
immediateQueue: make(chan int, immediateQueueSize),
|
||||
stopCh: make(chan struct{}),
|
||||
currentInterval: baseInterval,
|
||||
successTimes: make([]time.Time, 0),
|
||||
errorTimes: make([]time.Time, 0),
|
||||
}
|
||||
}
|
||||
|
||||
@ -91,6 +103,41 @@ func (f *ASNFetcher) QueueImmediate(asn int) {
|
||||
}
|
||||
}
|
||||
|
||||
// GetStats returns statistics about fetcher activity.
|
||||
func (f *ASNFetcher) GetStats() server.ASNFetcherStats {
|
||||
f.statsMu.Lock()
|
||||
defer f.statsMu.Unlock()
|
||||
|
||||
f.intervalMu.Lock()
|
||||
interval := f.currentInterval
|
||||
fails := f.consecutiveFails
|
||||
f.intervalMu.Unlock()
|
||||
|
||||
// Prune old entries and count
|
||||
cutoff := time.Now().Add(-statsWindow)
|
||||
f.successTimes = pruneOldTimes(f.successTimes, cutoff)
|
||||
f.errorTimes = pruneOldTimes(f.errorTimes, cutoff)
|
||||
|
||||
return server.ASNFetcherStats{
|
||||
SuccessesLastHour: len(f.successTimes),
|
||||
ErrorsLastHour: len(f.errorTimes),
|
||||
CurrentInterval: interval,
|
||||
ConsecutiveFails: fails,
|
||||
}
|
||||
}
|
||||
|
||||
// pruneOldTimes removes times older than cutoff and returns the pruned slice.
|
||||
func pruneOldTimes(times []time.Time, cutoff time.Time) []time.Time {
|
||||
result := make([]time.Time, 0, len(times))
|
||||
for _, t := range times {
|
||||
if t.After(cutoff) {
|
||||
result = append(result, t)
|
||||
}
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// getInterval returns the current fetch interval.
|
||||
func (f *ASNFetcher) getInterval() time.Duration {
|
||||
f.intervalMu.Lock()
|
||||
@ -102,8 +149,6 @@ func (f *ASNFetcher) getInterval() time.Duration {
|
||||
// recordSuccess decreases the interval on successful fetch.
|
||||
func (f *ASNFetcher) recordSuccess() {
|
||||
f.intervalMu.Lock()
|
||||
defer f.intervalMu.Unlock()
|
||||
|
||||
f.consecutiveFails = 0
|
||||
|
||||
// Decrease interval by half, but not below minimum
|
||||
@ -119,13 +164,17 @@ func (f *ASNFetcher) recordSuccess() {
|
||||
)
|
||||
f.currentInterval = newInterval
|
||||
}
|
||||
f.intervalMu.Unlock()
|
||||
|
||||
// Record success time for stats
|
||||
f.statsMu.Lock()
|
||||
f.successTimes = append(f.successTimes, time.Now())
|
||||
f.statsMu.Unlock()
|
||||
}
|
||||
|
||||
// recordFailure increases the interval on failed fetch using exponential backoff.
|
||||
func (f *ASNFetcher) recordFailure() {
|
||||
f.intervalMu.Lock()
|
||||
defer f.intervalMu.Unlock()
|
||||
|
||||
f.consecutiveFails++
|
||||
|
||||
// Exponential backoff: multiply by 2, capped at max
|
||||
@ -142,6 +191,12 @@ func (f *ASNFetcher) recordFailure() {
|
||||
)
|
||||
f.currentInterval = newInterval
|
||||
}
|
||||
f.intervalMu.Unlock()
|
||||
|
||||
// Record error time for stats
|
||||
f.statsMu.Lock()
|
||||
f.errorTimes = append(f.errorTimes, time.Now())
|
||||
f.statsMu.Unlock()
|
||||
}
|
||||
|
||||
// run is the main background loop.
|
||||
@ -263,3 +318,8 @@ func (f *ASNFetcher) fetchAndUpdate(asn int) bool {
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// GetStaleThreshold returns the WHOIS stale threshold duration.
|
||||
func GetStaleThreshold() time.Duration {
|
||||
return whoisStaleThreshold
|
||||
}
|
||||
|
||||
@ -58,6 +58,19 @@ func writeJSONSuccess(w http.ResponseWriter, data interface{}) error {
|
||||
})
|
||||
}
|
||||
|
||||
// WHOISStatsInfo contains WHOIS fetcher statistics for the status page.
|
||||
type WHOISStatsInfo struct {
|
||||
TotalASNs int `json:"total_asns"`
|
||||
FreshASNs int `json:"fresh_asns"`
|
||||
StaleASNs int `json:"stale_asns"`
|
||||
NeverFetched int `json:"never_fetched"`
|
||||
SuccessesLastHour int `json:"successes_last_hour"`
|
||||
ErrorsLastHour int `json:"errors_last_hour"`
|
||||
CurrentInterval string `json:"current_interval"`
|
||||
ConsecutiveFails int `json:"consecutive_fails"`
|
||||
FreshPercent float64 `json:"fresh_percent"`
|
||||
}
|
||||
|
||||
// handleStatusJSON returns a handler that serves JSON statistics including
|
||||
// uptime, message counts, database stats, and route information.
|
||||
func (s *Server) handleStatusJSON() http.HandlerFunc {
|
||||
@ -88,6 +101,7 @@ func (s *Server) handleStatusJSON() http.HandlerFunc {
|
||||
IPv6UpdatesPerSec float64 `json:"ipv6_updates_per_sec"`
|
||||
IPv4PrefixDistribution []database.PrefixDistribution `json:"ipv4_prefix_distribution"`
|
||||
IPv6PrefixDistribution []database.PrefixDistribution `json:"ipv6_prefix_distribution"`
|
||||
WHOISStats *WHOISStatsInfo `json:"whois_stats,omitempty"`
|
||||
}
|
||||
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
@ -149,6 +163,12 @@ func (s *Server) handleStatusJSON() http.HandlerFunc {
|
||||
var memStats runtime.MemStats
|
||||
runtime.ReadMemStats(&memStats)
|
||||
|
||||
// Get WHOIS stats if fetcher is available
|
||||
var whoisStats *WHOISStatsInfo
|
||||
if s.asnFetcher != nil {
|
||||
whoisStats = s.getWHOISStats(ctx)
|
||||
}
|
||||
|
||||
stats := Stats{
|
||||
Uptime: uptime,
|
||||
TotalMessages: metrics.TotalMessages,
|
||||
@ -175,6 +195,7 @@ func (s *Server) handleStatusJSON() http.HandlerFunc {
|
||||
IPv6UpdatesPerSec: routeMetrics.IPv6UpdatesPerSec,
|
||||
IPv4PrefixDistribution: dbStats.IPv4PrefixDistribution,
|
||||
IPv6PrefixDistribution: dbStats.IPv6PrefixDistribution,
|
||||
WHOISStats: whoisStats,
|
||||
}
|
||||
|
||||
if err := writeJSONSuccess(w, stats); err != nil {
|
||||
@ -183,6 +204,44 @@ func (s *Server) handleStatusJSON() http.HandlerFunc {
|
||||
}
|
||||
}
|
||||
|
||||
// getWHOISStats builds WHOIS statistics from database and fetcher.
|
||||
func (s *Server) getWHOISStats(ctx context.Context) *WHOISStatsInfo {
|
||||
// Get database WHOIS stats
|
||||
dbStats, err := s.db.GetWHOISStats(ctx, whoisStaleThreshold)
|
||||
if err != nil {
|
||||
s.logger.Warn("Failed to get WHOIS stats", "error", err)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get fetcher stats
|
||||
fetcherStats := s.asnFetcher.GetStats()
|
||||
|
||||
// Calculate fresh percentage
|
||||
var freshPercent float64
|
||||
if dbStats.TotalASNs > 0 {
|
||||
freshPercent = float64(dbStats.FreshASNs) / float64(dbStats.TotalASNs) * percentMultiplier
|
||||
}
|
||||
|
||||
return &WHOISStatsInfo{
|
||||
TotalASNs: dbStats.TotalASNs,
|
||||
FreshASNs: dbStats.FreshASNs,
|
||||
StaleASNs: dbStats.StaleASNs,
|
||||
NeverFetched: dbStats.NeverFetched,
|
||||
SuccessesLastHour: fetcherStats.SuccessesLastHour,
|
||||
ErrorsLastHour: fetcherStats.ErrorsLastHour,
|
||||
CurrentInterval: fetcherStats.CurrentInterval.String(),
|
||||
ConsecutiveFails: fetcherStats.ConsecutiveFails,
|
||||
FreshPercent: freshPercent,
|
||||
}
|
||||
}
|
||||
|
||||
// whoisStaleThreshold matches the fetcher's threshold for consistency.
|
||||
const whoisStaleThreshold = 30 * 24 * time.Hour
|
||||
|
||||
// percentMultiplier converts a ratio to a percentage.
|
||||
const percentMultiplier = 100
|
||||
|
||||
// handleStats returns a handler that serves API v1 statistics including
|
||||
// detailed handler queue statistics and performance metrics.
|
||||
func (s *Server) handleStats() http.HandlerFunc {
|
||||
@ -227,6 +286,7 @@ func (s *Server) handleStats() http.HandlerFunc {
|
||||
HandlerStats []HandlerStatsInfo `json:"handler_stats"`
|
||||
IPv4PrefixDistribution []database.PrefixDistribution `json:"ipv4_prefix_distribution"`
|
||||
IPv6PrefixDistribution []database.PrefixDistribution `json:"ipv6_prefix_distribution"`
|
||||
WHOISStats *WHOISStatsInfo `json:"whois_stats,omitempty"`
|
||||
}
|
||||
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
@ -314,6 +374,12 @@ func (s *Server) handleStats() http.HandlerFunc {
|
||||
var memStats runtime.MemStats
|
||||
runtime.ReadMemStats(&memStats)
|
||||
|
||||
// Get WHOIS stats if fetcher is available
|
||||
var whoisStats *WHOISStatsInfo
|
||||
if s.asnFetcher != nil {
|
||||
whoisStats = s.getWHOISStats(ctx)
|
||||
}
|
||||
|
||||
stats := StatsResponse{
|
||||
Uptime: uptime,
|
||||
TotalMessages: metrics.TotalMessages,
|
||||
@ -341,6 +407,7 @@ func (s *Server) handleStats() http.HandlerFunc {
|
||||
HandlerStats: handlerStatsInfo,
|
||||
IPv4PrefixDistribution: dbStats.IPv4PrefixDistribution,
|
||||
IPv6PrefixDistribution: dbStats.IPv6PrefixDistribution,
|
||||
WHOISStats: whoisStats,
|
||||
}
|
||||
|
||||
if err := writeJSONSuccess(w, stats); err != nil {
|
||||
|
||||
@ -13,9 +13,18 @@ import (
|
||||
"github.com/go-chi/chi/v5"
|
||||
)
|
||||
|
||||
// ASNFetcherStats contains WHOIS fetcher statistics.
|
||||
type ASNFetcherStats struct {
|
||||
SuccessesLastHour int
|
||||
ErrorsLastHour int
|
||||
CurrentInterval time.Duration
|
||||
ConsecutiveFails int
|
||||
}
|
||||
|
||||
// ASNFetcher is an interface for queuing ASN WHOIS lookups.
|
||||
type ASNFetcher interface {
|
||||
QueueImmediate(asn int)
|
||||
GetStats() ASNFetcherStats
|
||||
}
|
||||
|
||||
// Server provides HTTP endpoints for status monitoring
|
||||
|
||||
@ -177,8 +177,40 @@
|
||||
<span class="metric-value" id="ipv6_updates_per_sec">-</span>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div class="status-card">
|
||||
<h2>WHOIS Fetcher</h2>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Fresh ASNs</span>
|
||||
<span class="metric-value" id="whois_fresh">-</span>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Stale ASNs</span>
|
||||
<span class="metric-value" id="whois_stale">-</span>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Never Fetched</span>
|
||||
<span class="metric-value" id="whois_never">-</span>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Fresh %</span>
|
||||
<span class="metric-value" id="whois_percent">-</span>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Successes (1h)</span>
|
||||
<span class="metric-value" id="whois_successes">-</span>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Errors (1h)</span>
|
||||
<span class="metric-value" id="whois_errors">-</span>
|
||||
</div>
|
||||
<div class="metric">
|
||||
<span class="metric-label">Current Interval</span>
|
||||
<span class="metric-value" id="whois_interval">-</span>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
<div class="status-grid">
|
||||
<div class="status-card">
|
||||
<h2>IPv4 Prefix Distribution</h2>
|
||||
@ -318,7 +350,15 @@
|
||||
document.getElementById('ipv6_routes').textContent = '-';
|
||||
document.getElementById('ipv4_updates_per_sec').textContent = '-';
|
||||
document.getElementById('ipv6_updates_per_sec').textContent = '-';
|
||||
|
||||
document.getElementById('whois_fresh').textContent = '-';
|
||||
document.getElementById('whois_stale').textContent = '-';
|
||||
document.getElementById('whois_never').textContent = '-';
|
||||
document.getElementById('whois_percent').textContent = '-';
|
||||
document.getElementById('whois_successes').textContent = '-';
|
||||
document.getElementById('whois_errors').textContent = '-';
|
||||
document.getElementById('whois_errors').className = 'metric-value';
|
||||
document.getElementById('whois_interval').textContent = '-';
|
||||
|
||||
// Clear handler stats
|
||||
document.getElementById('handler-stats-container').innerHTML = '';
|
||||
|
||||
@ -368,7 +408,20 @@
|
||||
document.getElementById('ipv6_routes').textContent = formatNumber(data.ipv6_routes);
|
||||
document.getElementById('ipv4_updates_per_sec').textContent = data.ipv4_updates_per_sec.toFixed(1);
|
||||
document.getElementById('ipv6_updates_per_sec').textContent = data.ipv6_updates_per_sec.toFixed(1);
|
||||
|
||||
|
||||
// Update WHOIS stats
|
||||
if (data.whois_stats) {
|
||||
document.getElementById('whois_fresh').textContent = formatNumber(data.whois_stats.fresh_asns);
|
||||
document.getElementById('whois_stale').textContent = formatNumber(data.whois_stats.stale_asns);
|
||||
document.getElementById('whois_never').textContent = formatNumber(data.whois_stats.never_fetched);
|
||||
document.getElementById('whois_percent').textContent = data.whois_stats.fresh_percent.toFixed(1) + '%';
|
||||
document.getElementById('whois_successes').textContent = formatNumber(data.whois_stats.successes_last_hour);
|
||||
const errorsEl = document.getElementById('whois_errors');
|
||||
errorsEl.textContent = formatNumber(data.whois_stats.errors_last_hour);
|
||||
errorsEl.className = 'metric-value' + (data.whois_stats.errors_last_hour > 0 ? ' disconnected' : '');
|
||||
document.getElementById('whois_interval').textContent = data.whois_stats.current_interval;
|
||||
}
|
||||
|
||||
// Update handler stats
|
||||
updateHandlerStats(data.handler_stats || []);
|
||||
|
||||
|
||||
Loading…
Reference in New Issue
Block a user