Add live routing table with CIDR mask length tracking

- Added new live_routes table with mask_length column for tracking CIDR prefix lengths
- Updated PrefixHandler to maintain live routing table with additions and deletions
- Added route expiration functionality (5 minute timeout) to in-memory routing table
- Added prefix distribution stats showing count of prefixes by mask length
- Added IPv4/IPv6 prefix distribution cards to status page
- Updated database interface with UpsertLiveRoute, DeleteLiveRoute, and GetPrefixDistribution
- Set all handler queue depths to 50000 for consistency
- Doubled DBHandler batch size to 32000 for better throughput
- Fixed withdrawal handling to delete routes when origin ASN is available
This commit is contained in:
Jeffrey Paul 2025-07-28 01:51:42 +02:00
parent cea7c3dfd3
commit 3c46087976
13 changed files with 471 additions and 148 deletions

View File

@ -15,6 +15,9 @@ const (
// dirPermissions for creating directories
dirPermissions = 0750 // rwxr-x---
// defaultRouteExpirationMinutes is the default route expiration timeout in minutes
defaultRouteExpirationMinutes = 5
)
// Config holds configuration for the entire application
@ -27,6 +30,10 @@ type Config struct {
// EnableBatchedDatabaseWrites enables batched database operations for better performance
EnableBatchedDatabaseWrites bool
// RouteExpirationTimeout is how long a route can go without being refreshed before expiring
// Default is 2 hours which is conservative for BGP (typical BGP hold time is 90-180 seconds)
RouteExpirationTimeout time.Duration
}
// New creates a new Config with default paths based on the OS
@ -39,7 +46,8 @@ func New() (*Config, error) {
return &Config{
StateDir: stateDir,
MaxRuntime: 0, // Run forever by default
EnableBatchedDatabaseWrites: true, // Enable batching by default for better performance
EnableBatchedDatabaseWrites: true, // Enable batching by default
RouteExpirationTimeout: defaultRouteExpirationMinutes * time.Minute, // For active route monitoring
}, nil
}

View File

@ -4,6 +4,7 @@ package database
import (
"database/sql"
_ "embed"
"encoding/json"
"fmt"
"os"
"path/filepath"
@ -387,5 +388,120 @@ func (d *Database) GetStats() (Stats, error) {
stats.FileSizeBytes = fileInfo.Size()
}
// Get live routes count
err = d.db.QueryRow("SELECT COUNT(*) FROM live_routes").Scan(&stats.LiveRoutes)
if err != nil {
return stats, fmt.Errorf("failed to count live routes: %w", err)
}
// Get prefix distribution
stats.IPv4PrefixDistribution, stats.IPv6PrefixDistribution, err = d.GetPrefixDistribution()
if err != nil {
// Log but don't fail
d.logger.Warn("Failed to get prefix distribution", "error", err)
}
return stats, nil
}
// UpsertLiveRoute inserts or updates a live route
func (d *Database) UpsertLiveRoute(route *LiveRoute) error {
query := `
INSERT INTO live_routes (id, prefix, mask_length, ip_version, origin_asn, peer_ip, as_path, next_hop, last_updated)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(prefix, origin_asn, peer_ip) DO UPDATE SET
mask_length = excluded.mask_length,
ip_version = excluded.ip_version,
as_path = excluded.as_path,
next_hop = excluded.next_hop,
last_updated = excluded.last_updated
`
// Encode AS path as JSON
pathJSON, err := json.Marshal(route.ASPath)
if err != nil {
return fmt.Errorf("failed to encode AS path: %w", err)
}
_, err = d.db.Exec(query,
route.ID.String(),
route.Prefix,
route.MaskLength,
route.IPVersion,
route.OriginASN,
route.PeerIP,
string(pathJSON),
route.NextHop,
route.LastUpdated,
)
return err
}
// DeleteLiveRoute deletes a live route
// If originASN is 0, deletes all routes for the prefix/peer combination
func (d *Database) DeleteLiveRoute(prefix string, originASN int, peerIP string) error {
var query string
var err error
if originASN == 0 {
// Delete all routes for this prefix from this peer
query = `DELETE FROM live_routes WHERE prefix = ? AND peer_ip = ?`
_, err = d.db.Exec(query, prefix, peerIP)
} else {
// Delete specific route
query = `DELETE FROM live_routes WHERE prefix = ? AND origin_asn = ? AND peer_ip = ?`
_, err = d.db.Exec(query, prefix, originASN, peerIP)
}
return err
}
// GetPrefixDistribution returns the distribution of prefixes by mask length
func (d *Database) GetPrefixDistribution() (ipv4 []PrefixDistribution, ipv6 []PrefixDistribution, err error) {
// IPv4 distribution
query := `
SELECT mask_length, COUNT(*) as count
FROM live_routes
WHERE ip_version = 4
GROUP BY mask_length
ORDER BY mask_length
`
rows, err := d.db.Query(query)
if err != nil {
return nil, nil, fmt.Errorf("failed to query IPv4 distribution: %w", err)
}
defer func() { _ = rows.Close() }()
for rows.Next() {
var dist PrefixDistribution
if err := rows.Scan(&dist.MaskLength, &dist.Count); err != nil {
return nil, nil, fmt.Errorf("failed to scan IPv4 distribution: %w", err)
}
ipv4 = append(ipv4, dist)
}
// IPv6 distribution
query = `
SELECT mask_length, COUNT(*) as count
FROM live_routes
WHERE ip_version = 6
GROUP BY mask_length
ORDER BY mask_length
`
rows, err = d.db.Query(query)
if err != nil {
return nil, nil, fmt.Errorf("failed to query IPv6 distribution: %w", err)
}
defer func() { _ = rows.Close() }()
for rows.Next() {
var dist PrefixDistribution
if err := rows.Scan(&dist.MaskLength, &dist.Count); err != nil {
return nil, nil, fmt.Errorf("failed to scan IPv6 distribution: %w", err)
}
ipv6 = append(ipv6, dist)
}
return ipv4, ipv6, nil
}

View File

@ -12,6 +12,9 @@ type Stats struct {
IPv6Prefixes int
Peerings int
FileSizeBytes int64
LiveRoutes int
IPv4PrefixDistribution []PrefixDistribution
IPv6PrefixDistribution []PrefixDistribution
}
// Store defines the interface for database operations
@ -34,6 +37,11 @@ type Store interface {
// Peer operations
UpdatePeer(peerIP string, peerASN int, messageType string, timestamp time.Time) error
// Live route operations
UpsertLiveRoute(route *LiveRoute) error
DeleteLiveRoute(prefix string, originASN int, peerIP string) error
GetPrefixDistribution() (ipv4 []PrefixDistribution, ipv6 []PrefixDistribution, err error)
// Lifecycle
Close() error
}

View File

@ -45,3 +45,22 @@ type ASNPeering struct {
FirstSeen time.Time `json:"first_seen"`
LastSeen time.Time `json:"last_seen"`
}
// LiveRoute represents a route in the live routing table
type LiveRoute struct {
ID uuid.UUID `json:"id"`
Prefix string `json:"prefix"`
MaskLength int `json:"mask_length"`
IPVersion int `json:"ip_version"`
OriginASN int `json:"origin_asn"`
PeerIP string `json:"peer_ip"`
ASPath []int `json:"as_path"`
NextHop string `json:"next_hop"`
LastUpdated time.Time `json:"last_updated"`
}
// PrefixDistribution represents the distribution of prefixes by mask length
type PrefixDistribution struct {
MaskLength int `json:"mask_length"`
Count int `json:"count"`
}

View File

@ -69,3 +69,23 @@ CREATE INDEX IF NOT EXISTS idx_asns_number ON asns(number);
CREATE INDEX IF NOT EXISTS idx_bgp_peers_asn ON bgp_peers(peer_asn);
CREATE INDEX IF NOT EXISTS idx_bgp_peers_last_seen ON bgp_peers(last_seen);
CREATE INDEX IF NOT EXISTS idx_bgp_peers_ip ON bgp_peers(peer_ip);
-- Live routing table maintained by PrefixHandler
CREATE TABLE IF NOT EXISTS live_routes (
id TEXT PRIMARY KEY,
prefix TEXT NOT NULL,
mask_length INTEGER NOT NULL, -- CIDR mask length (0-32 for IPv4, 0-128 for IPv6)
ip_version INTEGER NOT NULL, -- 4 or 6
origin_asn INTEGER NOT NULL,
peer_ip TEXT NOT NULL,
as_path TEXT NOT NULL, -- JSON array
next_hop TEXT NOT NULL,
last_updated DATETIME NOT NULL,
UNIQUE(prefix, origin_asn, peer_ip)
);
-- Indexes for live_routes table
CREATE INDEX IF NOT EXISTS idx_live_routes_prefix ON live_routes(prefix);
CREATE INDEX IF NOT EXISTS idx_live_routes_mask_length ON live_routes(mask_length);
CREATE INDEX IF NOT EXISTS idx_live_routes_ip_version_mask ON live_routes(ip_version, mask_length);
CREATE INDEX IF NOT EXISTS idx_live_routes_last_updated ON live_routes(last_updated);

View File

@ -178,6 +178,9 @@ func (rw *RouteWatch) Shutdown() {
// Stop services
rw.streamer.Stop()
// Stop routing table expiration
rw.routingTable.Stop()
// Stop HTTP server with a timeout
const serverStopTimeout = 5 * time.Second
stopCtx, cancel := context.WithTimeout(context.Background(), serverStopTimeout)

View File

@ -157,6 +157,24 @@ func (m *mockStore) GetStats() (database.Stats, error) {
}, nil
}
// UpsertLiveRoute mock implementation
func (m *mockStore) UpsertLiveRoute(route *database.LiveRoute) error {
// Simple mock - just return nil
return nil
}
// DeleteLiveRoute mock implementation
func (m *mockStore) DeleteLiveRoute(prefix string, originASN int, peerIP string) error {
// Simple mock - just return nil
return nil
}
// GetPrefixDistribution mock implementation
func (m *mockStore) GetPrefixDistribution() (ipv4 []database.PrefixDistribution, ipv6 []database.PrefixDistribution, err error) {
// Return empty distributions for now
return nil, nil, nil
}
func TestRouteWatchLiveFeed(t *testing.T) {
// Disable snapshotter for tests
t.Setenv("ROUTEWATCH_DISABLE_SNAPSHOTTER", "1")

View File

@ -11,10 +11,10 @@ import (
const (
// dbHandlerQueueSize is the queue capacity for database operations
dbHandlerQueueSize = 200000
dbHandlerQueueSize = 50000
// batchSize is the number of operations to batch together
batchSize = 16000
batchSize = 32000
// batchTimeout is the maximum time to wait before flushing a batch
batchTimeout = 5 * time.Second

View File

@ -12,7 +12,7 @@ import (
const (
// peerHandlerQueueSize is the queue capacity for peer tracking operations
peerHandlerQueueSize = 2000
peerHandlerQueueSize = 50000
// peerBatchSize is the number of peer updates to batch together
peerBatchSize = 500

View File

@ -1,13 +1,15 @@
package routewatch
import (
"encoding/json"
"net"
"strings"
"sync"
"time"
"git.eeqj.de/sneak/routewatch/internal/database"
"git.eeqj.de/sneak/routewatch/internal/logger"
"git.eeqj.de/sneak/routewatch/internal/ristypes"
"github.com/google/uuid"
)
const (
@ -19,9 +21,14 @@ const (
// prefixBatchTimeout is the maximum time to wait before flushing a batch
prefixBatchTimeout = 5 * time.Second
// IP version constants
ipv4Version = 4
ipv6Version = 6
)
// PrefixHandler tracks BGP prefixes and maintains a routing table in the database
// PrefixHandler tracks BGP prefixes and maintains a live routing table in the database.
// Routes are added on announcement and deleted on withdrawal.
type PrefixHandler struct {
db database.Store
logger *logger.Logger
@ -185,80 +192,73 @@ func (h *PrefixHandler) flushBatchLocked() {
h.lastFlush = time.Now()
}
// processAnnouncement handles storing an announcement in the database
func (h *PrefixHandler) processAnnouncement(prefix *database.Prefix, update prefixUpdate) {
// Get or create origin ASN
originASN, err := h.db.GetOrCreateASN(update.originASN, update.timestamp)
// parseCIDR extracts the mask length and IP version from a prefix string
func parseCIDR(prefix string) (maskLength int, ipVersion int, err error) {
_, ipNet, err := net.ParseCIDR(prefix)
if err != nil {
h.logger.Error("Failed to get/create origin ASN",
"asn", update.originASN,
return 0, 0, err
}
ones, _ := ipNet.Mask.Size()
if strings.Contains(prefix, ":") {
return ones, ipv6Version, nil
}
return ones, ipv4Version, nil
}
// processAnnouncement handles storing an announcement in the database
func (h *PrefixHandler) processAnnouncement(_ *database.Prefix, update prefixUpdate) {
// Parse CIDR to get mask length
maskLength, ipVersion, err := parseCIDR(update.prefix)
if err != nil {
h.logger.Error("Failed to parse CIDR",
"prefix", update.prefix,
"error", err,
)
return
}
// Get or create peer ASN (first element in path if exists)
var peerASN *database.ASN
if len(update.path) > 0 {
peerASN, err = h.db.GetOrCreateASN(update.path[0], update.timestamp)
if err != nil {
h.logger.Error("Failed to get/create peer ASN",
"asn", update.path[0],
// Create live route record
liveRoute := &database.LiveRoute{
ID: uuid.New(),
Prefix: update.prefix,
MaskLength: maskLength,
IPVersion: ipVersion,
OriginASN: update.originASN,
PeerIP: update.peer,
ASPath: update.path,
NextHop: update.peer, // Using peer as next hop
LastUpdated: update.timestamp,
}
if err := h.db.UpsertLiveRoute(liveRoute); err != nil {
h.logger.Error("Failed to upsert live route",
"prefix", update.prefix,
"error", err,
)
}
}
return
// processWithdrawal handles removing a route from the live routing table
func (h *PrefixHandler) processWithdrawal(_ *database.Prefix, update prefixUpdate) {
// For withdrawals, we need to delete the route from live_routes
// Since we have the origin ASN from the update, we can delete the specific route
if update.originASN > 0 {
if err := h.db.DeleteLiveRoute(update.prefix, update.originASN, update.peer); err != nil {
h.logger.Error("Failed to delete live route",
"prefix", update.prefix,
"origin_asn", update.originASN,
"peer", update.peer,
"error", err,
)
}
} else {
// If no path, use origin as peer
peerASN = originASN
}
// Encode AS path as JSON
pathJSON, err := json.Marshal(update.path)
if err != nil {
h.logger.Error("Failed to encode AS path",
"path", update.path,
"error", err,
)
return
}
// Create announcement record
announcement := &database.Announcement{
PrefixID: prefix.ID,
ASNID: peerASN.ID,
OriginASNID: originASN.ID,
Path: string(pathJSON),
NextHop: update.peer,
Timestamp: update.timestamp,
IsWithdrawal: false,
}
if err := h.db.RecordAnnouncement(announcement); err != nil {
h.logger.Error("Failed to record announcement",
// If no origin ASN, log a warning
h.logger.Warn("Withdrawal without origin ASN",
"prefix", update.prefix,
"error", err,
)
}
}
// processWithdrawal handles storing a withdrawal in the database
func (h *PrefixHandler) processWithdrawal(prefix *database.Prefix, update prefixUpdate) {
// For withdrawals, create a withdrawal record
announcement := &database.Announcement{
PrefixID: prefix.ID,
NextHop: update.peer,
Timestamp: update.timestamp,
IsWithdrawal: true,
}
if err := h.db.RecordAnnouncement(announcement); err != nil {
h.logger.Error("Failed to record withdrawal",
"prefix", update.prefix,
"error", err,
"peer", update.peer,
)
}
}

View File

@ -65,6 +65,11 @@ type RoutingTable struct {
// Configuration
snapshotDir string
routeExpirationTimeout time.Duration
logger *logger.Logger
// Expiration management
stopExpiration chan struct{}
}
// New creates a new routing table, loading from snapshot if available
@ -76,6 +81,9 @@ func New(cfg *config.Config, logger *logger.Logger) *RoutingTable {
byPeerASN: make(map[int]map[RouteKey]*Route),
lastMetricsReset: time.Now(),
snapshotDir: cfg.GetStateDir(),
routeExpirationTimeout: cfg.RouteExpirationTimeout,
logger: logger,
stopExpiration: make(chan struct{}),
}
// Try to load from snapshot
@ -83,6 +91,9 @@ func New(cfg *config.Config, logger *logger.Logger) *RoutingTable {
logger.Warn("Failed to load routing table from snapshot", "error", err)
}
// Start expiration goroutine
go rt.expireRoutesLoop()
return rt
}
@ -522,3 +533,72 @@ func (rt *RoutingTable) loadFromSnapshot(logger *logger.Logger) error {
return nil
}
// expireRoutesLoop periodically removes expired routes
func (rt *RoutingTable) expireRoutesLoop() {
// Run every minute to check for expired routes
ticker := time.NewTicker(1 * time.Minute)
defer ticker.Stop()
for {
select {
case <-ticker.C:
rt.expireStaleRoutes()
case <-rt.stopExpiration:
return
}
}
}
// expireStaleRoutes removes routes that haven't been updated recently
func (rt *RoutingTable) expireStaleRoutes() {
rt.mu.Lock()
defer rt.mu.Unlock()
now := time.Now().UTC()
cutoffTime := now.Add(-rt.routeExpirationTimeout)
expiredCount := 0
// Collect keys to delete (can't delete while iterating)
var keysToDelete []RouteKey
for key, route := range rt.routes {
// Use AnnouncedAt as the last update time
if route.AnnouncedAt.Before(cutoffTime) {
keysToDelete = append(keysToDelete, key)
}
}
// Delete expired routes
for _, key := range keysToDelete {
route, exists := rt.routes[key]
if !exists {
continue
}
rt.removeFromIndexes(key, route)
delete(rt.routes, key)
expiredCount++
// Update metrics
if isIPv6(route.Prefix) {
rt.ipv6Routes--
} else {
rt.ipv4Routes--
}
}
if expiredCount > 0 {
rt.logger.Info("Expired stale routes",
"count", expiredCount,
"timeout", rt.routeExpirationTimeout,
"remaining_routes", len(rt.routes),
)
}
}
// Stop gracefully stops the routing table background tasks
func (rt *RoutingTable) Stop() {
if rt.stopExpiration != nil {
close(rt.stopExpiration)
}
}

View File

@ -131,6 +131,8 @@ func (s *Server) handleStatusJSON() http.HandlerFunc {
IPv6Routes int `json:"ipv6_routes"`
IPv4UpdatesPerSec float64 `json:"ipv4_updates_per_sec"`
IPv6UpdatesPerSec float64 `json:"ipv6_updates_per_sec"`
IPv4PrefixDistribution []database.PrefixDistribution `json:"ipv4_prefix_distribution"`
IPv6PrefixDistribution []database.PrefixDistribution `json:"ipv6_prefix_distribution"`
}
return func(w http.ResponseWriter, r *http.Request) {
@ -210,11 +212,13 @@ func (s *Server) handleStatusJSON() http.HandlerFunc {
IPv6Prefixes: dbStats.IPv6Prefixes,
Peerings: dbStats.Peerings,
DatabaseSizeBytes: dbStats.FileSizeBytes,
LiveRoutes: rtStats.TotalRoutes,
LiveRoutes: dbStats.LiveRoutes,
IPv4Routes: rtStats.IPv4Routes,
IPv6Routes: rtStats.IPv6Routes,
IPv4UpdatesPerSec: rtStats.IPv4UpdatesRate,
IPv6UpdatesPerSec: rtStats.IPv6UpdatesRate,
IPv4PrefixDistribution: dbStats.IPv4PrefixDistribution,
IPv6PrefixDistribution: dbStats.IPv6PrefixDistribution,
}
w.Header().Set("Content-Type", "application/json")
@ -262,6 +266,8 @@ func (s *Server) handleStats() http.HandlerFunc {
IPv4UpdatesPerSec float64 `json:"ipv4_updates_per_sec"`
IPv6UpdatesPerSec float64 `json:"ipv6_updates_per_sec"`
HandlerStats []HandlerStatsInfo `json:"handler_stats"`
IPv4PrefixDistribution []database.PrefixDistribution `json:"ipv4_prefix_distribution"`
IPv6PrefixDistribution []database.PrefixDistribution `json:"ipv6_prefix_distribution"`
}
return func(w http.ResponseWriter, r *http.Request) {
@ -351,12 +357,14 @@ func (s *Server) handleStats() http.HandlerFunc {
IPv6Prefixes: dbStats.IPv6Prefixes,
Peerings: dbStats.Peerings,
DatabaseSizeBytes: dbStats.FileSizeBytes,
LiveRoutes: rtStats.TotalRoutes,
LiveRoutes: dbStats.LiveRoutes,
IPv4Routes: rtStats.IPv4Routes,
IPv6Routes: rtStats.IPv6Routes,
IPv4UpdatesPerSec: rtStats.IPv4UpdatesRate,
IPv6UpdatesPerSec: rtStats.IPv6UpdatesRate,
HandlerStats: handlerStatsInfo,
IPv4PrefixDistribution: dbStats.IPv4PrefixDistribution,
IPv6PrefixDistribution: dbStats.IPv6PrefixDistribution,
}
w.Header().Set("Content-Type", "application/json")

View File

@ -153,6 +153,22 @@
</div>
</div>
<div class="status-grid">
<div class="status-card">
<h2>IPv4 Prefix Distribution</h2>
<div id="ipv4-prefix-distribution">
<!-- Will be populated dynamically -->
</div>
</div>
<div class="status-card">
<h2>IPv6 Prefix Distribution</h2>
<div id="ipv6-prefix-distribution">
<!-- Will be populated dynamically -->
</div>
</div>
</div>
<div id="handler-stats-container" class="status-grid">
<!-- Handler stats will be dynamically added here -->
</div>
@ -170,6 +186,29 @@
return num.toLocaleString();
}
function updatePrefixDistribution(elementId, distribution) {
const container = document.getElementById(elementId);
container.innerHTML = '';
if (!distribution || distribution.length === 0) {
container.innerHTML = '<div class="metric"><span class="metric-label">No data</span></div>';
return;
}
// Sort by mask length
distribution.sort((a, b) => a.mask_length - b.mask_length);
distribution.forEach(item => {
const metric = document.createElement('div');
metric.className = 'metric';
metric.innerHTML = `
<span class="metric-label">/${item.mask_length}</span>
<span class="metric-value">${formatNumber(item.count)}</span>
`;
container.appendChild(metric);
});
}
function updateHandlerStats(handlerStats) {
const container = document.getElementById('handler-stats-container');
container.innerHTML = '';
@ -249,6 +288,10 @@
// Update handler stats
updateHandlerStats(data.handler_stats || []);
// Update prefix distribution
updatePrefixDistribution('ipv4-prefix-distribution', data.ipv4_prefix_distribution);
updatePrefixDistribution('ipv6-prefix-distribution', data.ipv6_prefix_distribution);
// Clear any errors
document.getElementById('error').style.display = 'none';
})