Implement routing table snapshotter with automatic loading on startup
- Create snapshotter package with periodic (10 min) and on-demand snapshots - Add JSON serialization with gzip compression and atomic file writes - Update routing table to track AddedAt time for each route - Load snapshots on startup, filtering out stale routes (>30 minutes old) - Add ROUTEWATCH_DISABLE_SNAPSHOTTER env var for tests - Use OS-appropriate state directories (macOS: ~/Library/Application Support, Linux: /var/lib or XDG_STATE_HOME)
This commit is contained in:
@@ -2,7 +2,13 @@
|
||||
package routingtable
|
||||
|
||||
import (
|
||||
"compress/gzip"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"log/slog"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
@@ -11,6 +17,15 @@ import (
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
const (
|
||||
// routeStalenessThreshold is how old a route can be before we consider it stale
|
||||
// Using 30 minutes as a conservative value for snapshot loading
|
||||
routeStalenessThreshold = 30 * time.Minute
|
||||
|
||||
// snapshotFilename is the name of the snapshot file
|
||||
snapshotFilename = "routewatch-snapshot.json.gz"
|
||||
)
|
||||
|
||||
// Route represents a single route entry in the routing table
|
||||
type Route struct {
|
||||
PrefixID uuid.UUID `json:"prefix_id"`
|
||||
@@ -21,6 +36,7 @@ type Route struct {
|
||||
ASPath []int `json:"as_path"` // Full AS path
|
||||
NextHop string `json:"next_hop"`
|
||||
AnnouncedAt time.Time `json:"announced_at"`
|
||||
AddedAt time.Time `json:"added_at"` // When we added this route to our table
|
||||
}
|
||||
|
||||
// RouteKey uniquely identifies a route in the table
|
||||
@@ -48,15 +64,22 @@ type RoutingTable struct {
|
||||
lastMetricsReset time.Time
|
||||
}
|
||||
|
||||
// New creates a new empty routing table
|
||||
func New() *RoutingTable {
|
||||
return &RoutingTable{
|
||||
// New creates a new routing table, loading from snapshot if available
|
||||
func New(logger *slog.Logger) *RoutingTable {
|
||||
rt := &RoutingTable{
|
||||
routes: make(map[RouteKey]*Route),
|
||||
byPrefix: make(map[uuid.UUID]map[RouteKey]*Route),
|
||||
byOriginASN: make(map[uuid.UUID]map[RouteKey]*Route),
|
||||
byPeerASN: make(map[int]map[RouteKey]*Route),
|
||||
lastMetricsReset: time.Now(),
|
||||
}
|
||||
|
||||
// Try to load from snapshot
|
||||
if err := rt.loadFromSnapshot(logger); err != nil {
|
||||
logger.Warn("Failed to load routing table from snapshot", "error", err)
|
||||
}
|
||||
|
||||
return rt
|
||||
}
|
||||
|
||||
// AddRoute adds or updates a route in the routing table
|
||||
@@ -81,6 +104,11 @@ func (rt *RoutingTable) AddRoute(route *Route) {
|
||||
}
|
||||
}
|
||||
|
||||
// Set AddedAt if not already set
|
||||
if route.AddedAt.IsZero() {
|
||||
route.AddedAt = time.Now().UTC()
|
||||
}
|
||||
|
||||
// Add to main map
|
||||
rt.routes[key] = route
|
||||
|
||||
@@ -357,6 +385,7 @@ func (rt *RoutingTable) GetAllRoutesUnsafe() []*Route {
|
||||
for _, route := range rt.routes {
|
||||
routes = append(routes, route)
|
||||
}
|
||||
|
||||
return routes
|
||||
}
|
||||
|
||||
@@ -417,3 +446,108 @@ func (k RouteKey) String() string {
|
||||
func isIPv6(prefix string) bool {
|
||||
return strings.Contains(prefix, ":")
|
||||
}
|
||||
|
||||
// getStateDirectory returns the appropriate state directory based on the OS
|
||||
func getStateDirectory() (string, error) {
|
||||
switch runtime.GOOS {
|
||||
case "darwin":
|
||||
// macOS: Use ~/Library/Application Support/routewatch
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(home, "Library", "Application Support", "routewatch"), nil
|
||||
case "linux", "freebsd", "openbsd", "netbsd":
|
||||
// Unix-like: Use /var/lib/routewatch if running as root, otherwise use XDG_STATE_HOME
|
||||
if os.Geteuid() == 0 {
|
||||
return "/var/lib/routewatch", nil
|
||||
}
|
||||
// Check XDG_STATE_HOME first
|
||||
if xdgState := os.Getenv("XDG_STATE_HOME"); xdgState != "" {
|
||||
return filepath.Join(xdgState, "routewatch"), nil
|
||||
}
|
||||
// Fall back to ~/.local/state/routewatch
|
||||
home, err := os.UserHomeDir()
|
||||
if err != nil {
|
||||
return "", err
|
||||
}
|
||||
|
||||
return filepath.Join(home, ".local", "state", "routewatch"), nil
|
||||
default:
|
||||
return "", fmt.Errorf("unsupported operating system: %s", runtime.GOOS)
|
||||
}
|
||||
}
|
||||
|
||||
// loadFromSnapshot attempts to load the routing table from a snapshot file
|
||||
func (rt *RoutingTable) loadFromSnapshot(logger *slog.Logger) error {
|
||||
stateDir, err := getStateDirectory()
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to determine state directory: %w", err)
|
||||
}
|
||||
|
||||
snapshotPath := filepath.Join(stateDir, snapshotFilename)
|
||||
|
||||
// Check if snapshot file exists
|
||||
if _, err := os.Stat(snapshotPath); os.IsNotExist(err) {
|
||||
logger.Info("No snapshot file found, starting with empty routing table")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Open the snapshot file
|
||||
file, err := os.Open(filepath.Clean(snapshotPath))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to open snapshot file: %w", err)
|
||||
}
|
||||
defer func() { _ = file.Close() }()
|
||||
|
||||
// Create gzip reader
|
||||
gzReader, err := gzip.NewReader(file)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to create gzip reader: %w", err)
|
||||
}
|
||||
defer func() { _ = gzReader.Close() }()
|
||||
|
||||
// Decode the snapshot
|
||||
var snapshot struct {
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
Stats DetailedStats `json:"stats"`
|
||||
Routes []*Route `json:"routes"`
|
||||
}
|
||||
|
||||
decoder := json.NewDecoder(gzReader)
|
||||
if err := decoder.Decode(&snapshot); err != nil {
|
||||
return fmt.Errorf("failed to decode snapshot: %w", err)
|
||||
}
|
||||
|
||||
// Calculate staleness cutoff time
|
||||
now := time.Now().UTC()
|
||||
cutoffTime := now.Add(-routeStalenessThreshold)
|
||||
|
||||
// Load non-stale routes
|
||||
loadedCount := 0
|
||||
staleCount := 0
|
||||
|
||||
for _, route := range snapshot.Routes {
|
||||
// Check if route is stale based on AddedAt time
|
||||
if route.AddedAt.Before(cutoffTime) {
|
||||
staleCount++
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
// Add the route (this will update counters and indexes)
|
||||
rt.AddRoute(route)
|
||||
loadedCount++
|
||||
}
|
||||
|
||||
logger.Info("Loaded routing table from snapshot",
|
||||
"snapshot_time", snapshot.Timestamp,
|
||||
"loaded_routes", loadedCount,
|
||||
"stale_routes", staleCount,
|
||||
"total_routes_in_snapshot", len(snapshot.Routes),
|
||||
)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package routingtable
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
@@ -9,7 +10,9 @@ import (
|
||||
)
|
||||
|
||||
func TestRoutingTable(t *testing.T) {
|
||||
rt := New()
|
||||
// Create a test logger
|
||||
logger := slog.Default()
|
||||
rt := New(logger)
|
||||
|
||||
// Test data
|
||||
prefixID1 := uuid.New()
|
||||
@@ -118,7 +121,9 @@ func TestRoutingTable(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestRoutingTableConcurrency(t *testing.T) {
|
||||
rt := New()
|
||||
// Create a test logger
|
||||
logger := slog.Default()
|
||||
rt := New(logger)
|
||||
|
||||
// Test concurrent access
|
||||
var wg sync.WaitGroup
|
||||
@@ -170,7 +175,9 @@ func TestRoutingTableConcurrency(t *testing.T) {
|
||||
}
|
||||
|
||||
func TestRouteUpdate(t *testing.T) {
|
||||
rt := New()
|
||||
// Create a test logger
|
||||
logger := slog.Default()
|
||||
rt := New(logger)
|
||||
|
||||
prefixID := uuid.New()
|
||||
originASNID := uuid.New()
|
||||
|
||||
Reference in New Issue
Block a user