routewatch/internal/snapshotter/snapshotter.go
sneak ae2ef2ae0c Implement routing table snapshotter with automatic loading on startup
- Create snapshotter package with periodic (10 min) and on-demand snapshots
- Add JSON serialization with gzip compression and atomic file writes
- Update routing table to track AddedAt time for each route
- Load snapshots on startup, filtering out stale routes (>30 minutes old)
- Add ROUTEWATCH_DISABLE_SNAPSHOTTER env var for tests
- Use OS-appropriate state directories (macOS: ~/Library/Application Support, Linux: /var/lib or XDG_STATE_HOME)
2025-07-28 00:03:19 +02:00

261 lines
6.5 KiB
Go

// Package snapshotter provides functionality for creating periodic and on-demand
// snapshots of the routing table state.
package snapshotter
import (
"compress/gzip"
"context"
"encoding/json"
"fmt"
"log/slog"
"os"
"path/filepath"
"runtime"
"sync"
"time"
"git.eeqj.de/sneak/routewatch/internal/routingtable"
)
const (
snapshotInterval = 10 * time.Minute
snapshotFilename = "routewatch-snapshot.json.gz"
tempFileSuffix = ".tmp"
)
// Snapshotter handles periodic and on-demand snapshots of the routing table
type Snapshotter struct {
rt *routingtable.RoutingTable
stateDir string
logger *slog.Logger
ctx context.Context
cancel context.CancelFunc
mu sync.Mutex // Ensures only one snapshot runs at a time
wg sync.WaitGroup
lastSnapshot time.Time
}
// New creates a new Snapshotter instance
func New(rt *routingtable.RoutingTable, logger *slog.Logger) (*Snapshotter, error) {
stateDir, err := getStateDirectory()
if err != nil {
return nil, fmt.Errorf("failed to determine state directory: %w", err)
}
// Ensure state directory exists
const stateDirPerms = 0750
if err := os.MkdirAll(stateDir, stateDirPerms); err != nil {
return nil, fmt.Errorf("failed to create state directory: %w", err)
}
s := &Snapshotter{
rt: rt,
stateDir: stateDir,
logger: logger,
}
return s, nil
}
// Start begins the periodic snapshot process
func (s *Snapshotter) Start(ctx context.Context) {
s.mu.Lock()
defer s.mu.Unlock()
if s.ctx != nil {
// Already started
return
}
ctx, cancel := context.WithCancel(ctx)
s.ctx = ctx
s.cancel = cancel
// Start periodic snapshot goroutine
s.wg.Add(1)
go s.periodicSnapshot()
}
// getStateDirectory returns the appropriate state directory based on the OS
func getStateDirectory() (string, error) {
switch runtime.GOOS {
case "darwin":
// macOS: Use ~/Library/Application Support/routewatch
home, err := os.UserHomeDir()
if err != nil {
return "", err
}
return filepath.Join(home, "Library", "Application Support", "routewatch"), nil
case "linux", "freebsd", "openbsd", "netbsd":
// Unix-like: Use /var/lib/routewatch if running as root, otherwise use XDG_STATE_HOME
if os.Geteuid() == 0 {
return "/var/lib/routewatch", nil
}
// Check XDG_STATE_HOME first
if xdgState := os.Getenv("XDG_STATE_HOME"); xdgState != "" {
return filepath.Join(xdgState, "routewatch"), nil
}
// Fall back to ~/.local/state/routewatch
home, err := os.UserHomeDir()
if err != nil {
return "", err
}
return filepath.Join(home, ".local", "state", "routewatch"), nil
default:
return "", fmt.Errorf("unsupported operating system: %s", runtime.GOOS)
}
}
// periodicSnapshot runs periodic snapshots
func (s *Snapshotter) periodicSnapshot() {
defer s.wg.Done()
ticker := time.NewTicker(snapshotInterval)
defer ticker.Stop()
// Take an initial snapshot
if err := s.TakeSnapshot(); err != nil {
s.logger.Error("Failed to take initial snapshot", "error", err)
}
for {
select {
case <-s.ctx.Done():
return
case <-ticker.C:
if err := s.TakeSnapshot(); err != nil {
s.logger.Error("Failed to take periodic snapshot", "error", err)
}
}
}
}
// TakeSnapshot creates a snapshot of the current routing table state
func (s *Snapshotter) TakeSnapshot() error {
// Ensure only one snapshot runs at a time
s.mu.Lock()
defer s.mu.Unlock()
start := time.Now()
s.logger.Info("Starting routing table snapshot")
// Get a copy of all routes while holding read lock
s.rt.RLock()
routes := s.rt.GetAllRoutesUnsafe() // We'll need to add this method
stats := s.rt.GetDetailedStats()
s.rt.RUnlock()
// Create snapshot data structure
snapshot := struct {
Timestamp time.Time `json:"timestamp"`
Stats routingtable.DetailedStats `json:"stats"`
Routes []*routingtable.Route `json:"routes"`
}{
Timestamp: time.Now().UTC(),
Stats: stats,
Routes: routes,
}
// Serialize to JSON
jsonData, err := json.Marshal(snapshot)
if err != nil {
return fmt.Errorf("failed to marshal snapshot: %w", err)
}
// Write compressed data to temporary file
tempPath := filepath.Join(s.stateDir, snapshotFilename+tempFileSuffix)
finalPath := filepath.Join(s.stateDir, snapshotFilename)
// Clean the paths to avoid any path traversal issues
tempPath = filepath.Clean(tempPath)
finalPath = filepath.Clean(finalPath)
tempFile, err := os.Create(tempPath)
if err != nil {
return fmt.Errorf("failed to create temporary file: %w", err)
}
defer func() {
_ = tempFile.Close()
// Clean up temp file if it still exists
_ = os.Remove(tempPath)
}()
// Create gzip writer
gzipWriter := gzip.NewWriter(tempFile)
gzipWriter.Comment = fmt.Sprintf("RouteWatch snapshot taken at %s", snapshot.Timestamp.Format(time.RFC3339))
// Write compressed data
if _, err := gzipWriter.Write(jsonData); err != nil {
return fmt.Errorf("failed to write compressed data: %w", err)
}
// Close gzip writer to flush all data
if err := gzipWriter.Close(); err != nil {
return fmt.Errorf("failed to close gzip writer: %w", err)
}
// Sync to disk
if err := tempFile.Sync(); err != nil {
return fmt.Errorf("failed to sync temporary file: %w", err)
}
// Close temp file before rename
if err := tempFile.Close(); err != nil {
return fmt.Errorf("failed to close temporary file: %w", err)
}
// Atomically rename temp file to final location
if err := os.Rename(tempPath, finalPath); err != nil {
return fmt.Errorf("failed to rename temporary file: %w", err)
}
duration := time.Since(start)
s.lastSnapshot = time.Now()
s.logger.Info("Routing table snapshot completed",
"duration", duration,
"routes", len(routes),
"ipv4_routes", stats.IPv4Routes,
"ipv6_routes", stats.IPv6Routes,
"size_bytes", len(jsonData),
"path", finalPath,
)
return nil
}
// Shutdown performs a final snapshot and cleans up resources
func (s *Snapshotter) Shutdown() error {
s.logger.Info("Shutting down snapshotter")
// Cancel context to stop periodic snapshots
if s.cancel != nil {
s.cancel()
}
// Wait for periodic snapshot goroutine to finish
s.wg.Wait()
// Take final snapshot
if err := s.TakeSnapshot(); err != nil {
return fmt.Errorf("failed to take final snapshot: %w", err)
}
return nil
}
// GetLastSnapshotTime returns the time of the last successful snapshot
func (s *Snapshotter) GetLastSnapshotTime() time.Time {
s.mu.Lock()
defer s.mu.Unlock()
return s.lastSnapshot
}
// GetSnapshotPath returns the path to the snapshot file
func (s *Snapshotter) GetSnapshotPath() string {
return filepath.Join(s.stateDir, snapshotFilename)
}