- Remove immediate snapshot when periodic goroutine starts - Fix variable shadowing issue in snapshotter creation - Add debug logging for snapshotter shutdown - Snapshots now only occur after 10 minutes or on shutdown
257 lines
6.5 KiB
Go
257 lines
6.5 KiB
Go
// Package snapshotter provides functionality for creating periodic and on-demand
|
|
// snapshots of the routing table state.
|
|
package snapshotter
|
|
|
|
import (
|
|
"compress/gzip"
|
|
"context"
|
|
"encoding/json"
|
|
"fmt"
|
|
"log/slog"
|
|
"os"
|
|
"path/filepath"
|
|
"runtime"
|
|
"sync"
|
|
"time"
|
|
|
|
"git.eeqj.de/sneak/routewatch/internal/routingtable"
|
|
)
|
|
|
|
const (
|
|
snapshotInterval = 10 * time.Minute
|
|
snapshotFilename = "routewatch-snapshot.json.gz"
|
|
tempFileSuffix = ".tmp"
|
|
)
|
|
|
|
// Snapshotter handles periodic and on-demand snapshots of the routing table
|
|
type Snapshotter struct {
|
|
rt *routingtable.RoutingTable
|
|
stateDir string
|
|
logger *slog.Logger
|
|
ctx context.Context
|
|
cancel context.CancelFunc
|
|
mu sync.Mutex // Ensures only one snapshot runs at a time
|
|
wg sync.WaitGroup
|
|
lastSnapshot time.Time
|
|
}
|
|
|
|
// New creates a new Snapshotter instance
|
|
func New(rt *routingtable.RoutingTable, logger *slog.Logger) (*Snapshotter, error) {
|
|
stateDir, err := getStateDirectory()
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to determine state directory: %w", err)
|
|
}
|
|
|
|
// Ensure state directory exists
|
|
const stateDirPerms = 0750
|
|
if err := os.MkdirAll(stateDir, stateDirPerms); err != nil {
|
|
return nil, fmt.Errorf("failed to create state directory: %w", err)
|
|
}
|
|
|
|
s := &Snapshotter{
|
|
rt: rt,
|
|
stateDir: stateDir,
|
|
logger: logger,
|
|
}
|
|
|
|
return s, nil
|
|
}
|
|
|
|
// Start begins the periodic snapshot process
|
|
func (s *Snapshotter) Start(ctx context.Context) {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
if s.ctx != nil {
|
|
// Already started
|
|
return
|
|
}
|
|
|
|
ctx, cancel := context.WithCancel(ctx)
|
|
s.ctx = ctx
|
|
s.cancel = cancel
|
|
|
|
// Start periodic snapshot goroutine
|
|
s.wg.Add(1)
|
|
go s.periodicSnapshot()
|
|
}
|
|
|
|
// getStateDirectory returns the appropriate state directory based on the OS
|
|
func getStateDirectory() (string, error) {
|
|
switch runtime.GOOS {
|
|
case "darwin":
|
|
// macOS: Use ~/Library/Application Support/routewatch
|
|
home, err := os.UserHomeDir()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return filepath.Join(home, "Library", "Application Support", "routewatch"), nil
|
|
case "linux", "freebsd", "openbsd", "netbsd":
|
|
// Unix-like: Use /var/lib/routewatch if running as root, otherwise use XDG_STATE_HOME
|
|
if os.Geteuid() == 0 {
|
|
return "/var/lib/routewatch", nil
|
|
}
|
|
// Check XDG_STATE_HOME first
|
|
if xdgState := os.Getenv("XDG_STATE_HOME"); xdgState != "" {
|
|
return filepath.Join(xdgState, "routewatch"), nil
|
|
}
|
|
// Fall back to ~/.local/state/routewatch
|
|
home, err := os.UserHomeDir()
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
|
|
return filepath.Join(home, ".local", "state", "routewatch"), nil
|
|
default:
|
|
return "", fmt.Errorf("unsupported operating system: %s", runtime.GOOS)
|
|
}
|
|
}
|
|
|
|
// periodicSnapshot runs periodic snapshots
|
|
func (s *Snapshotter) periodicSnapshot() {
|
|
defer s.wg.Done()
|
|
|
|
ticker := time.NewTicker(snapshotInterval)
|
|
defer ticker.Stop()
|
|
|
|
// Wait for the first interval before taking any snapshots
|
|
for {
|
|
select {
|
|
case <-s.ctx.Done():
|
|
return
|
|
case <-ticker.C:
|
|
if err := s.TakeSnapshot(); err != nil {
|
|
s.logger.Error("Failed to take periodic snapshot", "error", err)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// TakeSnapshot creates a snapshot of the current routing table state
|
|
func (s *Snapshotter) TakeSnapshot() error {
|
|
// Ensure only one snapshot runs at a time
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
start := time.Now()
|
|
s.logger.Info("Starting routing table snapshot")
|
|
|
|
// Get a copy of all routes while holding read lock
|
|
s.rt.RLock()
|
|
routes := s.rt.GetAllRoutesUnsafe() // We'll need to add this method
|
|
stats := s.rt.GetDetailedStats()
|
|
s.rt.RUnlock()
|
|
|
|
// Create snapshot data structure
|
|
snapshot := struct {
|
|
Timestamp time.Time `json:"timestamp"`
|
|
Stats routingtable.DetailedStats `json:"stats"`
|
|
Routes []*routingtable.Route `json:"routes"`
|
|
}{
|
|
Timestamp: time.Now().UTC(),
|
|
Stats: stats,
|
|
Routes: routes,
|
|
}
|
|
|
|
// Serialize to JSON
|
|
jsonData, err := json.Marshal(snapshot)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to marshal snapshot: %w", err)
|
|
}
|
|
|
|
// Write compressed data to temporary file
|
|
tempPath := filepath.Join(s.stateDir, snapshotFilename+tempFileSuffix)
|
|
finalPath := filepath.Join(s.stateDir, snapshotFilename)
|
|
|
|
// Clean the paths to avoid any path traversal issues
|
|
tempPath = filepath.Clean(tempPath)
|
|
finalPath = filepath.Clean(finalPath)
|
|
|
|
tempFile, err := os.Create(tempPath)
|
|
if err != nil {
|
|
return fmt.Errorf("failed to create temporary file: %w", err)
|
|
}
|
|
defer func() {
|
|
_ = tempFile.Close()
|
|
// Clean up temp file if it still exists
|
|
_ = os.Remove(tempPath)
|
|
}()
|
|
|
|
// Create gzip writer
|
|
gzipWriter := gzip.NewWriter(tempFile)
|
|
gzipWriter.Comment = fmt.Sprintf("RouteWatch snapshot taken at %s", snapshot.Timestamp.Format(time.RFC3339))
|
|
|
|
// Write compressed data
|
|
if _, err := gzipWriter.Write(jsonData); err != nil {
|
|
return fmt.Errorf("failed to write compressed data: %w", err)
|
|
}
|
|
|
|
// Close gzip writer to flush all data
|
|
if err := gzipWriter.Close(); err != nil {
|
|
return fmt.Errorf("failed to close gzip writer: %w", err)
|
|
}
|
|
|
|
// Sync to disk
|
|
if err := tempFile.Sync(); err != nil {
|
|
return fmt.Errorf("failed to sync temporary file: %w", err)
|
|
}
|
|
|
|
// Close temp file before rename
|
|
if err := tempFile.Close(); err != nil {
|
|
return fmt.Errorf("failed to close temporary file: %w", err)
|
|
}
|
|
|
|
// Atomically rename temp file to final location
|
|
if err := os.Rename(tempPath, finalPath); err != nil {
|
|
return fmt.Errorf("failed to rename temporary file: %w", err)
|
|
}
|
|
|
|
duration := time.Since(start)
|
|
s.lastSnapshot = time.Now()
|
|
|
|
s.logger.Info("Routing table snapshot completed",
|
|
"duration", duration,
|
|
"routes", len(routes),
|
|
"ipv4_routes", stats.IPv4Routes,
|
|
"ipv6_routes", stats.IPv6Routes,
|
|
"size_bytes", len(jsonData),
|
|
"path", finalPath,
|
|
)
|
|
|
|
return nil
|
|
}
|
|
|
|
// Shutdown performs a final snapshot and cleans up resources
|
|
func (s *Snapshotter) Shutdown() error {
|
|
s.logger.Info("Shutting down snapshotter")
|
|
|
|
// Cancel context to stop periodic snapshots
|
|
if s.cancel != nil {
|
|
s.cancel()
|
|
}
|
|
|
|
// Wait for periodic snapshot goroutine to finish
|
|
s.wg.Wait()
|
|
|
|
// Take final snapshot
|
|
if err := s.TakeSnapshot(); err != nil {
|
|
return fmt.Errorf("failed to take final snapshot: %w", err)
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// GetLastSnapshotTime returns the time of the last successful snapshot
|
|
func (s *Snapshotter) GetLastSnapshotTime() time.Time {
|
|
s.mu.Lock()
|
|
defer s.mu.Unlock()
|
|
|
|
return s.lastSnapshot
|
|
}
|
|
|
|
// GetSnapshotPath returns the path to the snapshot file
|
|
func (s *Snapshotter) GetSnapshotPath() string {
|
|
return filepath.Join(s.stateDir, snapshotFilename)
|
|
}
|