Add production hardening: health check, streamer panic fix, db maintenance
- Add health check endpoint at /.well-known/healthcheck.json that verifies database and RIS Live connectivity, returns 200/503 - Fix panic in streamer when encountering unknown RIS message types by logging a warning and continuing instead of crashing - Add DBMaintainer for periodic database maintenance: - VACUUM every 6 hours to reclaim space - ANALYZE every hour to update query statistics - Graceful shutdown support - Add Vacuum() and Analyze() methods to database interface
This commit is contained in:
@@ -24,8 +24,70 @@ import (
|
||||
const (
|
||||
// statsContextTimeout is the timeout for stats API operations.
|
||||
statsContextTimeout = 4 * time.Second
|
||||
|
||||
// healthCheckTimeout is the timeout for health check operations.
|
||||
healthCheckTimeout = 2 * time.Second
|
||||
)
|
||||
|
||||
// HealthCheckResponse represents the health check response.
|
||||
type HealthCheckResponse struct {
|
||||
Status string `json:"status"`
|
||||
Timestamp string `json:"timestamp"`
|
||||
Checks map[string]string `json:"checks"`
|
||||
}
|
||||
|
||||
// handleHealthCheck returns a handler that performs health checks.
|
||||
// Returns 200 if healthy, 503 if any check fails.
|
||||
func (s *Server) handleHealthCheck() http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
ctx, cancel := context.WithTimeout(r.Context(), healthCheckTimeout)
|
||||
defer cancel()
|
||||
|
||||
checks := make(map[string]string)
|
||||
healthy := true
|
||||
|
||||
// Check database connectivity
|
||||
dbStats, err := s.db.GetStatsContext(ctx)
|
||||
if err != nil {
|
||||
checks["database"] = "error: " + err.Error()
|
||||
healthy = false
|
||||
} else if dbStats.ASNs == 0 && dbStats.Prefixes == 0 {
|
||||
checks["database"] = "warning: empty database"
|
||||
} else {
|
||||
checks["database"] = "ok"
|
||||
}
|
||||
|
||||
// Check streamer connection
|
||||
metrics := s.streamer.GetMetrics()
|
||||
if metrics.Connected {
|
||||
checks["ris_live"] = "ok"
|
||||
} else {
|
||||
checks["ris_live"] = "disconnected"
|
||||
healthy = false
|
||||
}
|
||||
|
||||
// Build response
|
||||
status := "ok"
|
||||
if !healthy {
|
||||
status = "error"
|
||||
}
|
||||
|
||||
response := HealthCheckResponse{
|
||||
Status: status,
|
||||
Timestamp: time.Now().UTC().Format(time.RFC3339),
|
||||
Checks: checks,
|
||||
}
|
||||
|
||||
if !healthy {
|
||||
w.WriteHeader(http.StatusServiceUnavailable)
|
||||
}
|
||||
|
||||
if err := writeJSONSuccess(w, response); err != nil {
|
||||
s.logger.Error("Failed to encode health check response", "error", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// handleRoot returns a handler that redirects to /status.
|
||||
func (s *Server) handleRoot() http.HandlerFunc {
|
||||
return func(w http.ResponseWriter, r *http.Request) {
|
||||
|
||||
@@ -24,6 +24,7 @@ func (s *Server) setupRoutes() {
|
||||
r.Get("/", s.handleRoot())
|
||||
r.Get("/status", s.handleStatusHTML())
|
||||
r.Get("/status.json", JSONValidationMiddleware(s.handleStatusJSON()).ServeHTTP)
|
||||
r.Get("/.well-known/healthcheck.json", JSONValidationMiddleware(s.handleHealthCheck()).ServeHTTP)
|
||||
|
||||
// AS and prefix detail pages
|
||||
r.Get("/as/{asn}", s.handleASDetail())
|
||||
|
||||
Reference in New Issue
Block a user