Implement backpressure mechanism to prevent memory exhaustion
- Add semaphore to limit concurrent message handlers to 100 - Drop messages when at capacity instead of creating unbounded goroutines - Track and log dropped messages (every 1000 drops) - Remove nested goroutine spawning in handler loop - Add metrics for dropped messages and active handlers This prevents the memory usage from growing unboundedly when the database can't keep up with the incoming BGP message stream. Messages are dropped gracefully rather than causing OOM errors.
This commit is contained in:
parent
32a540e0bf
commit
e14c89e4f1
@ -11,6 +11,7 @@ import (
|
||||
"net/http"
|
||||
"os"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/routewatch/internal/metrics"
|
||||
@ -24,6 +25,7 @@ const (
|
||||
metricsLogInterval = 10 * time.Second
|
||||
bytesPerKB = 1024
|
||||
bytesPerMB = 1024 * 1024
|
||||
maxConcurrentHandlers = 100 // Maximum number of concurrent message handlers
|
||||
)
|
||||
|
||||
// MessageHandler is an interface for handling RIS messages
|
||||
@ -48,6 +50,8 @@ type Streamer struct {
|
||||
cancel context.CancelFunc
|
||||
running bool
|
||||
metrics *metrics.Tracker
|
||||
semaphore chan struct{} // Limits concurrent message processing
|
||||
droppedMessages uint64 // Atomic counter for dropped messages
|
||||
}
|
||||
|
||||
// New creates a new RIS streamer
|
||||
@ -59,6 +63,7 @@ func New(logger *slog.Logger, metrics *metrics.Tracker) *Streamer {
|
||||
},
|
||||
handlers: make([]MessageHandler, 0),
|
||||
metrics: metrics,
|
||||
semaphore: make(chan struct{}, maxConcurrentHandlers),
|
||||
}
|
||||
}
|
||||
|
||||
@ -124,12 +129,18 @@ func (s *Streamer) GetMetrics() metrics.StreamMetrics {
|
||||
return s.metrics.GetStreamMetrics()
|
||||
}
|
||||
|
||||
// GetDroppedMessages returns the total number of dropped messages
|
||||
func (s *Streamer) GetDroppedMessages() uint64 {
|
||||
return atomic.LoadUint64(&s.droppedMessages)
|
||||
}
|
||||
|
||||
// logMetrics logs the current streaming statistics
|
||||
func (s *Streamer) logMetrics() {
|
||||
metrics := s.metrics.GetStreamMetrics()
|
||||
uptime := time.Since(metrics.ConnectedSince)
|
||||
|
||||
const bitsPerMegabit = 1000000
|
||||
droppedMessages := atomic.LoadUint64(&s.droppedMessages)
|
||||
s.logger.Info("Stream statistics",
|
||||
"uptime", uptime,
|
||||
"total_messages", metrics.TotalMessages,
|
||||
@ -138,6 +149,8 @@ func (s *Streamer) logMetrics() {
|
||||
"messages_per_sec", fmt.Sprintf("%.2f", metrics.MessagesPerSec),
|
||||
"bits_per_sec", fmt.Sprintf("%.0f", metrics.BitsPerSec),
|
||||
"mbps", fmt.Sprintf("%.2f", metrics.BitsPerSec/bitsPerMegabit),
|
||||
"dropped_messages", droppedMessages,
|
||||
"active_handlers", len(s.semaphore),
|
||||
)
|
||||
}
|
||||
|
||||
@ -219,8 +232,12 @@ func (s *Streamer) stream(ctx context.Context) error {
|
||||
copy(handlers, s.handlers)
|
||||
s.mu.RUnlock()
|
||||
|
||||
// Spawn goroutine to parse and process the message
|
||||
// Try to acquire semaphore, drop message if at capacity
|
||||
select {
|
||||
case s.semaphore <- struct{}{}:
|
||||
// Successfully acquired semaphore, process message
|
||||
go func(rawLine []byte, messageHandlers []MessageHandler) {
|
||||
defer func() { <-s.semaphore }() // Release semaphore when done
|
||||
|
||||
// Parse the outer wrapper first
|
||||
var wrapper ristypes.RISLiveMessage
|
||||
@ -280,15 +297,21 @@ func (s *Streamer) stream(ctx context.Context) error {
|
||||
panic(fmt.Sprintf("Unknown RIS message type: %s", msg.Type))
|
||||
}
|
||||
|
||||
// Spawn goroutine for each handler callback that wants this message type
|
||||
// Call handlers synchronously within this goroutine
|
||||
// This prevents unbounded goroutine growth at the handler level
|
||||
for _, handler := range messageHandlers {
|
||||
if handler.WantsMessage(msg.Type) {
|
||||
go func(h MessageHandler) {
|
||||
h.HandleMessage(&msg)
|
||||
}(handler)
|
||||
handler.HandleMessage(&msg)
|
||||
}
|
||||
}
|
||||
}(append([]byte(nil), line...), handlers) // Copy the line to avoid data races
|
||||
default:
|
||||
// Semaphore is full, drop the message
|
||||
dropped := atomic.AddUint64(&s.droppedMessages, 1)
|
||||
if dropped%1000 == 0 { // Log every 1000 dropped messages
|
||||
s.logger.Warn("Dropping messages due to overload", "total_dropped", dropped, "max_handlers", maxConcurrentHandlers)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if err := scanner.Err(); err != nil {
|
||||
|
Loading…
Reference in New Issue
Block a user