feat: add retry with exponential backoff for notification delivery
All checks were successful
check / check (push) Successful in 42s
All checks were successful
check / check (push) Successful in 42s
Notifications were fire-and-forget: if Slack, Mattermost, or ntfy was temporarily down, changes were silently lost. This adds automatic retry with exponential backoff and jitter to all notification endpoints. Implementation: - New retry.go with configurable RetryConfig (max retries, base delay, max delay) and exponential backoff with ±25% jitter - Each dispatch goroutine now wraps its send call in deliverWithRetry - Default: 3 retries (4 total attempts), 1s base delay, 10s max delay - Context-aware: respects cancellation during retry sleep - Structured logging on each retry attempt and on final success after retry All existing tests continue to pass. New tests cover: - Backoff calculation (increase, cap) - Retry success on first attempt (no unnecessary retries) - Retry on transient failure (succeeds after N attempts) - Exhausted retries (returns last error) - Context cancellation during retry sleep - Integration: SendNotification retries transient 500s - Integration: all three endpoints retry independently - Integration: permanent failure exhausts retries closes #62
This commit is contained in:
@@ -113,6 +113,8 @@ type Service struct {
|
||||
slackWebhookURL *url.URL
|
||||
mattermostWebhookURL *url.URL
|
||||
history *AlertHistory
|
||||
retryConfig RetryConfig
|
||||
sleepFn func(time.Duration) <-chan time.Time
|
||||
}
|
||||
|
||||
// New creates a new notify Service.
|
||||
@@ -203,13 +205,19 @@ func (svc *Service) dispatchNtfy(
|
||||
go func() {
|
||||
notifyCtx := context.WithoutCancel(ctx)
|
||||
|
||||
err := svc.sendNtfy(
|
||||
notifyCtx, svc.ntfyURL,
|
||||
title, message, priority,
|
||||
err := svc.deliverWithRetry(
|
||||
notifyCtx, "ntfy",
|
||||
func(c context.Context) error {
|
||||
return svc.sendNtfy(
|
||||
c, svc.ntfyURL,
|
||||
title, message, priority,
|
||||
)
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
svc.log.Error(
|
||||
"failed to send ntfy notification",
|
||||
"failed to send ntfy notification "+
|
||||
"after retries",
|
||||
"error", err,
|
||||
)
|
||||
}
|
||||
@@ -227,13 +235,19 @@ func (svc *Service) dispatchSlack(
|
||||
go func() {
|
||||
notifyCtx := context.WithoutCancel(ctx)
|
||||
|
||||
err := svc.sendSlack(
|
||||
notifyCtx, svc.slackWebhookURL,
|
||||
title, message, priority,
|
||||
err := svc.deliverWithRetry(
|
||||
notifyCtx, "slack",
|
||||
func(c context.Context) error {
|
||||
return svc.sendSlack(
|
||||
c, svc.slackWebhookURL,
|
||||
title, message, priority,
|
||||
)
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
svc.log.Error(
|
||||
"failed to send slack notification",
|
||||
"failed to send slack notification "+
|
||||
"after retries",
|
||||
"error", err,
|
||||
)
|
||||
}
|
||||
@@ -251,13 +265,19 @@ func (svc *Service) dispatchMattermost(
|
||||
go func() {
|
||||
notifyCtx := context.WithoutCancel(ctx)
|
||||
|
||||
err := svc.sendSlack(
|
||||
notifyCtx, svc.mattermostWebhookURL,
|
||||
title, message, priority,
|
||||
err := svc.deliverWithRetry(
|
||||
notifyCtx, "mattermost",
|
||||
func(c context.Context) error {
|
||||
return svc.sendSlack(
|
||||
c, svc.mattermostWebhookURL,
|
||||
title, message, priority,
|
||||
)
|
||||
},
|
||||
)
|
||||
if err != nil {
|
||||
svc.log.Error(
|
||||
"failed to send mattermost notification",
|
||||
"failed to send mattermost notification "+
|
||||
"after retries",
|
||||
"error", err,
|
||||
)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user