package notify import ( "context" "math" "math/rand/v2" "time" ) // Retry defaults. const ( // DefaultMaxRetries is the number of additional attempts // after the first failure. DefaultMaxRetries = 3 // DefaultBaseDelay is the initial delay before the first // retry attempt. DefaultBaseDelay = 1 * time.Second // DefaultMaxDelay caps the computed backoff delay. DefaultMaxDelay = 10 * time.Second // backoffMultiplier is the exponential growth factor. backoffMultiplier = 2 // jitterFraction controls the ±random spread applied // to each delay (0.25 = ±25%). jitterFraction = 0.25 ) // RetryConfig holds tuning knobs for the retry loop. // Zero values fall back to the package defaults above. type RetryConfig struct { MaxRetries int BaseDelay time.Duration MaxDelay time.Duration } // defaults returns a copy with zero fields replaced by // package defaults. func (rc RetryConfig) defaults() RetryConfig { if rc.MaxRetries <= 0 { rc.MaxRetries = DefaultMaxRetries } if rc.BaseDelay <= 0 { rc.BaseDelay = DefaultBaseDelay } if rc.MaxDelay <= 0 { rc.MaxDelay = DefaultMaxDelay } return rc } // backoff computes the delay for attempt n (0-indexed) with // jitter. The raw delay is BaseDelay * 2^n, capped at // MaxDelay, then randomised by ±jitterFraction. func (rc RetryConfig) backoff(attempt int) time.Duration { raw := float64(rc.BaseDelay) * math.Pow(backoffMultiplier, float64(attempt)) if raw > float64(rc.MaxDelay) { raw = float64(rc.MaxDelay) } // Apply jitter: uniform in [raw*(1-j), raw*(1+j)]. lo := raw * (1 - jitterFraction) hi := raw * (1 + jitterFraction) jittered := lo + rand.Float64()*(hi-lo) //nolint:gosec // jitter does not need crypto/rand return time.Duration(jittered) } // deliverWithRetry calls fn, retrying on error with // exponential backoff. It logs every failed attempt and // returns the last error if all attempts are exhausted. func (svc *Service) deliverWithRetry( ctx context.Context, endpoint string, fn func(context.Context) error, ) error { cfg := svc.retryConfig.defaults() var lastErr error // attempt 0 is the initial call; attempts 1..MaxRetries // are retries. for attempt := range cfg.MaxRetries + 1 { lastErr = fn(ctx) if lastErr == nil { if attempt > 0 { svc.log.Info( "notification delivered after retry", "endpoint", endpoint, "attempt", attempt+1, ) } return nil } // Last attempt — don't sleep, just return. if attempt == cfg.MaxRetries { break } delay := cfg.backoff(attempt) svc.log.Warn( "notification delivery failed, retrying", "endpoint", endpoint, "attempt", attempt+1, "maxAttempts", cfg.MaxRetries+1, "retryIn", delay, "error", lastErr, ) select { case <-ctx.Done(): return ctx.Err() case <-svc.sleepFunc(delay): } } return lastErr } // sleepFunc returns a channel that closes after d. // It is a field-level indirection so tests can override it. func (svc *Service) sleepFunc(d time.Duration) <-chan time.Time { if svc.sleepFn != nil { return svc.sleepFn(d) } return time.After(d) }