fix: retry on DNS timeout, distinguish authoritative negatives (closes #35)
Some checks failed
Check / check (pull_request) Failing after 6m4s
Some checks failed
Check / check (pull_request) Failing after 6m4s
- Add StatusTimeout constant for timeout responses - querySingleType now retries on timeout and SERVFAIL (3 attempts, exponential backoff starting at 100ms) - NXDOMAIN and NOERROR+empty are treated as authoritative negatives with no retry - classifyResponse sets structured error messages for timeout and SERVFAIL cases - Refactored into smaller functions to satisfy cyclomatic complexity limits
This commit is contained in:
@@ -459,9 +459,15 @@ func (r *Resolver) queryAllTypes(
|
||||
return resp, nil
|
||||
}
|
||||
|
||||
const (
|
||||
singleTypeMaxRetries = 3
|
||||
singleTypeInitialBackoff = 100 * time.Millisecond
|
||||
)
|
||||
|
||||
type queryState struct {
|
||||
gotNXDomain bool
|
||||
gotSERVFAIL bool
|
||||
gotTimeout bool
|
||||
hasRecords bool
|
||||
}
|
||||
|
||||
@@ -489,6 +495,21 @@ func (r *Resolver) queryEachType(
|
||||
return state
|
||||
}
|
||||
|
||||
// isTimeout checks whether an error represents a DNS timeout.
|
||||
func isTimeout(err error) bool {
|
||||
if err == nil {
|
||||
return false
|
||||
}
|
||||
|
||||
var netErr net.Error
|
||||
if errors.As(err, &netErr) && netErr.Timeout() {
|
||||
return true
|
||||
}
|
||||
|
||||
// Also catch i/o timeout strings from the dns library.
|
||||
return strings.Contains(err.Error(), "i/o timeout")
|
||||
}
|
||||
|
||||
func (r *Resolver) querySingleType(
|
||||
ctx context.Context,
|
||||
nsIP string,
|
||||
@@ -497,23 +518,99 @@ func (r *Resolver) querySingleType(
|
||||
resp *NameserverResponse,
|
||||
state *queryState,
|
||||
) {
|
||||
msg, err := r.queryDNS(ctx, nsIP, hostname, qtype)
|
||||
if err != nil {
|
||||
msg, lastErr := r.querySingleTypeWithRetry(
|
||||
ctx, nsIP, hostname, qtype,
|
||||
)
|
||||
if msg == nil {
|
||||
r.recordRetryFailure(lastErr, state)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
r.handleDNSResponse(msg, resp, state)
|
||||
}
|
||||
|
||||
func (r *Resolver) querySingleTypeWithRetry(
|
||||
ctx context.Context,
|
||||
nsIP string,
|
||||
hostname string,
|
||||
qtype uint16,
|
||||
) (*dns.Msg, error) {
|
||||
var lastErr error
|
||||
|
||||
backoff := singleTypeInitialBackoff
|
||||
|
||||
for attempt := range singleTypeMaxRetries {
|
||||
if checkCtx(ctx) != nil {
|
||||
return nil, ErrContextCanceled
|
||||
}
|
||||
|
||||
if attempt > 0 {
|
||||
if !waitBackoff(ctx, backoff) {
|
||||
return nil, ErrContextCanceled
|
||||
}
|
||||
|
||||
backoff *= timeoutMultiplier
|
||||
}
|
||||
|
||||
msg, err := r.queryDNS(ctx, nsIP, hostname, qtype)
|
||||
if err != nil {
|
||||
lastErr = err
|
||||
|
||||
if !isTimeout(err) {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
if msg.Rcode == dns.RcodeServerFailure {
|
||||
lastErr = ErrSERVFAIL
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
return msg, nil
|
||||
}
|
||||
|
||||
return nil, lastErr
|
||||
}
|
||||
|
||||
func waitBackoff(ctx context.Context, d time.Duration) bool {
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return false
|
||||
case <-time.After(d):
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Resolver) recordRetryFailure(
|
||||
lastErr error,
|
||||
state *queryState,
|
||||
) {
|
||||
if lastErr == nil {
|
||||
return
|
||||
}
|
||||
|
||||
if isTimeout(lastErr) {
|
||||
state.gotTimeout = true
|
||||
} else if errors.Is(lastErr, ErrSERVFAIL) {
|
||||
state.gotSERVFAIL = true
|
||||
}
|
||||
}
|
||||
|
||||
func (r *Resolver) handleDNSResponse(
|
||||
msg *dns.Msg,
|
||||
resp *NameserverResponse,
|
||||
state *queryState,
|
||||
) {
|
||||
if msg.Rcode == dns.RcodeNameError {
|
||||
state.gotNXDomain = true
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
if msg.Rcode == dns.RcodeServerFailure {
|
||||
state.gotSERVFAIL = true
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
collectAnswerRecords(msg, resp, state)
|
||||
}
|
||||
|
||||
@@ -540,8 +637,12 @@ func classifyResponse(resp *NameserverResponse, state queryState) {
|
||||
switch {
|
||||
case state.gotNXDomain && !state.hasRecords:
|
||||
resp.Status = StatusNXDomain
|
||||
case state.gotTimeout && !state.hasRecords:
|
||||
resp.Status = StatusTimeout
|
||||
resp.Error = "all queries timed out after retries"
|
||||
case state.gotSERVFAIL && !state.hasRecords:
|
||||
resp.Status = StatusError
|
||||
resp.Error = "server failure (SERVFAIL) after retries"
|
||||
case !state.hasRecords && !state.gotNXDomain:
|
||||
resp.Status = StatusNoData
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user