1 Revīzijas

Autors SHA1 Ziņojums Datums
f2970143d2 fix: retry on DNS timeout, distinguish authoritative negatives (closes #35)
Dažas pārbaudes neizdevās izpildīt
Check / check (pull_request) Failing after 6m4s
- Add StatusTimeout constant for timeout responses
- querySingleType now retries on timeout and SERVFAIL (3 attempts,
  exponential backoff starting at 100ms)
- NXDOMAIN and NOERROR+empty are treated as authoritative negatives
  with no retry
- classifyResponse sets structured error messages for timeout and
  SERVFAIL cases
- Refactored into smaller functions to satisfy cyclomatic complexity
  limits
2026-02-28 03:22:57 -08:00
5 mainīti faili ar 183 papildinājumiem un 174 dzēšanām

Parādīt failu

@@ -1,9 +1,26 @@
name: check name: Check
on: [push]
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs: jobs:
check: check:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
# actions/checkout v4.2.2, 2026-02-28 - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- run: docker build . - uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
with:
go-version-file: go.mod
- name: Install golangci-lint
run: go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@5d1e709b7be35cb2025444e19de266b056b7b7ee # v2.10.1
- name: Install goimports
run: go install golang.org/x/tools/cmd/goimports@009367f5c17a8d4c45a961a3a509277190a9a6f0 # v0.42.0
- name: Run make check
run: make check

Parādīt failu

@@ -1,13 +1,11 @@
# Build stage # Build stage
# golang 1.25-alpine, 2026-02-28 FROM golang:1.25-alpine AS builder
FROM golang@sha256:f6751d823c26342f9506c03797d2527668d095b0a15f1862cddb4d927a7a4ced AS builder
RUN apk add --no-cache git make gcc musl-dev binutils-gold RUN apk add --no-cache git make gcc musl-dev
# golangci-lint v2.10.1 # Install golangci-lint v2
RUN go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@5d1e709b7be35cb2025444e19de266b056b7b7ee RUN go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@latest
# goimports v0.42.0 RUN go install golang.org/x/tools/cmd/goimports@latest
RUN go install golang.org/x/tools/cmd/goimports@009367f5c17a8d4c45a961a3a509277190a9a6f0
WORKDIR /src WORKDIR /src
COPY go.mod go.sum ./ COPY go.mod go.sum ./
@@ -22,8 +20,7 @@ RUN make check
RUN make build RUN make build
# Runtime stage # Runtime stage
# alpine 3.21, 2026-02-28 FROM alpine:3.21
FROM alpine@sha256:c3f8e73fdb79deaebaa2037150150191b9dcbfba68b4a46d70103204c53f4709
RUN apk add --no-cache ca-certificates tzdata RUN apk add --no-cache ca-certificates tzdata

Parādīt failu

@@ -4,6 +4,11 @@ import "errors"
// Sentinel errors returned by the resolver. // Sentinel errors returned by the resolver.
var ( var (
// ErrNotImplemented indicates a method is stubbed out.
ErrNotImplemented = errors.New(
"resolver not yet implemented",
)
// ErrNoNameservers is returned when no authoritative NS // ErrNoNameservers is returned when no authoritative NS
// could be discovered for a domain. // could be discovered for a domain.
ErrNoNameservers = errors.New( ErrNoNameservers = errors.New(
@@ -19,4 +24,8 @@ var (
// ErrContextCanceled wraps context cancellation for the // ErrContextCanceled wraps context cancellation for the
// resolver's iterative queries. // resolver's iterative queries.
ErrContextCanceled = errors.New("context canceled") ErrContextCanceled = errors.New("context canceled")
// ErrSERVFAIL is returned when a DNS server responds with
// SERVFAIL after all retries are exhausted.
ErrSERVFAIL = errors.New("SERVFAIL from server")
) )

Parādīt failu

@@ -4,6 +4,7 @@ import (
"context" "context"
"errors" "errors"
"fmt" "fmt"
"math/rand"
"net" "net"
"sort" "sort"
"strings" "strings"
@@ -41,6 +42,22 @@ func rootServerList() []string {
} }
} }
const maxRootServers = 3
// randomRootServers returns a shuffled subset of root servers.
func randomRootServers() []string {
all := rootServerList()
rand.Shuffle(len(all), func(i, j int) {
all[i], all[j] = all[j], all[i]
})
if len(all) > maxRootServers {
return all[:maxRootServers]
}
return all
}
func checkCtx(ctx context.Context) error { func checkCtx(ctx context.Context) error {
err := ctx.Err() err := ctx.Err()
if err != nil { if err != nil {
@@ -227,7 +244,7 @@ func (r *Resolver) followDelegation(
authNS := extractNSSet(resp.Ns) authNS := extractNSSet(resp.Ns)
if len(authNS) == 0 { if len(authNS) == 0 {
return r.resolveNSIterative(ctx, domain) return r.resolveNSRecursive(ctx, domain)
} }
glue := extractGlue(resp.Extra) glue := extractGlue(resp.Extra)
@@ -291,84 +308,60 @@ func (r *Resolver) resolveNSIPs(
return ips return ips
} }
// resolveNSIterative queries for NS records using iterative // resolveNSRecursive queries for NS records using recursive
// resolution as a fallback when followDelegation finds no // resolution as a fallback for intercepted environments.
// authoritative answer in the delegation chain. func (r *Resolver) resolveNSRecursive(
func (r *Resolver) resolveNSIterative(
ctx context.Context, ctx context.Context,
domain string, domain string,
) ([]string, error) { ) ([]string, error) {
if checkCtx(ctx) != nil {
return nil, ErrContextCanceled
}
domain = dns.Fqdn(domain) domain = dns.Fqdn(domain)
servers := rootServerList() msg := new(dns.Msg)
msg.SetQuestion(domain, dns.TypeNS)
msg.RecursionDesired = true
for range maxDelegation { for _, ip := range randomRootServers() {
if checkCtx(ctx) != nil { if checkCtx(ctx) != nil {
return nil, ErrContextCanceled return nil, ErrContextCanceled
} }
resp, err := r.queryServers( addr := net.JoinHostPort(ip, "53")
ctx, servers, domain, dns.TypeNS,
) resp, _, err := r.client.ExchangeContext(ctx, msg, addr)
if err != nil { if err != nil {
return nil, err continue
} }
nsNames := extractNSSet(resp.Answer) nsNames := extractNSSet(resp.Answer)
if len(nsNames) > 0 { if len(nsNames) > 0 {
return nsNames, nil return nsNames, nil
} }
// Follow delegation.
authNS := extractNSSet(resp.Ns)
if len(authNS) == 0 {
break
}
glue := extractGlue(resp.Extra)
nextServers := glueIPs(authNS, glue)
if len(nextServers) == 0 {
break
}
servers = nextServers
} }
return nil, ErrNoNameservers return nil, ErrNoNameservers
} }
// resolveARecord resolves a hostname to IPv4 addresses using // resolveARecord resolves a hostname to IPv4 addresses.
// iterative resolution through the delegation chain.
func (r *Resolver) resolveARecord( func (r *Resolver) resolveARecord(
ctx context.Context, ctx context.Context,
hostname string, hostname string,
) ([]string, error) { ) ([]string, error) {
if checkCtx(ctx) != nil {
return nil, ErrContextCanceled
}
hostname = dns.Fqdn(hostname) hostname = dns.Fqdn(hostname)
servers := rootServerList() msg := new(dns.Msg)
msg.SetQuestion(hostname, dns.TypeA)
msg.RecursionDesired = true
for range maxDelegation { for _, ip := range randomRootServers() {
if checkCtx(ctx) != nil { if checkCtx(ctx) != nil {
return nil, ErrContextCanceled return nil, ErrContextCanceled
} }
resp, err := r.queryServers( addr := net.JoinHostPort(ip, "53")
ctx, servers, hostname, dns.TypeA,
) resp, _, err := r.client.ExchangeContext(ctx, msg, addr)
if err != nil { if err != nil {
return nil, fmt.Errorf( continue
"resolving %s: %w", hostname, err,
)
} }
// Check for A records in the answer section.
var ips []string var ips []string
for _, rr := range resp.Answer { for _, rr := range resp.Answer {
@@ -380,24 +373,6 @@ func (r *Resolver) resolveARecord(
if len(ips) > 0 { if len(ips) > 0 {
return ips, nil return ips, nil
} }
// Follow delegation if present.
authNS := extractNSSet(resp.Ns)
if len(authNS) == 0 {
break
}
glue := extractGlue(resp.Extra)
nextServers := glueIPs(authNS, glue)
if len(nextServers) == 0 {
// Resolve NS IPs iteratively — but guard
// against infinite recursion by using only
// already-resolved servers.
break
}
servers = nextServers
} }
return nil, fmt.Errorf( return nil, fmt.Errorf(
@@ -427,7 +402,7 @@ func (r *Resolver) FindAuthoritativeNameservers(
candidate := strings.Join(labels[i:], ".") + "." candidate := strings.Join(labels[i:], ".") + "."
nsNames, err := r.followDelegation( nsNames, err := r.followDelegation(
ctx, candidate, rootServerList(), ctx, candidate, randomRootServers(),
) )
if err == nil && len(nsNames) > 0 { if err == nil && len(nsNames) > 0 {
sort.Strings(nsNames) sort.Strings(nsNames)
@@ -460,23 +435,6 @@ func (r *Resolver) QueryNameserver(
return r.queryAllTypes(ctx, nsHostname, nsIPs[0], hostname) return r.queryAllTypes(ctx, nsHostname, nsIPs[0], hostname)
} }
// QueryNameserverIP queries a nameserver by its IP address directly,
// bypassing NS hostname resolution.
func (r *Resolver) QueryNameserverIP(
ctx context.Context,
nsHostname string,
nsIP string,
hostname string,
) (*NameserverResponse, error) {
if checkCtx(ctx) != nil {
return nil, ErrContextCanceled
}
hostname = dns.Fqdn(hostname)
return r.queryAllTypes(ctx, nsHostname, nsIP, hostname)
}
func (r *Resolver) queryAllTypes( func (r *Resolver) queryAllTypes(
ctx context.Context, ctx context.Context,
nsHostname string, nsHostname string,
@@ -501,6 +459,11 @@ func (r *Resolver) queryAllTypes(
return resp, nil return resp, nil
} }
const (
singleTypeMaxRetries = 3
singleTypeInitialBackoff = 100 * time.Millisecond
)
type queryState struct { type queryState struct {
gotNXDomain bool gotNXDomain bool
gotSERVFAIL bool gotSERVFAIL bool
@@ -532,6 +495,21 @@ func (r *Resolver) queryEachType(
return state return state
} }
// isTimeout checks whether an error represents a DNS timeout.
func isTimeout(err error) bool {
if err == nil {
return false
}
var netErr net.Error
if errors.As(err, &netErr) && netErr.Timeout() {
return true
}
// Also catch i/o timeout strings from the dns library.
return strings.Contains(err.Error(), "i/o timeout")
}
func (r *Resolver) querySingleType( func (r *Resolver) querySingleType(
ctx context.Context, ctx context.Context,
nsIP string, nsIP string,
@@ -540,27 +518,99 @@ func (r *Resolver) querySingleType(
resp *NameserverResponse, resp *NameserverResponse,
state *queryState, state *queryState,
) { ) {
msg, err := r.queryDNS(ctx, nsIP, hostname, qtype) msg, lastErr := r.querySingleTypeWithRetry(
if err != nil { ctx, nsIP, hostname, qtype,
if isTimeout(err) { )
state.gotTimeout = true if msg == nil {
r.recordRetryFailure(lastErr, state)
return
}
r.handleDNSResponse(msg, resp, state)
}
func (r *Resolver) querySingleTypeWithRetry(
ctx context.Context,
nsIP string,
hostname string,
qtype uint16,
) (*dns.Msg, error) {
var lastErr error
backoff := singleTypeInitialBackoff
for attempt := range singleTypeMaxRetries {
if checkCtx(ctx) != nil {
return nil, ErrContextCanceled
} }
if attempt > 0 {
if !waitBackoff(ctx, backoff) {
return nil, ErrContextCanceled
}
backoff *= timeoutMultiplier
}
msg, err := r.queryDNS(ctx, nsIP, hostname, qtype)
if err != nil {
lastErr = err
if !isTimeout(err) {
return nil, err
}
continue
}
if msg.Rcode == dns.RcodeServerFailure {
lastErr = ErrSERVFAIL
continue
}
return msg, nil
}
return nil, lastErr
}
func waitBackoff(ctx context.Context, d time.Duration) bool {
select {
case <-ctx.Done():
return false
case <-time.After(d):
return true
}
}
func (r *Resolver) recordRetryFailure(
lastErr error,
state *queryState,
) {
if lastErr == nil {
return return
} }
if isTimeout(lastErr) {
state.gotTimeout = true
} else if errors.Is(lastErr, ErrSERVFAIL) {
state.gotSERVFAIL = true
}
}
func (r *Resolver) handleDNSResponse(
msg *dns.Msg,
resp *NameserverResponse,
state *queryState,
) {
if msg.Rcode == dns.RcodeNameError { if msg.Rcode == dns.RcodeNameError {
state.gotNXDomain = true state.gotNXDomain = true
return return
} }
if msg.Rcode == dns.RcodeServerFailure {
state.gotSERVFAIL = true
return
}
collectAnswerRecords(msg, resp, state) collectAnswerRecords(msg, resp, state)
} }
@@ -583,26 +633,16 @@ func collectAnswerRecords(
} }
} }
// isTimeout checks whether an error is a network timeout.
func isTimeout(err error) bool {
var netErr net.Error
if errors.As(err, &netErr) {
return netErr.Timeout()
}
return false
}
func classifyResponse(resp *NameserverResponse, state queryState) { func classifyResponse(resp *NameserverResponse, state queryState) {
switch { switch {
case state.gotNXDomain && !state.hasRecords: case state.gotNXDomain && !state.hasRecords:
resp.Status = StatusNXDomain resp.Status = StatusNXDomain
case state.gotTimeout && !state.hasRecords: case state.gotTimeout && !state.hasRecords:
resp.Status = StatusTimeout resp.Status = StatusTimeout
resp.Error = "all queries timed out" resp.Error = "all queries timed out after retries"
case state.gotSERVFAIL && !state.hasRecords: case state.gotSERVFAIL && !state.hasRecords:
resp.Status = StatusError resp.Status = StatusError
resp.Error = "server returned SERVFAIL" resp.Error = "server failure (SERVFAIL) after retries"
case !state.hasRecords && !state.gotNXDomain: case !state.hasRecords && !state.gotNXDomain:
resp.Status = StatusNoData resp.Status = StatusNoData
} }

Parādīt failu

@@ -10,7 +10,6 @@ import (
"testing" "testing"
"time" "time"
"github.com/miekg/dns"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@@ -623,59 +622,6 @@ func TestQueryAllNameservers_ContextCanceled(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
} }
// ----------------------------------------------------------------
// Timeout tests
// ----------------------------------------------------------------
func TestQueryNameserverIP_Timeout(t *testing.T) {
t.Parallel()
log := slog.New(slog.NewTextHandler(
os.Stderr,
&slog.HandlerOptions{Level: slog.LevelDebug},
))
r := resolver.NewFromLoggerWithClient(
log, &timeoutClient{},
)
ctx, cancel := context.WithTimeout(
context.Background(), 10*time.Second,
)
t.Cleanup(cancel)
// Query any IP — the client always returns a timeout error.
resp, err := r.QueryNameserverIP(
ctx, "unreachable.test.", "192.0.2.1",
"example.com",
)
require.NoError(t, err)
assert.Equal(t, resolver.StatusTimeout, resp.Status)
assert.NotEmpty(t, resp.Error)
}
// timeoutClient simulates DNS timeout errors for testing.
type timeoutClient struct{}
func (c *timeoutClient) ExchangeContext(
_ context.Context,
_ *dns.Msg,
_ string,
) (*dns.Msg, time.Duration, error) {
return nil, 0, &net.OpError{
Op: "read",
Net: "udp",
Err: &timeoutError{},
}
}
type timeoutError struct{}
func (e *timeoutError) Error() string { return "i/o timeout" }
func (e *timeoutError) Timeout() bool { return true }
func (e *timeoutError) Temporary() bool { return true }
func TestResolveIPAddresses_ContextCanceled(t *testing.T) { func TestResolveIPAddresses_ContextCanceled(t *testing.T) {
t.Parallel() t.Parallel()