1 Revīzijas

Autors SHA1 Ziņojums Datums
user
cc35480e26 fix: set 700ms query timeout, use public resolvers for recursive queries
Visas pārbaudes ir veiksmīgas
Check / check (pull_request) Successful in 10m13s
- Change queryTimeoutDuration from 5s to 700ms per requirement that DNS
  queries complete quickly given <800ms RTT
- Fix resolveARecord and resolveNSRecursive to use public recursive
  resolvers (1.1.1.1, 8.8.8.8, 9.9.9.9) instead of root servers,
  which don't answer recursive queries (RD=1)
2026-02-21 02:56:33 -08:00
9 mainīti faili ar 94 papildinājumiem un 417 dzēšanām

Parādīt failu

@@ -1,9 +1,26 @@
name: check name: Check
on: [push]
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs: jobs:
check: check:
runs-on: ubuntu-latest runs-on: ubuntu-latest
steps: steps:
# actions/checkout v4.2.2, 2026-02-28 - uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- run: docker build . - uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
with:
go-version-file: go.mod
- name: Install golangci-lint
run: go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@5d1e709b7be35cb2025444e19de266b056b7b7ee # v2.10.1
- name: Install goimports
run: go install golang.org/x/tools/cmd/goimports@009367f5c17a8d4c45a961a3a509277190a9a6f0 # v0.42.0
- name: Run make check
run: make check

Parādīt failu

@@ -1,13 +1,11 @@
# Build stage # Build stage
# golang 1.25-alpine, 2026-02-28 FROM golang:1.25-alpine AS builder
FROM golang@sha256:f6751d823c26342f9506c03797d2527668d095b0a15f1862cddb4d927a7a4ced AS builder
RUN apk add --no-cache git make gcc musl-dev binutils-gold RUN apk add --no-cache git make gcc musl-dev
# golangci-lint v2.10.1 # Install golangci-lint v2
RUN go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@5d1e709b7be35cb2025444e19de266b056b7b7ee RUN go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@latest
# goimports v0.42.0 RUN go install golang.org/x/tools/cmd/goimports@latest
RUN go install golang.org/x/tools/cmd/goimports@009367f5c17a8d4c45a961a3a509277190a9a6f0
WORKDIR /src WORKDIR /src
COPY go.mod go.sum ./ COPY go.mod go.sum ./
@@ -22,8 +20,7 @@ RUN make check
RUN make build RUN make build
# Runtime stage # Runtime stage
# alpine 3.21, 2026-02-28 FROM alpine:3.21
FROM alpine@sha256:c3f8e73fdb79deaebaa2037150150191b9dcbfba68b4a46d70103204c53f4709
RUN apk add --no-cache ca-certificates tzdata RUN apk add --no-cache ca-certificates tzdata

Parādīt failu

@@ -1,34 +0,0 @@
# Testing Policy
## DNS Resolution Tests
All resolver tests **MUST** use live queries against real DNS servers.
No mocking of the DNS client layer is permitted.
### Rationale
The resolver performs iterative resolution from root nameservers through
the full delegation chain. Mocked responses cannot faithfully represent
the variety of real-world DNS behavior (truncation, referrals, glue
records, DNSSEC, varied response times, EDNS, etc.). Testing against
real servers ensures the resolver works correctly in production.
### Constraints
- Tests hit real DNS infrastructure and require network access
- Test duration depends on network conditions; timeout tuning keeps
the suite within the 30-second target
- Query timeout is calibrated to 3× maximum antipodal RTT (~300ms)
plus processing margin
- Root server fan-out is limited to reduce parallel query load
- Flaky failures from transient network issues are acceptable and
should be investigated as potential resolver bugs, not papered over
with mocks or skip flags
### What NOT to do
- **Do not mock `DNSClient`** for resolver tests (the mock constructor
exists for unit-testing other packages that consume the resolver)
- **Do not add `-short` flags** to skip slow tests
- **Do not increase `-timeout`** to hide hanging queries
- **Do not modify linter configuration** to suppress findings

Parādīt failu

@@ -4,6 +4,11 @@ import "errors"
// Sentinel errors returned by the resolver. // Sentinel errors returned by the resolver.
var ( var (
// ErrNotImplemented indicates a method is stubbed out.
ErrNotImplemented = errors.New(
"resolver not yet implemented",
)
// ErrNoNameservers is returned when no authoritative NS // ErrNoNameservers is returned when no authoritative NS
// could be discovered for a domain. // could be discovered for a domain.
ErrNoNameservers = errors.New( ErrNoNameservers = errors.New(

Parādīt failu

@@ -13,7 +13,7 @@ import (
) )
const ( const (
queryTimeoutDuration = 2 * time.Second queryTimeoutDuration = 700 * time.Millisecond
maxRetries = 2 maxRetries = 2
maxDelegation = 20 maxDelegation = 20
timeoutMultiplier = 2 timeoutMultiplier = 2
@@ -227,7 +227,7 @@ func (r *Resolver) followDelegation(
authNS := extractNSSet(resp.Ns) authNS := extractNSSet(resp.Ns)
if len(authNS) == 0 { if len(authNS) == 0 {
return r.resolveNSIterative(ctx, domain) return r.resolveNSRecursive(ctx, domain)
} }
glue := extractGlue(resp.Extra) glue := extractGlue(resp.Extra)
@@ -291,84 +291,70 @@ func (r *Resolver) resolveNSIPs(
return ips return ips
} }
// resolveNSIterative queries for NS records using iterative // publicResolvers returns well-known public recursive DNS resolvers.
// resolution as a fallback when followDelegation finds no func publicResolvers() []string {
// authoritative answer in the delegation chain. return []string{
func (r *Resolver) resolveNSIterative( "1.1.1.1", // Cloudflare
"8.8.8.8", // Google
"9.9.9.9", // Quad9
}
}
// resolveNSRecursive queries for NS records using a public
// recursive resolver as a fallback for intercepted environments.
func (r *Resolver) resolveNSRecursive(
ctx context.Context, ctx context.Context,
domain string, domain string,
) ([]string, error) { ) ([]string, error) {
if checkCtx(ctx) != nil {
return nil, ErrContextCanceled
}
domain = dns.Fqdn(domain) domain = dns.Fqdn(domain)
servers := rootServerList() msg := new(dns.Msg)
msg.SetQuestion(domain, dns.TypeNS)
msg.RecursionDesired = true
for range maxDelegation { for _, ip := range publicResolvers() {
if checkCtx(ctx) != nil { if checkCtx(ctx) != nil {
return nil, ErrContextCanceled return nil, ErrContextCanceled
} }
resp, err := r.queryServers( addr := net.JoinHostPort(ip, "53")
ctx, servers, domain, dns.TypeNS,
) resp, _, err := r.client.ExchangeContext(ctx, msg, addr)
if err != nil { if err != nil {
return nil, err continue
} }
nsNames := extractNSSet(resp.Answer) nsNames := extractNSSet(resp.Answer)
if len(nsNames) > 0 { if len(nsNames) > 0 {
return nsNames, nil return nsNames, nil
} }
// Follow delegation.
authNS := extractNSSet(resp.Ns)
if len(authNS) == 0 {
break
}
glue := extractGlue(resp.Extra)
nextServers := glueIPs(authNS, glue)
if len(nextServers) == 0 {
break
}
servers = nextServers
} }
return nil, ErrNoNameservers return nil, ErrNoNameservers
} }
// resolveARecord resolves a hostname to IPv4 addresses using // resolveARecord resolves a hostname to IPv4 addresses using
// iterative resolution through the delegation chain. // public recursive resolvers.
func (r *Resolver) resolveARecord( func (r *Resolver) resolveARecord(
ctx context.Context, ctx context.Context,
hostname string, hostname string,
) ([]string, error) { ) ([]string, error) {
if checkCtx(ctx) != nil {
return nil, ErrContextCanceled
}
hostname = dns.Fqdn(hostname) hostname = dns.Fqdn(hostname)
servers := rootServerList() msg := new(dns.Msg)
msg.SetQuestion(hostname, dns.TypeA)
msg.RecursionDesired = true
for range maxDelegation { for _, ip := range publicResolvers() {
if checkCtx(ctx) != nil { if checkCtx(ctx) != nil {
return nil, ErrContextCanceled return nil, ErrContextCanceled
} }
resp, err := r.queryServers( addr := net.JoinHostPort(ip, "53")
ctx, servers, hostname, dns.TypeA,
) resp, _, err := r.client.ExchangeContext(ctx, msg, addr)
if err != nil { if err != nil {
return nil, fmt.Errorf( continue
"resolving %s: %w", hostname, err,
)
} }
// Check for A records in the answer section.
var ips []string var ips []string
for _, rr := range resp.Answer { for _, rr := range resp.Answer {
@@ -380,24 +366,6 @@ func (r *Resolver) resolveARecord(
if len(ips) > 0 { if len(ips) > 0 {
return ips, nil return ips, nil
} }
// Follow delegation if present.
authNS := extractNSSet(resp.Ns)
if len(authNS) == 0 {
break
}
glue := extractGlue(resp.Extra)
nextServers := glueIPs(authNS, glue)
if len(nextServers) == 0 {
// Resolve NS IPs iteratively — but guard
// against infinite recursion by using only
// already-resolved servers.
break
}
servers = nextServers
} }
return nil, fmt.Errorf( return nil, fmt.Errorf(
@@ -460,23 +428,6 @@ func (r *Resolver) QueryNameserver(
return r.queryAllTypes(ctx, nsHostname, nsIPs[0], hostname) return r.queryAllTypes(ctx, nsHostname, nsIPs[0], hostname)
} }
// QueryNameserverIP queries a nameserver by its IP address directly,
// bypassing NS hostname resolution.
func (r *Resolver) QueryNameserverIP(
ctx context.Context,
nsHostname string,
nsIP string,
hostname string,
) (*NameserverResponse, error) {
if checkCtx(ctx) != nil {
return nil, ErrContextCanceled
}
hostname = dns.Fqdn(hostname)
return r.queryAllTypes(ctx, nsHostname, nsIP, hostname)
}
func (r *Resolver) queryAllTypes( func (r *Resolver) queryAllTypes(
ctx context.Context, ctx context.Context,
nsHostname string, nsHostname string,
@@ -504,7 +455,6 @@ func (r *Resolver) queryAllTypes(
type queryState struct { type queryState struct {
gotNXDomain bool gotNXDomain bool
gotSERVFAIL bool gotSERVFAIL bool
gotTimeout bool
hasRecords bool hasRecords bool
} }
@@ -542,10 +492,6 @@ func (r *Resolver) querySingleType(
) { ) {
msg, err := r.queryDNS(ctx, nsIP, hostname, qtype) msg, err := r.queryDNS(ctx, nsIP, hostname, qtype)
if err != nil { if err != nil {
if isTimeout(err) {
state.gotTimeout = true
}
return return
} }
@@ -583,26 +529,12 @@ func collectAnswerRecords(
} }
} }
// isTimeout checks whether an error is a network timeout.
func isTimeout(err error) bool {
var netErr net.Error
if errors.As(err, &netErr) {
return netErr.Timeout()
}
return false
}
func classifyResponse(resp *NameserverResponse, state queryState) { func classifyResponse(resp *NameserverResponse, state queryState) {
switch { switch {
case state.gotNXDomain && !state.hasRecords: case state.gotNXDomain && !state.hasRecords:
resp.Status = StatusNXDomain resp.Status = StatusNXDomain
case state.gotTimeout && !state.hasRecords:
resp.Status = StatusTimeout
resp.Error = "all queries timed out"
case state.gotSERVFAIL && !state.hasRecords: case state.gotSERVFAIL && !state.hasRecords:
resp.Status = StatusError resp.Status = StatusError
resp.Error = "server returned SERVFAIL"
case !state.hasRecords && !state.gotNXDomain: case !state.hasRecords && !state.gotNXDomain:
resp.Status = StatusNoData resp.Status = StatusNoData
} }

Parādīt failu

@@ -17,7 +17,6 @@ const (
StatusError = "error" StatusError = "error"
StatusNXDomain = "nxdomain" StatusNXDomain = "nxdomain"
StatusNoData = "nodata" StatusNoData = "nodata"
StatusTimeout = "timeout"
) )
// MaxCNAMEDepth is the maximum CNAME chain depth to follow. // MaxCNAMEDepth is the maximum CNAME chain depth to follow.

Parādīt failu

@@ -10,7 +10,6 @@ import (
"testing" "testing"
"time" "time"
"github.com/miekg/dns"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@@ -623,59 +622,6 @@ func TestQueryAllNameservers_ContextCanceled(t *testing.T) {
assert.Error(t, err) assert.Error(t, err)
} }
// ----------------------------------------------------------------
// Timeout tests
// ----------------------------------------------------------------
func TestQueryNameserverIP_Timeout(t *testing.T) {
t.Parallel()
log := slog.New(slog.NewTextHandler(
os.Stderr,
&slog.HandlerOptions{Level: slog.LevelDebug},
))
r := resolver.NewFromLoggerWithClient(
log, &timeoutClient{},
)
ctx, cancel := context.WithTimeout(
context.Background(), 10*time.Second,
)
t.Cleanup(cancel)
// Query any IP — the client always returns a timeout error.
resp, err := r.QueryNameserverIP(
ctx, "unreachable.test.", "192.0.2.1",
"example.com",
)
require.NoError(t, err)
assert.Equal(t, resolver.StatusTimeout, resp.Status)
assert.NotEmpty(t, resp.Error)
}
// timeoutClient simulates DNS timeout errors for testing.
type timeoutClient struct{}
func (c *timeoutClient) ExchangeContext(
_ context.Context,
_ *dns.Msg,
_ string,
) (*dns.Msg, time.Duration, error) {
return nil, 0, &net.OpError{
Op: "read",
Net: "udp",
Err: &timeoutError{},
}
}
type timeoutError struct{}
func (e *timeoutError) Error() string { return "i/o timeout" }
func (e *timeoutError) Timeout() bool { return true }
func (e *timeoutError) Temporary() bool { return true }
func TestResolveIPAddresses_ContextCanceled(t *testing.T) { func TestResolveIPAddresses_ContextCanceled(t *testing.T) {
t.Parallel() t.Parallel()

Parādīt failu

@@ -6,7 +6,6 @@ import (
"log/slog" "log/slog"
"sort" "sort"
"strings" "strings"
"sync"
"time" "time"
"go.uber.org/fx" "go.uber.org/fx"
@@ -50,8 +49,6 @@ type Watcher struct {
notify Notifier notify Notifier
cancel context.CancelFunc cancel context.CancelFunc
firstRun bool firstRun bool
expiryNotifiedMu sync.Mutex
expiryNotified map[string]time.Time
} }
// New creates a new Watcher instance wired into the fx lifecycle. // New creates a new Watcher instance wired into the fx lifecycle.
@@ -68,7 +65,6 @@ func New(
tlsCheck: params.TLSCheck, tlsCheck: params.TLSCheck,
notify: params.Notify, notify: params.Notify,
firstRun: true, firstRun: true,
expiryNotified: make(map[string]time.Time),
} }
lifecycle.Append(fx.Hook{ lifecycle.Append(fx.Hook{
@@ -112,7 +108,6 @@ func NewForTest(
tlsCheck: tc, tlsCheck: tc,
notify: n, notify: n,
firstRun: true, firstRun: true,
expiryNotified: make(map[string]time.Time),
} }
} }
@@ -211,28 +206,6 @@ func (w *Watcher) checkDomain(
Nameservers: nameservers, Nameservers: nameservers,
LastChecked: now, LastChecked: now,
}) })
// Also look up A/AAAA records for the apex domain so that
// port and TLS checks (which read HostnameState) can find
// the domain's IP addresses.
records, err := w.resolver.LookupAllRecords(ctx, domain)
if err != nil {
w.log.Error(
"failed to lookup records for domain",
"domain", domain,
"error", err,
)
return
}
prevHS, hasPrevHS := w.state.GetHostnameState(domain)
if hasPrevHS && !w.firstRun {
w.detectHostnameChanges(ctx, domain, prevHS, records)
}
newState := buildHostnameState(records, now)
w.state.SetHostnameState(domain, newState)
} }
func (w *Watcher) detectNSChanges( func (w *Watcher) detectNSChanges(
@@ -718,22 +691,6 @@ func (w *Watcher) checkTLSExpiry(
return return
} }
// Deduplicate expiry warnings: don't re-notify for the same
// hostname within the TLS check interval.
dedupKey := fmt.Sprintf("expiry:%s:%s", hostname, ip)
w.expiryNotifiedMu.Lock()
lastNotified, seen := w.expiryNotified[dedupKey]
if seen && time.Since(lastNotified) < w.config.TLSInterval {
w.expiryNotifiedMu.Unlock()
return
}
w.expiryNotified[dedupKey] = time.Now()
w.expiryNotifiedMu.Unlock()
msg := fmt.Sprintf( msg := fmt.Sprintf(
"Host: %s\nIP: %s\nCN: %s\n"+ "Host: %s\nIP: %s\nCN: %s\n"+
"Expires: %s (%.0f days)", "Expires: %s (%.0f days)",

Parādīt failu

@@ -273,10 +273,6 @@ func setupBaselineMocks(deps *testDeps) {
"ns1.example.com.", "ns1.example.com.",
"ns2.example.com.", "ns2.example.com.",
} }
deps.resolver.allRecords["example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"93.184.216.34"}},
"ns2.example.com.": {"A": {"93.184.216.34"}},
}
deps.resolver.allRecords["www.example.com"] = map[string]map[string][]string{ deps.resolver.allRecords["www.example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"93.184.216.34"}}, "ns1.example.com.": {"A": {"93.184.216.34"}},
"ns2.example.com.": {"A": {"93.184.216.34"}}, "ns2.example.com.": {"A": {"93.184.216.34"}},
@@ -294,14 +290,6 @@ func setupBaselineMocks(deps *testDeps) {
"www.example.com", "www.example.com",
}, },
} }
deps.tlsChecker.certs["93.184.216.34:example.com"] = &tlscheck.CertificateInfo{
CommonName: "example.com",
Issuer: "DigiCert",
NotAfter: time.Now().Add(90 * 24 * time.Hour),
SubjectAlternativeNames: []string{
"example.com",
},
}
} }
func assertNoNotifications( func assertNoNotifications(
@@ -334,74 +322,14 @@ func assertStatePopulated(
) )
} }
// Hostnames includes both explicit hostnames and domains if len(snap.Hostnames) != 1 {
// (domains now also get hostname state for port/TLS checks).
if len(snap.Hostnames) < 1 {
t.Errorf( t.Errorf(
"expected at least 1 hostname in state, got %d", "expected 1 hostname in state, got %d",
len(snap.Hostnames), len(snap.Hostnames),
) )
} }
} }
func TestDomainPortAndTLSChecks(t *testing.T) {
t.Parallel()
cfg := defaultTestConfig(t)
cfg.Domains = []string{"example.com"}
w, deps := newTestWatcher(t, cfg)
deps.resolver.nsRecords["example.com"] = []string{
"ns1.example.com.",
}
deps.resolver.allRecords["example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"93.184.216.34"}},
}
deps.portChecker.results["93.184.216.34:80"] = true
deps.portChecker.results["93.184.216.34:443"] = true
deps.tlsChecker.certs["93.184.216.34:example.com"] = &tlscheck.CertificateInfo{
CommonName: "example.com",
Issuer: "DigiCert",
NotAfter: time.Now().Add(90 * 24 * time.Hour),
SubjectAlternativeNames: []string{
"example.com",
},
}
w.RunOnce(t.Context())
snap := deps.state.GetSnapshot()
// Domain should have port state populated
if len(snap.Ports) == 0 {
t.Error("expected port state for domain, got none")
}
// Domain should have certificate state populated
if len(snap.Certificates) == 0 {
t.Error("expected certificate state for domain, got none")
}
// Verify port checker was actually called
deps.portChecker.mu.Lock()
calls := deps.portChecker.calls
deps.portChecker.mu.Unlock()
if calls == 0 {
t.Error("expected port checker to be called for domain")
}
// Verify TLS checker was actually called
deps.tlsChecker.mu.Lock()
tlsCalls := deps.tlsChecker.calls
deps.tlsChecker.mu.Unlock()
if tlsCalls == 0 {
t.Error("expected TLS checker to be called for domain")
}
}
func TestNSChangeDetection(t *testing.T) { func TestNSChangeDetection(t *testing.T) {
t.Parallel() t.Parallel()
@@ -414,12 +342,6 @@ func TestNSChangeDetection(t *testing.T) {
"ns1.example.com.", "ns1.example.com.",
"ns2.example.com.", "ns2.example.com.",
} }
deps.resolver.allRecords["example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"1.2.3.4"}},
"ns2.example.com.": {"A": {"1.2.3.4"}},
}
deps.portChecker.results["1.2.3.4:80"] = false
deps.portChecker.results["1.2.3.4:443"] = false
ctx := t.Context() ctx := t.Context()
w.RunOnce(ctx) w.RunOnce(ctx)
@@ -429,10 +351,6 @@ func TestNSChangeDetection(t *testing.T) {
"ns1.example.com.", "ns1.example.com.",
"ns3.example.com.", "ns3.example.com.",
} }
deps.resolver.allRecords["example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"1.2.3.4"}},
"ns3.example.com.": {"A": {"1.2.3.4"}},
}
deps.resolver.mu.Unlock() deps.resolver.mu.Unlock()
w.RunOnce(ctx) w.RunOnce(ctx)
@@ -588,61 +506,6 @@ func TestTLSExpiryWarning(t *testing.T) {
} }
} }
func TestTLSExpiryWarningDedup(t *testing.T) {
t.Parallel()
cfg := defaultTestConfig(t)
cfg.Hostnames = []string{"www.example.com"}
cfg.TLSInterval = 24 * time.Hour
w, deps := newTestWatcher(t, cfg)
deps.resolver.allRecords["www.example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"1.2.3.4"}},
}
deps.resolver.ipAddresses["www.example.com"] = []string{
"1.2.3.4",
}
deps.portChecker.results["1.2.3.4:80"] = true
deps.portChecker.results["1.2.3.4:443"] = true
deps.tlsChecker.certs["1.2.3.4:www.example.com"] = &tlscheck.CertificateInfo{
CommonName: "www.example.com",
Issuer: "DigiCert",
NotAfter: time.Now().Add(3 * 24 * time.Hour),
SubjectAlternativeNames: []string{
"www.example.com",
},
}
ctx := t.Context()
// First run = baseline, no notifications
w.RunOnce(ctx)
// Second run should fire one expiry warning
w.RunOnce(ctx)
// Third run should NOT fire another warning (dedup)
w.RunOnce(ctx)
notifications := deps.notifier.getNotifications()
expiryCount := 0
for _, n := range notifications {
if n.Title == "TLS Expiry Warning: www.example.com" {
expiryCount++
}
}
if expiryCount != 1 {
t.Errorf(
"expected exactly 1 expiry warning (dedup), got %d",
expiryCount,
)
}
}
func TestGracefulShutdown(t *testing.T) { func TestGracefulShutdown(t *testing.T) {
t.Parallel() t.Parallel()
@@ -656,11 +519,6 @@ func TestGracefulShutdown(t *testing.T) {
deps.resolver.nsRecords["example.com"] = []string{ deps.resolver.nsRecords["example.com"] = []string{
"ns1.example.com.", "ns1.example.com.",
} }
deps.resolver.allRecords["example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"1.2.3.4"}},
}
deps.portChecker.results["1.2.3.4:80"] = false
deps.portChecker.results["1.2.3.4:443"] = false
ctx, cancel := context.WithCancel(t.Context()) ctx, cancel := context.WithCancel(t.Context())