1 Commits

Author SHA1 Message Date
clawbot
f8d0dc4166 fix: look up A/AAAA records for apex domains to enable port/TLS checks (closes #19)
Some checks failed
Check / check (pull_request) Failing after 5m24s
collectIPs only reads HostnameState, but checkDomain only stored
DomainState (nameservers). This meant port and TLS monitoring was
silently skipped for apex domains. Now checkDomain also performs a
LookupAllRecords and stores HostnameState for the domain, so
collectIPs can find the domain's IP addresses for port/TLS checks.

Added TestDomainPortAndTLSChecks to verify the fix.
2026-02-21 00:53:42 -08:00
4 changed files with 117 additions and 28 deletions

View File

@@ -4,7 +4,6 @@ import (
"context"
"errors"
"fmt"
"math/rand/v2"
"net"
"sort"
"strings"
@@ -14,13 +13,7 @@ import (
)
const (
// queryTimeoutDuration is the per-exchange DNS timeout.
//
// Rationale: maximum RTT to antipodal root/TLD servers is
// ~300ms. We use 3× max RTT + 10ms processing ≈ 910ms,
// rounded to 1s. Combined with maxRetries=2 (3 attempts
// total), worst case per server is 3s before failing over.
queryTimeoutDuration = 1 * time.Second
queryTimeoutDuration = 5 * time.Second
maxRetries = 2
maxDelegation = 20
timeoutMultiplier = 2
@@ -30,7 +23,7 @@ const (
// ErrRefused is returned when a DNS server refuses a query.
var ErrRefused = errors.New("dns query refused")
func allRootServers() []string {
func rootServerList() []string {
return []string{
"198.41.0.4", // a.root-servers.net
"170.247.170.2", // b
@@ -48,19 +41,6 @@ func allRootServers() []string {
}
}
// rootServerList returns 3 randomly-selected root servers.
// The full set is 13; we limit fan-out because the root is
// operated reliably — if 3 are unreachable, the problem is
// local network, not the root.
func rootServerList() []string {
shuffled := allRootServers()
rand.Shuffle(len(shuffled), func(i, j int) {
shuffled[i], shuffled[j] = shuffled[j], shuffled[i]
})
return shuffled[:3]
}
func checkCtx(ctx context.Context) error {
err := ctx.Err()
if err != nil {
@@ -322,7 +302,7 @@ func (r *Resolver) resolveNSRecursive(
msg.SetQuestion(domain, dns.TypeNS)
msg.RecursionDesired = true
for _, ip := range rootServerList() {
for _, ip := range rootServerList()[:3] {
if checkCtx(ctx) != nil {
return nil, ErrContextCanceled
}
@@ -353,7 +333,7 @@ func (r *Resolver) resolveARecord(
msg.SetQuestion(hostname, dns.TypeA)
msg.RecursionDesired = true
for _, ip := range rootServerList() {
for _, ip := range rootServerList()[:3] {
if checkCtx(ctx) != nil {
return nil, ErrContextCanceled
}

View File

@@ -156,8 +156,8 @@ func (s *State) Load() error {
// Save writes the current state to disk atomically.
func (s *State) Save() error {
s.mu.Lock()
defer s.mu.Unlock()
s.mu.RLock()
defer s.mu.RUnlock()
s.snapshot.LastUpdated = time.Now().UTC()

View File

@@ -206,6 +206,28 @@ func (w *Watcher) checkDomain(
Nameservers: nameservers,
LastChecked: now,
})
// Also look up A/AAAA records for the apex domain so that
// port and TLS checks (which read HostnameState) can find
// the domain's IP addresses.
records, err := w.resolver.LookupAllRecords(ctx, domain)
if err != nil {
w.log.Error(
"failed to lookup records for domain",
"domain", domain,
"error", err,
)
return
}
prevHS, hasPrevHS := w.state.GetHostnameState(domain)
if hasPrevHS && !w.firstRun {
w.detectHostnameChanges(ctx, domain, prevHS, records)
}
newState := buildHostnameState(records, now)
w.state.SetHostnameState(domain, newState)
}
func (w *Watcher) detectNSChanges(

View File

@@ -273,6 +273,10 @@ func setupBaselineMocks(deps *testDeps) {
"ns1.example.com.",
"ns2.example.com.",
}
deps.resolver.allRecords["example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"93.184.216.34"}},
"ns2.example.com.": {"A": {"93.184.216.34"}},
}
deps.resolver.allRecords["www.example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"93.184.216.34"}},
"ns2.example.com.": {"A": {"93.184.216.34"}},
@@ -290,6 +294,14 @@ func setupBaselineMocks(deps *testDeps) {
"www.example.com",
},
}
deps.tlsChecker.certs["93.184.216.34:example.com"] = &tlscheck.CertificateInfo{
CommonName: "example.com",
Issuer: "DigiCert",
NotAfter: time.Now().Add(90 * 24 * time.Hour),
SubjectAlternativeNames: []string{
"example.com",
},
}
}
func assertNoNotifications(
@@ -322,14 +334,74 @@ func assertStatePopulated(
)
}
if len(snap.Hostnames) != 1 {
// Hostnames includes both explicit hostnames and domains
// (domains now also get hostname state for port/TLS checks).
if len(snap.Hostnames) < 1 {
t.Errorf(
"expected 1 hostname in state, got %d",
"expected at least 1 hostname in state, got %d",
len(snap.Hostnames),
)
}
}
func TestDomainPortAndTLSChecks(t *testing.T) {
t.Parallel()
cfg := defaultTestConfig(t)
cfg.Domains = []string{"example.com"}
w, deps := newTestWatcher(t, cfg)
deps.resolver.nsRecords["example.com"] = []string{
"ns1.example.com.",
}
deps.resolver.allRecords["example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"93.184.216.34"}},
}
deps.portChecker.results["93.184.216.34:80"] = true
deps.portChecker.results["93.184.216.34:443"] = true
deps.tlsChecker.certs["93.184.216.34:example.com"] = &tlscheck.CertificateInfo{
CommonName: "example.com",
Issuer: "DigiCert",
NotAfter: time.Now().Add(90 * 24 * time.Hour),
SubjectAlternativeNames: []string{
"example.com",
},
}
w.RunOnce(t.Context())
snap := deps.state.GetSnapshot()
// Domain should have port state populated
if len(snap.Ports) == 0 {
t.Error("expected port state for domain, got none")
}
// Domain should have certificate state populated
if len(snap.Certificates) == 0 {
t.Error("expected certificate state for domain, got none")
}
// Verify port checker was actually called
deps.portChecker.mu.Lock()
calls := deps.portChecker.calls
deps.portChecker.mu.Unlock()
if calls == 0 {
t.Error("expected port checker to be called for domain")
}
// Verify TLS checker was actually called
deps.tlsChecker.mu.Lock()
tlsCalls := deps.tlsChecker.calls
deps.tlsChecker.mu.Unlock()
if tlsCalls == 0 {
t.Error("expected TLS checker to be called for domain")
}
}
func TestNSChangeDetection(t *testing.T) {
t.Parallel()
@@ -342,6 +414,12 @@ func TestNSChangeDetection(t *testing.T) {
"ns1.example.com.",
"ns2.example.com.",
}
deps.resolver.allRecords["example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"1.2.3.4"}},
"ns2.example.com.": {"A": {"1.2.3.4"}},
}
deps.portChecker.results["1.2.3.4:80"] = false
deps.portChecker.results["1.2.3.4:443"] = false
ctx := t.Context()
w.RunOnce(ctx)
@@ -351,6 +429,10 @@ func TestNSChangeDetection(t *testing.T) {
"ns1.example.com.",
"ns3.example.com.",
}
deps.resolver.allRecords["example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"1.2.3.4"}},
"ns3.example.com.": {"A": {"1.2.3.4"}},
}
deps.resolver.mu.Unlock()
w.RunOnce(ctx)
@@ -519,6 +601,11 @@ func TestGracefulShutdown(t *testing.T) {
deps.resolver.nsRecords["example.com"] = []string{
"ns1.example.com.",
}
deps.resolver.allRecords["example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"1.2.3.4"}},
}
deps.portChecker.results["1.2.3.4:80"] = false
deps.portChecker.results["1.2.3.4:443"] = false
ctx, cancel := context.WithCancel(t.Context())