From ee14bd01aee7271ffa1ebf73ece42b779a6fd455 Mon Sep 17 00:00:00 2001 From: clawbot Date: Mon, 2 Mar 2026 00:10:49 +0100 Subject: [PATCH] fix: enforce DNS-first ordering for port and TLS checks (#64) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary DNS checks now always complete before port or TLS checks begin, ensuring those checks use freshly resolved IP addresses instead of potentially stale ones from a previous cycle. ## Problem Port and TLS checks read IP addresses from state that was populated during the most recent DNS check. If DNS changes between cycles, port/TLS checks may target stale IPs. In particular, when the TLS ticker fired (every 12h), it ran `runTLSChecks` without refreshing DNS first — meaning TLS checks could use IPs that were up to 12 hours old. ## Changes - **Extract `runDNSChecks()`** from the former `runDNSAndPortChecks()` so DNS resolution can be invoked independently as a prerequisite for any check type. - **TLS ticker now runs DNS first**: When the TLS ticker fires, DNS checks run before TLS checks, ensuring fresh IPs. - **`RunOnce` uses explicit 3-phase ordering**: DNS → ports → TLS. Port checks must complete before TLS because TLS checks only target IPs where port 443 is open. - **New test `TestDNSRunsBeforePortAndTLSChecks`**: Verifies that when DNS IPs change between cycles, port and TLS checks pick up the new IPs. - **README updated**: Monitoring lifecycle section now documents the DNS-first ordering guarantee. ## Check ordering | Trigger | Phase 1 | Phase 2 | Phase 3 | |---------|---------|---------|----------| | Startup (`RunOnce`) | DNS | Ports | TLS | | DNS ticker | DNS | Ports | — | | TLS ticker | DNS | — | TLS | closes https://git.eeqj.de/sneak/dnswatcher/issues/58 Co-authored-by: user Reviewed-on: https://git.eeqj.de/sneak/dnswatcher/pulls/64 Co-authored-by: clawbot Co-committed-by: clawbot --- README.md | 12 ++++-- internal/watcher/watcher.go | 35 ++++++++++++--- internal/watcher/watcher_test.go | 74 ++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 11e0819..458cd42 100644 --- a/README.md +++ b/README.md @@ -367,9 +367,15 @@ docker run -d \ triggering change notifications). 2. **Initial check**: Immediately perform all DNS, port, and TLS checks on startup. -3. **Periodic checks**: - - DNS and port checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h). - - TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h). +3. **Periodic checks** (DNS always runs first): + - DNS checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h). Also + re-run before every TLS check cycle to ensure fresh IPs. + - Port checks: every `DNSWATCHER_DNS_INTERVAL`, after DNS completes. + - TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h), after + DNS completes. + - Port and TLS checks always use freshly resolved IP addresses from + the DNS phase that immediately precedes them — never stale IPs + from a previous cycle. 4. **On change detection**: Send notifications to all configured endpoints, update in-memory state, persist to disk. 5. **Shutdown**: Persist final state to disk, complete in-flight diff --git a/internal/watcher/watcher.go b/internal/watcher/watcher.go index 6a48ce6..44ebc0e 100644 --- a/internal/watcher/watcher.go +++ b/internal/watcher/watcher.go @@ -141,9 +141,16 @@ func (w *Watcher) Run(ctx context.Context) { return case <-dnsTicker.C: - w.runDNSAndPortChecks(ctx) + w.runDNSChecks(ctx) + + w.checkAllPorts(ctx) w.saveState() case <-tlsTicker.C: + // Run DNS first so TLS checks use freshly + // resolved IP addresses, not stale ones from + // a previous cycle. + w.runDNSChecks(ctx) + w.runTLSChecks(ctx) w.saveState() } @@ -151,10 +158,26 @@ func (w *Watcher) Run(ctx context.Context) { } // RunOnce performs a single complete monitoring cycle. +// DNS checks run first so that port and TLS checks use +// freshly resolved IP addresses. Port checks run before +// TLS because TLS checks only target IPs with an open +// port 443. func (w *Watcher) RunOnce(ctx context.Context) { w.detectFirstRun() - w.runDNSAndPortChecks(ctx) + + // Phase 1: DNS resolution must complete first so that + // subsequent checks use fresh IP addresses. + w.runDNSChecks(ctx) + + // Phase 2: Port checks populate port state that TLS + // checks depend on (TLS only targets IPs where port + // 443 is open). + w.checkAllPorts(ctx) + + // Phase 3: TLS checks use fresh DNS IPs and current + // port state. w.runTLSChecks(ctx) + w.saveState() w.firstRun = false } @@ -171,7 +194,11 @@ func (w *Watcher) detectFirstRun() { } } -func (w *Watcher) runDNSAndPortChecks(ctx context.Context) { +// runDNSChecks performs DNS resolution for all configured domains +// and hostnames, updating state with freshly resolved records. +// This must complete before port or TLS checks run so those +// checks operate on current IP addresses. +func (w *Watcher) runDNSChecks(ctx context.Context) { for _, domain := range w.config.Domains { w.checkDomain(ctx, domain) } @@ -179,8 +206,6 @@ func (w *Watcher) runDNSAndPortChecks(ctx context.Context) { for _, hostname := range w.config.Hostnames { w.checkHostname(ctx, hostname) } - - w.checkAllPorts(ctx) } func (w *Watcher) checkDomain( diff --git a/internal/watcher/watcher_test.go b/internal/watcher/watcher_test.go index 43514eb..54d444f 100644 --- a/internal/watcher/watcher_test.go +++ b/internal/watcher/watcher_test.go @@ -682,6 +682,80 @@ func TestGracefulShutdown(t *testing.T) { } } +func setupHostnameIP( + deps *testDeps, + hostname, ip string, +) { + deps.resolver.allRecords[hostname] = map[string]map[string][]string{ + "ns1.example.com.": {"A": {ip}}, + } + deps.portChecker.results[ip+":80"] = true + deps.portChecker.results[ip+":443"] = true + deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{ + CommonName: hostname, + Issuer: "DigiCert", + NotAfter: time.Now().Add(90 * 24 * time.Hour), + SubjectAlternativeNames: []string{hostname}, + } +} + +func updateHostnameIP(deps *testDeps, hostname, ip string) { + deps.resolver.mu.Lock() + deps.resolver.allRecords[hostname] = map[string]map[string][]string{ + "ns1.example.com.": {"A": {ip}}, + } + deps.resolver.mu.Unlock() + + deps.portChecker.mu.Lock() + deps.portChecker.results[ip+":80"] = true + deps.portChecker.results[ip+":443"] = true + deps.portChecker.mu.Unlock() + + deps.tlsChecker.mu.Lock() + deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{ + CommonName: hostname, + Issuer: "DigiCert", + NotAfter: time.Now().Add(90 * 24 * time.Hour), + SubjectAlternativeNames: []string{hostname}, + } + deps.tlsChecker.mu.Unlock() +} + +func TestDNSRunsBeforePortAndTLSChecks(t *testing.T) { + t.Parallel() + + cfg := defaultTestConfig(t) + cfg.Hostnames = []string{"www.example.com"} + + w, deps := newTestWatcher(t, cfg) + + setupHostnameIP(deps, "www.example.com", "10.0.0.1") + + ctx := t.Context() + w.RunOnce(ctx) + + snap := deps.state.GetSnapshot() + if _, ok := snap.Ports["10.0.0.1:80"]; !ok { + t.Fatal("expected port state for 10.0.0.1:80") + } + + // DNS changes to a new IP; port and TLS must pick it up. + updateHostnameIP(deps, "www.example.com", "10.0.0.2") + + w.RunOnce(ctx) + + snap = deps.state.GetSnapshot() + + if _, ok := snap.Ports["10.0.0.2:80"]; !ok { + t.Error("port check used stale DNS: missing 10.0.0.2:80") + } + + certKey := "10.0.0.2:443:www.example.com" + if _, ok := snap.Certificates[certKey]; !ok { + t.Error("TLS check used stale DNS: missing " + certKey) + } +} + func TestNSFailureAndRecovery(t *testing.T) { t.Parallel()