From 461ab7e06c61ba42068471f5ea68527c6716a8b4 Mon Sep 17 00:00:00 2001 From: user Date: Sun, 1 Mar 2026 15:03:02 -0800 Subject: [PATCH] fix: enforce DNS-first ordering for port and TLS checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit DNS checks now always complete before port or TLS checks begin, ensuring those checks use freshly resolved IP addresses instead of potentially stale ones from a previous cycle. Changes: - Extract runDNSChecks() from runDNSAndPortChecks() so DNS resolution can be invoked independently - Run DNS before TLS on the TLS ticker (previously TLS ran alone with whatever IPs were in state from the last DNS cycle, up to 12h stale) - RunOnce uses explicit 3-phase ordering: DNS → ports → TLS - Add TestDNSRunsBeforePortAndTLSChecks to verify fresh IPs propagate - Update README monitoring lifecycle to document DNS-first ordering closes #58 --- README.md | 12 ++++-- internal/watcher/watcher.go | 35 ++++++++++++--- internal/watcher/watcher_test.go | 74 ++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 11e0819..458cd42 100644 --- a/README.md +++ b/README.md @@ -367,9 +367,15 @@ docker run -d \ triggering change notifications). 2. **Initial check**: Immediately perform all DNS, port, and TLS checks on startup. -3. **Periodic checks**: - - DNS and port checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h). - - TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h). +3. **Periodic checks** (DNS always runs first): + - DNS checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h). Also + re-run before every TLS check cycle to ensure fresh IPs. + - Port checks: every `DNSWATCHER_DNS_INTERVAL`, after DNS completes. + - TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h), after + DNS completes. + - Port and TLS checks always use freshly resolved IP addresses from + the DNS phase that immediately precedes them — never stale IPs + from a previous cycle. 4. **On change detection**: Send notifications to all configured endpoints, update in-memory state, persist to disk. 5. **Shutdown**: Persist final state to disk, complete in-flight diff --git a/internal/watcher/watcher.go b/internal/watcher/watcher.go index 6a48ce6..44ebc0e 100644 --- a/internal/watcher/watcher.go +++ b/internal/watcher/watcher.go @@ -141,9 +141,16 @@ func (w *Watcher) Run(ctx context.Context) { return case <-dnsTicker.C: - w.runDNSAndPortChecks(ctx) + w.runDNSChecks(ctx) + + w.checkAllPorts(ctx) w.saveState() case <-tlsTicker.C: + // Run DNS first so TLS checks use freshly + // resolved IP addresses, not stale ones from + // a previous cycle. + w.runDNSChecks(ctx) + w.runTLSChecks(ctx) w.saveState() } @@ -151,10 +158,26 @@ func (w *Watcher) Run(ctx context.Context) { } // RunOnce performs a single complete monitoring cycle. +// DNS checks run first so that port and TLS checks use +// freshly resolved IP addresses. Port checks run before +// TLS because TLS checks only target IPs with an open +// port 443. func (w *Watcher) RunOnce(ctx context.Context) { w.detectFirstRun() - w.runDNSAndPortChecks(ctx) + + // Phase 1: DNS resolution must complete first so that + // subsequent checks use fresh IP addresses. + w.runDNSChecks(ctx) + + // Phase 2: Port checks populate port state that TLS + // checks depend on (TLS only targets IPs where port + // 443 is open). + w.checkAllPorts(ctx) + + // Phase 3: TLS checks use fresh DNS IPs and current + // port state. w.runTLSChecks(ctx) + w.saveState() w.firstRun = false } @@ -171,7 +194,11 @@ func (w *Watcher) detectFirstRun() { } } -func (w *Watcher) runDNSAndPortChecks(ctx context.Context) { +// runDNSChecks performs DNS resolution for all configured domains +// and hostnames, updating state with freshly resolved records. +// This must complete before port or TLS checks run so those +// checks operate on current IP addresses. +func (w *Watcher) runDNSChecks(ctx context.Context) { for _, domain := range w.config.Domains { w.checkDomain(ctx, domain) } @@ -179,8 +206,6 @@ func (w *Watcher) runDNSAndPortChecks(ctx context.Context) { for _, hostname := range w.config.Hostnames { w.checkHostname(ctx, hostname) } - - w.checkAllPorts(ctx) } func (w *Watcher) checkDomain( diff --git a/internal/watcher/watcher_test.go b/internal/watcher/watcher_test.go index 43514eb..54d444f 100644 --- a/internal/watcher/watcher_test.go +++ b/internal/watcher/watcher_test.go @@ -682,6 +682,80 @@ func TestGracefulShutdown(t *testing.T) { } } +func setupHostnameIP( + deps *testDeps, + hostname, ip string, +) { + deps.resolver.allRecords[hostname] = map[string]map[string][]string{ + "ns1.example.com.": {"A": {ip}}, + } + deps.portChecker.results[ip+":80"] = true + deps.portChecker.results[ip+":443"] = true + deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{ + CommonName: hostname, + Issuer: "DigiCert", + NotAfter: time.Now().Add(90 * 24 * time.Hour), + SubjectAlternativeNames: []string{hostname}, + } +} + +func updateHostnameIP(deps *testDeps, hostname, ip string) { + deps.resolver.mu.Lock() + deps.resolver.allRecords[hostname] = map[string]map[string][]string{ + "ns1.example.com.": {"A": {ip}}, + } + deps.resolver.mu.Unlock() + + deps.portChecker.mu.Lock() + deps.portChecker.results[ip+":80"] = true + deps.portChecker.results[ip+":443"] = true + deps.portChecker.mu.Unlock() + + deps.tlsChecker.mu.Lock() + deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{ + CommonName: hostname, + Issuer: "DigiCert", + NotAfter: time.Now().Add(90 * 24 * time.Hour), + SubjectAlternativeNames: []string{hostname}, + } + deps.tlsChecker.mu.Unlock() +} + +func TestDNSRunsBeforePortAndTLSChecks(t *testing.T) { + t.Parallel() + + cfg := defaultTestConfig(t) + cfg.Hostnames = []string{"www.example.com"} + + w, deps := newTestWatcher(t, cfg) + + setupHostnameIP(deps, "www.example.com", "10.0.0.1") + + ctx := t.Context() + w.RunOnce(ctx) + + snap := deps.state.GetSnapshot() + if _, ok := snap.Ports["10.0.0.1:80"]; !ok { + t.Fatal("expected port state for 10.0.0.1:80") + } + + // DNS changes to a new IP; port and TLS must pick it up. + updateHostnameIP(deps, "www.example.com", "10.0.0.2") + + w.RunOnce(ctx) + + snap = deps.state.GetSnapshot() + + if _, ok := snap.Ports["10.0.0.2:80"]; !ok { + t.Error("port check used stale DNS: missing 10.0.0.2:80") + } + + certKey := "10.0.0.2:443:www.example.com" + if _, ok := snap.Certificates[certKey]; !ok { + t.Error("TLS check used stale DNS: missing " + certKey) + } +} + func TestNSFailureAndRecovery(t *testing.T) { t.Parallel()