fix: enforce DNS-first ordering for port and TLS checks
All checks were successful
check / check (push) Successful in 46s

DNS checks now always complete before port or TLS checks begin,
ensuring those checks use freshly resolved IP addresses instead of
potentially stale ones from a previous cycle.

Changes:
- Extract runDNSChecks() from runDNSAndPortChecks() so DNS resolution
  can be invoked independently
- Run DNS before TLS on the TLS ticker (previously TLS ran alone with
  whatever IPs were in state from the last DNS cycle, up to 12h stale)
- RunOnce uses explicit 3-phase ordering: DNS → ports → TLS
- Add TestDNSRunsBeforePortAndTLSChecks to verify fresh IPs propagate
- Update README monitoring lifecycle to document DNS-first ordering

closes #58
This commit is contained in:
user
2026-03-01 15:03:02 -08:00
parent 2835c2dc43
commit 461ab7e06c
3 changed files with 113 additions and 8 deletions

View File

@@ -367,9 +367,15 @@ docker run -d \
triggering change notifications).
2. **Initial check**: Immediately perform all DNS, port, and TLS checks
on startup.
3. **Periodic checks**:
- DNS and port checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h).
- TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h).
3. **Periodic checks** (DNS always runs first):
- DNS checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h). Also
re-run before every TLS check cycle to ensure fresh IPs.
- Port checks: every `DNSWATCHER_DNS_INTERVAL`, after DNS completes.
- TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h), after
DNS completes.
- Port and TLS checks always use freshly resolved IP addresses from
the DNS phase that immediately precedes them — never stale IPs
from a previous cycle.
4. **On change detection**: Send notifications to all configured
endpoints, update in-memory state, persist to disk.
5. **Shutdown**: Persist final state to disk, complete in-flight

View File

@@ -141,9 +141,16 @@ func (w *Watcher) Run(ctx context.Context) {
return
case <-dnsTicker.C:
w.runDNSAndPortChecks(ctx)
w.runDNSChecks(ctx)
w.checkAllPorts(ctx)
w.saveState()
case <-tlsTicker.C:
// Run DNS first so TLS checks use freshly
// resolved IP addresses, not stale ones from
// a previous cycle.
w.runDNSChecks(ctx)
w.runTLSChecks(ctx)
w.saveState()
}
@@ -151,10 +158,26 @@ func (w *Watcher) Run(ctx context.Context) {
}
// RunOnce performs a single complete monitoring cycle.
// DNS checks run first so that port and TLS checks use
// freshly resolved IP addresses. Port checks run before
// TLS because TLS checks only target IPs with an open
// port 443.
func (w *Watcher) RunOnce(ctx context.Context) {
w.detectFirstRun()
w.runDNSAndPortChecks(ctx)
// Phase 1: DNS resolution must complete first so that
// subsequent checks use fresh IP addresses.
w.runDNSChecks(ctx)
// Phase 2: Port checks populate port state that TLS
// checks depend on (TLS only targets IPs where port
// 443 is open).
w.checkAllPorts(ctx)
// Phase 3: TLS checks use fresh DNS IPs and current
// port state.
w.runTLSChecks(ctx)
w.saveState()
w.firstRun = false
}
@@ -171,7 +194,11 @@ func (w *Watcher) detectFirstRun() {
}
}
func (w *Watcher) runDNSAndPortChecks(ctx context.Context) {
// runDNSChecks performs DNS resolution for all configured domains
// and hostnames, updating state with freshly resolved records.
// This must complete before port or TLS checks run so those
// checks operate on current IP addresses.
func (w *Watcher) runDNSChecks(ctx context.Context) {
for _, domain := range w.config.Domains {
w.checkDomain(ctx, domain)
}
@@ -179,8 +206,6 @@ func (w *Watcher) runDNSAndPortChecks(ctx context.Context) {
for _, hostname := range w.config.Hostnames {
w.checkHostname(ctx, hostname)
}
w.checkAllPorts(ctx)
}
func (w *Watcher) checkDomain(

View File

@@ -682,6 +682,80 @@ func TestGracefulShutdown(t *testing.T) {
}
}
func setupHostnameIP(
deps *testDeps,
hostname, ip string,
) {
deps.resolver.allRecords[hostname] = map[string]map[string][]string{
"ns1.example.com.": {"A": {ip}},
}
deps.portChecker.results[ip+":80"] = true
deps.portChecker.results[ip+":443"] = true
deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{
CommonName: hostname,
Issuer: "DigiCert",
NotAfter: time.Now().Add(90 * 24 * time.Hour),
SubjectAlternativeNames: []string{hostname},
}
}
func updateHostnameIP(deps *testDeps, hostname, ip string) {
deps.resolver.mu.Lock()
deps.resolver.allRecords[hostname] = map[string]map[string][]string{
"ns1.example.com.": {"A": {ip}},
}
deps.resolver.mu.Unlock()
deps.portChecker.mu.Lock()
deps.portChecker.results[ip+":80"] = true
deps.portChecker.results[ip+":443"] = true
deps.portChecker.mu.Unlock()
deps.tlsChecker.mu.Lock()
deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{
CommonName: hostname,
Issuer: "DigiCert",
NotAfter: time.Now().Add(90 * 24 * time.Hour),
SubjectAlternativeNames: []string{hostname},
}
deps.tlsChecker.mu.Unlock()
}
func TestDNSRunsBeforePortAndTLSChecks(t *testing.T) {
t.Parallel()
cfg := defaultTestConfig(t)
cfg.Hostnames = []string{"www.example.com"}
w, deps := newTestWatcher(t, cfg)
setupHostnameIP(deps, "www.example.com", "10.0.0.1")
ctx := t.Context()
w.RunOnce(ctx)
snap := deps.state.GetSnapshot()
if _, ok := snap.Ports["10.0.0.1:80"]; !ok {
t.Fatal("expected port state for 10.0.0.1:80")
}
// DNS changes to a new IP; port and TLS must pick it up.
updateHostnameIP(deps, "www.example.com", "10.0.0.2")
w.RunOnce(ctx)
snap = deps.state.GetSnapshot()
if _, ok := snap.Ports["10.0.0.2:80"]; !ok {
t.Error("port check used stale DNS: missing 10.0.0.2:80")
}
certKey := "10.0.0.2:443:www.example.com"
if _, ok := snap.Certificates[certKey]; !ok {
t.Error("TLS check used stale DNS: missing " + certKey)
}
}
func TestNSFailureAndRecovery(t *testing.T) {
t.Parallel()