fix: enforce DNS-first ordering for port and TLS checks (#64)
All checks were successful
check / check (push) Successful in 8s
All checks were successful
check / check (push) Successful in 8s
## Summary DNS checks now always complete before port or TLS checks begin, ensuring those checks use freshly resolved IP addresses instead of potentially stale ones from a previous cycle. ## Problem Port and TLS checks read IP addresses from state that was populated during the most recent DNS check. If DNS changes between cycles, port/TLS checks may target stale IPs. In particular, when the TLS ticker fired (every 12h), it ran `runTLSChecks` without refreshing DNS first — meaning TLS checks could use IPs that were up to 12 hours old. ## Changes - **Extract `runDNSChecks()`** from the former `runDNSAndPortChecks()` so DNS resolution can be invoked independently as a prerequisite for any check type. - **TLS ticker now runs DNS first**: When the TLS ticker fires, DNS checks run before TLS checks, ensuring fresh IPs. - **`RunOnce` uses explicit 3-phase ordering**: DNS → ports → TLS. Port checks must complete before TLS because TLS checks only target IPs where port 443 is open. - **New test `TestDNSRunsBeforePortAndTLSChecks`**: Verifies that when DNS IPs change between cycles, port and TLS checks pick up the new IPs. - **README updated**: Monitoring lifecycle section now documents the DNS-first ordering guarantee. ## Check ordering | Trigger | Phase 1 | Phase 2 | Phase 3 | |---------|---------|---------|----------| | Startup (`RunOnce`) | DNS | Ports | TLS | | DNS ticker | DNS | Ports | — | | TLS ticker | DNS | — | TLS | closes #58 Co-authored-by: user <user@Mac.lan guest wan> Reviewed-on: #64 Co-authored-by: clawbot <clawbot@noreply.example.org> Co-committed-by: clawbot <clawbot@noreply.example.org>
This commit was merged in pull request #64.
This commit is contained in:
12
README.md
12
README.md
@@ -367,9 +367,15 @@ docker run -d \
|
||||
triggering change notifications).
|
||||
2. **Initial check**: Immediately perform all DNS, port, and TLS checks
|
||||
on startup.
|
||||
3. **Periodic checks**:
|
||||
- DNS and port checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h).
|
||||
- TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h).
|
||||
3. **Periodic checks** (DNS always runs first):
|
||||
- DNS checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h). Also
|
||||
re-run before every TLS check cycle to ensure fresh IPs.
|
||||
- Port checks: every `DNSWATCHER_DNS_INTERVAL`, after DNS completes.
|
||||
- TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h), after
|
||||
DNS completes.
|
||||
- Port and TLS checks always use freshly resolved IP addresses from
|
||||
the DNS phase that immediately precedes them — never stale IPs
|
||||
from a previous cycle.
|
||||
4. **On change detection**: Send notifications to all configured
|
||||
endpoints, update in-memory state, persist to disk.
|
||||
5. **Shutdown**: Persist final state to disk, complete in-flight
|
||||
|
||||
@@ -141,9 +141,16 @@ func (w *Watcher) Run(ctx context.Context) {
|
||||
|
||||
return
|
||||
case <-dnsTicker.C:
|
||||
w.runDNSAndPortChecks(ctx)
|
||||
w.runDNSChecks(ctx)
|
||||
|
||||
w.checkAllPorts(ctx)
|
||||
w.saveState()
|
||||
case <-tlsTicker.C:
|
||||
// Run DNS first so TLS checks use freshly
|
||||
// resolved IP addresses, not stale ones from
|
||||
// a previous cycle.
|
||||
w.runDNSChecks(ctx)
|
||||
|
||||
w.runTLSChecks(ctx)
|
||||
w.saveState()
|
||||
}
|
||||
@@ -151,10 +158,26 @@ func (w *Watcher) Run(ctx context.Context) {
|
||||
}
|
||||
|
||||
// RunOnce performs a single complete monitoring cycle.
|
||||
// DNS checks run first so that port and TLS checks use
|
||||
// freshly resolved IP addresses. Port checks run before
|
||||
// TLS because TLS checks only target IPs with an open
|
||||
// port 443.
|
||||
func (w *Watcher) RunOnce(ctx context.Context) {
|
||||
w.detectFirstRun()
|
||||
w.runDNSAndPortChecks(ctx)
|
||||
|
||||
// Phase 1: DNS resolution must complete first so that
|
||||
// subsequent checks use fresh IP addresses.
|
||||
w.runDNSChecks(ctx)
|
||||
|
||||
// Phase 2: Port checks populate port state that TLS
|
||||
// checks depend on (TLS only targets IPs where port
|
||||
// 443 is open).
|
||||
w.checkAllPorts(ctx)
|
||||
|
||||
// Phase 3: TLS checks use fresh DNS IPs and current
|
||||
// port state.
|
||||
w.runTLSChecks(ctx)
|
||||
|
||||
w.saveState()
|
||||
w.firstRun = false
|
||||
}
|
||||
@@ -171,7 +194,11 @@ func (w *Watcher) detectFirstRun() {
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Watcher) runDNSAndPortChecks(ctx context.Context) {
|
||||
// runDNSChecks performs DNS resolution for all configured domains
|
||||
// and hostnames, updating state with freshly resolved records.
|
||||
// This must complete before port or TLS checks run so those
|
||||
// checks operate on current IP addresses.
|
||||
func (w *Watcher) runDNSChecks(ctx context.Context) {
|
||||
for _, domain := range w.config.Domains {
|
||||
w.checkDomain(ctx, domain)
|
||||
}
|
||||
@@ -179,8 +206,6 @@ func (w *Watcher) runDNSAndPortChecks(ctx context.Context) {
|
||||
for _, hostname := range w.config.Hostnames {
|
||||
w.checkHostname(ctx, hostname)
|
||||
}
|
||||
|
||||
w.checkAllPorts(ctx)
|
||||
}
|
||||
|
||||
func (w *Watcher) checkDomain(
|
||||
|
||||
@@ -682,6 +682,80 @@ func TestGracefulShutdown(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func setupHostnameIP(
|
||||
deps *testDeps,
|
||||
hostname, ip string,
|
||||
) {
|
||||
deps.resolver.allRecords[hostname] = map[string]map[string][]string{
|
||||
"ns1.example.com.": {"A": {ip}},
|
||||
}
|
||||
deps.portChecker.results[ip+":80"] = true
|
||||
deps.portChecker.results[ip+":443"] = true
|
||||
deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{
|
||||
CommonName: hostname,
|
||||
Issuer: "DigiCert",
|
||||
NotAfter: time.Now().Add(90 * 24 * time.Hour),
|
||||
SubjectAlternativeNames: []string{hostname},
|
||||
}
|
||||
}
|
||||
|
||||
func updateHostnameIP(deps *testDeps, hostname, ip string) {
|
||||
deps.resolver.mu.Lock()
|
||||
deps.resolver.allRecords[hostname] = map[string]map[string][]string{
|
||||
"ns1.example.com.": {"A": {ip}},
|
||||
}
|
||||
deps.resolver.mu.Unlock()
|
||||
|
||||
deps.portChecker.mu.Lock()
|
||||
deps.portChecker.results[ip+":80"] = true
|
||||
deps.portChecker.results[ip+":443"] = true
|
||||
deps.portChecker.mu.Unlock()
|
||||
|
||||
deps.tlsChecker.mu.Lock()
|
||||
deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{
|
||||
CommonName: hostname,
|
||||
Issuer: "DigiCert",
|
||||
NotAfter: time.Now().Add(90 * 24 * time.Hour),
|
||||
SubjectAlternativeNames: []string{hostname},
|
||||
}
|
||||
deps.tlsChecker.mu.Unlock()
|
||||
}
|
||||
|
||||
func TestDNSRunsBeforePortAndTLSChecks(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cfg := defaultTestConfig(t)
|
||||
cfg.Hostnames = []string{"www.example.com"}
|
||||
|
||||
w, deps := newTestWatcher(t, cfg)
|
||||
|
||||
setupHostnameIP(deps, "www.example.com", "10.0.0.1")
|
||||
|
||||
ctx := t.Context()
|
||||
w.RunOnce(ctx)
|
||||
|
||||
snap := deps.state.GetSnapshot()
|
||||
if _, ok := snap.Ports["10.0.0.1:80"]; !ok {
|
||||
t.Fatal("expected port state for 10.0.0.1:80")
|
||||
}
|
||||
|
||||
// DNS changes to a new IP; port and TLS must pick it up.
|
||||
updateHostnameIP(deps, "www.example.com", "10.0.0.2")
|
||||
|
||||
w.RunOnce(ctx)
|
||||
|
||||
snap = deps.state.GetSnapshot()
|
||||
|
||||
if _, ok := snap.Ports["10.0.0.2:80"]; !ok {
|
||||
t.Error("port check used stale DNS: missing 10.0.0.2:80")
|
||||
}
|
||||
|
||||
certKey := "10.0.0.2:443:www.example.com"
|
||||
if _, ok := snap.Certificates[certKey]; !ok {
|
||||
t.Error("TLS check used stale DNS: missing " + certKey)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNSFailureAndRecovery(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user