fix: enforce DNS-first ordering for port and TLS checks
All checks were successful
check / check (push) Successful in 46s
All checks were successful
check / check (push) Successful in 46s
DNS checks now always complete before port or TLS checks begin, ensuring those checks use freshly resolved IP addresses instead of potentially stale ones from a previous cycle. Changes: - Extract runDNSChecks() from runDNSAndPortChecks() so DNS resolution can be invoked independently - Run DNS before TLS on the TLS ticker (previously TLS ran alone with whatever IPs were in state from the last DNS cycle, up to 12h stale) - RunOnce uses explicit 3-phase ordering: DNS → ports → TLS - Add TestDNSRunsBeforePortAndTLSChecks to verify fresh IPs propagate - Update README monitoring lifecycle to document DNS-first ordering closes #58
This commit is contained in:
12
README.md
12
README.md
@@ -367,9 +367,15 @@ docker run -d \
|
||||
triggering change notifications).
|
||||
2. **Initial check**: Immediately perform all DNS, port, and TLS checks
|
||||
on startup.
|
||||
3. **Periodic checks**:
|
||||
- DNS and port checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h).
|
||||
- TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h).
|
||||
3. **Periodic checks** (DNS always runs first):
|
||||
- DNS checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h). Also
|
||||
re-run before every TLS check cycle to ensure fresh IPs.
|
||||
- Port checks: every `DNSWATCHER_DNS_INTERVAL`, after DNS completes.
|
||||
- TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h), after
|
||||
DNS completes.
|
||||
- Port and TLS checks always use freshly resolved IP addresses from
|
||||
the DNS phase that immediately precedes them — never stale IPs
|
||||
from a previous cycle.
|
||||
4. **On change detection**: Send notifications to all configured
|
||||
endpoints, update in-memory state, persist to disk.
|
||||
5. **Shutdown**: Persist final state to disk, complete in-flight
|
||||
|
||||
@@ -141,9 +141,16 @@ func (w *Watcher) Run(ctx context.Context) {
|
||||
|
||||
return
|
||||
case <-dnsTicker.C:
|
||||
w.runDNSAndPortChecks(ctx)
|
||||
w.runDNSChecks(ctx)
|
||||
|
||||
w.checkAllPorts(ctx)
|
||||
w.saveState()
|
||||
case <-tlsTicker.C:
|
||||
// Run DNS first so TLS checks use freshly
|
||||
// resolved IP addresses, not stale ones from
|
||||
// a previous cycle.
|
||||
w.runDNSChecks(ctx)
|
||||
|
||||
w.runTLSChecks(ctx)
|
||||
w.saveState()
|
||||
}
|
||||
@@ -151,10 +158,26 @@ func (w *Watcher) Run(ctx context.Context) {
|
||||
}
|
||||
|
||||
// RunOnce performs a single complete monitoring cycle.
|
||||
// DNS checks run first so that port and TLS checks use
|
||||
// freshly resolved IP addresses. Port checks run before
|
||||
// TLS because TLS checks only target IPs with an open
|
||||
// port 443.
|
||||
func (w *Watcher) RunOnce(ctx context.Context) {
|
||||
w.detectFirstRun()
|
||||
w.runDNSAndPortChecks(ctx)
|
||||
|
||||
// Phase 1: DNS resolution must complete first so that
|
||||
// subsequent checks use fresh IP addresses.
|
||||
w.runDNSChecks(ctx)
|
||||
|
||||
// Phase 2: Port checks populate port state that TLS
|
||||
// checks depend on (TLS only targets IPs where port
|
||||
// 443 is open).
|
||||
w.checkAllPorts(ctx)
|
||||
|
||||
// Phase 3: TLS checks use fresh DNS IPs and current
|
||||
// port state.
|
||||
w.runTLSChecks(ctx)
|
||||
|
||||
w.saveState()
|
||||
w.firstRun = false
|
||||
}
|
||||
@@ -171,7 +194,11 @@ func (w *Watcher) detectFirstRun() {
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Watcher) runDNSAndPortChecks(ctx context.Context) {
|
||||
// runDNSChecks performs DNS resolution for all configured domains
|
||||
// and hostnames, updating state with freshly resolved records.
|
||||
// This must complete before port or TLS checks run so those
|
||||
// checks operate on current IP addresses.
|
||||
func (w *Watcher) runDNSChecks(ctx context.Context) {
|
||||
for _, domain := range w.config.Domains {
|
||||
w.checkDomain(ctx, domain)
|
||||
}
|
||||
@@ -179,8 +206,6 @@ func (w *Watcher) runDNSAndPortChecks(ctx context.Context) {
|
||||
for _, hostname := range w.config.Hostnames {
|
||||
w.checkHostname(ctx, hostname)
|
||||
}
|
||||
|
||||
w.checkAllPorts(ctx)
|
||||
}
|
||||
|
||||
func (w *Watcher) checkDomain(
|
||||
|
||||
@@ -682,6 +682,80 @@ func TestGracefulShutdown(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func setupHostnameIP(
|
||||
deps *testDeps,
|
||||
hostname, ip string,
|
||||
) {
|
||||
deps.resolver.allRecords[hostname] = map[string]map[string][]string{
|
||||
"ns1.example.com.": {"A": {ip}},
|
||||
}
|
||||
deps.portChecker.results[ip+":80"] = true
|
||||
deps.portChecker.results[ip+":443"] = true
|
||||
deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{
|
||||
CommonName: hostname,
|
||||
Issuer: "DigiCert",
|
||||
NotAfter: time.Now().Add(90 * 24 * time.Hour),
|
||||
SubjectAlternativeNames: []string{hostname},
|
||||
}
|
||||
}
|
||||
|
||||
func updateHostnameIP(deps *testDeps, hostname, ip string) {
|
||||
deps.resolver.mu.Lock()
|
||||
deps.resolver.allRecords[hostname] = map[string]map[string][]string{
|
||||
"ns1.example.com.": {"A": {ip}},
|
||||
}
|
||||
deps.resolver.mu.Unlock()
|
||||
|
||||
deps.portChecker.mu.Lock()
|
||||
deps.portChecker.results[ip+":80"] = true
|
||||
deps.portChecker.results[ip+":443"] = true
|
||||
deps.portChecker.mu.Unlock()
|
||||
|
||||
deps.tlsChecker.mu.Lock()
|
||||
deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{
|
||||
CommonName: hostname,
|
||||
Issuer: "DigiCert",
|
||||
NotAfter: time.Now().Add(90 * 24 * time.Hour),
|
||||
SubjectAlternativeNames: []string{hostname},
|
||||
}
|
||||
deps.tlsChecker.mu.Unlock()
|
||||
}
|
||||
|
||||
func TestDNSRunsBeforePortAndTLSChecks(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cfg := defaultTestConfig(t)
|
||||
cfg.Hostnames = []string{"www.example.com"}
|
||||
|
||||
w, deps := newTestWatcher(t, cfg)
|
||||
|
||||
setupHostnameIP(deps, "www.example.com", "10.0.0.1")
|
||||
|
||||
ctx := t.Context()
|
||||
w.RunOnce(ctx)
|
||||
|
||||
snap := deps.state.GetSnapshot()
|
||||
if _, ok := snap.Ports["10.0.0.1:80"]; !ok {
|
||||
t.Fatal("expected port state for 10.0.0.1:80")
|
||||
}
|
||||
|
||||
// DNS changes to a new IP; port and TLS must pick it up.
|
||||
updateHostnameIP(deps, "www.example.com", "10.0.0.2")
|
||||
|
||||
w.RunOnce(ctx)
|
||||
|
||||
snap = deps.state.GetSnapshot()
|
||||
|
||||
if _, ok := snap.Ports["10.0.0.2:80"]; !ok {
|
||||
t.Error("port check used stale DNS: missing 10.0.0.2:80")
|
||||
}
|
||||
|
||||
certKey := "10.0.0.2:443:www.example.com"
|
||||
if _, ok := snap.Certificates[certKey]; !ok {
|
||||
t.Error("TLS check used stale DNS: missing " + certKey)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNSFailureAndRecovery(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user