From ee14bd01aee7271ffa1ebf73ece42b779a6fd455 Mon Sep 17 00:00:00 2001 From: clawbot Date: Mon, 2 Mar 2026 00:10:49 +0100 Subject: [PATCH 1/2] fix: enforce DNS-first ordering for port and TLS checks (#64) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary DNS checks now always complete before port or TLS checks begin, ensuring those checks use freshly resolved IP addresses instead of potentially stale ones from a previous cycle. ## Problem Port and TLS checks read IP addresses from state that was populated during the most recent DNS check. If DNS changes between cycles, port/TLS checks may target stale IPs. In particular, when the TLS ticker fired (every 12h), it ran `runTLSChecks` without refreshing DNS first — meaning TLS checks could use IPs that were up to 12 hours old. ## Changes - **Extract `runDNSChecks()`** from the former `runDNSAndPortChecks()` so DNS resolution can be invoked independently as a prerequisite for any check type. - **TLS ticker now runs DNS first**: When the TLS ticker fires, DNS checks run before TLS checks, ensuring fresh IPs. - **`RunOnce` uses explicit 3-phase ordering**: DNS → ports → TLS. Port checks must complete before TLS because TLS checks only target IPs where port 443 is open. - **New test `TestDNSRunsBeforePortAndTLSChecks`**: Verifies that when DNS IPs change between cycles, port and TLS checks pick up the new IPs. - **README updated**: Monitoring lifecycle section now documents the DNS-first ordering guarantee. ## Check ordering | Trigger | Phase 1 | Phase 2 | Phase 3 | |---------|---------|---------|----------| | Startup (`RunOnce`) | DNS | Ports | TLS | | DNS ticker | DNS | Ports | — | | TLS ticker | DNS | — | TLS | closes https://git.eeqj.de/sneak/dnswatcher/issues/58 Co-authored-by: user Reviewed-on: https://git.eeqj.de/sneak/dnswatcher/pulls/64 Co-authored-by: clawbot Co-committed-by: clawbot --- README.md | 12 ++++-- internal/watcher/watcher.go | 35 ++++++++++++--- internal/watcher/watcher_test.go | 74 ++++++++++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 11e0819..458cd42 100644 --- a/README.md +++ b/README.md @@ -367,9 +367,15 @@ docker run -d \ triggering change notifications). 2. **Initial check**: Immediately perform all DNS, port, and TLS checks on startup. -3. **Periodic checks**: - - DNS and port checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h). - - TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h). +3. **Periodic checks** (DNS always runs first): + - DNS checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h). Also + re-run before every TLS check cycle to ensure fresh IPs. + - Port checks: every `DNSWATCHER_DNS_INTERVAL`, after DNS completes. + - TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h), after + DNS completes. + - Port and TLS checks always use freshly resolved IP addresses from + the DNS phase that immediately precedes them — never stale IPs + from a previous cycle. 4. **On change detection**: Send notifications to all configured endpoints, update in-memory state, persist to disk. 5. **Shutdown**: Persist final state to disk, complete in-flight diff --git a/internal/watcher/watcher.go b/internal/watcher/watcher.go index 6a48ce6..44ebc0e 100644 --- a/internal/watcher/watcher.go +++ b/internal/watcher/watcher.go @@ -141,9 +141,16 @@ func (w *Watcher) Run(ctx context.Context) { return case <-dnsTicker.C: - w.runDNSAndPortChecks(ctx) + w.runDNSChecks(ctx) + + w.checkAllPorts(ctx) w.saveState() case <-tlsTicker.C: + // Run DNS first so TLS checks use freshly + // resolved IP addresses, not stale ones from + // a previous cycle. + w.runDNSChecks(ctx) + w.runTLSChecks(ctx) w.saveState() } @@ -151,10 +158,26 @@ func (w *Watcher) Run(ctx context.Context) { } // RunOnce performs a single complete monitoring cycle. +// DNS checks run first so that port and TLS checks use +// freshly resolved IP addresses. Port checks run before +// TLS because TLS checks only target IPs with an open +// port 443. func (w *Watcher) RunOnce(ctx context.Context) { w.detectFirstRun() - w.runDNSAndPortChecks(ctx) + + // Phase 1: DNS resolution must complete first so that + // subsequent checks use fresh IP addresses. + w.runDNSChecks(ctx) + + // Phase 2: Port checks populate port state that TLS + // checks depend on (TLS only targets IPs where port + // 443 is open). + w.checkAllPorts(ctx) + + // Phase 3: TLS checks use fresh DNS IPs and current + // port state. w.runTLSChecks(ctx) + w.saveState() w.firstRun = false } @@ -171,7 +194,11 @@ func (w *Watcher) detectFirstRun() { } } -func (w *Watcher) runDNSAndPortChecks(ctx context.Context) { +// runDNSChecks performs DNS resolution for all configured domains +// and hostnames, updating state with freshly resolved records. +// This must complete before port or TLS checks run so those +// checks operate on current IP addresses. +func (w *Watcher) runDNSChecks(ctx context.Context) { for _, domain := range w.config.Domains { w.checkDomain(ctx, domain) } @@ -179,8 +206,6 @@ func (w *Watcher) runDNSAndPortChecks(ctx context.Context) { for _, hostname := range w.config.Hostnames { w.checkHostname(ctx, hostname) } - - w.checkAllPorts(ctx) } func (w *Watcher) checkDomain( diff --git a/internal/watcher/watcher_test.go b/internal/watcher/watcher_test.go index 43514eb..54d444f 100644 --- a/internal/watcher/watcher_test.go +++ b/internal/watcher/watcher_test.go @@ -682,6 +682,80 @@ func TestGracefulShutdown(t *testing.T) { } } +func setupHostnameIP( + deps *testDeps, + hostname, ip string, +) { + deps.resolver.allRecords[hostname] = map[string]map[string][]string{ + "ns1.example.com.": {"A": {ip}}, + } + deps.portChecker.results[ip+":80"] = true + deps.portChecker.results[ip+":443"] = true + deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{ + CommonName: hostname, + Issuer: "DigiCert", + NotAfter: time.Now().Add(90 * 24 * time.Hour), + SubjectAlternativeNames: []string{hostname}, + } +} + +func updateHostnameIP(deps *testDeps, hostname, ip string) { + deps.resolver.mu.Lock() + deps.resolver.allRecords[hostname] = map[string]map[string][]string{ + "ns1.example.com.": {"A": {ip}}, + } + deps.resolver.mu.Unlock() + + deps.portChecker.mu.Lock() + deps.portChecker.results[ip+":80"] = true + deps.portChecker.results[ip+":443"] = true + deps.portChecker.mu.Unlock() + + deps.tlsChecker.mu.Lock() + deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{ + CommonName: hostname, + Issuer: "DigiCert", + NotAfter: time.Now().Add(90 * 24 * time.Hour), + SubjectAlternativeNames: []string{hostname}, + } + deps.tlsChecker.mu.Unlock() +} + +func TestDNSRunsBeforePortAndTLSChecks(t *testing.T) { + t.Parallel() + + cfg := defaultTestConfig(t) + cfg.Hostnames = []string{"www.example.com"} + + w, deps := newTestWatcher(t, cfg) + + setupHostnameIP(deps, "www.example.com", "10.0.0.1") + + ctx := t.Context() + w.RunOnce(ctx) + + snap := deps.state.GetSnapshot() + if _, ok := snap.Ports["10.0.0.1:80"]; !ok { + t.Fatal("expected port state for 10.0.0.1:80") + } + + // DNS changes to a new IP; port and TLS must pick it up. + updateHostnameIP(deps, "www.example.com", "10.0.0.2") + + w.RunOnce(ctx) + + snap = deps.state.GetSnapshot() + + if _, ok := snap.Ports["10.0.0.2:80"]; !ok { + t.Error("port check used stale DNS: missing 10.0.0.2:80") + } + + certKey := "10.0.0.2:443:www.example.com" + if _, ok := snap.Certificates[certKey]; !ok { + t.Error("TLS check used stale DNS: missing " + certKey) + } +} + func TestNSFailureAndRecovery(t *testing.T) { t.Parallel() From b20e75459f0f49f6bca83a6a16c59d5a3c9325bd Mon Sep 17 00:00:00 2001 From: clawbot Date: Mon, 2 Mar 2026 00:32:27 +0100 Subject: [PATCH 2/2] fix: track multiple hostnames per IP:port in port state (#65) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## Summary Port state keys are `ip:port` with a single `hostname` field. When multiple hostnames resolve to the same IP (shared hosting, CDN), only one hostname was associated. This caused orphaned port state when that hostname removed the IP from DNS while the IP remained valid for other hostnames. ## Changes ### State (`internal/state/state.go`) - `PortState.Hostname` (string) → `PortState.Hostnames` ([]string) - Custom `UnmarshalJSON` for backward compatibility: reads old single `hostname` field and migrates to a single-element `hostnames` slice - Added `DeletePortState` and `GetAllPortKeys` methods for cleanup ### Watcher (`internal/watcher/watcher.go`) - Refactored `checkAllPorts` into three phases: 1. Build IP:port → hostname associations from current DNS data 2. Check each unique IP:port once with all associated hostnames 3. Clean up stale port state entries with no hostname references - Port change notifications now list all associated hostnames (`Hosts:` instead of `Host:`) - Added `buildPortAssociations`, `parsePortKey`, and `cleanupStalePorts` helper functions ### README - Updated state file format example: `hostname` → `hostnames` (array) - Updated notification description to reflect multiple hostnames ## Backward Compatibility Existing state files with the old single `hostname` string are handled gracefully via custom JSON unmarshaling — they are read as single-element `hostnames` slices. Closes https://git.eeqj.de/sneak/dnswatcher/issues/55 Co-authored-by: clawbot Reviewed-on: https://git.eeqj.de/sneak/dnswatcher/pulls/65 Co-authored-by: clawbot Co-committed-by: clawbot --- README.md | 8 +-- internal/state/state.go | 62 ++++++++++++++++++++- internal/watcher/watcher.go | 104 ++++++++++++++++++++++++++++++------ 3 files changed, 152 insertions(+), 22 deletions(-) diff --git a/README.md b/README.md index 458cd42..0b30a9b 100644 --- a/README.md +++ b/README.md @@ -110,8 +110,8 @@ includes: - **NS recoveries**: Which nameserver recovered, which hostname/domain. - **NS inconsistencies**: Which nameservers disagree, what each one returned, which hostname affected. -- **Port changes**: Which IP:port, old state, new state, associated - hostname. +- **Port changes**: Which IP:port, old state, new state, all associated + hostnames. - **TLS expiry warnings**: Which certificate, days remaining, CN, issuer, associated hostname and IP. - **TLS certificate changes**: Old and new CN/issuer/SANs, associated @@ -290,12 +290,12 @@ not as a merged view, to enable inconsistency detection. "ports": { "93.184.216.34:80": { "open": true, - "hostname": "www.example.com", + "hostnames": ["www.example.com"], "lastChecked": "2026-02-19T12:00:00Z" }, "93.184.216.34:443": { "open": true, - "hostname": "www.example.com", + "hostnames": ["www.example.com"], "lastChecked": "2026-02-19T12:00:00Z" } }, diff --git a/internal/state/state.go b/internal/state/state.go index dd4561d..efe681c 100644 --- a/internal/state/state.go +++ b/internal/state/state.go @@ -57,10 +57,49 @@ type HostnameState struct { // PortState holds the monitoring state for a port. type PortState struct { Open bool `json:"open"` - Hostname string `json:"hostname"` + Hostnames []string `json:"hostnames"` LastChecked time.Time `json:"lastChecked"` } +// UnmarshalJSON implements custom unmarshaling to handle both +// the old single-hostname format and the new multi-hostname +// format for backward compatibility with existing state files. +func (ps *PortState) UnmarshalJSON(data []byte) error { + // Use an alias to prevent infinite recursion. + type portStateAlias struct { + Open bool `json:"open"` + Hostnames []string `json:"hostnames"` + LastChecked time.Time `json:"lastChecked"` + } + + var alias portStateAlias + + err := json.Unmarshal(data, &alias) + if err != nil { + return fmt.Errorf("unmarshaling port state: %w", err) + } + + ps.Open = alias.Open + ps.Hostnames = alias.Hostnames + ps.LastChecked = alias.LastChecked + + // If Hostnames is empty, try reading the old single-hostname + // format for backward compatibility. + if len(ps.Hostnames) == 0 { + var old struct { + Hostname string `json:"hostname"` + } + + // Best-effort: ignore errors since the main unmarshal + // already succeeded. + if json.Unmarshal(data, &old) == nil && old.Hostname != "" { + ps.Hostnames = []string{old.Hostname} + } + } + + return nil +} + // CertificateState holds TLS certificate monitoring state. type CertificateState struct { CommonName string `json:"commonName"` @@ -263,6 +302,27 @@ func (s *State) GetPortState(key string) (*PortState, bool) { return ps, ok } +// DeletePortState removes a port state entry. +func (s *State) DeletePortState(key string) { + s.mu.Lock() + defer s.mu.Unlock() + + delete(s.snapshot.Ports, key) +} + +// GetAllPortKeys returns all port state keys. +func (s *State) GetAllPortKeys() []string { + s.mu.RLock() + defer s.mu.RUnlock() + + keys := make([]string, 0, len(s.snapshot.Ports)) + for k := range s.snapshot.Ports { + keys = append(keys, k) + } + + return keys +} + // SetCertificateState updates the state for a certificate. func (s *State) SetCertificateState( key string, diff --git a/internal/watcher/watcher.go b/internal/watcher/watcher.go index 44ebc0e..f641b70 100644 --- a/internal/watcher/watcher.go +++ b/internal/watcher/watcher.go @@ -473,24 +473,94 @@ func (w *Watcher) detectInconsistencies( } func (w *Watcher) checkAllPorts(ctx context.Context) { - for _, hostname := range w.config.Hostnames { - w.checkPortsForHostname(ctx, hostname) + // Phase 1: Build current IP:port → hostname associations + // from fresh DNS data. + associations := w.buildPortAssociations() + + // Phase 2: Check each unique IP:port and update state + // with the full set of associated hostnames. + for key, hostnames := range associations { + ip, port := parsePortKey(key) + if port == 0 { + continue + } + + w.checkSinglePort(ctx, ip, port, hostnames) } - for _, domain := range w.config.Domains { - w.checkPortsForHostname(ctx, domain) - } + // Phase 3: Remove port state entries that no longer have + // any hostname referencing them. + w.cleanupStalePorts(associations) } -func (w *Watcher) checkPortsForHostname( - ctx context.Context, - hostname string, -) { - ips := w.collectIPs(hostname) +// buildPortAssociations constructs a map from IP:port keys to +// the sorted set of hostnames currently resolving to that IP. +func (w *Watcher) buildPortAssociations() map[string][]string { + assoc := make(map[string]map[string]bool) - for _, ip := range ips { - for _, port := range monitoredPorts { - w.checkSinglePort(ctx, ip, port, hostname) + allNames := make( + []string, 0, + len(w.config.Hostnames)+len(w.config.Domains), + ) + allNames = append(allNames, w.config.Hostnames...) + allNames = append(allNames, w.config.Domains...) + + for _, name := range allNames { + ips := w.collectIPs(name) + for _, ip := range ips { + for _, port := range monitoredPorts { + key := fmt.Sprintf("%s:%d", ip, port) + if assoc[key] == nil { + assoc[key] = make(map[string]bool) + } + + assoc[key][name] = true + } + } + } + + result := make(map[string][]string, len(assoc)) + for key, set := range assoc { + hostnames := make([]string, 0, len(set)) + for h := range set { + hostnames = append(hostnames, h) + } + + sort.Strings(hostnames) + + result[key] = hostnames + } + + return result +} + +// parsePortKey splits an "ip:port" key into its components. +func parsePortKey(key string) (string, int) { + lastColon := strings.LastIndex(key, ":") + if lastColon < 0 { + return key, 0 + } + + ip := key[:lastColon] + + var p int + + _, err := fmt.Sscanf(key[lastColon+1:], "%d", &p) + if err != nil { + return ip, 0 + } + + return ip, p +} + +// cleanupStalePorts removes port state entries that are no +// longer referenced by any hostname in the current DNS data. +func (w *Watcher) cleanupStalePorts( + currentAssociations map[string][]string, +) { + for _, key := range w.state.GetAllPortKeys() { + if _, exists := currentAssociations[key]; !exists { + w.state.DeletePortState(key) } } } @@ -527,7 +597,7 @@ func (w *Watcher) checkSinglePort( ctx context.Context, ip string, port int, - hostname string, + hostnames []string, ) { result, err := w.portCheck.CheckPort(ctx, ip, port) if err != nil { @@ -552,8 +622,8 @@ func (w *Watcher) checkSinglePort( } msg := fmt.Sprintf( - "Host: %s\nAddress: %s\nPort now %s", - hostname, key, stateStr, + "Hosts: %s\nAddress: %s\nPort now %s", + strings.Join(hostnames, ", "), key, stateStr, ) w.notify.SendNotification( @@ -566,7 +636,7 @@ func (w *Watcher) checkSinglePort( w.state.SetPortState(key, &state.PortState{ Open: result.Open, - Hostname: hostname, + Hostnames: hostnames, LastChecked: now, }) }