diff --git a/README.md b/README.md index 11e0819..0b30a9b 100644 --- a/README.md +++ b/README.md @@ -110,8 +110,8 @@ includes: - **NS recoveries**: Which nameserver recovered, which hostname/domain. - **NS inconsistencies**: Which nameservers disagree, what each one returned, which hostname affected. -- **Port changes**: Which IP:port, old state, new state, associated - hostname. +- **Port changes**: Which IP:port, old state, new state, all associated + hostnames. - **TLS expiry warnings**: Which certificate, days remaining, CN, issuer, associated hostname and IP. - **TLS certificate changes**: Old and new CN/issuer/SANs, associated @@ -290,12 +290,12 @@ not as a merged view, to enable inconsistency detection. "ports": { "93.184.216.34:80": { "open": true, - "hostname": "www.example.com", + "hostnames": ["www.example.com"], "lastChecked": "2026-02-19T12:00:00Z" }, "93.184.216.34:443": { "open": true, - "hostname": "www.example.com", + "hostnames": ["www.example.com"], "lastChecked": "2026-02-19T12:00:00Z" } }, @@ -367,9 +367,15 @@ docker run -d \ triggering change notifications). 2. **Initial check**: Immediately perform all DNS, port, and TLS checks on startup. -3. **Periodic checks**: - - DNS and port checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h). - - TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h). +3. **Periodic checks** (DNS always runs first): + - DNS checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h). Also + re-run before every TLS check cycle to ensure fresh IPs. + - Port checks: every `DNSWATCHER_DNS_INTERVAL`, after DNS completes. + - TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h), after + DNS completes. + - Port and TLS checks always use freshly resolved IP addresses from + the DNS phase that immediately precedes them — never stale IPs + from a previous cycle. 4. **On change detection**: Send notifications to all configured endpoints, update in-memory state, persist to disk. 5. **Shutdown**: Persist final state to disk, complete in-flight diff --git a/internal/state/state.go b/internal/state/state.go index dd4561d..efe681c 100644 --- a/internal/state/state.go +++ b/internal/state/state.go @@ -57,10 +57,49 @@ type HostnameState struct { // PortState holds the monitoring state for a port. type PortState struct { Open bool `json:"open"` - Hostname string `json:"hostname"` + Hostnames []string `json:"hostnames"` LastChecked time.Time `json:"lastChecked"` } +// UnmarshalJSON implements custom unmarshaling to handle both +// the old single-hostname format and the new multi-hostname +// format for backward compatibility with existing state files. +func (ps *PortState) UnmarshalJSON(data []byte) error { + // Use an alias to prevent infinite recursion. + type portStateAlias struct { + Open bool `json:"open"` + Hostnames []string `json:"hostnames"` + LastChecked time.Time `json:"lastChecked"` + } + + var alias portStateAlias + + err := json.Unmarshal(data, &alias) + if err != nil { + return fmt.Errorf("unmarshaling port state: %w", err) + } + + ps.Open = alias.Open + ps.Hostnames = alias.Hostnames + ps.LastChecked = alias.LastChecked + + // If Hostnames is empty, try reading the old single-hostname + // format for backward compatibility. + if len(ps.Hostnames) == 0 { + var old struct { + Hostname string `json:"hostname"` + } + + // Best-effort: ignore errors since the main unmarshal + // already succeeded. + if json.Unmarshal(data, &old) == nil && old.Hostname != "" { + ps.Hostnames = []string{old.Hostname} + } + } + + return nil +} + // CertificateState holds TLS certificate monitoring state. type CertificateState struct { CommonName string `json:"commonName"` @@ -263,6 +302,27 @@ func (s *State) GetPortState(key string) (*PortState, bool) { return ps, ok } +// DeletePortState removes a port state entry. +func (s *State) DeletePortState(key string) { + s.mu.Lock() + defer s.mu.Unlock() + + delete(s.snapshot.Ports, key) +} + +// GetAllPortKeys returns all port state keys. +func (s *State) GetAllPortKeys() []string { + s.mu.RLock() + defer s.mu.RUnlock() + + keys := make([]string, 0, len(s.snapshot.Ports)) + for k := range s.snapshot.Ports { + keys = append(keys, k) + } + + return keys +} + // SetCertificateState updates the state for a certificate. func (s *State) SetCertificateState( key string, diff --git a/internal/watcher/watcher.go b/internal/watcher/watcher.go index 6a9ae64..60b70ba 100644 --- a/internal/watcher/watcher.go +++ b/internal/watcher/watcher.go @@ -143,9 +143,16 @@ func (w *Watcher) Run(ctx context.Context) { return case <-dnsTicker.C: - w.runDNSAndPortChecks(ctx) + w.runDNSChecks(ctx) + + w.checkAllPorts(ctx) w.saveState() case <-tlsTicker.C: + // Run DNS first so TLS checks use freshly + // resolved IP addresses, not stale ones from + // a previous cycle. + w.runDNSChecks(ctx) + w.runTLSChecks(ctx) w.saveState() } @@ -153,10 +160,26 @@ func (w *Watcher) Run(ctx context.Context) { } // RunOnce performs a single complete monitoring cycle. +// DNS checks run first so that port and TLS checks use +// freshly resolved IP addresses. Port checks run before +// TLS because TLS checks only target IPs with an open +// port 443. func (w *Watcher) RunOnce(ctx context.Context) { w.detectFirstRun() - w.runDNSAndPortChecks(ctx) + + // Phase 1: DNS resolution must complete first so that + // subsequent checks use fresh IP addresses. + w.runDNSChecks(ctx) + + // Phase 2: Port checks populate port state that TLS + // checks depend on (TLS only targets IPs where port + // 443 is open). + w.checkAllPorts(ctx) + + // Phase 3: TLS checks use fresh DNS IPs and current + // port state. w.runTLSChecks(ctx) + w.saveState() w.firstRun = false } @@ -173,7 +196,11 @@ func (w *Watcher) detectFirstRun() { } } -func (w *Watcher) runDNSAndPortChecks(ctx context.Context) { +// runDNSChecks performs DNS resolution for all configured domains +// and hostnames, updating state with freshly resolved records. +// This must complete before port or TLS checks run so those +// checks operate on current IP addresses. +func (w *Watcher) runDNSChecks(ctx context.Context) { for _, domain := range w.config.Domains { w.checkDomain(ctx, domain) } @@ -181,8 +208,6 @@ func (w *Watcher) runDNSAndPortChecks(ctx context.Context) { for _, hostname := range w.config.Hostnames { w.checkHostname(ctx, hostname) } - - w.checkAllPorts(ctx) } func (w *Watcher) checkDomain( @@ -450,24 +475,94 @@ func (w *Watcher) detectInconsistencies( } func (w *Watcher) checkAllPorts(ctx context.Context) { - for _, hostname := range w.config.Hostnames { - w.checkPortsForHostname(ctx, hostname) + // Phase 1: Build current IP:port → hostname associations + // from fresh DNS data. + associations := w.buildPortAssociations() + + // Phase 2: Check each unique IP:port and update state + // with the full set of associated hostnames. + for key, hostnames := range associations { + ip, port := parsePortKey(key) + if port == 0 { + continue + } + + w.checkSinglePort(ctx, ip, port, hostnames) } - for _, domain := range w.config.Domains { - w.checkPortsForHostname(ctx, domain) - } + // Phase 3: Remove port state entries that no longer have + // any hostname referencing them. + w.cleanupStalePorts(associations) } -func (w *Watcher) checkPortsForHostname( - ctx context.Context, - hostname string, -) { - ips := w.collectIPs(hostname) +// buildPortAssociations constructs a map from IP:port keys to +// the sorted set of hostnames currently resolving to that IP. +func (w *Watcher) buildPortAssociations() map[string][]string { + assoc := make(map[string]map[string]bool) - for _, ip := range ips { - for _, port := range monitoredPorts { - w.checkSinglePort(ctx, ip, port, hostname) + allNames := make( + []string, 0, + len(w.config.Hostnames)+len(w.config.Domains), + ) + allNames = append(allNames, w.config.Hostnames...) + allNames = append(allNames, w.config.Domains...) + + for _, name := range allNames { + ips := w.collectIPs(name) + for _, ip := range ips { + for _, port := range monitoredPorts { + key := fmt.Sprintf("%s:%d", ip, port) + if assoc[key] == nil { + assoc[key] = make(map[string]bool) + } + + assoc[key][name] = true + } + } + } + + result := make(map[string][]string, len(assoc)) + for key, set := range assoc { + hostnames := make([]string, 0, len(set)) + for h := range set { + hostnames = append(hostnames, h) + } + + sort.Strings(hostnames) + + result[key] = hostnames + } + + return result +} + +// parsePortKey splits an "ip:port" key into its components. +func parsePortKey(key string) (string, int) { + lastColon := strings.LastIndex(key, ":") + if lastColon < 0 { + return key, 0 + } + + ip := key[:lastColon] + + var p int + + _, err := fmt.Sscanf(key[lastColon+1:], "%d", &p) + if err != nil { + return ip, 0 + } + + return ip, p +} + +// cleanupStalePorts removes port state entries that are no +// longer referenced by any hostname in the current DNS data. +func (w *Watcher) cleanupStalePorts( + currentAssociations map[string][]string, +) { + for _, key := range w.state.GetAllPortKeys() { + if _, exists := currentAssociations[key]; !exists { + w.state.DeletePortState(key) } } } @@ -504,7 +599,7 @@ func (w *Watcher) checkSinglePort( ctx context.Context, ip string, port int, - hostname string, + hostnames []string, ) { result, err := w.portCheck.CheckPort(ctx, ip, port) if err != nil { @@ -529,8 +624,8 @@ func (w *Watcher) checkSinglePort( } msg := fmt.Sprintf( - "Host: %s\nAddress: %s\nPort now %s", - hostname, key, stateStr, + "Hosts: %s\nAddress: %s\nPort now %s", + strings.Join(hostnames, ", "), key, stateStr, ) w.notify.SendNotification( @@ -543,7 +638,7 @@ func (w *Watcher) checkSinglePort( w.state.SetPortState(key, &state.PortState{ Open: result.Open, - Hostname: hostname, + Hostnames: hostnames, LastChecked: now, }) } diff --git a/internal/watcher/watcher_test.go b/internal/watcher/watcher_test.go index 43514eb..54d444f 100644 --- a/internal/watcher/watcher_test.go +++ b/internal/watcher/watcher_test.go @@ -682,6 +682,80 @@ func TestGracefulShutdown(t *testing.T) { } } +func setupHostnameIP( + deps *testDeps, + hostname, ip string, +) { + deps.resolver.allRecords[hostname] = map[string]map[string][]string{ + "ns1.example.com.": {"A": {ip}}, + } + deps.portChecker.results[ip+":80"] = true + deps.portChecker.results[ip+":443"] = true + deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{ + CommonName: hostname, + Issuer: "DigiCert", + NotAfter: time.Now().Add(90 * 24 * time.Hour), + SubjectAlternativeNames: []string{hostname}, + } +} + +func updateHostnameIP(deps *testDeps, hostname, ip string) { + deps.resolver.mu.Lock() + deps.resolver.allRecords[hostname] = map[string]map[string][]string{ + "ns1.example.com.": {"A": {ip}}, + } + deps.resolver.mu.Unlock() + + deps.portChecker.mu.Lock() + deps.portChecker.results[ip+":80"] = true + deps.portChecker.results[ip+":443"] = true + deps.portChecker.mu.Unlock() + + deps.tlsChecker.mu.Lock() + deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{ + CommonName: hostname, + Issuer: "DigiCert", + NotAfter: time.Now().Add(90 * 24 * time.Hour), + SubjectAlternativeNames: []string{hostname}, + } + deps.tlsChecker.mu.Unlock() +} + +func TestDNSRunsBeforePortAndTLSChecks(t *testing.T) { + t.Parallel() + + cfg := defaultTestConfig(t) + cfg.Hostnames = []string{"www.example.com"} + + w, deps := newTestWatcher(t, cfg) + + setupHostnameIP(deps, "www.example.com", "10.0.0.1") + + ctx := t.Context() + w.RunOnce(ctx) + + snap := deps.state.GetSnapshot() + if _, ok := snap.Ports["10.0.0.1:80"]; !ok { + t.Fatal("expected port state for 10.0.0.1:80") + } + + // DNS changes to a new IP; port and TLS must pick it up. + updateHostnameIP(deps, "www.example.com", "10.0.0.2") + + w.RunOnce(ctx) + + snap = deps.state.GetSnapshot() + + if _, ok := snap.Ports["10.0.0.2:80"]; !ok { + t.Error("port check used stale DNS: missing 10.0.0.2:80") + } + + certKey := "10.0.0.2:443:www.example.com" + if _, ok := snap.Certificates[certKey]; !ok { + t.Error("TLS check used stale DNS: missing " + certKey) + } +} + func TestNSFailureAndRecovery(t *testing.T) { t.Parallel()