fix: use context.Background() for watcher goroutine lifetime #63
20
README.md
20
README.md
@@ -110,8 +110,8 @@ includes:
|
|||||||
- **NS recoveries**: Which nameserver recovered, which hostname/domain.
|
- **NS recoveries**: Which nameserver recovered, which hostname/domain.
|
||||||
- **NS inconsistencies**: Which nameservers disagree, what each one
|
- **NS inconsistencies**: Which nameservers disagree, what each one
|
||||||
returned, which hostname affected.
|
returned, which hostname affected.
|
||||||
- **Port changes**: Which IP:port, old state, new state, associated
|
- **Port changes**: Which IP:port, old state, new state, all associated
|
||||||
hostname.
|
hostnames.
|
||||||
- **TLS expiry warnings**: Which certificate, days remaining, CN,
|
- **TLS expiry warnings**: Which certificate, days remaining, CN,
|
||||||
issuer, associated hostname and IP.
|
issuer, associated hostname and IP.
|
||||||
- **TLS certificate changes**: Old and new CN/issuer/SANs, associated
|
- **TLS certificate changes**: Old and new CN/issuer/SANs, associated
|
||||||
@@ -290,12 +290,12 @@ not as a merged view, to enable inconsistency detection.
|
|||||||
"ports": {
|
"ports": {
|
||||||
"93.184.216.34:80": {
|
"93.184.216.34:80": {
|
||||||
"open": true,
|
"open": true,
|
||||||
"hostname": "www.example.com",
|
"hostnames": ["www.example.com"],
|
||||||
"lastChecked": "2026-02-19T12:00:00Z"
|
"lastChecked": "2026-02-19T12:00:00Z"
|
||||||
},
|
},
|
||||||
"93.184.216.34:443": {
|
"93.184.216.34:443": {
|
||||||
"open": true,
|
"open": true,
|
||||||
"hostname": "www.example.com",
|
"hostnames": ["www.example.com"],
|
||||||
"lastChecked": "2026-02-19T12:00:00Z"
|
"lastChecked": "2026-02-19T12:00:00Z"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -367,9 +367,15 @@ docker run -d \
|
|||||||
triggering change notifications).
|
triggering change notifications).
|
||||||
2. **Initial check**: Immediately perform all DNS, port, and TLS checks
|
2. **Initial check**: Immediately perform all DNS, port, and TLS checks
|
||||||
on startup.
|
on startup.
|
||||||
3. **Periodic checks**:
|
3. **Periodic checks** (DNS always runs first):
|
||||||
- DNS and port checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h).
|
- DNS checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h). Also
|
||||||
- TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h).
|
re-run before every TLS check cycle to ensure fresh IPs.
|
||||||
|
- Port checks: every `DNSWATCHER_DNS_INTERVAL`, after DNS completes.
|
||||||
|
- TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h), after
|
||||||
|
DNS completes.
|
||||||
|
- Port and TLS checks always use freshly resolved IP addresses from
|
||||||
|
the DNS phase that immediately precedes them — never stale IPs
|
||||||
|
from a previous cycle.
|
||||||
4. **On change detection**: Send notifications to all configured
|
4. **On change detection**: Send notifications to all configured
|
||||||
endpoints, update in-memory state, persist to disk.
|
endpoints, update in-memory state, persist to disk.
|
||||||
5. **Shutdown**: Persist final state to disk, complete in-flight
|
5. **Shutdown**: Persist final state to disk, complete in-flight
|
||||||
|
|||||||
@@ -57,10 +57,49 @@ type HostnameState struct {
|
|||||||
// PortState holds the monitoring state for a port.
|
// PortState holds the monitoring state for a port.
|
||||||
type PortState struct {
|
type PortState struct {
|
||||||
Open bool `json:"open"`
|
Open bool `json:"open"`
|
||||||
Hostname string `json:"hostname"`
|
Hostnames []string `json:"hostnames"`
|
||||||
LastChecked time.Time `json:"lastChecked"`
|
LastChecked time.Time `json:"lastChecked"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// UnmarshalJSON implements custom unmarshaling to handle both
|
||||||
|
// the old single-hostname format and the new multi-hostname
|
||||||
|
// format for backward compatibility with existing state files.
|
||||||
|
func (ps *PortState) UnmarshalJSON(data []byte) error {
|
||||||
|
// Use an alias to prevent infinite recursion.
|
||||||
|
type portStateAlias struct {
|
||||||
|
Open bool `json:"open"`
|
||||||
|
Hostnames []string `json:"hostnames"`
|
||||||
|
LastChecked time.Time `json:"lastChecked"`
|
||||||
|
}
|
||||||
|
|
||||||
|
var alias portStateAlias
|
||||||
|
|
||||||
|
err := json.Unmarshal(data, &alias)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("unmarshaling port state: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
ps.Open = alias.Open
|
||||||
|
ps.Hostnames = alias.Hostnames
|
||||||
|
ps.LastChecked = alias.LastChecked
|
||||||
|
|
||||||
|
// If Hostnames is empty, try reading the old single-hostname
|
||||||
|
// format for backward compatibility.
|
||||||
|
if len(ps.Hostnames) == 0 {
|
||||||
|
var old struct {
|
||||||
|
Hostname string `json:"hostname"`
|
||||||
|
}
|
||||||
|
|
||||||
|
// Best-effort: ignore errors since the main unmarshal
|
||||||
|
// already succeeded.
|
||||||
|
if json.Unmarshal(data, &old) == nil && old.Hostname != "" {
|
||||||
|
ps.Hostnames = []string{old.Hostname}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// CertificateState holds TLS certificate monitoring state.
|
// CertificateState holds TLS certificate monitoring state.
|
||||||
type CertificateState struct {
|
type CertificateState struct {
|
||||||
CommonName string `json:"commonName"`
|
CommonName string `json:"commonName"`
|
||||||
@@ -263,6 +302,27 @@ func (s *State) GetPortState(key string) (*PortState, bool) {
|
|||||||
return ps, ok
|
return ps, ok
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DeletePortState removes a port state entry.
|
||||||
|
func (s *State) DeletePortState(key string) {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
|
||||||
|
delete(s.snapshot.Ports, key)
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetAllPortKeys returns all port state keys.
|
||||||
|
func (s *State) GetAllPortKeys() []string {
|
||||||
|
s.mu.RLock()
|
||||||
|
defer s.mu.RUnlock()
|
||||||
|
|
||||||
|
keys := make([]string, 0, len(s.snapshot.Ports))
|
||||||
|
for k := range s.snapshot.Ports {
|
||||||
|
keys = append(keys, k)
|
||||||
|
}
|
||||||
|
|
||||||
|
return keys
|
||||||
|
}
|
||||||
|
|
||||||
// SetCertificateState updates the state for a certificate.
|
// SetCertificateState updates the state for a certificate.
|
||||||
func (s *State) SetCertificateState(
|
func (s *State) SetCertificateState(
|
||||||
key string,
|
key string,
|
||||||
|
|||||||
@@ -143,9 +143,16 @@ func (w *Watcher) Run(ctx context.Context) {
|
|||||||
|
|
||||||
return
|
return
|
||||||
case <-dnsTicker.C:
|
case <-dnsTicker.C:
|
||||||
w.runDNSAndPortChecks(ctx)
|
w.runDNSChecks(ctx)
|
||||||
|
|
||||||
|
w.checkAllPorts(ctx)
|
||||||
w.saveState()
|
w.saveState()
|
||||||
case <-tlsTicker.C:
|
case <-tlsTicker.C:
|
||||||
|
// Run DNS first so TLS checks use freshly
|
||||||
|
// resolved IP addresses, not stale ones from
|
||||||
|
// a previous cycle.
|
||||||
|
w.runDNSChecks(ctx)
|
||||||
|
|
||||||
w.runTLSChecks(ctx)
|
w.runTLSChecks(ctx)
|
||||||
w.saveState()
|
w.saveState()
|
||||||
}
|
}
|
||||||
@@ -153,10 +160,26 @@ func (w *Watcher) Run(ctx context.Context) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// RunOnce performs a single complete monitoring cycle.
|
// RunOnce performs a single complete monitoring cycle.
|
||||||
|
// DNS checks run first so that port and TLS checks use
|
||||||
|
// freshly resolved IP addresses. Port checks run before
|
||||||
|
// TLS because TLS checks only target IPs with an open
|
||||||
|
// port 443.
|
||||||
func (w *Watcher) RunOnce(ctx context.Context) {
|
func (w *Watcher) RunOnce(ctx context.Context) {
|
||||||
w.detectFirstRun()
|
w.detectFirstRun()
|
||||||
w.runDNSAndPortChecks(ctx)
|
|
||||||
|
// Phase 1: DNS resolution must complete first so that
|
||||||
|
// subsequent checks use fresh IP addresses.
|
||||||
|
w.runDNSChecks(ctx)
|
||||||
|
|
||||||
|
// Phase 2: Port checks populate port state that TLS
|
||||||
|
// checks depend on (TLS only targets IPs where port
|
||||||
|
// 443 is open).
|
||||||
|
w.checkAllPorts(ctx)
|
||||||
|
|
||||||
|
// Phase 3: TLS checks use fresh DNS IPs and current
|
||||||
|
// port state.
|
||||||
w.runTLSChecks(ctx)
|
w.runTLSChecks(ctx)
|
||||||
|
|
||||||
w.saveState()
|
w.saveState()
|
||||||
w.firstRun = false
|
w.firstRun = false
|
||||||
}
|
}
|
||||||
@@ -173,7 +196,11 @@ func (w *Watcher) detectFirstRun() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *Watcher) runDNSAndPortChecks(ctx context.Context) {
|
// runDNSChecks performs DNS resolution for all configured domains
|
||||||
|
// and hostnames, updating state with freshly resolved records.
|
||||||
|
// This must complete before port or TLS checks run so those
|
||||||
|
// checks operate on current IP addresses.
|
||||||
|
func (w *Watcher) runDNSChecks(ctx context.Context) {
|
||||||
for _, domain := range w.config.Domains {
|
for _, domain := range w.config.Domains {
|
||||||
w.checkDomain(ctx, domain)
|
w.checkDomain(ctx, domain)
|
||||||
}
|
}
|
||||||
@@ -181,8 +208,6 @@ func (w *Watcher) runDNSAndPortChecks(ctx context.Context) {
|
|||||||
for _, hostname := range w.config.Hostnames {
|
for _, hostname := range w.config.Hostnames {
|
||||||
w.checkHostname(ctx, hostname)
|
w.checkHostname(ctx, hostname)
|
||||||
}
|
}
|
||||||
|
|
||||||
w.checkAllPorts(ctx)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *Watcher) checkDomain(
|
func (w *Watcher) checkDomain(
|
||||||
@@ -450,24 +475,94 @@ func (w *Watcher) detectInconsistencies(
|
|||||||
}
|
}
|
||||||
|
|
||||||
func (w *Watcher) checkAllPorts(ctx context.Context) {
|
func (w *Watcher) checkAllPorts(ctx context.Context) {
|
||||||
for _, hostname := range w.config.Hostnames {
|
// Phase 1: Build current IP:port → hostname associations
|
||||||
w.checkPortsForHostname(ctx, hostname)
|
// from fresh DNS data.
|
||||||
|
associations := w.buildPortAssociations()
|
||||||
|
|
||||||
|
// Phase 2: Check each unique IP:port and update state
|
||||||
|
// with the full set of associated hostnames.
|
||||||
|
for key, hostnames := range associations {
|
||||||
|
ip, port := parsePortKey(key)
|
||||||
|
if port == 0 {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
w.checkSinglePort(ctx, ip, port, hostnames)
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, domain := range w.config.Domains {
|
// Phase 3: Remove port state entries that no longer have
|
||||||
w.checkPortsForHostname(ctx, domain)
|
// any hostname referencing them.
|
||||||
}
|
w.cleanupStalePorts(associations)
|
||||||
}
|
}
|
||||||
|
|
||||||
func (w *Watcher) checkPortsForHostname(
|
// buildPortAssociations constructs a map from IP:port keys to
|
||||||
ctx context.Context,
|
// the sorted set of hostnames currently resolving to that IP.
|
||||||
hostname string,
|
func (w *Watcher) buildPortAssociations() map[string][]string {
|
||||||
) {
|
assoc := make(map[string]map[string]bool)
|
||||||
ips := w.collectIPs(hostname)
|
|
||||||
|
|
||||||
for _, ip := range ips {
|
allNames := make(
|
||||||
for _, port := range monitoredPorts {
|
[]string, 0,
|
||||||
w.checkSinglePort(ctx, ip, port, hostname)
|
len(w.config.Hostnames)+len(w.config.Domains),
|
||||||
|
)
|
||||||
|
allNames = append(allNames, w.config.Hostnames...)
|
||||||
|
allNames = append(allNames, w.config.Domains...)
|
||||||
|
|
||||||
|
for _, name := range allNames {
|
||||||
|
ips := w.collectIPs(name)
|
||||||
|
for _, ip := range ips {
|
||||||
|
for _, port := range monitoredPorts {
|
||||||
|
key := fmt.Sprintf("%s:%d", ip, port)
|
||||||
|
if assoc[key] == nil {
|
||||||
|
assoc[key] = make(map[string]bool)
|
||||||
|
}
|
||||||
|
|
||||||
|
assoc[key][name] = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result := make(map[string][]string, len(assoc))
|
||||||
|
for key, set := range assoc {
|
||||||
|
hostnames := make([]string, 0, len(set))
|
||||||
|
for h := range set {
|
||||||
|
hostnames = append(hostnames, h)
|
||||||
|
}
|
||||||
|
|
||||||
|
sort.Strings(hostnames)
|
||||||
|
|
||||||
|
result[key] = hostnames
|
||||||
|
}
|
||||||
|
|
||||||
|
return result
|
||||||
|
}
|
||||||
|
|
||||||
|
// parsePortKey splits an "ip:port" key into its components.
|
||||||
|
func parsePortKey(key string) (string, int) {
|
||||||
|
lastColon := strings.LastIndex(key, ":")
|
||||||
|
if lastColon < 0 {
|
||||||
|
return key, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
ip := key[:lastColon]
|
||||||
|
|
||||||
|
var p int
|
||||||
|
|
||||||
|
_, err := fmt.Sscanf(key[lastColon+1:], "%d", &p)
|
||||||
|
if err != nil {
|
||||||
|
return ip, 0
|
||||||
|
}
|
||||||
|
|
||||||
|
return ip, p
|
||||||
|
}
|
||||||
|
|
||||||
|
// cleanupStalePorts removes port state entries that are no
|
||||||
|
// longer referenced by any hostname in the current DNS data.
|
||||||
|
func (w *Watcher) cleanupStalePorts(
|
||||||
|
currentAssociations map[string][]string,
|
||||||
|
) {
|
||||||
|
for _, key := range w.state.GetAllPortKeys() {
|
||||||
|
if _, exists := currentAssociations[key]; !exists {
|
||||||
|
w.state.DeletePortState(key)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -504,7 +599,7 @@ func (w *Watcher) checkSinglePort(
|
|||||||
ctx context.Context,
|
ctx context.Context,
|
||||||
ip string,
|
ip string,
|
||||||
port int,
|
port int,
|
||||||
hostname string,
|
hostnames []string,
|
||||||
) {
|
) {
|
||||||
result, err := w.portCheck.CheckPort(ctx, ip, port)
|
result, err := w.portCheck.CheckPort(ctx, ip, port)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -529,8 +624,8 @@ func (w *Watcher) checkSinglePort(
|
|||||||
}
|
}
|
||||||
|
|
||||||
msg := fmt.Sprintf(
|
msg := fmt.Sprintf(
|
||||||
"Host: %s\nAddress: %s\nPort now %s",
|
"Hosts: %s\nAddress: %s\nPort now %s",
|
||||||
hostname, key, stateStr,
|
strings.Join(hostnames, ", "), key, stateStr,
|
||||||
)
|
)
|
||||||
|
|
||||||
w.notify.SendNotification(
|
w.notify.SendNotification(
|
||||||
@@ -543,7 +638,7 @@ func (w *Watcher) checkSinglePort(
|
|||||||
|
|
||||||
w.state.SetPortState(key, &state.PortState{
|
w.state.SetPortState(key, &state.PortState{
|
||||||
Open: result.Open,
|
Open: result.Open,
|
||||||
Hostname: hostname,
|
Hostnames: hostnames,
|
||||||
LastChecked: now,
|
LastChecked: now,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -682,6 +682,80 @@ func TestGracefulShutdown(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func setupHostnameIP(
|
||||||
|
deps *testDeps,
|
||||||
|
hostname, ip string,
|
||||||
|
) {
|
||||||
|
deps.resolver.allRecords[hostname] = map[string]map[string][]string{
|
||||||
|
"ns1.example.com.": {"A": {ip}},
|
||||||
|
}
|
||||||
|
deps.portChecker.results[ip+":80"] = true
|
||||||
|
deps.portChecker.results[ip+":443"] = true
|
||||||
|
deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{
|
||||||
|
CommonName: hostname,
|
||||||
|
Issuer: "DigiCert",
|
||||||
|
NotAfter: time.Now().Add(90 * 24 * time.Hour),
|
||||||
|
SubjectAlternativeNames: []string{hostname},
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func updateHostnameIP(deps *testDeps, hostname, ip string) {
|
||||||
|
deps.resolver.mu.Lock()
|
||||||
|
deps.resolver.allRecords[hostname] = map[string]map[string][]string{
|
||||||
|
"ns1.example.com.": {"A": {ip}},
|
||||||
|
}
|
||||||
|
deps.resolver.mu.Unlock()
|
||||||
|
|
||||||
|
deps.portChecker.mu.Lock()
|
||||||
|
deps.portChecker.results[ip+":80"] = true
|
||||||
|
deps.portChecker.results[ip+":443"] = true
|
||||||
|
deps.portChecker.mu.Unlock()
|
||||||
|
|
||||||
|
deps.tlsChecker.mu.Lock()
|
||||||
|
deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{
|
||||||
|
CommonName: hostname,
|
||||||
|
Issuer: "DigiCert",
|
||||||
|
NotAfter: time.Now().Add(90 * 24 * time.Hour),
|
||||||
|
SubjectAlternativeNames: []string{hostname},
|
||||||
|
}
|
||||||
|
deps.tlsChecker.mu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDNSRunsBeforePortAndTLSChecks(t *testing.T) {
|
||||||
|
t.Parallel()
|
||||||
|
|
||||||
|
cfg := defaultTestConfig(t)
|
||||||
|
cfg.Hostnames = []string{"www.example.com"}
|
||||||
|
|
||||||
|
w, deps := newTestWatcher(t, cfg)
|
||||||
|
|
||||||
|
setupHostnameIP(deps, "www.example.com", "10.0.0.1")
|
||||||
|
|
||||||
|
ctx := t.Context()
|
||||||
|
w.RunOnce(ctx)
|
||||||
|
|
||||||
|
snap := deps.state.GetSnapshot()
|
||||||
|
if _, ok := snap.Ports["10.0.0.1:80"]; !ok {
|
||||||
|
t.Fatal("expected port state for 10.0.0.1:80")
|
||||||
|
}
|
||||||
|
|
||||||
|
// DNS changes to a new IP; port and TLS must pick it up.
|
||||||
|
updateHostnameIP(deps, "www.example.com", "10.0.0.2")
|
||||||
|
|
||||||
|
w.RunOnce(ctx)
|
||||||
|
|
||||||
|
snap = deps.state.GetSnapshot()
|
||||||
|
|
||||||
|
if _, ok := snap.Ports["10.0.0.2:80"]; !ok {
|
||||||
|
t.Error("port check used stale DNS: missing 10.0.0.2:80")
|
||||||
|
}
|
||||||
|
|
||||||
|
certKey := "10.0.0.2:443:www.example.com"
|
||||||
|
if _, ok := snap.Certificates[certKey]; !ok {
|
||||||
|
t.Error("TLS check used stale DNS: missing " + certKey)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestNSFailureAndRecovery(t *testing.T) {
|
func TestNSFailureAndRecovery(t *testing.T) {
|
||||||
t.Parallel()
|
t.Parallel()
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user