Merge branch 'main' into fix/issue-53-startup-context
All checks were successful
check / check (push) Successful in 46s
All checks were successful
check / check (push) Successful in 46s
This commit is contained in:
20
README.md
20
README.md
@@ -110,8 +110,8 @@ includes:
|
||||
- **NS recoveries**: Which nameserver recovered, which hostname/domain.
|
||||
- **NS inconsistencies**: Which nameservers disagree, what each one
|
||||
returned, which hostname affected.
|
||||
- **Port changes**: Which IP:port, old state, new state, associated
|
||||
hostname.
|
||||
- **Port changes**: Which IP:port, old state, new state, all associated
|
||||
hostnames.
|
||||
- **TLS expiry warnings**: Which certificate, days remaining, CN,
|
||||
issuer, associated hostname and IP.
|
||||
- **TLS certificate changes**: Old and new CN/issuer/SANs, associated
|
||||
@@ -290,12 +290,12 @@ not as a merged view, to enable inconsistency detection.
|
||||
"ports": {
|
||||
"93.184.216.34:80": {
|
||||
"open": true,
|
||||
"hostname": "www.example.com",
|
||||
"hostnames": ["www.example.com"],
|
||||
"lastChecked": "2026-02-19T12:00:00Z"
|
||||
},
|
||||
"93.184.216.34:443": {
|
||||
"open": true,
|
||||
"hostname": "www.example.com",
|
||||
"hostnames": ["www.example.com"],
|
||||
"lastChecked": "2026-02-19T12:00:00Z"
|
||||
}
|
||||
},
|
||||
@@ -367,9 +367,15 @@ docker run -d \
|
||||
triggering change notifications).
|
||||
2. **Initial check**: Immediately perform all DNS, port, and TLS checks
|
||||
on startup.
|
||||
3. **Periodic checks**:
|
||||
- DNS and port checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h).
|
||||
- TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h).
|
||||
3. **Periodic checks** (DNS always runs first):
|
||||
- DNS checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h). Also
|
||||
re-run before every TLS check cycle to ensure fresh IPs.
|
||||
- Port checks: every `DNSWATCHER_DNS_INTERVAL`, after DNS completes.
|
||||
- TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h), after
|
||||
DNS completes.
|
||||
- Port and TLS checks always use freshly resolved IP addresses from
|
||||
the DNS phase that immediately precedes them — never stale IPs
|
||||
from a previous cycle.
|
||||
4. **On change detection**: Send notifications to all configured
|
||||
endpoints, update in-memory state, persist to disk.
|
||||
5. **Shutdown**: Persist final state to disk, complete in-flight
|
||||
|
||||
@@ -57,10 +57,49 @@ type HostnameState struct {
|
||||
// PortState holds the monitoring state for a port.
|
||||
type PortState struct {
|
||||
Open bool `json:"open"`
|
||||
Hostname string `json:"hostname"`
|
||||
Hostnames []string `json:"hostnames"`
|
||||
LastChecked time.Time `json:"lastChecked"`
|
||||
}
|
||||
|
||||
// UnmarshalJSON implements custom unmarshaling to handle both
|
||||
// the old single-hostname format and the new multi-hostname
|
||||
// format for backward compatibility with existing state files.
|
||||
func (ps *PortState) UnmarshalJSON(data []byte) error {
|
||||
// Use an alias to prevent infinite recursion.
|
||||
type portStateAlias struct {
|
||||
Open bool `json:"open"`
|
||||
Hostnames []string `json:"hostnames"`
|
||||
LastChecked time.Time `json:"lastChecked"`
|
||||
}
|
||||
|
||||
var alias portStateAlias
|
||||
|
||||
err := json.Unmarshal(data, &alias)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unmarshaling port state: %w", err)
|
||||
}
|
||||
|
||||
ps.Open = alias.Open
|
||||
ps.Hostnames = alias.Hostnames
|
||||
ps.LastChecked = alias.LastChecked
|
||||
|
||||
// If Hostnames is empty, try reading the old single-hostname
|
||||
// format for backward compatibility.
|
||||
if len(ps.Hostnames) == 0 {
|
||||
var old struct {
|
||||
Hostname string `json:"hostname"`
|
||||
}
|
||||
|
||||
// Best-effort: ignore errors since the main unmarshal
|
||||
// already succeeded.
|
||||
if json.Unmarshal(data, &old) == nil && old.Hostname != "" {
|
||||
ps.Hostnames = []string{old.Hostname}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// CertificateState holds TLS certificate monitoring state.
|
||||
type CertificateState struct {
|
||||
CommonName string `json:"commonName"`
|
||||
@@ -263,6 +302,27 @@ func (s *State) GetPortState(key string) (*PortState, bool) {
|
||||
return ps, ok
|
||||
}
|
||||
|
||||
// DeletePortState removes a port state entry.
|
||||
func (s *State) DeletePortState(key string) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
delete(s.snapshot.Ports, key)
|
||||
}
|
||||
|
||||
// GetAllPortKeys returns all port state keys.
|
||||
func (s *State) GetAllPortKeys() []string {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
|
||||
keys := make([]string, 0, len(s.snapshot.Ports))
|
||||
for k := range s.snapshot.Ports {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
|
||||
return keys
|
||||
}
|
||||
|
||||
// SetCertificateState updates the state for a certificate.
|
||||
func (s *State) SetCertificateState(
|
||||
key string,
|
||||
|
||||
@@ -143,9 +143,16 @@ func (w *Watcher) Run(ctx context.Context) {
|
||||
|
||||
return
|
||||
case <-dnsTicker.C:
|
||||
w.runDNSAndPortChecks(ctx)
|
||||
w.runDNSChecks(ctx)
|
||||
|
||||
w.checkAllPorts(ctx)
|
||||
w.saveState()
|
||||
case <-tlsTicker.C:
|
||||
// Run DNS first so TLS checks use freshly
|
||||
// resolved IP addresses, not stale ones from
|
||||
// a previous cycle.
|
||||
w.runDNSChecks(ctx)
|
||||
|
||||
w.runTLSChecks(ctx)
|
||||
w.saveState()
|
||||
}
|
||||
@@ -153,10 +160,26 @@ func (w *Watcher) Run(ctx context.Context) {
|
||||
}
|
||||
|
||||
// RunOnce performs a single complete monitoring cycle.
|
||||
// DNS checks run first so that port and TLS checks use
|
||||
// freshly resolved IP addresses. Port checks run before
|
||||
// TLS because TLS checks only target IPs with an open
|
||||
// port 443.
|
||||
func (w *Watcher) RunOnce(ctx context.Context) {
|
||||
w.detectFirstRun()
|
||||
w.runDNSAndPortChecks(ctx)
|
||||
|
||||
// Phase 1: DNS resolution must complete first so that
|
||||
// subsequent checks use fresh IP addresses.
|
||||
w.runDNSChecks(ctx)
|
||||
|
||||
// Phase 2: Port checks populate port state that TLS
|
||||
// checks depend on (TLS only targets IPs where port
|
||||
// 443 is open).
|
||||
w.checkAllPorts(ctx)
|
||||
|
||||
// Phase 3: TLS checks use fresh DNS IPs and current
|
||||
// port state.
|
||||
w.runTLSChecks(ctx)
|
||||
|
||||
w.saveState()
|
||||
w.firstRun = false
|
||||
}
|
||||
@@ -173,7 +196,11 @@ func (w *Watcher) detectFirstRun() {
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Watcher) runDNSAndPortChecks(ctx context.Context) {
|
||||
// runDNSChecks performs DNS resolution for all configured domains
|
||||
// and hostnames, updating state with freshly resolved records.
|
||||
// This must complete before port or TLS checks run so those
|
||||
// checks operate on current IP addresses.
|
||||
func (w *Watcher) runDNSChecks(ctx context.Context) {
|
||||
for _, domain := range w.config.Domains {
|
||||
w.checkDomain(ctx, domain)
|
||||
}
|
||||
@@ -181,8 +208,6 @@ func (w *Watcher) runDNSAndPortChecks(ctx context.Context) {
|
||||
for _, hostname := range w.config.Hostnames {
|
||||
w.checkHostname(ctx, hostname)
|
||||
}
|
||||
|
||||
w.checkAllPorts(ctx)
|
||||
}
|
||||
|
||||
func (w *Watcher) checkDomain(
|
||||
@@ -450,24 +475,94 @@ func (w *Watcher) detectInconsistencies(
|
||||
}
|
||||
|
||||
func (w *Watcher) checkAllPorts(ctx context.Context) {
|
||||
for _, hostname := range w.config.Hostnames {
|
||||
w.checkPortsForHostname(ctx, hostname)
|
||||
// Phase 1: Build current IP:port → hostname associations
|
||||
// from fresh DNS data.
|
||||
associations := w.buildPortAssociations()
|
||||
|
||||
// Phase 2: Check each unique IP:port and update state
|
||||
// with the full set of associated hostnames.
|
||||
for key, hostnames := range associations {
|
||||
ip, port := parsePortKey(key)
|
||||
if port == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
w.checkSinglePort(ctx, ip, port, hostnames)
|
||||
}
|
||||
|
||||
for _, domain := range w.config.Domains {
|
||||
w.checkPortsForHostname(ctx, domain)
|
||||
}
|
||||
// Phase 3: Remove port state entries that no longer have
|
||||
// any hostname referencing them.
|
||||
w.cleanupStalePorts(associations)
|
||||
}
|
||||
|
||||
func (w *Watcher) checkPortsForHostname(
|
||||
ctx context.Context,
|
||||
hostname string,
|
||||
) {
|
||||
ips := w.collectIPs(hostname)
|
||||
// buildPortAssociations constructs a map from IP:port keys to
|
||||
// the sorted set of hostnames currently resolving to that IP.
|
||||
func (w *Watcher) buildPortAssociations() map[string][]string {
|
||||
assoc := make(map[string]map[string]bool)
|
||||
|
||||
for _, ip := range ips {
|
||||
for _, port := range monitoredPorts {
|
||||
w.checkSinglePort(ctx, ip, port, hostname)
|
||||
allNames := make(
|
||||
[]string, 0,
|
||||
len(w.config.Hostnames)+len(w.config.Domains),
|
||||
)
|
||||
allNames = append(allNames, w.config.Hostnames...)
|
||||
allNames = append(allNames, w.config.Domains...)
|
||||
|
||||
for _, name := range allNames {
|
||||
ips := w.collectIPs(name)
|
||||
for _, ip := range ips {
|
||||
for _, port := range monitoredPorts {
|
||||
key := fmt.Sprintf("%s:%d", ip, port)
|
||||
if assoc[key] == nil {
|
||||
assoc[key] = make(map[string]bool)
|
||||
}
|
||||
|
||||
assoc[key][name] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result := make(map[string][]string, len(assoc))
|
||||
for key, set := range assoc {
|
||||
hostnames := make([]string, 0, len(set))
|
||||
for h := range set {
|
||||
hostnames = append(hostnames, h)
|
||||
}
|
||||
|
||||
sort.Strings(hostnames)
|
||||
|
||||
result[key] = hostnames
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// parsePortKey splits an "ip:port" key into its components.
|
||||
func parsePortKey(key string) (string, int) {
|
||||
lastColon := strings.LastIndex(key, ":")
|
||||
if lastColon < 0 {
|
||||
return key, 0
|
||||
}
|
||||
|
||||
ip := key[:lastColon]
|
||||
|
||||
var p int
|
||||
|
||||
_, err := fmt.Sscanf(key[lastColon+1:], "%d", &p)
|
||||
if err != nil {
|
||||
return ip, 0
|
||||
}
|
||||
|
||||
return ip, p
|
||||
}
|
||||
|
||||
// cleanupStalePorts removes port state entries that are no
|
||||
// longer referenced by any hostname in the current DNS data.
|
||||
func (w *Watcher) cleanupStalePorts(
|
||||
currentAssociations map[string][]string,
|
||||
) {
|
||||
for _, key := range w.state.GetAllPortKeys() {
|
||||
if _, exists := currentAssociations[key]; !exists {
|
||||
w.state.DeletePortState(key)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -504,7 +599,7 @@ func (w *Watcher) checkSinglePort(
|
||||
ctx context.Context,
|
||||
ip string,
|
||||
port int,
|
||||
hostname string,
|
||||
hostnames []string,
|
||||
) {
|
||||
result, err := w.portCheck.CheckPort(ctx, ip, port)
|
||||
if err != nil {
|
||||
@@ -529,8 +624,8 @@ func (w *Watcher) checkSinglePort(
|
||||
}
|
||||
|
||||
msg := fmt.Sprintf(
|
||||
"Host: %s\nAddress: %s\nPort now %s",
|
||||
hostname, key, stateStr,
|
||||
"Hosts: %s\nAddress: %s\nPort now %s",
|
||||
strings.Join(hostnames, ", "), key, stateStr,
|
||||
)
|
||||
|
||||
w.notify.SendNotification(
|
||||
@@ -543,7 +638,7 @@ func (w *Watcher) checkSinglePort(
|
||||
|
||||
w.state.SetPortState(key, &state.PortState{
|
||||
Open: result.Open,
|
||||
Hostname: hostname,
|
||||
Hostnames: hostnames,
|
||||
LastChecked: now,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -682,6 +682,80 @@ func TestGracefulShutdown(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func setupHostnameIP(
|
||||
deps *testDeps,
|
||||
hostname, ip string,
|
||||
) {
|
||||
deps.resolver.allRecords[hostname] = map[string]map[string][]string{
|
||||
"ns1.example.com.": {"A": {ip}},
|
||||
}
|
||||
deps.portChecker.results[ip+":80"] = true
|
||||
deps.portChecker.results[ip+":443"] = true
|
||||
deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{
|
||||
CommonName: hostname,
|
||||
Issuer: "DigiCert",
|
||||
NotAfter: time.Now().Add(90 * 24 * time.Hour),
|
||||
SubjectAlternativeNames: []string{hostname},
|
||||
}
|
||||
}
|
||||
|
||||
func updateHostnameIP(deps *testDeps, hostname, ip string) {
|
||||
deps.resolver.mu.Lock()
|
||||
deps.resolver.allRecords[hostname] = map[string]map[string][]string{
|
||||
"ns1.example.com.": {"A": {ip}},
|
||||
}
|
||||
deps.resolver.mu.Unlock()
|
||||
|
||||
deps.portChecker.mu.Lock()
|
||||
deps.portChecker.results[ip+":80"] = true
|
||||
deps.portChecker.results[ip+":443"] = true
|
||||
deps.portChecker.mu.Unlock()
|
||||
|
||||
deps.tlsChecker.mu.Lock()
|
||||
deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{
|
||||
CommonName: hostname,
|
||||
Issuer: "DigiCert",
|
||||
NotAfter: time.Now().Add(90 * 24 * time.Hour),
|
||||
SubjectAlternativeNames: []string{hostname},
|
||||
}
|
||||
deps.tlsChecker.mu.Unlock()
|
||||
}
|
||||
|
||||
func TestDNSRunsBeforePortAndTLSChecks(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cfg := defaultTestConfig(t)
|
||||
cfg.Hostnames = []string{"www.example.com"}
|
||||
|
||||
w, deps := newTestWatcher(t, cfg)
|
||||
|
||||
setupHostnameIP(deps, "www.example.com", "10.0.0.1")
|
||||
|
||||
ctx := t.Context()
|
||||
w.RunOnce(ctx)
|
||||
|
||||
snap := deps.state.GetSnapshot()
|
||||
if _, ok := snap.Ports["10.0.0.1:80"]; !ok {
|
||||
t.Fatal("expected port state for 10.0.0.1:80")
|
||||
}
|
||||
|
||||
// DNS changes to a new IP; port and TLS must pick it up.
|
||||
updateHostnameIP(deps, "www.example.com", "10.0.0.2")
|
||||
|
||||
w.RunOnce(ctx)
|
||||
|
||||
snap = deps.state.GetSnapshot()
|
||||
|
||||
if _, ok := snap.Ports["10.0.0.2:80"]; !ok {
|
||||
t.Error("port check used stale DNS: missing 10.0.0.2:80")
|
||||
}
|
||||
|
||||
certKey := "10.0.0.2:443:www.example.com"
|
||||
if _, ok := snap.Certificates[certKey]; !ok {
|
||||
t.Error("TLS check used stale DNS: missing " + certKey)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNSFailureAndRecovery(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user