Compare commits
6 Commits
fix/issue-
...
fix/67-rea
| Author | SHA1 | Date | |
|---|---|---|---|
| 83643f84ab | |||
| 6ebc4ffa04 | |||
| b20e75459f | |||
| ee14bd01ae | |||
| 2835c2dc43 | |||
| 299a36660f |
@@ -1,6 +1,6 @@
|
||||
.git
|
||||
bin
|
||||
data
|
||||
.env
|
||||
.DS_Store
|
||||
*.exe
|
||||
.git/
|
||||
bin/
|
||||
*.md
|
||||
LICENSE
|
||||
.editorconfig
|
||||
.gitignore
|
||||
|
||||
@@ -8,8 +8,5 @@ charset = utf-8
|
||||
trim_trailing_whitespace = true
|
||||
insert_final_newline = true
|
||||
|
||||
[*.go]
|
||||
indent_style = tab
|
||||
|
||||
[Makefile]
|
||||
indent_style = tab
|
||||
|
||||
21
LICENSE
21
LICENSE
@@ -1,21 +0,0 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2026 sneak
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
25
Makefile
25
Makefile
@@ -1,4 +1,4 @@
|
||||
.PHONY: all build lint fmt fmt-check test check clean docker hooks
|
||||
.PHONY: all build lint fmt fmt-check test check clean hooks docker
|
||||
|
||||
BINARY := dnswatcher
|
||||
VERSION := $(shell git describe --tags --always --dirty 2>/dev/null || echo "dev")
|
||||
@@ -18,25 +18,32 @@ fmt:
|
||||
goimports -w .
|
||||
|
||||
fmt-check:
|
||||
@test -z "$$(gofmt -l .)" || (echo "Files not formatted:" && gofmt -l . && exit 1)
|
||||
@test -z "$$(gofmt -l .)" || (echo "gofmt: files not formatted:" && gofmt -l . && exit 1)
|
||||
|
||||
test:
|
||||
go test -v -race -cover -timeout 30s ./...
|
||||
go test -v -race -timeout 30s -cover ./...
|
||||
|
||||
# Check runs all validation without making changes
|
||||
# Used by CI and Docker build - fails if anything is wrong
|
||||
check: fmt-check lint test
|
||||
check:
|
||||
@echo "==> Checking formatting..."
|
||||
@test -z "$$(gofmt -l .)" || (echo "Files not formatted:" && gofmt -l . && exit 1)
|
||||
@echo "==> Running linter..."
|
||||
golangci-lint run --config .golangci.yml ./...
|
||||
@echo "==> Running tests..."
|
||||
go test -v -race -timeout 30s ./...
|
||||
@echo "==> Building..."
|
||||
go build -ldflags "$(LDFLAGS)" -o /dev/null ./cmd/dnswatcher
|
||||
@echo "==> All checks passed!"
|
||||
|
||||
docker:
|
||||
docker build .
|
||||
clean:
|
||||
rm -rf bin/
|
||||
|
||||
hooks:
|
||||
@printf '#!/bin/sh\nset -e\nmake check\n' > .git/hooks/pre-commit
|
||||
@echo '#!/bin/sh' > .git/hooks/pre-commit
|
||||
@echo 'make check' >> .git/hooks/pre-commit
|
||||
@chmod +x .git/hooks/pre-commit
|
||||
@echo "Pre-commit hook installed."
|
||||
|
||||
clean:
|
||||
rm -rf bin/
|
||||
docker:
|
||||
docker build .
|
||||
|
||||
37
README.md
37
README.md
@@ -1,9 +1,10 @@
|
||||
# dnswatcher
|
||||
|
||||
dnswatcher is a pre-1.0 Go daemon by [@sneak](https://sneak.berlin) that monitors DNS records, TCP port availability, and TLS certificates, delivering real-time change notifications via Slack, Mattermost, and ntfy webhooks.
|
||||
|
||||
> ⚠️ Pre-1.0 software. APIs, configuration, and behavior may change without notice.
|
||||
|
||||
dnswatcher is a production DNS and infrastructure monitoring daemon written in
|
||||
Go. It watches configured DNS domains and hostnames for changes, monitors TCP
|
||||
dnswatcher watches configured DNS domains and hostnames for changes, monitors TCP
|
||||
port availability, tracks TLS certificate expiry, and delivers real-time
|
||||
notifications via Slack, Mattermost, and/or ntfy webhooks.
|
||||
|
||||
@@ -109,8 +110,8 @@ includes:
|
||||
- **NS recoveries**: Which nameserver recovered, which hostname/domain.
|
||||
- **NS inconsistencies**: Which nameservers disagree, what each one
|
||||
returned, which hostname affected.
|
||||
- **Port changes**: Which IP:port, old state, new state, associated
|
||||
hostname.
|
||||
- **Port changes**: Which IP:port, old state, new state, all associated
|
||||
hostnames.
|
||||
- **TLS expiry warnings**: Which certificate, days remaining, CN,
|
||||
issuer, associated hostname and IP.
|
||||
- **TLS certificate changes**: Old and new CN/issuer/SANs, associated
|
||||
@@ -289,12 +290,12 @@ not as a merged view, to enable inconsistency detection.
|
||||
"ports": {
|
||||
"93.184.216.34:80": {
|
||||
"open": true,
|
||||
"hostname": "www.example.com",
|
||||
"hostnames": ["www.example.com"],
|
||||
"lastChecked": "2026-02-19T12:00:00Z"
|
||||
},
|
||||
"93.184.216.34:443": {
|
||||
"open": true,
|
||||
"hostname": "www.example.com",
|
||||
"hostnames": ["www.example.com"],
|
||||
"lastChecked": "2026-02-19T12:00:00Z"
|
||||
}
|
||||
},
|
||||
@@ -327,13 +328,10 @@ tracks reachability:
|
||||
|
||||
```sh
|
||||
make build # Build binary to bin/dnswatcher
|
||||
make test # Run tests with race detector and 30s timeout
|
||||
make test # Run tests with race detector
|
||||
make lint # Run golangci-lint
|
||||
make fmt # Format code (writes)
|
||||
make fmt-check # Read-only format check
|
||||
make check # Run all checks (fmt-check, lint, test, build)
|
||||
make docker # Build Docker image
|
||||
make hooks # Install pre-commit hook
|
||||
make fmt # Format code
|
||||
make check # Run all checks (format, lint, test, build)
|
||||
make clean # Remove build artifacts
|
||||
```
|
||||
|
||||
@@ -369,9 +367,15 @@ docker run -d \
|
||||
triggering change notifications).
|
||||
2. **Initial check**: Immediately perform all DNS, port, and TLS checks
|
||||
on startup.
|
||||
3. **Periodic checks**:
|
||||
- DNS and port checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h).
|
||||
- TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h).
|
||||
3. **Periodic checks** (DNS always runs first):
|
||||
- DNS checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h). Also
|
||||
re-run before every TLS check cycle to ensure fresh IPs.
|
||||
- Port checks: every `DNSWATCHER_DNS_INTERVAL`, after DNS completes.
|
||||
- TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h), after
|
||||
DNS completes.
|
||||
- Port and TLS checks always use freshly resolved IP addresses from
|
||||
the DNS phase that immediately precedes them — never stale IPs
|
||||
from a previous cycle.
|
||||
4. **On change detection**: Send notifications to all configured
|
||||
endpoints, update in-memory state, persist to disk.
|
||||
5. **Shutdown**: Persist final state to disk, complete in-flight
|
||||
@@ -397,7 +401,8 @@ Viper for configuration.
|
||||
|
||||
## License
|
||||
|
||||
MIT — see [LICENSE](LICENSE).
|
||||
License has not yet been chosen for this project. Pending decision by the
|
||||
author (MIT, GPL, or WTFPL).
|
||||
|
||||
## Author
|
||||
|
||||
|
||||
60
internal/handlers/domains.go
Normal file
60
internal/handlers/domains.go
Normal file
@@ -0,0 +1,60 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"time"
|
||||
)
|
||||
|
||||
// domainResponse represents a single domain in the API response.
|
||||
type domainResponse struct {
|
||||
Domain string `json:"domain"`
|
||||
Nameservers []string `json:"nameservers,omitempty"`
|
||||
LastChecked string `json:"lastChecked,omitempty"`
|
||||
Status string `json:"status"`
|
||||
}
|
||||
|
||||
// domainsResponse is the top-level response for GET /api/v1/domains.
|
||||
type domainsResponse struct {
|
||||
Domains []domainResponse `json:"domains"`
|
||||
}
|
||||
|
||||
// HandleDomains returns the configured domains and their status.
|
||||
func (h *Handlers) HandleDomains() http.HandlerFunc {
|
||||
return func(
|
||||
writer http.ResponseWriter,
|
||||
request *http.Request,
|
||||
) {
|
||||
configured := h.config.Domains
|
||||
snapshot := h.state.GetSnapshot()
|
||||
|
||||
domains := make(
|
||||
[]domainResponse, 0, len(configured),
|
||||
)
|
||||
|
||||
for _, domain := range configured {
|
||||
dr := domainResponse{
|
||||
Domain: domain,
|
||||
Status: "pending",
|
||||
}
|
||||
|
||||
ds, ok := snapshot.Domains[domain]
|
||||
if ok {
|
||||
dr.Nameservers = ds.Nameservers
|
||||
dr.Status = "ok"
|
||||
|
||||
if !ds.LastChecked.IsZero() {
|
||||
dr.LastChecked = ds.LastChecked.
|
||||
Format(time.RFC3339)
|
||||
}
|
||||
}
|
||||
|
||||
domains = append(domains, dr)
|
||||
}
|
||||
|
||||
h.respondJSON(
|
||||
writer, request,
|
||||
&domainsResponse{Domains: domains},
|
||||
http.StatusOK,
|
||||
)
|
||||
}
|
||||
}
|
||||
@@ -8,9 +8,11 @@ import (
|
||||
|
||||
"go.uber.org/fx"
|
||||
|
||||
"sneak.berlin/go/dnswatcher/internal/config"
|
||||
"sneak.berlin/go/dnswatcher/internal/globals"
|
||||
"sneak.berlin/go/dnswatcher/internal/healthcheck"
|
||||
"sneak.berlin/go/dnswatcher/internal/logger"
|
||||
"sneak.berlin/go/dnswatcher/internal/state"
|
||||
)
|
||||
|
||||
// Params contains dependencies for Handlers.
|
||||
@@ -20,6 +22,8 @@ type Params struct {
|
||||
Logger *logger.Logger
|
||||
Globals *globals.Globals
|
||||
Healthcheck *healthcheck.Healthcheck
|
||||
State *state.State
|
||||
Config *config.Config
|
||||
}
|
||||
|
||||
// Handlers provides HTTP request handlers.
|
||||
@@ -28,6 +32,8 @@ type Handlers struct {
|
||||
params *Params
|
||||
globals *globals.Globals
|
||||
hc *healthcheck.Healthcheck
|
||||
state *state.State
|
||||
config *config.Config
|
||||
}
|
||||
|
||||
// New creates a new Handlers instance.
|
||||
@@ -37,6 +43,8 @@ func New(_ fx.Lifecycle, params Params) (*Handlers, error) {
|
||||
params: ¶ms,
|
||||
globals: params.Globals,
|
||||
hc: params.Healthcheck,
|
||||
state: params.State,
|
||||
config: params.Config,
|
||||
}, nil
|
||||
}
|
||||
|
||||
@@ -44,7 +52,7 @@ func (h *Handlers) respondJSON(
|
||||
writer http.ResponseWriter,
|
||||
_ *http.Request,
|
||||
data any,
|
||||
status int,
|
||||
status int, //nolint:unparam // general-purpose utility; status varies in future use
|
||||
) {
|
||||
writer.Header().Set("Content-Type", "application/json")
|
||||
writer.WriteHeader(status)
|
||||
|
||||
120
internal/handlers/hostnames.go
Normal file
120
internal/handlers/hostnames.go
Normal file
@@ -0,0 +1,120 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"net/http"
|
||||
"sort"
|
||||
"time"
|
||||
|
||||
"sneak.berlin/go/dnswatcher/internal/state"
|
||||
)
|
||||
|
||||
// nameserverRecordResponse represents one nameserver's records
|
||||
// in the API response.
|
||||
type nameserverRecordResponse struct {
|
||||
Nameserver string `json:"nameserver"`
|
||||
Records map[string][]string `json:"records"`
|
||||
Status string `json:"status"`
|
||||
Error string `json:"error,omitempty"`
|
||||
LastChecked string `json:"lastChecked,omitempty"`
|
||||
}
|
||||
|
||||
// hostnameResponse represents a single hostname in the API response.
|
||||
type hostnameResponse struct {
|
||||
Hostname string `json:"hostname"`
|
||||
Nameservers []nameserverRecordResponse `json:"nameservers,omitempty"`
|
||||
LastChecked string `json:"lastChecked,omitempty"`
|
||||
Status string `json:"status"`
|
||||
}
|
||||
|
||||
// hostnamesResponse is the top-level response for
|
||||
// GET /api/v1/hostnames.
|
||||
type hostnamesResponse struct {
|
||||
Hostnames []hostnameResponse `json:"hostnames"`
|
||||
}
|
||||
|
||||
// HandleHostnames returns the configured hostnames and their status.
|
||||
func (h *Handlers) HandleHostnames() http.HandlerFunc {
|
||||
return func(
|
||||
writer http.ResponseWriter,
|
||||
request *http.Request,
|
||||
) {
|
||||
configured := h.config.Hostnames
|
||||
snapshot := h.state.GetSnapshot()
|
||||
|
||||
hostnames := make(
|
||||
[]hostnameResponse, 0, len(configured),
|
||||
)
|
||||
|
||||
for _, hostname := range configured {
|
||||
hr := hostnameResponse{
|
||||
Hostname: hostname,
|
||||
Status: "pending",
|
||||
}
|
||||
|
||||
hs, ok := snapshot.Hostnames[hostname]
|
||||
if ok {
|
||||
hr.Status = "ok"
|
||||
|
||||
if !hs.LastChecked.IsZero() {
|
||||
hr.LastChecked = hs.LastChecked.
|
||||
Format(time.RFC3339)
|
||||
}
|
||||
|
||||
hr.Nameservers = buildNameserverRecords(
|
||||
hs,
|
||||
)
|
||||
}
|
||||
|
||||
hostnames = append(hostnames, hr)
|
||||
}
|
||||
|
||||
h.respondJSON(
|
||||
writer, request,
|
||||
&hostnamesResponse{Hostnames: hostnames},
|
||||
http.StatusOK,
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
// buildNameserverRecords converts the per-nameserver state map
|
||||
// into a sorted slice for deterministic JSON output.
|
||||
func buildNameserverRecords(
|
||||
hs *state.HostnameState,
|
||||
) []nameserverRecordResponse {
|
||||
if hs.RecordsByNameserver == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
nsNames := make(
|
||||
[]string, 0, len(hs.RecordsByNameserver),
|
||||
)
|
||||
for ns := range hs.RecordsByNameserver {
|
||||
nsNames = append(nsNames, ns)
|
||||
}
|
||||
|
||||
sort.Strings(nsNames)
|
||||
|
||||
records := make(
|
||||
[]nameserverRecordResponse, 0, len(nsNames),
|
||||
)
|
||||
|
||||
for _, ns := range nsNames {
|
||||
nsr := hs.RecordsByNameserver[ns]
|
||||
|
||||
entry := nameserverRecordResponse{
|
||||
Nameserver: ns,
|
||||
Records: nsr.Records,
|
||||
Status: nsr.Status,
|
||||
Error: nsr.Error,
|
||||
}
|
||||
|
||||
if !nsr.LastChecked.IsZero() {
|
||||
entry.LastChecked = nsr.LastChecked.
|
||||
Format(time.RFC3339)
|
||||
}
|
||||
|
||||
records = append(records, entry)
|
||||
}
|
||||
|
||||
return records
|
||||
}
|
||||
@@ -4,7 +4,6 @@ import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"math/rand"
|
||||
"net"
|
||||
"sort"
|
||||
"strings"
|
||||
@@ -42,22 +41,6 @@ func rootServerList() []string {
|
||||
}
|
||||
}
|
||||
|
||||
const maxRootServers = 3
|
||||
|
||||
// randomRootServers returns a shuffled subset of root servers.
|
||||
func randomRootServers() []string {
|
||||
all := rootServerList()
|
||||
rand.Shuffle(len(all), func(i, j int) {
|
||||
all[i], all[j] = all[j], all[i]
|
||||
})
|
||||
|
||||
if len(all) > maxRootServers {
|
||||
return all[:maxRootServers]
|
||||
}
|
||||
|
||||
return all
|
||||
}
|
||||
|
||||
func checkCtx(ctx context.Context) error {
|
||||
err := ctx.Err()
|
||||
if err != nil {
|
||||
@@ -244,7 +227,7 @@ func (r *Resolver) followDelegation(
|
||||
|
||||
authNS := extractNSSet(resp.Ns)
|
||||
if len(authNS) == 0 {
|
||||
return r.resolveNSRecursive(ctx, domain)
|
||||
return r.resolveNSIterative(ctx, domain)
|
||||
}
|
||||
|
||||
glue := extractGlue(resp.Extra)
|
||||
@@ -308,60 +291,84 @@ func (r *Resolver) resolveNSIPs(
|
||||
return ips
|
||||
}
|
||||
|
||||
// resolveNSRecursive queries for NS records using recursive
|
||||
// resolution as a fallback for intercepted environments.
|
||||
func (r *Resolver) resolveNSRecursive(
|
||||
// resolveNSIterative queries for NS records using iterative
|
||||
// resolution as a fallback when followDelegation finds no
|
||||
// authoritative answer in the delegation chain.
|
||||
func (r *Resolver) resolveNSIterative(
|
||||
ctx context.Context,
|
||||
domain string,
|
||||
) ([]string, error) {
|
||||
domain = dns.Fqdn(domain)
|
||||
msg := new(dns.Msg)
|
||||
msg.SetQuestion(domain, dns.TypeNS)
|
||||
msg.RecursionDesired = true
|
||||
if checkCtx(ctx) != nil {
|
||||
return nil, ErrContextCanceled
|
||||
}
|
||||
|
||||
for _, ip := range randomRootServers() {
|
||||
domain = dns.Fqdn(domain)
|
||||
servers := rootServerList()
|
||||
|
||||
for range maxDelegation {
|
||||
if checkCtx(ctx) != nil {
|
||||
return nil, ErrContextCanceled
|
||||
}
|
||||
|
||||
addr := net.JoinHostPort(ip, "53")
|
||||
|
||||
resp, _, err := r.client.ExchangeContext(ctx, msg, addr)
|
||||
resp, err := r.queryServers(
|
||||
ctx, servers, domain, dns.TypeNS,
|
||||
)
|
||||
if err != nil {
|
||||
continue
|
||||
return nil, err
|
||||
}
|
||||
|
||||
nsNames := extractNSSet(resp.Answer)
|
||||
if len(nsNames) > 0 {
|
||||
return nsNames, nil
|
||||
}
|
||||
|
||||
// Follow delegation.
|
||||
authNS := extractNSSet(resp.Ns)
|
||||
if len(authNS) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
glue := extractGlue(resp.Extra)
|
||||
nextServers := glueIPs(authNS, glue)
|
||||
|
||||
if len(nextServers) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
servers = nextServers
|
||||
}
|
||||
|
||||
return nil, ErrNoNameservers
|
||||
}
|
||||
|
||||
// resolveARecord resolves a hostname to IPv4 addresses.
|
||||
// resolveARecord resolves a hostname to IPv4 addresses using
|
||||
// iterative resolution through the delegation chain.
|
||||
func (r *Resolver) resolveARecord(
|
||||
ctx context.Context,
|
||||
hostname string,
|
||||
) ([]string, error) {
|
||||
hostname = dns.Fqdn(hostname)
|
||||
msg := new(dns.Msg)
|
||||
msg.SetQuestion(hostname, dns.TypeA)
|
||||
msg.RecursionDesired = true
|
||||
if checkCtx(ctx) != nil {
|
||||
return nil, ErrContextCanceled
|
||||
}
|
||||
|
||||
for _, ip := range randomRootServers() {
|
||||
hostname = dns.Fqdn(hostname)
|
||||
servers := rootServerList()
|
||||
|
||||
for range maxDelegation {
|
||||
if checkCtx(ctx) != nil {
|
||||
return nil, ErrContextCanceled
|
||||
}
|
||||
|
||||
addr := net.JoinHostPort(ip, "53")
|
||||
|
||||
resp, _, err := r.client.ExchangeContext(ctx, msg, addr)
|
||||
resp, err := r.queryServers(
|
||||
ctx, servers, hostname, dns.TypeA,
|
||||
)
|
||||
if err != nil {
|
||||
continue
|
||||
return nil, fmt.Errorf(
|
||||
"resolving %s: %w", hostname, err,
|
||||
)
|
||||
}
|
||||
|
||||
// Check for A records in the answer section.
|
||||
var ips []string
|
||||
|
||||
for _, rr := range resp.Answer {
|
||||
@@ -373,6 +380,24 @@ func (r *Resolver) resolveARecord(
|
||||
if len(ips) > 0 {
|
||||
return ips, nil
|
||||
}
|
||||
|
||||
// Follow delegation if present.
|
||||
authNS := extractNSSet(resp.Ns)
|
||||
if len(authNS) == 0 {
|
||||
break
|
||||
}
|
||||
|
||||
glue := extractGlue(resp.Extra)
|
||||
nextServers := glueIPs(authNS, glue)
|
||||
|
||||
if len(nextServers) == 0 {
|
||||
// Resolve NS IPs iteratively — but guard
|
||||
// against infinite recursion by using only
|
||||
// already-resolved servers.
|
||||
break
|
||||
}
|
||||
|
||||
servers = nextServers
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf(
|
||||
@@ -402,7 +427,7 @@ func (r *Resolver) FindAuthoritativeNameservers(
|
||||
candidate := strings.Join(labels[i:], ".") + "."
|
||||
|
||||
nsNames, err := r.followDelegation(
|
||||
ctx, candidate, randomRootServers(),
|
||||
ctx, candidate, rootServerList(),
|
||||
)
|
||||
if err == nil && len(nsNames) > 0 {
|
||||
sort.Strings(nsNames)
|
||||
|
||||
@@ -28,6 +28,8 @@ func (s *Server) SetupRoutes() {
|
||||
// API v1 routes
|
||||
s.router.Route("/api/v1", func(r chi.Router) {
|
||||
r.Get("/status", s.handlers.HandleStatus())
|
||||
r.Get("/domains", s.handlers.HandleDomains())
|
||||
r.Get("/hostnames", s.handlers.HandleHostnames())
|
||||
})
|
||||
|
||||
// Metrics endpoint (optional, with basic auth)
|
||||
|
||||
@@ -57,10 +57,49 @@ type HostnameState struct {
|
||||
// PortState holds the monitoring state for a port.
|
||||
type PortState struct {
|
||||
Open bool `json:"open"`
|
||||
Hostname string `json:"hostname"`
|
||||
Hostnames []string `json:"hostnames"`
|
||||
LastChecked time.Time `json:"lastChecked"`
|
||||
}
|
||||
|
||||
// UnmarshalJSON implements custom unmarshaling to handle both
|
||||
// the old single-hostname format and the new multi-hostname
|
||||
// format for backward compatibility with existing state files.
|
||||
func (ps *PortState) UnmarshalJSON(data []byte) error {
|
||||
// Use an alias to prevent infinite recursion.
|
||||
type portStateAlias struct {
|
||||
Open bool `json:"open"`
|
||||
Hostnames []string `json:"hostnames"`
|
||||
LastChecked time.Time `json:"lastChecked"`
|
||||
}
|
||||
|
||||
var alias portStateAlias
|
||||
|
||||
err := json.Unmarshal(data, &alias)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unmarshaling port state: %w", err)
|
||||
}
|
||||
|
||||
ps.Open = alias.Open
|
||||
ps.Hostnames = alias.Hostnames
|
||||
ps.LastChecked = alias.LastChecked
|
||||
|
||||
// If Hostnames is empty, try reading the old single-hostname
|
||||
// format for backward compatibility.
|
||||
if len(ps.Hostnames) == 0 {
|
||||
var old struct {
|
||||
Hostname string `json:"hostname"`
|
||||
}
|
||||
|
||||
// Best-effort: ignore errors since the main unmarshal
|
||||
// already succeeded.
|
||||
if json.Unmarshal(data, &old) == nil && old.Hostname != "" {
|
||||
ps.Hostnames = []string{old.Hostname}
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// CertificateState holds TLS certificate monitoring state.
|
||||
type CertificateState struct {
|
||||
CommonName string `json:"commonName"`
|
||||
@@ -263,6 +302,27 @@ func (s *State) GetPortState(key string) (*PortState, bool) {
|
||||
return ps, ok
|
||||
}
|
||||
|
||||
// DeletePortState removes a port state entry.
|
||||
func (s *State) DeletePortState(key string) {
|
||||
s.mu.Lock()
|
||||
defer s.mu.Unlock()
|
||||
|
||||
delete(s.snapshot.Ports, key)
|
||||
}
|
||||
|
||||
// GetAllPortKeys returns all port state keys.
|
||||
func (s *State) GetAllPortKeys() []string {
|
||||
s.mu.RLock()
|
||||
defer s.mu.RUnlock()
|
||||
|
||||
keys := make([]string, 0, len(s.snapshot.Ports))
|
||||
for k := range s.snapshot.Ports {
|
||||
keys = append(keys, k)
|
||||
}
|
||||
|
||||
return keys
|
||||
}
|
||||
|
||||
// SetCertificateState updates the state for a certificate.
|
||||
func (s *State) SetCertificateState(
|
||||
key string,
|
||||
|
||||
@@ -72,13 +72,15 @@ func New(
|
||||
}
|
||||
|
||||
lifecycle.Append(fx.Hook{
|
||||
OnStart: func(startCtx context.Context) error {
|
||||
ctx, cancel := context.WithCancel(
|
||||
context.WithoutCancel(startCtx),
|
||||
)
|
||||
OnStart: func(_ context.Context) error {
|
||||
// Use context.Background() — the fx startup context
|
||||
// expires after startup completes, so deriving from it
|
||||
// would cancel the watcher immediately. The watcher's
|
||||
// lifetime is controlled by w.cancel in OnStop.
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
w.cancel = cancel
|
||||
|
||||
go w.Run(ctx)
|
||||
go w.Run(ctx) //nolint:contextcheck // intentionally not derived from startCtx
|
||||
|
||||
return nil
|
||||
},
|
||||
@@ -141,9 +143,16 @@ func (w *Watcher) Run(ctx context.Context) {
|
||||
|
||||
return
|
||||
case <-dnsTicker.C:
|
||||
w.runDNSAndPortChecks(ctx)
|
||||
w.runDNSChecks(ctx)
|
||||
|
||||
w.checkAllPorts(ctx)
|
||||
w.saveState()
|
||||
case <-tlsTicker.C:
|
||||
// Run DNS first so TLS checks use freshly
|
||||
// resolved IP addresses, not stale ones from
|
||||
// a previous cycle.
|
||||
w.runDNSChecks(ctx)
|
||||
|
||||
w.runTLSChecks(ctx)
|
||||
w.saveState()
|
||||
}
|
||||
@@ -151,10 +160,26 @@ func (w *Watcher) Run(ctx context.Context) {
|
||||
}
|
||||
|
||||
// RunOnce performs a single complete monitoring cycle.
|
||||
// DNS checks run first so that port and TLS checks use
|
||||
// freshly resolved IP addresses. Port checks run before
|
||||
// TLS because TLS checks only target IPs with an open
|
||||
// port 443.
|
||||
func (w *Watcher) RunOnce(ctx context.Context) {
|
||||
w.detectFirstRun()
|
||||
w.runDNSAndPortChecks(ctx)
|
||||
|
||||
// Phase 1: DNS resolution must complete first so that
|
||||
// subsequent checks use fresh IP addresses.
|
||||
w.runDNSChecks(ctx)
|
||||
|
||||
// Phase 2: Port checks populate port state that TLS
|
||||
// checks depend on (TLS only targets IPs where port
|
||||
// 443 is open).
|
||||
w.checkAllPorts(ctx)
|
||||
|
||||
// Phase 3: TLS checks use fresh DNS IPs and current
|
||||
// port state.
|
||||
w.runTLSChecks(ctx)
|
||||
|
||||
w.saveState()
|
||||
w.firstRun = false
|
||||
}
|
||||
@@ -171,7 +196,11 @@ func (w *Watcher) detectFirstRun() {
|
||||
}
|
||||
}
|
||||
|
||||
func (w *Watcher) runDNSAndPortChecks(ctx context.Context) {
|
||||
// runDNSChecks performs DNS resolution for all configured domains
|
||||
// and hostnames, updating state with freshly resolved records.
|
||||
// This must complete before port or TLS checks run so those
|
||||
// checks operate on current IP addresses.
|
||||
func (w *Watcher) runDNSChecks(ctx context.Context) {
|
||||
for _, domain := range w.config.Domains {
|
||||
w.checkDomain(ctx, domain)
|
||||
}
|
||||
@@ -179,8 +208,6 @@ func (w *Watcher) runDNSAndPortChecks(ctx context.Context) {
|
||||
for _, hostname := range w.config.Hostnames {
|
||||
w.checkHostname(ctx, hostname)
|
||||
}
|
||||
|
||||
w.checkAllPorts(ctx)
|
||||
}
|
||||
|
||||
func (w *Watcher) checkDomain(
|
||||
@@ -448,24 +475,94 @@ func (w *Watcher) detectInconsistencies(
|
||||
}
|
||||
|
||||
func (w *Watcher) checkAllPorts(ctx context.Context) {
|
||||
for _, hostname := range w.config.Hostnames {
|
||||
w.checkPortsForHostname(ctx, hostname)
|
||||
// Phase 1: Build current IP:port → hostname associations
|
||||
// from fresh DNS data.
|
||||
associations := w.buildPortAssociations()
|
||||
|
||||
// Phase 2: Check each unique IP:port and update state
|
||||
// with the full set of associated hostnames.
|
||||
for key, hostnames := range associations {
|
||||
ip, port := parsePortKey(key)
|
||||
if port == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
w.checkSinglePort(ctx, ip, port, hostnames)
|
||||
}
|
||||
|
||||
for _, domain := range w.config.Domains {
|
||||
w.checkPortsForHostname(ctx, domain)
|
||||
}
|
||||
// Phase 3: Remove port state entries that no longer have
|
||||
// any hostname referencing them.
|
||||
w.cleanupStalePorts(associations)
|
||||
}
|
||||
|
||||
func (w *Watcher) checkPortsForHostname(
|
||||
ctx context.Context,
|
||||
hostname string,
|
||||
) {
|
||||
ips := w.collectIPs(hostname)
|
||||
// buildPortAssociations constructs a map from IP:port keys to
|
||||
// the sorted set of hostnames currently resolving to that IP.
|
||||
func (w *Watcher) buildPortAssociations() map[string][]string {
|
||||
assoc := make(map[string]map[string]bool)
|
||||
|
||||
for _, ip := range ips {
|
||||
for _, port := range monitoredPorts {
|
||||
w.checkSinglePort(ctx, ip, port, hostname)
|
||||
allNames := make(
|
||||
[]string, 0,
|
||||
len(w.config.Hostnames)+len(w.config.Domains),
|
||||
)
|
||||
allNames = append(allNames, w.config.Hostnames...)
|
||||
allNames = append(allNames, w.config.Domains...)
|
||||
|
||||
for _, name := range allNames {
|
||||
ips := w.collectIPs(name)
|
||||
for _, ip := range ips {
|
||||
for _, port := range monitoredPorts {
|
||||
key := fmt.Sprintf("%s:%d", ip, port)
|
||||
if assoc[key] == nil {
|
||||
assoc[key] = make(map[string]bool)
|
||||
}
|
||||
|
||||
assoc[key][name] = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
result := make(map[string][]string, len(assoc))
|
||||
for key, set := range assoc {
|
||||
hostnames := make([]string, 0, len(set))
|
||||
for h := range set {
|
||||
hostnames = append(hostnames, h)
|
||||
}
|
||||
|
||||
sort.Strings(hostnames)
|
||||
|
||||
result[key] = hostnames
|
||||
}
|
||||
|
||||
return result
|
||||
}
|
||||
|
||||
// parsePortKey splits an "ip:port" key into its components.
|
||||
func parsePortKey(key string) (string, int) {
|
||||
lastColon := strings.LastIndex(key, ":")
|
||||
if lastColon < 0 {
|
||||
return key, 0
|
||||
}
|
||||
|
||||
ip := key[:lastColon]
|
||||
|
||||
var p int
|
||||
|
||||
_, err := fmt.Sscanf(key[lastColon+1:], "%d", &p)
|
||||
if err != nil {
|
||||
return ip, 0
|
||||
}
|
||||
|
||||
return ip, p
|
||||
}
|
||||
|
||||
// cleanupStalePorts removes port state entries that are no
|
||||
// longer referenced by any hostname in the current DNS data.
|
||||
func (w *Watcher) cleanupStalePorts(
|
||||
currentAssociations map[string][]string,
|
||||
) {
|
||||
for _, key := range w.state.GetAllPortKeys() {
|
||||
if _, exists := currentAssociations[key]; !exists {
|
||||
w.state.DeletePortState(key)
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -502,7 +599,7 @@ func (w *Watcher) checkSinglePort(
|
||||
ctx context.Context,
|
||||
ip string,
|
||||
port int,
|
||||
hostname string,
|
||||
hostnames []string,
|
||||
) {
|
||||
result, err := w.portCheck.CheckPort(ctx, ip, port)
|
||||
if err != nil {
|
||||
@@ -527,8 +624,8 @@ func (w *Watcher) checkSinglePort(
|
||||
}
|
||||
|
||||
msg := fmt.Sprintf(
|
||||
"Host: %s\nAddress: %s\nPort now %s",
|
||||
hostname, key, stateStr,
|
||||
"Hosts: %s\nAddress: %s\nPort now %s",
|
||||
strings.Join(hostnames, ", "), key, stateStr,
|
||||
)
|
||||
|
||||
w.notify.SendNotification(
|
||||
@@ -541,7 +638,7 @@ func (w *Watcher) checkSinglePort(
|
||||
|
||||
w.state.SetPortState(key, &state.PortState{
|
||||
Open: result.Open,
|
||||
Hostname: hostname,
|
||||
Hostnames: hostnames,
|
||||
LastChecked: now,
|
||||
})
|
||||
}
|
||||
|
||||
@@ -682,6 +682,80 @@ func TestGracefulShutdown(t *testing.T) {
|
||||
}
|
||||
}
|
||||
|
||||
func setupHostnameIP(
|
||||
deps *testDeps,
|
||||
hostname, ip string,
|
||||
) {
|
||||
deps.resolver.allRecords[hostname] = map[string]map[string][]string{
|
||||
"ns1.example.com.": {"A": {ip}},
|
||||
}
|
||||
deps.portChecker.results[ip+":80"] = true
|
||||
deps.portChecker.results[ip+":443"] = true
|
||||
deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{
|
||||
CommonName: hostname,
|
||||
Issuer: "DigiCert",
|
||||
NotAfter: time.Now().Add(90 * 24 * time.Hour),
|
||||
SubjectAlternativeNames: []string{hostname},
|
||||
}
|
||||
}
|
||||
|
||||
func updateHostnameIP(deps *testDeps, hostname, ip string) {
|
||||
deps.resolver.mu.Lock()
|
||||
deps.resolver.allRecords[hostname] = map[string]map[string][]string{
|
||||
"ns1.example.com.": {"A": {ip}},
|
||||
}
|
||||
deps.resolver.mu.Unlock()
|
||||
|
||||
deps.portChecker.mu.Lock()
|
||||
deps.portChecker.results[ip+":80"] = true
|
||||
deps.portChecker.results[ip+":443"] = true
|
||||
deps.portChecker.mu.Unlock()
|
||||
|
||||
deps.tlsChecker.mu.Lock()
|
||||
deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{
|
||||
CommonName: hostname,
|
||||
Issuer: "DigiCert",
|
||||
NotAfter: time.Now().Add(90 * 24 * time.Hour),
|
||||
SubjectAlternativeNames: []string{hostname},
|
||||
}
|
||||
deps.tlsChecker.mu.Unlock()
|
||||
}
|
||||
|
||||
func TestDNSRunsBeforePortAndTLSChecks(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
cfg := defaultTestConfig(t)
|
||||
cfg.Hostnames = []string{"www.example.com"}
|
||||
|
||||
w, deps := newTestWatcher(t, cfg)
|
||||
|
||||
setupHostnameIP(deps, "www.example.com", "10.0.0.1")
|
||||
|
||||
ctx := t.Context()
|
||||
w.RunOnce(ctx)
|
||||
|
||||
snap := deps.state.GetSnapshot()
|
||||
if _, ok := snap.Ports["10.0.0.1:80"]; !ok {
|
||||
t.Fatal("expected port state for 10.0.0.1:80")
|
||||
}
|
||||
|
||||
// DNS changes to a new IP; port and TLS must pick it up.
|
||||
updateHostnameIP(deps, "www.example.com", "10.0.0.2")
|
||||
|
||||
w.RunOnce(ctx)
|
||||
|
||||
snap = deps.state.GetSnapshot()
|
||||
|
||||
if _, ok := snap.Ports["10.0.0.2:80"]; !ok {
|
||||
t.Error("port check used stale DNS: missing 10.0.0.2:80")
|
||||
}
|
||||
|
||||
certKey := "10.0.0.2:443:www.example.com"
|
||||
if _, ok := snap.Certificates[certKey]; !ok {
|
||||
t.Error("TLS check used stale DNS: missing " + certKey)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNSFailureAndRecovery(t *testing.T) {
|
||||
t.Parallel()
|
||||
|
||||
|
||||
Reference in New Issue
Block a user