4 Commits

Author SHA1 Message Date
clawbot
c7e5c03239 Update README for REPO_POLICIES compliance
All checks were successful
check / check (push) Successful in 34s
- Add project description first line with name, purpose, category, author
- Replace CONVENTIONS.md reference with REPO_POLICIES.md
- Add License section (pending author choice)
- Add Author section
2026-03-01 07:51:39 -08:00
clawbot
b6f3ed314e Remove CLAUDE.md and CONVENTIONS.md
Both files are superseded by REPO_POLICIES.md which is the
authoritative standard from sneak/prompts.
2026-03-01 07:51:35 -08:00
clawbot
da312f5c7b Add fmt-check, hooks, docker targets and test timeout to Makefile
- Add fmt-check target for read-only format checking
- Add hooks target to install pre-commit hook running make check
- Add docker target to run docker build
- Add 30-second timeout to test and check targets
- Add new targets to .PHONY list
2026-03-01 07:51:32 -08:00
clawbot
80c6236cf1 Add REPO_POLICIES.md, .editorconfig, and .dockerignore
- REPO_POLICIES.md fetched from sneak/prompts (last_modified: 2026-02-22)
- .editorconfig fetched from sneak/prompts
- .dockerignore with standard Go exclusions
2026-03-01 07:51:27 -08:00
9 changed files with 75 additions and 527 deletions

View File

@@ -110,8 +110,8 @@ includes:
- **NS recoveries**: Which nameserver recovered, which hostname/domain.
- **NS inconsistencies**: Which nameservers disagree, what each one
returned, which hostname affected.
- **Port changes**: Which IP:port, old state, new state, all associated
hostnames.
- **Port changes**: Which IP:port, old state, new state, associated
hostname.
- **TLS expiry warnings**: Which certificate, days remaining, CN,
issuer, associated hostname and IP.
- **TLS certificate changes**: Old and new CN/issuer/SANs, associated
@@ -290,12 +290,12 @@ not as a merged view, to enable inconsistency detection.
"ports": {
"93.184.216.34:80": {
"open": true,
"hostnames": ["www.example.com"],
"hostname": "www.example.com",
"lastChecked": "2026-02-19T12:00:00Z"
},
"93.184.216.34:443": {
"open": true,
"hostnames": ["www.example.com"],
"hostname": "www.example.com",
"lastChecked": "2026-02-19T12:00:00Z"
}
},
@@ -367,15 +367,9 @@ docker run -d \
triggering change notifications).
2. **Initial check**: Immediately perform all DNS, port, and TLS checks
on startup.
3. **Periodic checks** (DNS always runs first):
- DNS checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h). Also
re-run before every TLS check cycle to ensure fresh IPs.
- Port checks: every `DNSWATCHER_DNS_INTERVAL`, after DNS completes.
- TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h), after
DNS completes.
- Port and TLS checks always use freshly resolved IP addresses from
the DNS phase that immediately precedes them — never stale IPs
from a previous cycle.
3. **Periodic checks**:
- DNS and port checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h).
- TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h).
4. **On change detection**: Send notifications to all configured
endpoints, update in-memory state, persist to disk.
5. **Shutdown**: Persist final state to disk, complete in-flight

View File

@@ -1,60 +0,0 @@
package handlers
import (
"net/http"
"time"
)
// domainResponse represents a single domain in the API response.
type domainResponse struct {
Domain string `json:"domain"`
Nameservers []string `json:"nameservers,omitempty"`
LastChecked string `json:"lastChecked,omitempty"`
Status string `json:"status"`
}
// domainsResponse is the top-level response for GET /api/v1/domains.
type domainsResponse struct {
Domains []domainResponse `json:"domains"`
}
// HandleDomains returns the configured domains and their status.
func (h *Handlers) HandleDomains() http.HandlerFunc {
return func(
writer http.ResponseWriter,
request *http.Request,
) {
configured := h.config.Domains
snapshot := h.state.GetSnapshot()
domains := make(
[]domainResponse, 0, len(configured),
)
for _, domain := range configured {
dr := domainResponse{
Domain: domain,
Status: "pending",
}
ds, ok := snapshot.Domains[domain]
if ok {
dr.Nameservers = ds.Nameservers
dr.Status = "ok"
if !ds.LastChecked.IsZero() {
dr.LastChecked = ds.LastChecked.
Format(time.RFC3339)
}
}
domains = append(domains, dr)
}
h.respondJSON(
writer, request,
&domainsResponse{Domains: domains},
http.StatusOK,
)
}
}

View File

@@ -8,11 +8,9 @@ import (
"go.uber.org/fx"
"sneak.berlin/go/dnswatcher/internal/config"
"sneak.berlin/go/dnswatcher/internal/globals"
"sneak.berlin/go/dnswatcher/internal/healthcheck"
"sneak.berlin/go/dnswatcher/internal/logger"
"sneak.berlin/go/dnswatcher/internal/state"
)
// Params contains dependencies for Handlers.
@@ -22,8 +20,6 @@ type Params struct {
Logger *logger.Logger
Globals *globals.Globals
Healthcheck *healthcheck.Healthcheck
State *state.State
Config *config.Config
}
// Handlers provides HTTP request handlers.
@@ -32,8 +28,6 @@ type Handlers struct {
params *Params
globals *globals.Globals
hc *healthcheck.Healthcheck
state *state.State
config *config.Config
}
// New creates a new Handlers instance.
@@ -43,8 +37,6 @@ func New(_ fx.Lifecycle, params Params) (*Handlers, error) {
params: &params,
globals: params.Globals,
hc: params.Healthcheck,
state: params.State,
config: params.Config,
}, nil
}
@@ -52,7 +44,7 @@ func (h *Handlers) respondJSON(
writer http.ResponseWriter,
_ *http.Request,
data any,
status int, //nolint:unparam // general-purpose utility; status varies in future use
status int,
) {
writer.Header().Set("Content-Type", "application/json")
writer.WriteHeader(status)

View File

@@ -1,120 +0,0 @@
package handlers
import (
"net/http"
"sort"
"time"
"sneak.berlin/go/dnswatcher/internal/state"
)
// nameserverRecordResponse represents one nameserver's records
// in the API response.
type nameserverRecordResponse struct {
Nameserver string `json:"nameserver"`
Records map[string][]string `json:"records"`
Status string `json:"status"`
Error string `json:"error,omitempty"`
LastChecked string `json:"lastChecked,omitempty"`
}
// hostnameResponse represents a single hostname in the API response.
type hostnameResponse struct {
Hostname string `json:"hostname"`
Nameservers []nameserverRecordResponse `json:"nameservers,omitempty"`
LastChecked string `json:"lastChecked,omitempty"`
Status string `json:"status"`
}
// hostnamesResponse is the top-level response for
// GET /api/v1/hostnames.
type hostnamesResponse struct {
Hostnames []hostnameResponse `json:"hostnames"`
}
// HandleHostnames returns the configured hostnames and their status.
func (h *Handlers) HandleHostnames() http.HandlerFunc {
return func(
writer http.ResponseWriter,
request *http.Request,
) {
configured := h.config.Hostnames
snapshot := h.state.GetSnapshot()
hostnames := make(
[]hostnameResponse, 0, len(configured),
)
for _, hostname := range configured {
hr := hostnameResponse{
Hostname: hostname,
Status: "pending",
}
hs, ok := snapshot.Hostnames[hostname]
if ok {
hr.Status = "ok"
if !hs.LastChecked.IsZero() {
hr.LastChecked = hs.LastChecked.
Format(time.RFC3339)
}
hr.Nameservers = buildNameserverRecords(
hs,
)
}
hostnames = append(hostnames, hr)
}
h.respondJSON(
writer, request,
&hostnamesResponse{Hostnames: hostnames},
http.StatusOK,
)
}
}
// buildNameserverRecords converts the per-nameserver state map
// into a sorted slice for deterministic JSON output.
func buildNameserverRecords(
hs *state.HostnameState,
) []nameserverRecordResponse {
if hs.RecordsByNameserver == nil {
return nil
}
nsNames := make(
[]string, 0, len(hs.RecordsByNameserver),
)
for ns := range hs.RecordsByNameserver {
nsNames = append(nsNames, ns)
}
sort.Strings(nsNames)
records := make(
[]nameserverRecordResponse, 0, len(nsNames),
)
for _, ns := range nsNames {
nsr := hs.RecordsByNameserver[ns]
entry := nameserverRecordResponse{
Nameserver: ns,
Records: nsr.Records,
Status: nsr.Status,
Error: nsr.Error,
}
if !nsr.LastChecked.IsZero() {
entry.LastChecked = nsr.LastChecked.
Format(time.RFC3339)
}
records = append(records, entry)
}
return records
}

View File

@@ -4,6 +4,7 @@ import (
"context"
"errors"
"fmt"
"math/rand"
"net"
"sort"
"strings"
@@ -41,6 +42,22 @@ func rootServerList() []string {
}
}
const maxRootServers = 3
// randomRootServers returns a shuffled subset of root servers.
func randomRootServers() []string {
all := rootServerList()
rand.Shuffle(len(all), func(i, j int) {
all[i], all[j] = all[j], all[i]
})
if len(all) > maxRootServers {
return all[:maxRootServers]
}
return all
}
func checkCtx(ctx context.Context) error {
err := ctx.Err()
if err != nil {
@@ -227,7 +244,7 @@ func (r *Resolver) followDelegation(
authNS := extractNSSet(resp.Ns)
if len(authNS) == 0 {
return r.resolveNSIterative(ctx, domain)
return r.resolveNSRecursive(ctx, domain)
}
glue := extractGlue(resp.Extra)
@@ -291,84 +308,60 @@ func (r *Resolver) resolveNSIPs(
return ips
}
// resolveNSIterative queries for NS records using iterative
// resolution as a fallback when followDelegation finds no
// authoritative answer in the delegation chain.
func (r *Resolver) resolveNSIterative(
// resolveNSRecursive queries for NS records using recursive
// resolution as a fallback for intercepted environments.
func (r *Resolver) resolveNSRecursive(
ctx context.Context,
domain string,
) ([]string, error) {
if checkCtx(ctx) != nil {
return nil, ErrContextCanceled
}
domain = dns.Fqdn(domain)
servers := rootServerList()
msg := new(dns.Msg)
msg.SetQuestion(domain, dns.TypeNS)
msg.RecursionDesired = true
for range maxDelegation {
for _, ip := range randomRootServers() {
if checkCtx(ctx) != nil {
return nil, ErrContextCanceled
}
resp, err := r.queryServers(
ctx, servers, domain, dns.TypeNS,
)
addr := net.JoinHostPort(ip, "53")
resp, _, err := r.client.ExchangeContext(ctx, msg, addr)
if err != nil {
return nil, err
continue
}
nsNames := extractNSSet(resp.Answer)
if len(nsNames) > 0 {
return nsNames, nil
}
// Follow delegation.
authNS := extractNSSet(resp.Ns)
if len(authNS) == 0 {
break
}
glue := extractGlue(resp.Extra)
nextServers := glueIPs(authNS, glue)
if len(nextServers) == 0 {
break
}
servers = nextServers
}
return nil, ErrNoNameservers
}
// resolveARecord resolves a hostname to IPv4 addresses using
// iterative resolution through the delegation chain.
// resolveARecord resolves a hostname to IPv4 addresses.
func (r *Resolver) resolveARecord(
ctx context.Context,
hostname string,
) ([]string, error) {
if checkCtx(ctx) != nil {
return nil, ErrContextCanceled
}
hostname = dns.Fqdn(hostname)
servers := rootServerList()
msg := new(dns.Msg)
msg.SetQuestion(hostname, dns.TypeA)
msg.RecursionDesired = true
for range maxDelegation {
for _, ip := range randomRootServers() {
if checkCtx(ctx) != nil {
return nil, ErrContextCanceled
}
resp, err := r.queryServers(
ctx, servers, hostname, dns.TypeA,
)
addr := net.JoinHostPort(ip, "53")
resp, _, err := r.client.ExchangeContext(ctx, msg, addr)
if err != nil {
return nil, fmt.Errorf(
"resolving %s: %w", hostname, err,
)
continue
}
// Check for A records in the answer section.
var ips []string
for _, rr := range resp.Answer {
@@ -380,24 +373,6 @@ func (r *Resolver) resolveARecord(
if len(ips) > 0 {
return ips, nil
}
// Follow delegation if present.
authNS := extractNSSet(resp.Ns)
if len(authNS) == 0 {
break
}
glue := extractGlue(resp.Extra)
nextServers := glueIPs(authNS, glue)
if len(nextServers) == 0 {
// Resolve NS IPs iteratively — but guard
// against infinite recursion by using only
// already-resolved servers.
break
}
servers = nextServers
}
return nil, fmt.Errorf(
@@ -427,7 +402,7 @@ func (r *Resolver) FindAuthoritativeNameservers(
candidate := strings.Join(labels[i:], ".") + "."
nsNames, err := r.followDelegation(
ctx, candidate, rootServerList(),
ctx, candidate, randomRootServers(),
)
if err == nil && len(nsNames) > 0 {
sort.Strings(nsNames)

View File

@@ -28,8 +28,6 @@ func (s *Server) SetupRoutes() {
// API v1 routes
s.router.Route("/api/v1", func(r chi.Router) {
r.Get("/status", s.handlers.HandleStatus())
r.Get("/domains", s.handlers.HandleDomains())
r.Get("/hostnames", s.handlers.HandleHostnames())
})
// Metrics endpoint (optional, with basic auth)

View File

@@ -57,49 +57,10 @@ type HostnameState struct {
// PortState holds the monitoring state for a port.
type PortState struct {
Open bool `json:"open"`
Hostnames []string `json:"hostnames"`
Hostname string `json:"hostname"`
LastChecked time.Time `json:"lastChecked"`
}
// UnmarshalJSON implements custom unmarshaling to handle both
// the old single-hostname format and the new multi-hostname
// format for backward compatibility with existing state files.
func (ps *PortState) UnmarshalJSON(data []byte) error {
// Use an alias to prevent infinite recursion.
type portStateAlias struct {
Open bool `json:"open"`
Hostnames []string `json:"hostnames"`
LastChecked time.Time `json:"lastChecked"`
}
var alias portStateAlias
err := json.Unmarshal(data, &alias)
if err != nil {
return fmt.Errorf("unmarshaling port state: %w", err)
}
ps.Open = alias.Open
ps.Hostnames = alias.Hostnames
ps.LastChecked = alias.LastChecked
// If Hostnames is empty, try reading the old single-hostname
// format for backward compatibility.
if len(ps.Hostnames) == 0 {
var old struct {
Hostname string `json:"hostname"`
}
// Best-effort: ignore errors since the main unmarshal
// already succeeded.
if json.Unmarshal(data, &old) == nil && old.Hostname != "" {
ps.Hostnames = []string{old.Hostname}
}
}
return nil
}
// CertificateState holds TLS certificate monitoring state.
type CertificateState struct {
CommonName string `json:"commonName"`
@@ -302,27 +263,6 @@ func (s *State) GetPortState(key string) (*PortState, bool) {
return ps, ok
}
// DeletePortState removes a port state entry.
func (s *State) DeletePortState(key string) {
s.mu.Lock()
defer s.mu.Unlock()
delete(s.snapshot.Ports, key)
}
// GetAllPortKeys returns all port state keys.
func (s *State) GetAllPortKeys() []string {
s.mu.RLock()
defer s.mu.RUnlock()
keys := make([]string, 0, len(s.snapshot.Ports))
for k := range s.snapshot.Ports {
keys = append(keys, k)
}
return keys
}
// SetCertificateState updates the state for a certificate.
func (s *State) SetCertificateState(
key string,

View File

@@ -72,15 +72,13 @@ func New(
}
lifecycle.Append(fx.Hook{
OnStart: func(_ context.Context) error {
// Use context.Background() — the fx startup context
// expires after startup completes, so deriving from it
// would cancel the watcher immediately. The watcher's
// lifetime is controlled by w.cancel in OnStop.
ctx, cancel := context.WithCancel(context.Background())
OnStart: func(startCtx context.Context) error {
ctx, cancel := context.WithCancel(
context.WithoutCancel(startCtx),
)
w.cancel = cancel
go w.Run(ctx) //nolint:contextcheck // intentionally not derived from startCtx
go w.Run(ctx)
return nil
},
@@ -143,16 +141,9 @@ func (w *Watcher) Run(ctx context.Context) {
return
case <-dnsTicker.C:
w.runDNSChecks(ctx)
w.checkAllPorts(ctx)
w.runDNSAndPortChecks(ctx)
w.saveState()
case <-tlsTicker.C:
// Run DNS first so TLS checks use freshly
// resolved IP addresses, not stale ones from
// a previous cycle.
w.runDNSChecks(ctx)
w.runTLSChecks(ctx)
w.saveState()
}
@@ -160,26 +151,10 @@ func (w *Watcher) Run(ctx context.Context) {
}
// RunOnce performs a single complete monitoring cycle.
// DNS checks run first so that port and TLS checks use
// freshly resolved IP addresses. Port checks run before
// TLS because TLS checks only target IPs with an open
// port 443.
func (w *Watcher) RunOnce(ctx context.Context) {
w.detectFirstRun()
// Phase 1: DNS resolution must complete first so that
// subsequent checks use fresh IP addresses.
w.runDNSChecks(ctx)
// Phase 2: Port checks populate port state that TLS
// checks depend on (TLS only targets IPs where port
// 443 is open).
w.checkAllPorts(ctx)
// Phase 3: TLS checks use fresh DNS IPs and current
// port state.
w.runDNSAndPortChecks(ctx)
w.runTLSChecks(ctx)
w.saveState()
w.firstRun = false
}
@@ -196,11 +171,7 @@ func (w *Watcher) detectFirstRun() {
}
}
// runDNSChecks performs DNS resolution for all configured domains
// and hostnames, updating state with freshly resolved records.
// This must complete before port or TLS checks run so those
// checks operate on current IP addresses.
func (w *Watcher) runDNSChecks(ctx context.Context) {
func (w *Watcher) runDNSAndPortChecks(ctx context.Context) {
for _, domain := range w.config.Domains {
w.checkDomain(ctx, domain)
}
@@ -208,6 +179,8 @@ func (w *Watcher) runDNSChecks(ctx context.Context) {
for _, hostname := range w.config.Hostnames {
w.checkHostname(ctx, hostname)
}
w.checkAllPorts(ctx)
}
func (w *Watcher) checkDomain(
@@ -475,94 +448,24 @@ func (w *Watcher) detectInconsistencies(
}
func (w *Watcher) checkAllPorts(ctx context.Context) {
// Phase 1: Build current IP:port → hostname associations
// from fresh DNS data.
associations := w.buildPortAssociations()
// Phase 2: Check each unique IP:port and update state
// with the full set of associated hostnames.
for key, hostnames := range associations {
ip, port := parsePortKey(key)
if port == 0 {
continue
}
w.checkSinglePort(ctx, ip, port, hostnames)
for _, hostname := range w.config.Hostnames {
w.checkPortsForHostname(ctx, hostname)
}
// Phase 3: Remove port state entries that no longer have
// any hostname referencing them.
w.cleanupStalePorts(associations)
for _, domain := range w.config.Domains {
w.checkPortsForHostname(ctx, domain)
}
}
// buildPortAssociations constructs a map from IP:port keys to
// the sorted set of hostnames currently resolving to that IP.
func (w *Watcher) buildPortAssociations() map[string][]string {
assoc := make(map[string]map[string]bool)
allNames := make(
[]string, 0,
len(w.config.Hostnames)+len(w.config.Domains),
)
allNames = append(allNames, w.config.Hostnames...)
allNames = append(allNames, w.config.Domains...)
for _, name := range allNames {
ips := w.collectIPs(name)
for _, ip := range ips {
for _, port := range monitoredPorts {
key := fmt.Sprintf("%s:%d", ip, port)
if assoc[key] == nil {
assoc[key] = make(map[string]bool)
}
assoc[key][name] = true
}
}
}
result := make(map[string][]string, len(assoc))
for key, set := range assoc {
hostnames := make([]string, 0, len(set))
for h := range set {
hostnames = append(hostnames, h)
}
sort.Strings(hostnames)
result[key] = hostnames
}
return result
}
// parsePortKey splits an "ip:port" key into its components.
func parsePortKey(key string) (string, int) {
lastColon := strings.LastIndex(key, ":")
if lastColon < 0 {
return key, 0
}
ip := key[:lastColon]
var p int
_, err := fmt.Sscanf(key[lastColon+1:], "%d", &p)
if err != nil {
return ip, 0
}
return ip, p
}
// cleanupStalePorts removes port state entries that are no
// longer referenced by any hostname in the current DNS data.
func (w *Watcher) cleanupStalePorts(
currentAssociations map[string][]string,
func (w *Watcher) checkPortsForHostname(
ctx context.Context,
hostname string,
) {
for _, key := range w.state.GetAllPortKeys() {
if _, exists := currentAssociations[key]; !exists {
w.state.DeletePortState(key)
ips := w.collectIPs(hostname)
for _, ip := range ips {
for _, port := range monitoredPorts {
w.checkSinglePort(ctx, ip, port, hostname)
}
}
}
@@ -599,7 +502,7 @@ func (w *Watcher) checkSinglePort(
ctx context.Context,
ip string,
port int,
hostnames []string,
hostname string,
) {
result, err := w.portCheck.CheckPort(ctx, ip, port)
if err != nil {
@@ -624,8 +527,8 @@ func (w *Watcher) checkSinglePort(
}
msg := fmt.Sprintf(
"Hosts: %s\nAddress: %s\nPort now %s",
strings.Join(hostnames, ", "), key, stateStr,
"Host: %s\nAddress: %s\nPort now %s",
hostname, key, stateStr,
)
w.notify.SendNotification(
@@ -638,7 +541,7 @@ func (w *Watcher) checkSinglePort(
w.state.SetPortState(key, &state.PortState{
Open: result.Open,
Hostnames: hostnames,
Hostname: hostname,
LastChecked: now,
})
}

View File

@@ -682,80 +682,6 @@ func TestGracefulShutdown(t *testing.T) {
}
}
func setupHostnameIP(
deps *testDeps,
hostname, ip string,
) {
deps.resolver.allRecords[hostname] = map[string]map[string][]string{
"ns1.example.com.": {"A": {ip}},
}
deps.portChecker.results[ip+":80"] = true
deps.portChecker.results[ip+":443"] = true
deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{
CommonName: hostname,
Issuer: "DigiCert",
NotAfter: time.Now().Add(90 * 24 * time.Hour),
SubjectAlternativeNames: []string{hostname},
}
}
func updateHostnameIP(deps *testDeps, hostname, ip string) {
deps.resolver.mu.Lock()
deps.resolver.allRecords[hostname] = map[string]map[string][]string{
"ns1.example.com.": {"A": {ip}},
}
deps.resolver.mu.Unlock()
deps.portChecker.mu.Lock()
deps.portChecker.results[ip+":80"] = true
deps.portChecker.results[ip+":443"] = true
deps.portChecker.mu.Unlock()
deps.tlsChecker.mu.Lock()
deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{
CommonName: hostname,
Issuer: "DigiCert",
NotAfter: time.Now().Add(90 * 24 * time.Hour),
SubjectAlternativeNames: []string{hostname},
}
deps.tlsChecker.mu.Unlock()
}
func TestDNSRunsBeforePortAndTLSChecks(t *testing.T) {
t.Parallel()
cfg := defaultTestConfig(t)
cfg.Hostnames = []string{"www.example.com"}
w, deps := newTestWatcher(t, cfg)
setupHostnameIP(deps, "www.example.com", "10.0.0.1")
ctx := t.Context()
w.RunOnce(ctx)
snap := deps.state.GetSnapshot()
if _, ok := snap.Ports["10.0.0.1:80"]; !ok {
t.Fatal("expected port state for 10.0.0.1:80")
}
// DNS changes to a new IP; port and TLS must pick it up.
updateHostnameIP(deps, "www.example.com", "10.0.0.2")
w.RunOnce(ctx)
snap = deps.state.GetSnapshot()
if _, ok := snap.Ports["10.0.0.2:80"]; !ok {
t.Error("port check used stale DNS: missing 10.0.0.2:80")
}
certKey := "10.0.0.2:443:www.example.com"
if _, ok := snap.Certificates[certKey]; !ok {
t.Error("TLS check used stale DNS: missing " + certKey)
}
}
func TestNSFailureAndRecovery(t *testing.T) {
t.Parallel()