5 Commits

Author SHA1 Message Date
a66d17ad1b Merge branch 'main' into repo-policies-compliance
All checks were successful
check / check (push) Successful in 44s
2026-03-01 21:10:47 +01:00
clawbot
c7e5c03239 Update README for REPO_POLICIES compliance
All checks were successful
check / check (push) Successful in 34s
- Add project description first line with name, purpose, category, author
- Replace CONVENTIONS.md reference with REPO_POLICIES.md
- Add License section (pending author choice)
- Add Author section
2026-03-01 07:51:39 -08:00
clawbot
b6f3ed314e Remove CLAUDE.md and CONVENTIONS.md
Both files are superseded by REPO_POLICIES.md which is the
authoritative standard from sneak/prompts.
2026-03-01 07:51:35 -08:00
clawbot
da312f5c7b Add fmt-check, hooks, docker targets and test timeout to Makefile
- Add fmt-check target for read-only format checking
- Add hooks target to install pre-commit hook running make check
- Add docker target to run docker build
- Add 30-second timeout to test and check targets
- Add new targets to .PHONY list
2026-03-01 07:51:32 -08:00
clawbot
80c6236cf1 Add REPO_POLICIES.md, .editorconfig, and .dockerignore
- REPO_POLICIES.md fetched from sneak/prompts (last_modified: 2026-02-22)
- .editorconfig fetched from sneak/prompts
- .dockerignore with standard Go exclusions
2026-03-01 07:51:27 -08:00
6 changed files with 55 additions and 395 deletions

View File

@@ -110,8 +110,8 @@ includes:
- **NS recoveries**: Which nameserver recovered, which hostname/domain.
- **NS inconsistencies**: Which nameservers disagree, what each one
returned, which hostname affected.
- **Port changes**: Which IP:port, old state, new state, all associated
hostnames.
- **Port changes**: Which IP:port, old state, new state, associated
hostname.
- **TLS expiry warnings**: Which certificate, days remaining, CN,
issuer, associated hostname and IP.
- **TLS certificate changes**: Old and new CN/issuer/SANs, associated
@@ -290,12 +290,12 @@ not as a merged view, to enable inconsistency detection.
"ports": {
"93.184.216.34:80": {
"open": true,
"hostnames": ["www.example.com"],
"hostname": "www.example.com",
"lastChecked": "2026-02-19T12:00:00Z"
},
"93.184.216.34:443": {
"open": true,
"hostnames": ["www.example.com"],
"hostname": "www.example.com",
"lastChecked": "2026-02-19T12:00:00Z"
}
},
@@ -367,15 +367,9 @@ docker run -d \
triggering change notifications).
2. **Initial check**: Immediately perform all DNS, port, and TLS checks
on startup.
3. **Periodic checks** (DNS always runs first):
- DNS checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h). Also
re-run before every TLS check cycle to ensure fresh IPs.
- Port checks: every `DNSWATCHER_DNS_INTERVAL`, after DNS completes.
- TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h), after
DNS completes.
- Port and TLS checks always use freshly resolved IP addresses from
the DNS phase that immediately precedes them — never stale IPs
from a previous cycle.
3. **Periodic checks**:
- DNS and port checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h).
- TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h).
4. **On change detection**: Send notifications to all configured
endpoints, update in-memory state, persist to disk.
5. **Shutdown**: Persist final state to disk, complete in-flight

View File

@@ -15,12 +15,6 @@ import (
"sneak.berlin/go/dnswatcher/internal/logger"
)
// ErrNoTargets is returned when DNSWATCHER_TARGETS is empty or unset.
var ErrNoTargets = errors.New(
"no targets configured: set DNSWATCHER_TARGETS to a comma-separated " +
"list of DNS names to monitor",
)
// Default configuration values.
const (
defaultPort = 8080
@@ -124,9 +118,25 @@ func buildConfig(
}
}
domains, hostnames, err := classifyAndValidateTargets()
dnsInterval, err := time.ParseDuration(
viper.GetString("DNS_INTERVAL"),
)
if err != nil {
return nil, err
dnsInterval = defaultDNSInterval
}
tlsInterval, err := time.ParseDuration(
viper.GetString("TLS_INTERVAL"),
)
if err != nil {
tlsInterval = defaultTLSInterval
}
domains, hostnames, err := ClassifyTargets(
parseCSV(viper.GetString("TARGETS")),
)
if err != nil {
return nil, fmt.Errorf("invalid targets configuration: %w", err)
}
cfg := &Config{
@@ -138,8 +148,8 @@ func buildConfig(
SlackWebhook: viper.GetString("SLACK_WEBHOOK"),
MattermostWebhook: viper.GetString("MATTERMOST_WEBHOOK"),
NtfyTopic: viper.GetString("NTFY_TOPIC"),
DNSInterval: parseDurationOrDefault("DNS_INTERVAL", defaultDNSInterval),
TLSInterval: parseDurationOrDefault("TLS_INTERVAL", defaultTLSInterval),
DNSInterval: dnsInterval,
TLSInterval: tlsInterval,
TLSExpiryWarning: viper.GetInt("TLS_EXPIRY_WARNING"),
SentryDSN: viper.GetString("SENTRY_DSN"),
MaintenanceMode: viper.GetBool("MAINTENANCE_MODE"),
@@ -152,32 +162,6 @@ func buildConfig(
return cfg, nil
}
func classifyAndValidateTargets() ([]string, []string, error) {
domains, hostnames, err := ClassifyTargets(
parseCSV(viper.GetString("TARGETS")),
)
if err != nil {
return nil, nil, fmt.Errorf(
"invalid targets configuration: %w", err,
)
}
if len(domains) == 0 && len(hostnames) == 0 {
return nil, nil, ErrNoTargets
}
return domains, hostnames, nil
}
func parseDurationOrDefault(key string, fallback time.Duration) time.Duration {
d, err := time.ParseDuration(viper.GetString(key))
if err != nil {
return fallback
}
return d
}
func parseCSV(input string) []string {
if input == "" {
return nil

View File

@@ -1,87 +0,0 @@
package config_test
import (
"errors"
"testing"
"go.uber.org/fx"
"sneak.berlin/go/dnswatcher/internal/config"
"sneak.berlin/go/dnswatcher/internal/globals"
"sneak.berlin/go/dnswatcher/internal/logger"
)
func TestNewReturnsErrNoTargetsWhenEmpty(t *testing.T) {
// Cannot use t.Parallel() because t.Setenv modifies the process
// environment.
t.Setenv("DNSWATCHER_TARGETS", "")
t.Setenv("DNSWATCHER_DATA_DIR", t.TempDir())
var cfg *config.Config
app := fx.New(
fx.Provide(
func() *globals.Globals {
return &globals.Globals{
Appname: "dnswatcher-test-empty",
}
},
logger.New,
config.New,
),
fx.Populate(&cfg),
fx.NopLogger,
)
err := app.Err()
if err == nil {
t.Fatal(
"expected error when DNSWATCHER_TARGETS is empty, got nil",
)
}
if !errors.Is(err, config.ErrNoTargets) {
t.Errorf("expected ErrNoTargets, got: %v", err)
}
}
func TestNewSucceedsWithTargets(t *testing.T) {
// Cannot use t.Parallel() because t.Setenv modifies the process
// environment.
t.Setenv("DNSWATCHER_TARGETS", "example.com")
t.Setenv("DNSWATCHER_DATA_DIR", t.TempDir())
// Prevent loading a local config file by changing to a temp dir.
t.Chdir(t.TempDir())
var cfg *config.Config
app := fx.New(
fx.Provide(
func() *globals.Globals {
return &globals.Globals{
Appname: "dnswatcher-test-ok",
}
},
logger.New,
config.New,
),
fx.Populate(&cfg),
fx.NopLogger,
)
err := app.Err()
if err != nil {
t.Fatalf(
"expected no error with valid targets, got: %v",
err,
)
}
if len(cfg.Domains) != 1 || cfg.Domains[0] != "example.com" {
t.Errorf(
"expected [example.com], got domains=%v",
cfg.Domains,
)
}
}

View File

@@ -57,47 +57,8 @@ type HostnameState struct {
// PortState holds the monitoring state for a port.
type PortState struct {
Open bool `json:"open"`
Hostnames []string `json:"hostnames"`
LastChecked time.Time `json:"lastChecked"`
}
// UnmarshalJSON implements custom unmarshaling to handle both
// the old single-hostname format and the new multi-hostname
// format for backward compatibility with existing state files.
func (ps *PortState) UnmarshalJSON(data []byte) error {
// Use an alias to prevent infinite recursion.
type portStateAlias struct {
Open bool `json:"open"`
Hostnames []string `json:"hostnames"`
LastChecked time.Time `json:"lastChecked"`
}
var alias portStateAlias
err := json.Unmarshal(data, &alias)
if err != nil {
return fmt.Errorf("unmarshaling port state: %w", err)
}
ps.Open = alias.Open
ps.Hostnames = alias.Hostnames
ps.LastChecked = alias.LastChecked
// If Hostnames is empty, try reading the old single-hostname
// format for backward compatibility.
if len(ps.Hostnames) == 0 {
var old struct {
Hostname string `json:"hostname"`
}
// Best-effort: ignore errors since the main unmarshal
// already succeeded.
if json.Unmarshal(data, &old) == nil && old.Hostname != "" {
ps.Hostnames = []string{old.Hostname}
}
}
return nil
LastChecked time.Time `json:"lastChecked"`
}
// CertificateState holds TLS certificate monitoring state.
@@ -302,27 +263,6 @@ func (s *State) GetPortState(key string) (*PortState, bool) {
return ps, ok
}
// DeletePortState removes a port state entry.
func (s *State) DeletePortState(key string) {
s.mu.Lock()
defer s.mu.Unlock()
delete(s.snapshot.Ports, key)
}
// GetAllPortKeys returns all port state keys.
func (s *State) GetAllPortKeys() []string {
s.mu.RLock()
defer s.mu.RUnlock()
keys := make([]string, 0, len(s.snapshot.Ports))
for k := range s.snapshot.Ports {
keys = append(keys, k)
}
return keys
}
// SetCertificateState updates the state for a certificate.
func (s *State) SetCertificateState(
key string,

View File

@@ -72,15 +72,13 @@ func New(
}
lifecycle.Append(fx.Hook{
OnStart: func(_ context.Context) error {
// Use context.Background() — the fx startup context
// expires after startup completes, so deriving from it
// would cancel the watcher immediately. The watcher's
// lifetime is controlled by w.cancel in OnStop.
ctx, cancel := context.WithCancel(context.Background())
OnStart: func(startCtx context.Context) error {
ctx, cancel := context.WithCancel(
context.WithoutCancel(startCtx),
)
w.cancel = cancel
go w.Run(ctx) //nolint:contextcheck // intentionally not derived from startCtx
go w.Run(ctx)
return nil
},
@@ -143,16 +141,9 @@ func (w *Watcher) Run(ctx context.Context) {
return
case <-dnsTicker.C:
w.runDNSChecks(ctx)
w.checkAllPorts(ctx)
w.runDNSAndPortChecks(ctx)
w.saveState()
case <-tlsTicker.C:
// Run DNS first so TLS checks use freshly
// resolved IP addresses, not stale ones from
// a previous cycle.
w.runDNSChecks(ctx)
w.runTLSChecks(ctx)
w.saveState()
}
@@ -160,26 +151,10 @@ func (w *Watcher) Run(ctx context.Context) {
}
// RunOnce performs a single complete monitoring cycle.
// DNS checks run first so that port and TLS checks use
// freshly resolved IP addresses. Port checks run before
// TLS because TLS checks only target IPs with an open
// port 443.
func (w *Watcher) RunOnce(ctx context.Context) {
w.detectFirstRun()
// Phase 1: DNS resolution must complete first so that
// subsequent checks use fresh IP addresses.
w.runDNSChecks(ctx)
// Phase 2: Port checks populate port state that TLS
// checks depend on (TLS only targets IPs where port
// 443 is open).
w.checkAllPorts(ctx)
// Phase 3: TLS checks use fresh DNS IPs and current
// port state.
w.runDNSAndPortChecks(ctx)
w.runTLSChecks(ctx)
w.saveState()
w.firstRun = false
}
@@ -196,11 +171,7 @@ func (w *Watcher) detectFirstRun() {
}
}
// runDNSChecks performs DNS resolution for all configured domains
// and hostnames, updating state with freshly resolved records.
// This must complete before port or TLS checks run so those
// checks operate on current IP addresses.
func (w *Watcher) runDNSChecks(ctx context.Context) {
func (w *Watcher) runDNSAndPortChecks(ctx context.Context) {
for _, domain := range w.config.Domains {
w.checkDomain(ctx, domain)
}
@@ -208,6 +179,8 @@ func (w *Watcher) runDNSChecks(ctx context.Context) {
for _, hostname := range w.config.Hostnames {
w.checkHostname(ctx, hostname)
}
w.checkAllPorts(ctx)
}
func (w *Watcher) checkDomain(
@@ -475,94 +448,24 @@ func (w *Watcher) detectInconsistencies(
}
func (w *Watcher) checkAllPorts(ctx context.Context) {
// Phase 1: Build current IP:port → hostname associations
// from fresh DNS data.
associations := w.buildPortAssociations()
// Phase 2: Check each unique IP:port and update state
// with the full set of associated hostnames.
for key, hostnames := range associations {
ip, port := parsePortKey(key)
if port == 0 {
continue
for _, hostname := range w.config.Hostnames {
w.checkPortsForHostname(ctx, hostname)
}
w.checkSinglePort(ctx, ip, port, hostnames)
for _, domain := range w.config.Domains {
w.checkPortsForHostname(ctx, domain)
}
// Phase 3: Remove port state entries that no longer have
// any hostname referencing them.
w.cleanupStalePorts(associations)
}
// buildPortAssociations constructs a map from IP:port keys to
// the sorted set of hostnames currently resolving to that IP.
func (w *Watcher) buildPortAssociations() map[string][]string {
assoc := make(map[string]map[string]bool)
func (w *Watcher) checkPortsForHostname(
ctx context.Context,
hostname string,
) {
ips := w.collectIPs(hostname)
allNames := make(
[]string, 0,
len(w.config.Hostnames)+len(w.config.Domains),
)
allNames = append(allNames, w.config.Hostnames...)
allNames = append(allNames, w.config.Domains...)
for _, name := range allNames {
ips := w.collectIPs(name)
for _, ip := range ips {
for _, port := range monitoredPorts {
key := fmt.Sprintf("%s:%d", ip, port)
if assoc[key] == nil {
assoc[key] = make(map[string]bool)
}
assoc[key][name] = true
}
}
}
result := make(map[string][]string, len(assoc))
for key, set := range assoc {
hostnames := make([]string, 0, len(set))
for h := range set {
hostnames = append(hostnames, h)
}
sort.Strings(hostnames)
result[key] = hostnames
}
return result
}
// parsePortKey splits an "ip:port" key into its components.
func parsePortKey(key string) (string, int) {
lastColon := strings.LastIndex(key, ":")
if lastColon < 0 {
return key, 0
}
ip := key[:lastColon]
var p int
_, err := fmt.Sscanf(key[lastColon+1:], "%d", &p)
if err != nil {
return ip, 0
}
return ip, p
}
// cleanupStalePorts removes port state entries that are no
// longer referenced by any hostname in the current DNS data.
func (w *Watcher) cleanupStalePorts(
currentAssociations map[string][]string,
) {
for _, key := range w.state.GetAllPortKeys() {
if _, exists := currentAssociations[key]; !exists {
w.state.DeletePortState(key)
w.checkSinglePort(ctx, ip, port, hostname)
}
}
}
@@ -599,7 +502,7 @@ func (w *Watcher) checkSinglePort(
ctx context.Context,
ip string,
port int,
hostnames []string,
hostname string,
) {
result, err := w.portCheck.CheckPort(ctx, ip, port)
if err != nil {
@@ -624,8 +527,8 @@ func (w *Watcher) checkSinglePort(
}
msg := fmt.Sprintf(
"Hosts: %s\nAddress: %s\nPort now %s",
strings.Join(hostnames, ", "), key, stateStr,
"Host: %s\nAddress: %s\nPort now %s",
hostname, key, stateStr,
)
w.notify.SendNotification(
@@ -638,7 +541,7 @@ func (w *Watcher) checkSinglePort(
w.state.SetPortState(key, &state.PortState{
Open: result.Open,
Hostnames: hostnames,
Hostname: hostname,
LastChecked: now,
})
}

View File

@@ -682,80 +682,6 @@ func TestGracefulShutdown(t *testing.T) {
}
}
func setupHostnameIP(
deps *testDeps,
hostname, ip string,
) {
deps.resolver.allRecords[hostname] = map[string]map[string][]string{
"ns1.example.com.": {"A": {ip}},
}
deps.portChecker.results[ip+":80"] = true
deps.portChecker.results[ip+":443"] = true
deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{
CommonName: hostname,
Issuer: "DigiCert",
NotAfter: time.Now().Add(90 * 24 * time.Hour),
SubjectAlternativeNames: []string{hostname},
}
}
func updateHostnameIP(deps *testDeps, hostname, ip string) {
deps.resolver.mu.Lock()
deps.resolver.allRecords[hostname] = map[string]map[string][]string{
"ns1.example.com.": {"A": {ip}},
}
deps.resolver.mu.Unlock()
deps.portChecker.mu.Lock()
deps.portChecker.results[ip+":80"] = true
deps.portChecker.results[ip+":443"] = true
deps.portChecker.mu.Unlock()
deps.tlsChecker.mu.Lock()
deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{
CommonName: hostname,
Issuer: "DigiCert",
NotAfter: time.Now().Add(90 * 24 * time.Hour),
SubjectAlternativeNames: []string{hostname},
}
deps.tlsChecker.mu.Unlock()
}
func TestDNSRunsBeforePortAndTLSChecks(t *testing.T) {
t.Parallel()
cfg := defaultTestConfig(t)
cfg.Hostnames = []string{"www.example.com"}
w, deps := newTestWatcher(t, cfg)
setupHostnameIP(deps, "www.example.com", "10.0.0.1")
ctx := t.Context()
w.RunOnce(ctx)
snap := deps.state.GetSnapshot()
if _, ok := snap.Ports["10.0.0.1:80"]; !ok {
t.Fatal("expected port state for 10.0.0.1:80")
}
// DNS changes to a new IP; port and TLS must pick it up.
updateHostnameIP(deps, "www.example.com", "10.0.0.2")
w.RunOnce(ctx)
snap = deps.state.GetSnapshot()
if _, ok := snap.Ports["10.0.0.2:80"]; !ok {
t.Error("port check used stale DNS: missing 10.0.0.2:80")
}
certKey := "10.0.0.2:443:www.example.com"
if _, ok := snap.Certificates[certKey]; !ok {
t.Error("TLS check used stale DNS: missing " + certKey)
}
}
func TestNSFailureAndRecovery(t *testing.T) {
t.Parallel()