1 Commits

Author SHA1 Message Date
user
cc35480e26 fix: set 700ms query timeout, use public resolvers for recursive queries
All checks were successful
Check / check (pull_request) Successful in 10m13s
- Change queryTimeoutDuration from 5s to 700ms per requirement that DNS
  queries complete quickly given <800ms RTT
- Fix resolveARecord and resolveNSRecursive to use public recursive
  resolvers (1.1.1.1, 8.8.8.8, 9.9.9.9) instead of root servers,
  which don't answer recursive queries (RD=1)
2026-02-21 02:56:33 -08:00
19 changed files with 1448 additions and 1048 deletions

View File

@@ -1,6 +0,0 @@
.git/
bin/
*.md
LICENSE
.editorconfig
.gitignore

View File

@@ -1,12 +0,0 @@
root = true
[*]
indent_style = space
indent_size = 4
end_of_line = lf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
[Makefile]
indent_style = tab

View File

@@ -1,9 +1,26 @@
name: check
on: [push]
name: Check
on:
push:
branches: [main]
pull_request:
branches: [main]
jobs:
check:
runs-on: ubuntu-latest
steps:
# actions/checkout v4.2.2, 2026-02-28
- uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
- run: docker build .
check:
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4
- uses: actions/setup-go@40f1582b2485089dde7abd97c1529aa768e1baff # v5
with:
go-version-file: go.mod
- name: Install golangci-lint
run: go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@5d1e709b7be35cb2025444e19de266b056b7b7ee # v2.10.1
- name: Install goimports
run: go install golang.org/x/tools/cmd/goimports@009367f5c17a8d4c45a961a3a509277190a9a6f0 # v0.42.0
- name: Run make check
run: make check

68
CLAUDE.md Normal file
View File

@@ -0,0 +1,68 @@
# Repository Rules
Last Updated 2026-01-08
These rules MUST be followed at all times, it is very important.
* Never use `git add -A` - add specific changes to a deliberate commit. A
commit should contain one change. After each change, make a commit with a
good one-line summary.
* NEVER modify the linter config without asking first.
* NEVER modify tests to exclude special cases or otherwise get them to pass
without asking first. In almost all cases, the code should be changed,
NOT the tests. If you think the test needs to be changed, make your case
for that and ask for permission to proceed, then stop. You need explicit
user approval to modify existing tests. (You do not need user approval
for writing NEW tests.)
* When linting, assume the linter config is CORRECT, and that each item
output by the linter is something that legitimately needs fixing in the
code.
* When running tests, use `make test`.
* Before commits, run `make check`. This runs `make lint` and `make test`
and `make check-fmt`. Any issues discovered MUST be resolved before
committing unless explicitly told otherwise.
* When fixing a bug, write a failing test for the bug FIRST. Add
appropriate logging to the test to ensure it is written correctly. Commit
that. Then go about fixing the bug until the test passes (without
modifying the test further). Then commit that.
* When adding a new feature, do the same - implement a test first (TDD). It
doesn't have to be super complex. Commit the test, then commit the
feature.
* When adding a new feature, use a feature branch. When the feature is
completely finished and the code is up to standards (passes `make check`)
then and only then can the feature branch be merged into `main` and the
branch deleted.
* Write godoc documentation comments for all exported types and functions as
you go along.
* ALWAYS be consistent in naming. If you name something one thing in one
place, name it the EXACT SAME THING in another place.
* Be descriptive and specific in naming. `wl` is bad;
`SourceHostWhitelist` is good. `ConnsPerHost` is bad;
`MaxConnectionsPerHost` is good.
* This is not prototype or teaching code - this is designed for production.
Any security issues (such as denial of service) or other web
vulnerabilities are P1 bugs and must be added to TODO.md at the top.
* As this is production code, no stubbing of implementations unless
specifically instructed. We need working implementations.
* Avoid vendoring deps unless specifically instructed to. NEVER commit
the vendor directory, NEVER commit compiled binaries. If these
directories or files exist, add them to .gitignore (and commit the
.gitignore) if they are not already in there. Keep the entire git
repository (with history) small - under 20MiB, unless you specifically
must commit larger files (e.g. test fixture example media files). Only
OUR source code and immediately supporting files (such as test examples)
goes into the repo/history.

1225
CONVENTIONS.md Normal file

File diff suppressed because it is too large Load Diff

View File

@@ -1,13 +1,11 @@
# Build stage
# golang 1.25-alpine, 2026-02-28
FROM golang@sha256:f6751d823c26342f9506c03797d2527668d095b0a15f1862cddb4d927a7a4ced AS builder
FROM golang:1.25-alpine AS builder
RUN apk add --no-cache git make gcc musl-dev binutils-gold
RUN apk add --no-cache git make gcc musl-dev
# golangci-lint v2.10.1
RUN go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@5d1e709b7be35cb2025444e19de266b056b7b7ee
# goimports v0.42.0
RUN go install golang.org/x/tools/cmd/goimports@009367f5c17a8d4c45a961a3a509277190a9a6f0
# Install golangci-lint v2
RUN go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@latest
RUN go install golang.org/x/tools/cmd/goimports@latest
WORKDIR /src
COPY go.mod go.sum ./
@@ -22,8 +20,7 @@ RUN make check
RUN make build
# Runtime stage
# alpine 3.21, 2026-02-28
FROM alpine@sha256:c3f8e73fdb79deaebaa2037150150191b9dcbfba68b4a46d70103204c53f4709
FROM alpine:3.21
RUN apk add --no-cache ca-certificates tzdata

View File

@@ -1,4 +1,4 @@
.PHONY: all build lint fmt fmt-check test check clean hooks docker
.PHONY: all build lint fmt test check clean
BINARY := dnswatcher
VERSION := $(shell git describe --tags --always --dirty 2>/dev/null || echo "dev")
@@ -17,11 +17,8 @@ fmt:
gofmt -s -w .
goimports -w .
fmt-check:
@test -z "$$(gofmt -l .)" || (echo "gofmt: files not formatted:" && gofmt -l . && exit 1)
test:
go test -v -race -timeout 30s -cover ./...
go test -v -race -cover ./...
# Check runs all validation without making changes
# Used by CI and Docker build - fails if anything is wrong
@@ -31,19 +28,10 @@ check:
@echo "==> Running linter..."
golangci-lint run --config .golangci.yml ./...
@echo "==> Running tests..."
go test -v -race -timeout 30s ./...
go test -v -race ./...
@echo "==> Building..."
go build -ldflags "$(LDFLAGS)" -o /dev/null ./cmd/dnswatcher
@echo "==> All checks passed!"
clean:
rm -rf bin/
hooks:
@echo '#!/bin/sh' > .git/hooks/pre-commit
@echo 'make check' >> .git/hooks/pre-commit
@chmod +x .git/hooks/pre-commit
@echo "Pre-commit hook installed."
docker:
docker build .

View File

@@ -1,10 +1,9 @@
# dnswatcher
dnswatcher is a pre-1.0 Go daemon by [@sneak](https://sneak.berlin) that monitors DNS records, TCP port availability, and TLS certificates, delivering real-time change notifications via Slack, Mattermost, and ntfy webhooks.
> ⚠️ Pre-1.0 software. APIs, configuration, and behavior may change without notice.
dnswatcher watches configured DNS domains and hostnames for changes, monitors TCP
dnswatcher is a production DNS and infrastructure monitoring daemon written in
Go. It watches configured DNS domains and hostnames for changes, monitors TCP
port availability, tracks TLS certificate expiry, and delivers real-time
notifications via Slack, Mattermost, and/or ntfy webhooks.
@@ -110,8 +109,8 @@ includes:
- **NS recoveries**: Which nameserver recovered, which hostname/domain.
- **NS inconsistencies**: Which nameservers disagree, what each one
returned, which hostname affected.
- **Port changes**: Which IP:port, old state, new state, all associated
hostnames.
- **Port changes**: Which IP:port, old state, new state, associated
hostname.
- **TLS expiry warnings**: Which certificate, days remaining, CN,
issuer, associated hostname and IP.
- **TLS certificate changes**: Old and new CN/issuer/SANs, associated
@@ -290,12 +289,12 @@ not as a merged view, to enable inconsistency detection.
"ports": {
"93.184.216.34:80": {
"open": true,
"hostnames": ["www.example.com"],
"hostname": "www.example.com",
"lastChecked": "2026-02-19T12:00:00Z"
},
"93.184.216.34:443": {
"open": true,
"hostnames": ["www.example.com"],
"hostname": "www.example.com",
"lastChecked": "2026-02-19T12:00:00Z"
}
},
@@ -367,15 +366,9 @@ docker run -d \
triggering change notifications).
2. **Initial check**: Immediately perform all DNS, port, and TLS checks
on startup.
3. **Periodic checks** (DNS always runs first):
- DNS checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h). Also
re-run before every TLS check cycle to ensure fresh IPs.
- Port checks: every `DNSWATCHER_DNS_INTERVAL`, after DNS completes.
- TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h), after
DNS completes.
- Port and TLS checks always use freshly resolved IP addresses from
the DNS phase that immediately precedes them — never stale IPs
from a previous cycle.
3. **Periodic checks**:
- DNS and port checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h).
- TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h).
4. **On change detection**: Send notifications to all configured
endpoints, update in-memory state, persist to disk.
5. **Shutdown**: Persist final state to disk, complete in-flight
@@ -392,18 +385,7 @@ docker run -d \
## Project Structure
Follows the conventions defined in `REPO_POLICIES.md`, adapted from the
Follows the conventions defined in `CONVENTIONS.md`, adapted from the
[upaas](https://git.eeqj.de/sneak/upaas) project template. Uses uber/fx
for dependency injection, go-chi for HTTP routing, slog for logging, and
Viper for configuration.
---
## License
License has not yet been chosen for this project. Pending decision by the
author (MIT, GPL, or WTFPL).
## Author
[@sneak](https://sneak.berlin)

View File

@@ -1,188 +0,0 @@
---
title: Repository Policies
last_modified: 2026-02-22
---
This document covers repository structure, tooling, and workflow standards. Code
style conventions are in separate documents:
- [Code Styleguide](https://git.eeqj.de/sneak/prompts/raw/branch/main/prompts/CODE_STYLEGUIDE.md)
(general, bash, Docker)
- [Go](https://git.eeqj.de/sneak/prompts/raw/branch/main/prompts/CODE_STYLEGUIDE_GO.md)
- [JavaScript](https://git.eeqj.de/sneak/prompts/raw/branch/main/prompts/CODE_STYLEGUIDE_JS.md)
- [Python](https://git.eeqj.de/sneak/prompts/raw/branch/main/prompts/CODE_STYLEGUIDE_PYTHON.md)
- [Go HTTP Server Conventions](https://git.eeqj.de/sneak/prompts/raw/branch/main/prompts/GO_HTTP_SERVER_CONVENTIONS.md)
---
- Cross-project documentation (such as this file) must include
`last_modified: YYYY-MM-DD` in the YAML front matter so it can be kept in sync
with the authoritative source as policies evolve.
- **ALL external references must be pinned by cryptographic hash.** This
includes Docker base images, Go modules, npm packages, GitHub Actions, and
anything else fetched from a remote source. Version tags (`@v4`, `@latest`,
`:3.21`, etc.) are server-mutable and therefore remote code execution
vulnerabilities. The ONLY acceptable way to reference an external dependency
is by its content hash (Docker `@sha256:...`, Go module hash in `go.sum`, npm
integrity hash in lockfile, GitHub Actions `@<commit-sha>`). No exceptions.
This also means never `curl | bash` to install tools like pyenv, nvm, rustup,
etc. Instead, download a specific release archive from GitHub, verify its hash
(hardcoded in the Dockerfile or script), and only then install. Unverified
install scripts are arbitrary remote code execution. This is the single most
important rule in this document. Double-check every external reference in
every file before committing. There are zero exceptions to this rule.
- Every repo with software must have a root `Makefile` with these targets:
`make test`, `make lint`, `make fmt` (writes), `make fmt-check` (read-only),
`make check` (prereqs: `test`, `lint`, `fmt-check`), `make docker`, and
`make hooks` (installs pre-commit hook). A model Makefile is at
`https://git.eeqj.de/sneak/prompts/raw/branch/main/Makefile`.
- Always use Makefile targets (`make fmt`, `make test`, `make lint`, etc.)
instead of invoking the underlying tools directly. The Makefile is the single
source of truth for how these operations are run.
- The Makefile is authoritative documentation for how the repo is used. Beyond
the required targets above, it should have targets for every common operation:
running a local development server (`make run`, `make dev`), re-initializing
or migrating the database (`make db-reset`, `make migrate`), building
artifacts (`make build`), generating code, seeding data, or anything else a
developer would do regularly. If someone checks out the repo and types
`make<tab>`, they should see every meaningful operation available. A new
contributor should be able to understand the entire development workflow by
reading the Makefile.
- Every repo should have a `Dockerfile`. All Dockerfiles must run `make check`
as a build step so the build fails if the branch is not green. For non-server
repos, the Dockerfile should bring up a development environment and run
`make check`. For server repos, `make check` should run as an early build
stage before the final image is assembled.
- Every repo should have a Gitea Actions workflow (`.gitea/workflows/`) that
runs `docker build .` on push. Since the Dockerfile already runs `make check`,
a successful build implies all checks pass.
- Use platform-standard formatters: `black` for Python, `prettier` for
JS/CSS/Markdown/HTML, `go fmt` for Go. Always use default configuration with
two exceptions: four-space indents (except Go), and `proseWrap: always` for
Markdown (hard-wrap at 80 columns). Documentation and writing repos (Markdown,
HTML, CSS) should also have `.prettierrc` and `.prettierignore`.
- Pre-commit hook: `make check` if local testing is possible, otherwise
`make lint && make fmt-check`. The Makefile should provide a `make hooks`
target to install the pre-commit hook.
- All repos with software must have tests that run via the platform-standard
test framework (`go test`, `pytest`, `jest`/`vitest`, etc.). If no meaningful
tests exist yet, add the most minimal test possible — e.g. importing the
module under test to verify it compiles/parses. There is no excuse for
`make test` to be a no-op.
- `make test` must complete in under 20 seconds. Add a 30-second timeout in the
Makefile.
- Docker builds must complete in under 5 minutes.
- `make check` must not modify any files in the repo. Tests may use temporary
directories.
- `main` must always pass `make check`, no exceptions.
- Never commit secrets. `.env` files, credentials, API keys, and private keys
must be in `.gitignore`. No exceptions.
- `.gitignore` should be comprehensive from the start: OS files (`.DS_Store`),
editor files (`.swp`, `*~`), language build artifacts, and `node_modules/`.
Fetch the standard `.gitignore` from
`https://git.eeqj.de/sneak/prompts/raw/branch/main/.gitignore` when setting up
a new repo.
- Never use `git add -A` or `git add .`. Always stage files explicitly by name.
- Never force-push to `main`.
- Make all changes on a feature branch. You can do whatever you want on a
feature branch.
- `.golangci.yml` is standardized and must _NEVER_ be modified by an agent, only
manually by the user. Fetch from
`https://git.eeqj.de/sneak/prompts/raw/branch/main/.golangci.yml`.
- When pinning images or packages by hash, add a comment above the reference
with the version and date (YYYY-MM-DD).
- Use `yarn`, not `npm`.
- Write all dates as YYYY-MM-DD (ISO 8601).
- Simple projects should be configured with environment variables.
- Dockerized web services listen on port 8080 by default, overridable with
`PORT`.
- `README.md` is the primary documentation. Required sections:
- **Description**: First line must include the project name, purpose,
category (web server, SPA, CLI tool, etc.), license, and author. Example:
"µPaaS is an MIT-licensed Go web application by @sneak that receives
git-frontend webhooks and deploys applications via Docker in realtime."
- **Getting Started**: Copy-pasteable install/usage code block.
- **Rationale**: Why does this exist?
- **Design**: How is the program structured?
- **TODO**: Update meticulously, even between commits. When planning, put
the todo list in the README so a new agent can pick up where the last one
left off.
- **License**: MIT, GPL, or WTFPL. Ask the user for new projects. Include a
`LICENSE` file in the repo root and a License section in the README.
- **Author**: [@sneak](https://sneak.berlin).
- First commit of a new repo should contain only `README.md`.
- Go module root: `sneak.berlin/go/<name>`. Always run `go mod tidy` before
committing.
- Use SemVer.
- Database migrations live in `internal/db/migrations/` and must be embedded in
the binary.
- `000_migration.sql` — contains ONLY the creation of the migrations tracking
table itself. Nothing else.
- `001_schema.sql` — the full application schema.
- **Pre-1.0.0:** never add additional migration files (002, 003, etc.). There
is no installed base to migrate. Edit `001_schema.sql` directly.
- **Post-1.0.0:** add new numbered migration files for each schema change.
Never edit existing migrations after release.
- All repos should have an `.editorconfig` enforcing the project's indentation
settings.
- Avoid putting files in the repo root unless necessary. Root should contain
only project-level config files (`README.md`, `Makefile`, `Dockerfile`,
`LICENSE`, `.gitignore`, `.editorconfig`, `REPO_POLICIES.md`, and
language-specific config). Everything else goes in a subdirectory. Canonical
subdirectory names:
- `bin/` — executable scripts and tools
- `cmd/` — Go command entrypoints
- `configs/` — configuration templates and examples
- `deploy/` — deployment manifests (k8s, compose, terraform)
- `docs/` — documentation and markdown (README.md stays in root)
- `internal/` — Go internal packages
- `internal/db/migrations/` — database migrations
- `pkg/` — Go library packages
- `share/` — systemd units, data files
- `static/` — static assets (images, fonts, etc.)
- `web/` — web frontend source
- When setting up a new repo, files from the `prompts` repo may be used as
templates. Fetch them from
`https://git.eeqj.de/sneak/prompts/raw/branch/main/<path>`.
- New repos must contain at minimum:
- `README.md`, `.git`, `.gitignore`, `.editorconfig`
- `LICENSE`, `REPO_POLICIES.md` (copy from the `prompts` repo)
- `Makefile`
- `Dockerfile`, `.dockerignore`
- `.gitea/workflows/check.yml`
- Go: `go.mod`, `go.sum`, `.golangci.yml`
- JS: `package.json`, `yarn.lock`, `.prettierrc`, `.prettierignore`
- Python: `pyproject.toml`

View File

@@ -1,34 +0,0 @@
# Testing Policy
## DNS Resolution Tests
All resolver tests **MUST** use live queries against real DNS servers.
No mocking of the DNS client layer is permitted.
### Rationale
The resolver performs iterative resolution from root nameservers through
the full delegation chain. Mocked responses cannot faithfully represent
the variety of real-world DNS behavior (truncation, referrals, glue
records, DNSSEC, varied response times, EDNS, etc.). Testing against
real servers ensures the resolver works correctly in production.
### Constraints
- Tests hit real DNS infrastructure and require network access
- Test duration depends on network conditions; timeout tuning keeps
the suite within the 30-second target
- Query timeout is calibrated to 3× maximum antipodal RTT (~300ms)
plus processing margin
- Root server fan-out is limited to reduce parallel query load
- Flaky failures from transient network issues are acceptable and
should be investigated as potential resolver bugs, not papered over
with mocks or skip flags
### What NOT to do
- **Do not mock `DNSClient`** for resolver tests (the mock constructor
exists for unit-testing other packages that consume the resolver)
- **Do not add `-short` flags** to skip slow tests
- **Do not increase `-timeout`** to hide hanging queries
- **Do not modify linter configuration** to suppress findings

View File

@@ -15,12 +15,6 @@ import (
"sneak.berlin/go/dnswatcher/internal/logger"
)
// ErrNoTargets is returned when DNSWATCHER_TARGETS is empty or unset.
var ErrNoTargets = errors.New(
"no targets configured: set DNSWATCHER_TARGETS to a comma-separated " +
"list of DNS names to monitor",
)
// Default configuration values.
const (
defaultPort = 8080
@@ -124,9 +118,25 @@ func buildConfig(
}
}
domains, hostnames, err := classifyAndValidateTargets()
dnsInterval, err := time.ParseDuration(
viper.GetString("DNS_INTERVAL"),
)
if err != nil {
return nil, err
dnsInterval = defaultDNSInterval
}
tlsInterval, err := time.ParseDuration(
viper.GetString("TLS_INTERVAL"),
)
if err != nil {
tlsInterval = defaultTLSInterval
}
domains, hostnames, err := ClassifyTargets(
parseCSV(viper.GetString("TARGETS")),
)
if err != nil {
return nil, fmt.Errorf("invalid targets configuration: %w", err)
}
cfg := &Config{
@@ -138,8 +148,8 @@ func buildConfig(
SlackWebhook: viper.GetString("SLACK_WEBHOOK"),
MattermostWebhook: viper.GetString("MATTERMOST_WEBHOOK"),
NtfyTopic: viper.GetString("NTFY_TOPIC"),
DNSInterval: parseDurationOrDefault("DNS_INTERVAL", defaultDNSInterval),
TLSInterval: parseDurationOrDefault("TLS_INTERVAL", defaultTLSInterval),
DNSInterval: dnsInterval,
TLSInterval: tlsInterval,
TLSExpiryWarning: viper.GetInt("TLS_EXPIRY_WARNING"),
SentryDSN: viper.GetString("SENTRY_DSN"),
MaintenanceMode: viper.GetBool("MAINTENANCE_MODE"),
@@ -152,32 +162,6 @@ func buildConfig(
return cfg, nil
}
func classifyAndValidateTargets() ([]string, []string, error) {
domains, hostnames, err := ClassifyTargets(
parseCSV(viper.GetString("TARGETS")),
)
if err != nil {
return nil, nil, fmt.Errorf(
"invalid targets configuration: %w", err,
)
}
if len(domains) == 0 && len(hostnames) == 0 {
return nil, nil, ErrNoTargets
}
return domains, hostnames, nil
}
func parseDurationOrDefault(key string, fallback time.Duration) time.Duration {
d, err := time.ParseDuration(viper.GetString(key))
if err != nil {
return fallback
}
return d
}
func parseCSV(input string) []string {
if input == "" {
return nil

View File

@@ -1,87 +0,0 @@
package config_test
import (
"errors"
"testing"
"go.uber.org/fx"
"sneak.berlin/go/dnswatcher/internal/config"
"sneak.berlin/go/dnswatcher/internal/globals"
"sneak.berlin/go/dnswatcher/internal/logger"
)
func TestNewReturnsErrNoTargetsWhenEmpty(t *testing.T) {
// Cannot use t.Parallel() because t.Setenv modifies the process
// environment.
t.Setenv("DNSWATCHER_TARGETS", "")
t.Setenv("DNSWATCHER_DATA_DIR", t.TempDir())
var cfg *config.Config
app := fx.New(
fx.Provide(
func() *globals.Globals {
return &globals.Globals{
Appname: "dnswatcher-test-empty",
}
},
logger.New,
config.New,
),
fx.Populate(&cfg),
fx.NopLogger,
)
err := app.Err()
if err == nil {
t.Fatal(
"expected error when DNSWATCHER_TARGETS is empty, got nil",
)
}
if !errors.Is(err, config.ErrNoTargets) {
t.Errorf("expected ErrNoTargets, got: %v", err)
}
}
func TestNewSucceedsWithTargets(t *testing.T) {
// Cannot use t.Parallel() because t.Setenv modifies the process
// environment.
t.Setenv("DNSWATCHER_TARGETS", "example.com")
t.Setenv("DNSWATCHER_DATA_DIR", t.TempDir())
// Prevent loading a local config file by changing to a temp dir.
t.Chdir(t.TempDir())
var cfg *config.Config
app := fx.New(
fx.Provide(
func() *globals.Globals {
return &globals.Globals{
Appname: "dnswatcher-test-ok",
}
},
logger.New,
config.New,
),
fx.Populate(&cfg),
fx.NopLogger,
)
err := app.Err()
if err != nil {
t.Fatalf(
"expected no error with valid targets, got: %v",
err,
)
}
if len(cfg.Domains) != 1 || cfg.Domains[0] != "example.com" {
t.Errorf(
"expected [example.com], got domains=%v",
cfg.Domains,
)
}
}

View File

@@ -4,6 +4,11 @@ import "errors"
// Sentinel errors returned by the resolver.
var (
// ErrNotImplemented indicates a method is stubbed out.
ErrNotImplemented = errors.New(
"resolver not yet implemented",
)
// ErrNoNameservers is returned when no authoritative NS
// could be discovered for a domain.
ErrNoNameservers = errors.New(

View File

@@ -13,7 +13,7 @@ import (
)
const (
queryTimeoutDuration = 2 * time.Second
queryTimeoutDuration = 700 * time.Millisecond
maxRetries = 2
maxDelegation = 20
timeoutMultiplier = 2
@@ -227,7 +227,7 @@ func (r *Resolver) followDelegation(
authNS := extractNSSet(resp.Ns)
if len(authNS) == 0 {
return r.resolveNSIterative(ctx, domain)
return r.resolveNSRecursive(ctx, domain)
}
glue := extractGlue(resp.Extra)
@@ -291,84 +291,70 @@ func (r *Resolver) resolveNSIPs(
return ips
}
// resolveNSIterative queries for NS records using iterative
// resolution as a fallback when followDelegation finds no
// authoritative answer in the delegation chain.
func (r *Resolver) resolveNSIterative(
// publicResolvers returns well-known public recursive DNS resolvers.
func publicResolvers() []string {
return []string{
"1.1.1.1", // Cloudflare
"8.8.8.8", // Google
"9.9.9.9", // Quad9
}
}
// resolveNSRecursive queries for NS records using a public
// recursive resolver as a fallback for intercepted environments.
func (r *Resolver) resolveNSRecursive(
ctx context.Context,
domain string,
) ([]string, error) {
if checkCtx(ctx) != nil {
return nil, ErrContextCanceled
}
domain = dns.Fqdn(domain)
servers := rootServerList()
msg := new(dns.Msg)
msg.SetQuestion(domain, dns.TypeNS)
msg.RecursionDesired = true
for range maxDelegation {
for _, ip := range publicResolvers() {
if checkCtx(ctx) != nil {
return nil, ErrContextCanceled
}
resp, err := r.queryServers(
ctx, servers, domain, dns.TypeNS,
)
addr := net.JoinHostPort(ip, "53")
resp, _, err := r.client.ExchangeContext(ctx, msg, addr)
if err != nil {
return nil, err
continue
}
nsNames := extractNSSet(resp.Answer)
if len(nsNames) > 0 {
return nsNames, nil
}
// Follow delegation.
authNS := extractNSSet(resp.Ns)
if len(authNS) == 0 {
break
}
glue := extractGlue(resp.Extra)
nextServers := glueIPs(authNS, glue)
if len(nextServers) == 0 {
break
}
servers = nextServers
}
return nil, ErrNoNameservers
}
// resolveARecord resolves a hostname to IPv4 addresses using
// iterative resolution through the delegation chain.
// public recursive resolvers.
func (r *Resolver) resolveARecord(
ctx context.Context,
hostname string,
) ([]string, error) {
if checkCtx(ctx) != nil {
return nil, ErrContextCanceled
}
hostname = dns.Fqdn(hostname)
servers := rootServerList()
msg := new(dns.Msg)
msg.SetQuestion(hostname, dns.TypeA)
msg.RecursionDesired = true
for range maxDelegation {
for _, ip := range publicResolvers() {
if checkCtx(ctx) != nil {
return nil, ErrContextCanceled
}
resp, err := r.queryServers(
ctx, servers, hostname, dns.TypeA,
)
addr := net.JoinHostPort(ip, "53")
resp, _, err := r.client.ExchangeContext(ctx, msg, addr)
if err != nil {
return nil, fmt.Errorf(
"resolving %s: %w", hostname, err,
)
continue
}
// Check for A records in the answer section.
var ips []string
for _, rr := range resp.Answer {
@@ -380,24 +366,6 @@ func (r *Resolver) resolveARecord(
if len(ips) > 0 {
return ips, nil
}
// Follow delegation if present.
authNS := extractNSSet(resp.Ns)
if len(authNS) == 0 {
break
}
glue := extractGlue(resp.Extra)
nextServers := glueIPs(authNS, glue)
if len(nextServers) == 0 {
// Resolve NS IPs iteratively — but guard
// against infinite recursion by using only
// already-resolved servers.
break
}
servers = nextServers
}
return nil, fmt.Errorf(
@@ -460,23 +428,6 @@ func (r *Resolver) QueryNameserver(
return r.queryAllTypes(ctx, nsHostname, nsIPs[0], hostname)
}
// QueryNameserverIP queries a nameserver by its IP address directly,
// bypassing NS hostname resolution.
func (r *Resolver) QueryNameserverIP(
ctx context.Context,
nsHostname string,
nsIP string,
hostname string,
) (*NameserverResponse, error) {
if checkCtx(ctx) != nil {
return nil, ErrContextCanceled
}
hostname = dns.Fqdn(hostname)
return r.queryAllTypes(ctx, nsHostname, nsIP, hostname)
}
func (r *Resolver) queryAllTypes(
ctx context.Context,
nsHostname string,
@@ -504,7 +455,6 @@ func (r *Resolver) queryAllTypes(
type queryState struct {
gotNXDomain bool
gotSERVFAIL bool
gotTimeout bool
hasRecords bool
}
@@ -542,10 +492,6 @@ func (r *Resolver) querySingleType(
) {
msg, err := r.queryDNS(ctx, nsIP, hostname, qtype)
if err != nil {
if isTimeout(err) {
state.gotTimeout = true
}
return
}
@@ -583,26 +529,12 @@ func collectAnswerRecords(
}
}
// isTimeout checks whether an error is a network timeout.
func isTimeout(err error) bool {
var netErr net.Error
if errors.As(err, &netErr) {
return netErr.Timeout()
}
return false
}
func classifyResponse(resp *NameserverResponse, state queryState) {
switch {
case state.gotNXDomain && !state.hasRecords:
resp.Status = StatusNXDomain
case state.gotTimeout && !state.hasRecords:
resp.Status = StatusTimeout
resp.Error = "all queries timed out"
case state.gotSERVFAIL && !state.hasRecords:
resp.Status = StatusError
resp.Error = "server returned SERVFAIL"
case !state.hasRecords && !state.gotNXDomain:
resp.Status = StatusNoData
}

View File

@@ -17,7 +17,6 @@ const (
StatusError = "error"
StatusNXDomain = "nxdomain"
StatusNoData = "nodata"
StatusTimeout = "timeout"
)
// MaxCNAMEDepth is the maximum CNAME chain depth to follow.

View File

@@ -10,7 +10,6 @@ import (
"testing"
"time"
"github.com/miekg/dns"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
@@ -623,59 +622,6 @@ func TestQueryAllNameservers_ContextCanceled(t *testing.T) {
assert.Error(t, err)
}
// ----------------------------------------------------------------
// Timeout tests
// ----------------------------------------------------------------
func TestQueryNameserverIP_Timeout(t *testing.T) {
t.Parallel()
log := slog.New(slog.NewTextHandler(
os.Stderr,
&slog.HandlerOptions{Level: slog.LevelDebug},
))
r := resolver.NewFromLoggerWithClient(
log, &timeoutClient{},
)
ctx, cancel := context.WithTimeout(
context.Background(), 10*time.Second,
)
t.Cleanup(cancel)
// Query any IP — the client always returns a timeout error.
resp, err := r.QueryNameserverIP(
ctx, "unreachable.test.", "192.0.2.1",
"example.com",
)
require.NoError(t, err)
assert.Equal(t, resolver.StatusTimeout, resp.Status)
assert.NotEmpty(t, resp.Error)
}
// timeoutClient simulates DNS timeout errors for testing.
type timeoutClient struct{}
func (c *timeoutClient) ExchangeContext(
_ context.Context,
_ *dns.Msg,
_ string,
) (*dns.Msg, time.Duration, error) {
return nil, 0, &net.OpError{
Op: "read",
Net: "udp",
Err: &timeoutError{},
}
}
type timeoutError struct{}
func (e *timeoutError) Error() string { return "i/o timeout" }
func (e *timeoutError) Timeout() bool { return true }
func (e *timeoutError) Temporary() bool { return true }
func TestResolveIPAddresses_ContextCanceled(t *testing.T) {
t.Parallel()

View File

@@ -57,49 +57,10 @@ type HostnameState struct {
// PortState holds the monitoring state for a port.
type PortState struct {
Open bool `json:"open"`
Hostnames []string `json:"hostnames"`
Hostname string `json:"hostname"`
LastChecked time.Time `json:"lastChecked"`
}
// UnmarshalJSON implements custom unmarshaling to handle both
// the old single-hostname format and the new multi-hostname
// format for backward compatibility with existing state files.
func (ps *PortState) UnmarshalJSON(data []byte) error {
// Use an alias to prevent infinite recursion.
type portStateAlias struct {
Open bool `json:"open"`
Hostnames []string `json:"hostnames"`
LastChecked time.Time `json:"lastChecked"`
}
var alias portStateAlias
err := json.Unmarshal(data, &alias)
if err != nil {
return fmt.Errorf("unmarshaling port state: %w", err)
}
ps.Open = alias.Open
ps.Hostnames = alias.Hostnames
ps.LastChecked = alias.LastChecked
// If Hostnames is empty, try reading the old single-hostname
// format for backward compatibility.
if len(ps.Hostnames) == 0 {
var old struct {
Hostname string `json:"hostname"`
}
// Best-effort: ignore errors since the main unmarshal
// already succeeded.
if json.Unmarshal(data, &old) == nil && old.Hostname != "" {
ps.Hostnames = []string{old.Hostname}
}
}
return nil
}
// CertificateState holds TLS certificate monitoring state.
type CertificateState struct {
CommonName string `json:"commonName"`
@@ -302,27 +263,6 @@ func (s *State) GetPortState(key string) (*PortState, bool) {
return ps, ok
}
// DeletePortState removes a port state entry.
func (s *State) DeletePortState(key string) {
s.mu.Lock()
defer s.mu.Unlock()
delete(s.snapshot.Ports, key)
}
// GetAllPortKeys returns all port state keys.
func (s *State) GetAllPortKeys() []string {
s.mu.RLock()
defer s.mu.RUnlock()
keys := make([]string, 0, len(s.snapshot.Ports))
for k := range s.snapshot.Ports {
keys = append(keys, k)
}
return keys
}
// SetCertificateState updates the state for a certificate.
func (s *State) SetCertificateState(
key string,

View File

@@ -6,7 +6,6 @@ import (
"log/slog"
"sort"
"strings"
"sync"
"time"
"go.uber.org/fx"
@@ -41,17 +40,15 @@ type Params struct {
// Watcher orchestrates all monitoring checks on a schedule.
type Watcher struct {
log *slog.Logger
config *config.Config
state *state.State
resolver DNSResolver
portCheck PortChecker
tlsCheck TLSChecker
notify Notifier
cancel context.CancelFunc
firstRun bool
expiryNotifiedMu sync.Mutex
expiryNotified map[string]time.Time
log *slog.Logger
config *config.Config
state *state.State
resolver DNSResolver
portCheck PortChecker
tlsCheck TLSChecker
notify Notifier
cancel context.CancelFunc
firstRun bool
}
// New creates a new Watcher instance wired into the fx lifecycle.
@@ -60,27 +57,24 @@ func New(
params Params,
) (*Watcher, error) {
w := &Watcher{
log: params.Logger.Get(),
config: params.Config,
state: params.State,
resolver: params.Resolver,
portCheck: params.PortCheck,
tlsCheck: params.TLSCheck,
notify: params.Notify,
firstRun: true,
expiryNotified: make(map[string]time.Time),
log: params.Logger.Get(),
config: params.Config,
state: params.State,
resolver: params.Resolver,
portCheck: params.PortCheck,
tlsCheck: params.TLSCheck,
notify: params.Notify,
firstRun: true,
}
lifecycle.Append(fx.Hook{
OnStart: func(_ context.Context) error {
// Use context.Background() — the fx startup context
// expires after startup completes, so deriving from it
// would cancel the watcher immediately. The watcher's
// lifetime is controlled by w.cancel in OnStop.
ctx, cancel := context.WithCancel(context.Background())
OnStart: func(startCtx context.Context) error {
ctx, cancel := context.WithCancel(
context.WithoutCancel(startCtx),
)
w.cancel = cancel
go w.Run(ctx) //nolint:contextcheck // intentionally not derived from startCtx
go w.Run(ctx)
return nil
},
@@ -106,15 +100,14 @@ func NewForTest(
n Notifier,
) *Watcher {
return &Watcher{
log: slog.Default(),
config: cfg,
state: st,
resolver: res,
portCheck: pc,
tlsCheck: tc,
notify: n,
firstRun: true,
expiryNotified: make(map[string]time.Time),
log: slog.Default(),
config: cfg,
state: st,
resolver: res,
portCheck: pc,
tlsCheck: tc,
notify: n,
firstRun: true,
}
}
@@ -143,16 +136,9 @@ func (w *Watcher) Run(ctx context.Context) {
return
case <-dnsTicker.C:
w.runDNSChecks(ctx)
w.checkAllPorts(ctx)
w.runDNSAndPortChecks(ctx)
w.saveState()
case <-tlsTicker.C:
// Run DNS first so TLS checks use freshly
// resolved IP addresses, not stale ones from
// a previous cycle.
w.runDNSChecks(ctx)
w.runTLSChecks(ctx)
w.saveState()
}
@@ -160,26 +146,10 @@ func (w *Watcher) Run(ctx context.Context) {
}
// RunOnce performs a single complete monitoring cycle.
// DNS checks run first so that port and TLS checks use
// freshly resolved IP addresses. Port checks run before
// TLS because TLS checks only target IPs with an open
// port 443.
func (w *Watcher) RunOnce(ctx context.Context) {
w.detectFirstRun()
// Phase 1: DNS resolution must complete first so that
// subsequent checks use fresh IP addresses.
w.runDNSChecks(ctx)
// Phase 2: Port checks populate port state that TLS
// checks depend on (TLS only targets IPs where port
// 443 is open).
w.checkAllPorts(ctx)
// Phase 3: TLS checks use fresh DNS IPs and current
// port state.
w.runDNSAndPortChecks(ctx)
w.runTLSChecks(ctx)
w.saveState()
w.firstRun = false
}
@@ -196,11 +166,7 @@ func (w *Watcher) detectFirstRun() {
}
}
// runDNSChecks performs DNS resolution for all configured domains
// and hostnames, updating state with freshly resolved records.
// This must complete before port or TLS checks run so those
// checks operate on current IP addresses.
func (w *Watcher) runDNSChecks(ctx context.Context) {
func (w *Watcher) runDNSAndPortChecks(ctx context.Context) {
for _, domain := range w.config.Domains {
w.checkDomain(ctx, domain)
}
@@ -208,6 +174,8 @@ func (w *Watcher) runDNSChecks(ctx context.Context) {
for _, hostname := range w.config.Hostnames {
w.checkHostname(ctx, hostname)
}
w.checkAllPorts(ctx)
}
func (w *Watcher) checkDomain(
@@ -238,28 +206,6 @@ func (w *Watcher) checkDomain(
Nameservers: nameservers,
LastChecked: now,
})
// Also look up A/AAAA records for the apex domain so that
// port and TLS checks (which read HostnameState) can find
// the domain's IP addresses.
records, err := w.resolver.LookupAllRecords(ctx, domain)
if err != nil {
w.log.Error(
"failed to lookup records for domain",
"domain", domain,
"error", err,
)
return
}
prevHS, hasPrevHS := w.state.GetHostnameState(domain)
if hasPrevHS && !w.firstRun {
w.detectHostnameChanges(ctx, domain, prevHS, records)
}
newState := buildHostnameState(records, now)
w.state.SetHostnameState(domain, newState)
}
func (w *Watcher) detectNSChanges(
@@ -475,94 +421,24 @@ func (w *Watcher) detectInconsistencies(
}
func (w *Watcher) checkAllPorts(ctx context.Context) {
// Phase 1: Build current IP:port → hostname associations
// from fresh DNS data.
associations := w.buildPortAssociations()
// Phase 2: Check each unique IP:port and update state
// with the full set of associated hostnames.
for key, hostnames := range associations {
ip, port := parsePortKey(key)
if port == 0 {
continue
}
w.checkSinglePort(ctx, ip, port, hostnames)
for _, hostname := range w.config.Hostnames {
w.checkPortsForHostname(ctx, hostname)
}
// Phase 3: Remove port state entries that no longer have
// any hostname referencing them.
w.cleanupStalePorts(associations)
for _, domain := range w.config.Domains {
w.checkPortsForHostname(ctx, domain)
}
}
// buildPortAssociations constructs a map from IP:port keys to
// the sorted set of hostnames currently resolving to that IP.
func (w *Watcher) buildPortAssociations() map[string][]string {
assoc := make(map[string]map[string]bool)
allNames := make(
[]string, 0,
len(w.config.Hostnames)+len(w.config.Domains),
)
allNames = append(allNames, w.config.Hostnames...)
allNames = append(allNames, w.config.Domains...)
for _, name := range allNames {
ips := w.collectIPs(name)
for _, ip := range ips {
for _, port := range monitoredPorts {
key := fmt.Sprintf("%s:%d", ip, port)
if assoc[key] == nil {
assoc[key] = make(map[string]bool)
}
assoc[key][name] = true
}
}
}
result := make(map[string][]string, len(assoc))
for key, set := range assoc {
hostnames := make([]string, 0, len(set))
for h := range set {
hostnames = append(hostnames, h)
}
sort.Strings(hostnames)
result[key] = hostnames
}
return result
}
// parsePortKey splits an "ip:port" key into its components.
func parsePortKey(key string) (string, int) {
lastColon := strings.LastIndex(key, ":")
if lastColon < 0 {
return key, 0
}
ip := key[:lastColon]
var p int
_, err := fmt.Sscanf(key[lastColon+1:], "%d", &p)
if err != nil {
return ip, 0
}
return ip, p
}
// cleanupStalePorts removes port state entries that are no
// longer referenced by any hostname in the current DNS data.
func (w *Watcher) cleanupStalePorts(
currentAssociations map[string][]string,
func (w *Watcher) checkPortsForHostname(
ctx context.Context,
hostname string,
) {
for _, key := range w.state.GetAllPortKeys() {
if _, exists := currentAssociations[key]; !exists {
w.state.DeletePortState(key)
ips := w.collectIPs(hostname)
for _, ip := range ips {
for _, port := range monitoredPorts {
w.checkSinglePort(ctx, ip, port, hostname)
}
}
}
@@ -599,7 +475,7 @@ func (w *Watcher) checkSinglePort(
ctx context.Context,
ip string,
port int,
hostnames []string,
hostname string,
) {
result, err := w.portCheck.CheckPort(ctx, ip, port)
if err != nil {
@@ -624,8 +500,8 @@ func (w *Watcher) checkSinglePort(
}
msg := fmt.Sprintf(
"Hosts: %s\nAddress: %s\nPort now %s",
strings.Join(hostnames, ", "), key, stateStr,
"Host: %s\nAddress: %s\nPort now %s",
hostname, key, stateStr,
)
w.notify.SendNotification(
@@ -638,7 +514,7 @@ func (w *Watcher) checkSinglePort(
w.state.SetPortState(key, &state.PortState{
Open: result.Open,
Hostnames: hostnames,
Hostname: hostname,
LastChecked: now,
})
}
@@ -815,22 +691,6 @@ func (w *Watcher) checkTLSExpiry(
return
}
// Deduplicate expiry warnings: don't re-notify for the same
// hostname within the TLS check interval.
dedupKey := fmt.Sprintf("expiry:%s:%s", hostname, ip)
w.expiryNotifiedMu.Lock()
lastNotified, seen := w.expiryNotified[dedupKey]
if seen && time.Since(lastNotified) < w.config.TLSInterval {
w.expiryNotifiedMu.Unlock()
return
}
w.expiryNotified[dedupKey] = time.Now()
w.expiryNotifiedMu.Unlock()
msg := fmt.Sprintf(
"Host: %s\nIP: %s\nCN: %s\n"+
"Expires: %s (%.0f days)",

View File

@@ -273,10 +273,6 @@ func setupBaselineMocks(deps *testDeps) {
"ns1.example.com.",
"ns2.example.com.",
}
deps.resolver.allRecords["example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"93.184.216.34"}},
"ns2.example.com.": {"A": {"93.184.216.34"}},
}
deps.resolver.allRecords["www.example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"93.184.216.34"}},
"ns2.example.com.": {"A": {"93.184.216.34"}},
@@ -294,14 +290,6 @@ func setupBaselineMocks(deps *testDeps) {
"www.example.com",
},
}
deps.tlsChecker.certs["93.184.216.34:example.com"] = &tlscheck.CertificateInfo{
CommonName: "example.com",
Issuer: "DigiCert",
NotAfter: time.Now().Add(90 * 24 * time.Hour),
SubjectAlternativeNames: []string{
"example.com",
},
}
}
func assertNoNotifications(
@@ -334,74 +322,14 @@ func assertStatePopulated(
)
}
// Hostnames includes both explicit hostnames and domains
// (domains now also get hostname state for port/TLS checks).
if len(snap.Hostnames) < 1 {
if len(snap.Hostnames) != 1 {
t.Errorf(
"expected at least 1 hostname in state, got %d",
"expected 1 hostname in state, got %d",
len(snap.Hostnames),
)
}
}
func TestDomainPortAndTLSChecks(t *testing.T) {
t.Parallel()
cfg := defaultTestConfig(t)
cfg.Domains = []string{"example.com"}
w, deps := newTestWatcher(t, cfg)
deps.resolver.nsRecords["example.com"] = []string{
"ns1.example.com.",
}
deps.resolver.allRecords["example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"93.184.216.34"}},
}
deps.portChecker.results["93.184.216.34:80"] = true
deps.portChecker.results["93.184.216.34:443"] = true
deps.tlsChecker.certs["93.184.216.34:example.com"] = &tlscheck.CertificateInfo{
CommonName: "example.com",
Issuer: "DigiCert",
NotAfter: time.Now().Add(90 * 24 * time.Hour),
SubjectAlternativeNames: []string{
"example.com",
},
}
w.RunOnce(t.Context())
snap := deps.state.GetSnapshot()
// Domain should have port state populated
if len(snap.Ports) == 0 {
t.Error("expected port state for domain, got none")
}
// Domain should have certificate state populated
if len(snap.Certificates) == 0 {
t.Error("expected certificate state for domain, got none")
}
// Verify port checker was actually called
deps.portChecker.mu.Lock()
calls := deps.portChecker.calls
deps.portChecker.mu.Unlock()
if calls == 0 {
t.Error("expected port checker to be called for domain")
}
// Verify TLS checker was actually called
deps.tlsChecker.mu.Lock()
tlsCalls := deps.tlsChecker.calls
deps.tlsChecker.mu.Unlock()
if tlsCalls == 0 {
t.Error("expected TLS checker to be called for domain")
}
}
func TestNSChangeDetection(t *testing.T) {
t.Parallel()
@@ -414,12 +342,6 @@ func TestNSChangeDetection(t *testing.T) {
"ns1.example.com.",
"ns2.example.com.",
}
deps.resolver.allRecords["example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"1.2.3.4"}},
"ns2.example.com.": {"A": {"1.2.3.4"}},
}
deps.portChecker.results["1.2.3.4:80"] = false
deps.portChecker.results["1.2.3.4:443"] = false
ctx := t.Context()
w.RunOnce(ctx)
@@ -429,10 +351,6 @@ func TestNSChangeDetection(t *testing.T) {
"ns1.example.com.",
"ns3.example.com.",
}
deps.resolver.allRecords["example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"1.2.3.4"}},
"ns3.example.com.": {"A": {"1.2.3.4"}},
}
deps.resolver.mu.Unlock()
w.RunOnce(ctx)
@@ -588,61 +506,6 @@ func TestTLSExpiryWarning(t *testing.T) {
}
}
func TestTLSExpiryWarningDedup(t *testing.T) {
t.Parallel()
cfg := defaultTestConfig(t)
cfg.Hostnames = []string{"www.example.com"}
cfg.TLSInterval = 24 * time.Hour
w, deps := newTestWatcher(t, cfg)
deps.resolver.allRecords["www.example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"1.2.3.4"}},
}
deps.resolver.ipAddresses["www.example.com"] = []string{
"1.2.3.4",
}
deps.portChecker.results["1.2.3.4:80"] = true
deps.portChecker.results["1.2.3.4:443"] = true
deps.tlsChecker.certs["1.2.3.4:www.example.com"] = &tlscheck.CertificateInfo{
CommonName: "www.example.com",
Issuer: "DigiCert",
NotAfter: time.Now().Add(3 * 24 * time.Hour),
SubjectAlternativeNames: []string{
"www.example.com",
},
}
ctx := t.Context()
// First run = baseline, no notifications
w.RunOnce(ctx)
// Second run should fire one expiry warning
w.RunOnce(ctx)
// Third run should NOT fire another warning (dedup)
w.RunOnce(ctx)
notifications := deps.notifier.getNotifications()
expiryCount := 0
for _, n := range notifications {
if n.Title == "TLS Expiry Warning: www.example.com" {
expiryCount++
}
}
if expiryCount != 1 {
t.Errorf(
"expected exactly 1 expiry warning (dedup), got %d",
expiryCount,
)
}
}
func TestGracefulShutdown(t *testing.T) {
t.Parallel()
@@ -656,11 +519,6 @@ func TestGracefulShutdown(t *testing.T) {
deps.resolver.nsRecords["example.com"] = []string{
"ns1.example.com.",
}
deps.resolver.allRecords["example.com"] = map[string]map[string][]string{
"ns1.example.com.": {"A": {"1.2.3.4"}},
}
deps.portChecker.results["1.2.3.4:80"] = false
deps.portChecker.results["1.2.3.4:443"] = false
ctx, cancel := context.WithCancel(t.Context())
@@ -682,80 +540,6 @@ func TestGracefulShutdown(t *testing.T) {
}
}
func setupHostnameIP(
deps *testDeps,
hostname, ip string,
) {
deps.resolver.allRecords[hostname] = map[string]map[string][]string{
"ns1.example.com.": {"A": {ip}},
}
deps.portChecker.results[ip+":80"] = true
deps.portChecker.results[ip+":443"] = true
deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{
CommonName: hostname,
Issuer: "DigiCert",
NotAfter: time.Now().Add(90 * 24 * time.Hour),
SubjectAlternativeNames: []string{hostname},
}
}
func updateHostnameIP(deps *testDeps, hostname, ip string) {
deps.resolver.mu.Lock()
deps.resolver.allRecords[hostname] = map[string]map[string][]string{
"ns1.example.com.": {"A": {ip}},
}
deps.resolver.mu.Unlock()
deps.portChecker.mu.Lock()
deps.portChecker.results[ip+":80"] = true
deps.portChecker.results[ip+":443"] = true
deps.portChecker.mu.Unlock()
deps.tlsChecker.mu.Lock()
deps.tlsChecker.certs[ip+":"+hostname] = &tlscheck.CertificateInfo{
CommonName: hostname,
Issuer: "DigiCert",
NotAfter: time.Now().Add(90 * 24 * time.Hour),
SubjectAlternativeNames: []string{hostname},
}
deps.tlsChecker.mu.Unlock()
}
func TestDNSRunsBeforePortAndTLSChecks(t *testing.T) {
t.Parallel()
cfg := defaultTestConfig(t)
cfg.Hostnames = []string{"www.example.com"}
w, deps := newTestWatcher(t, cfg)
setupHostnameIP(deps, "www.example.com", "10.0.0.1")
ctx := t.Context()
w.RunOnce(ctx)
snap := deps.state.GetSnapshot()
if _, ok := snap.Ports["10.0.0.1:80"]; !ok {
t.Fatal("expected port state for 10.0.0.1:80")
}
// DNS changes to a new IP; port and TLS must pick it up.
updateHostnameIP(deps, "www.example.com", "10.0.0.2")
w.RunOnce(ctx)
snap = deps.state.GetSnapshot()
if _, ok := snap.Ports["10.0.0.2:80"]; !ok {
t.Error("port check used stale DNS: missing 10.0.0.2:80")
}
certKey := "10.0.0.2:443:www.example.com"
if _, ok := snap.Certificates[certKey]; !ok {
t.Error("TLS check used stale DNS: missing " + certKey)
}
}
func TestNSFailureAndRecovery(t *testing.T) {
t.Parallel()