From 144a2df66514fb95c700316461dada4ddb34c72e Mon Sep 17 00:00:00 2001 From: sneak Date: Thu, 19 Feb 2026 21:05:39 +0100 Subject: [PATCH] Initial scaffold with per-nameserver DNS monitoring model Full project structure following upaas conventions: uber/fx DI, go-chi routing, slog logging, Viper config. State persisted as JSON file with per-nameserver record tracking for inconsistency detection. Stub implementations for resolver, portcheck, tlscheck, and watcher. --- .gitignore | 6 + .golangci.yml | 32 + CLAUDE.md | 68 ++ CONVENTIONS.md | 1225 +++++++++++++++++++++++++++ Dockerfile | 38 + Makefile | 37 + README.md | 385 +++++++++ cmd/dnswatcher/main.go | 56 ++ go.mod | 39 + go.sum | 87 ++ internal/config/config.go | 187 ++++ internal/globals/globals.go | 62 ++ internal/handlers/handlers.go | 58 ++ internal/handlers/healthcheck.go | 17 + internal/handlers/status.go | 23 + internal/healthcheck/healthcheck.go | 79 ++ internal/logger/logger.go | 83 ++ internal/middleware/middleware.go | 205 +++++ internal/notify/notify.go | 261 ++++++ internal/portcheck/portcheck.go | 48 ++ internal/resolver/resolver.go | 64 ++ internal/server/routes.go | 43 + internal/server/server.go | 129 +++ internal/state/state.go | 287 +++++++ internal/tlscheck/tlscheck.go | 58 ++ internal/watcher/watcher.go | 94 ++ 26 files changed, 3671 insertions(+) create mode 100644 .gitignore create mode 100644 .golangci.yml create mode 100644 CLAUDE.md create mode 100644 CONVENTIONS.md create mode 100644 Dockerfile create mode 100644 Makefile create mode 100644 README.md create mode 100644 cmd/dnswatcher/main.go create mode 100644 go.mod create mode 100644 go.sum create mode 100644 internal/config/config.go create mode 100644 internal/globals/globals.go create mode 100644 internal/handlers/handlers.go create mode 100644 internal/handlers/healthcheck.go create mode 100644 internal/handlers/status.go create mode 100644 internal/healthcheck/healthcheck.go create mode 100644 internal/logger/logger.go create mode 100644 internal/middleware/middleware.go create mode 100644 internal/notify/notify.go create mode 100644 internal/portcheck/portcheck.go create mode 100644 internal/resolver/resolver.go create mode 100644 internal/server/routes.go create mode 100644 internal/server/server.go create mode 100644 internal/state/state.go create mode 100644 internal/tlscheck/tlscheck.go create mode 100644 internal/watcher/watcher.go diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..9c22cd8 --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +bin/ +vendor/ +data/ +.env +*.exe +/dnswatcher diff --git a/.golangci.yml b/.golangci.yml new file mode 100644 index 0000000..34a8e31 --- /dev/null +++ b/.golangci.yml @@ -0,0 +1,32 @@ +version: "2" + +run: + timeout: 5m + modules-download-mode: readonly + +linters: + default: all + disable: + # Genuinely incompatible with project patterns + - exhaustruct # Requires all struct fields + - depguard # Dependency allow/block lists + - godot # Requires comments to end with periods + - wsl # Deprecated, replaced by wsl_v5 + - wrapcheck # Too verbose for internal packages + - varnamelen # Short names like db, id are idiomatic Go + +linters-settings: + lll: + line-length: 88 + funlen: + lines: 80 + statements: 50 + cyclop: + max-complexity: 15 + dupl: + threshold: 100 + +issues: + exclude-use-default: false + max-issues-per-linter: 0 + max-same-issues: 0 diff --git a/CLAUDE.md b/CLAUDE.md new file mode 100644 index 0000000..9c9ebe5 --- /dev/null +++ b/CLAUDE.md @@ -0,0 +1,68 @@ +# Repository Rules + +Last Updated 2026-01-08 + +These rules MUST be followed at all times, it is very important. + +* Never use `git add -A` - add specific changes to a deliberate commit. A + commit should contain one change. After each change, make a commit with a + good one-line summary. + +* NEVER modify the linter config without asking first. + +* NEVER modify tests to exclude special cases or otherwise get them to pass + without asking first. In almost all cases, the code should be changed, + NOT the tests. If you think the test needs to be changed, make your case + for that and ask for permission to proceed, then stop. You need explicit + user approval to modify existing tests. (You do not need user approval + for writing NEW tests.) + +* When linting, assume the linter config is CORRECT, and that each item + output by the linter is something that legitimately needs fixing in the + code. + +* When running tests, use `make test`. + +* Before commits, run `make check`. This runs `make lint` and `make test` + and `make check-fmt`. Any issues discovered MUST be resolved before + committing unless explicitly told otherwise. + +* When fixing a bug, write a failing test for the bug FIRST. Add + appropriate logging to the test to ensure it is written correctly. Commit + that. Then go about fixing the bug until the test passes (without + modifying the test further). Then commit that. + +* When adding a new feature, do the same - implement a test first (TDD). It + doesn't have to be super complex. Commit the test, then commit the + feature. + +* When adding a new feature, use a feature branch. When the feature is + completely finished and the code is up to standards (passes `make check`) + then and only then can the feature branch be merged into `main` and the + branch deleted. + +* Write godoc documentation comments for all exported types and functions as + you go along. + +* ALWAYS be consistent in naming. If you name something one thing in one + place, name it the EXACT SAME THING in another place. + +* Be descriptive and specific in naming. `wl` is bad; + `SourceHostWhitelist` is good. `ConnsPerHost` is bad; + `MaxConnectionsPerHost` is good. + +* This is not prototype or teaching code - this is designed for production. + Any security issues (such as denial of service) or other web + vulnerabilities are P1 bugs and must be added to TODO.md at the top. + +* As this is production code, no stubbing of implementations unless + specifically instructed. We need working implementations. + +* Avoid vendoring deps unless specifically instructed to. NEVER commit + the vendor directory, NEVER commit compiled binaries. If these + directories or files exist, add them to .gitignore (and commit the + .gitignore) if they are not already in there. Keep the entire git + repository (with history) small - under 20MiB, unless you specifically + must commit larger files (e.g. test fixture example media files). Only + OUR source code and immediately supporting files (such as test examples) + goes into the repo/history. diff --git a/CONVENTIONS.md b/CONVENTIONS.md new file mode 100644 index 0000000..6f7c217 --- /dev/null +++ b/CONVENTIONS.md @@ -0,0 +1,1225 @@ +# Go HTTP Server Conventions + +This document defines the architectural patterns, design decisions, and conventions for building Go HTTP servers. All new projects must follow these standards. + +## Table of Contents + +1. [Required Libraries](#1-required-libraries) +2. [Project Structure](#2-project-structure) +3. [Dependency Injection (Uber fx)](#3-dependency-injection-uber-fx) +4. [Server Architecture](#4-server-architecture) +5. [Routing (go-chi)](#5-routing-go-chi) +6. [Handler Conventions](#6-handler-conventions) +7. [Middleware Conventions](#7-middleware-conventions) +8. [Configuration (Viper)](#8-configuration-viper) +9. [Logging (slog)](#9-logging-slog) +10. [Database Wrapper](#10-database-wrapper) +11. [Globals Package](#11-globals-package) +12. [Static Assets & Templates](#12-static-assets--templates) +13. [Health Check](#13-health-check) +14. [External Integrations](#14-external-integrations) + +--- + +## 1. Required Libraries + +These libraries are **mandatory** for all new projects: + +| Purpose | Library | Import Path | +|---------|---------|-------------| +| Dependency Injection | Uber fx | `go.uber.org/fx` | +| HTTP Router | go-chi | `github.com/go-chi/chi` | +| Logging | slog (stdlib) | `log/slog` | +| Configuration | Viper | `github.com/spf13/viper` | +| Environment Loading | godotenv | `github.com/joho/godotenv/autoload` | +| CORS | go-chi/cors | `github.com/go-chi/cors` | +| Error Reporting | Sentry | `github.com/getsentry/sentry-go` | +| Metrics | Prometheus | `github.com/prometheus/client_golang` | +| Metrics Middleware | go-http-metrics | `github.com/slok/go-http-metrics` | +| Basic Auth | basicauth-go | `github.com/99designs/basicauth-go` | + +--- + +## 2. Project Structure + +``` +project-root/ +├── cmd/ +│ └── {appname}/ +│ └── main.go # Entry point +├── internal/ +│ ├── config/ +│ │ └── config.go # Configuration loading +│ ├── database/ +│ │ └── database.go # Database wrapper +│ ├── globals/ +│ │ └── globals.go # Build-time variables +│ ├── handlers/ +│ │ ├── handlers.go # Base handler struct and helpers +│ │ ├── index.go # Individual handlers... +│ │ ├── healthcheck.go +│ │ └── {feature}.go +│ ├── healthcheck/ +│ │ └── healthcheck.go # Health check service +│ ├── logger/ +│ │ └── logger.go # Logger setup +│ ├── middleware/ +│ │ └── middleware.go # All middleware definitions +│ └── server/ +│ ├── server.go # Server struct and lifecycle +│ ├── http.go # HTTP server setup +│ └── routes.go # Route definitions +├── static/ +│ ├── static.go # Embed directive +│ ├── css/ +│ └── js/ +├── templates/ +│ ├── templates.go # Embed and parse +│ └── *.html +├── go.mod +├── go.sum +├── Makefile +└── Dockerfile +``` + +### Key Principles + +- **`cmd/{appname}/`**: Only the entry point. Minimal logic, just bootstrapping. +- **`internal/`**: All application packages. Not importable by external projects. +- **One package per concern**: config, database, handlers, middleware, etc. +- **Flat handler files**: One file per handler or logical group of handlers. + +--- + +## 3. Dependency Injection (Uber fx) + +### Entry Point Pattern + +```go +// cmd/httpd/main.go +package main + +import ( + "yourproject/internal/config" + "yourproject/internal/database" + "yourproject/internal/globals" + "yourproject/internal/handlers" + "yourproject/internal/healthcheck" + "yourproject/internal/logger" + "yourproject/internal/middleware" + "yourproject/internal/server" + "go.uber.org/fx" +) + +var ( + Appname string = "CHANGEME" + Version string + Buildarch string +) + +func main() { + globals.Appname = Appname + globals.Version = Version + globals.Buildarch = Buildarch + + fx.New( + fx.Provide( + config.New, + database.New, + globals.New, + handlers.New, + logger.New, + server.New, + middleware.New, + healthcheck.New, + ), + fx.Invoke(func(*server.Server) {}), + ).Run() +} +``` + +### Params Struct Pattern + +Every component that receives dependencies uses a params struct with `fx.In`: + +```go +type HandlersParams struct { + fx.In + Logger *logger.Logger + Globals *globals.Globals + Database *database.Database + Healthcheck *healthcheck.Healthcheck +} + +type Handlers struct { + params *HandlersParams + log *slog.Logger + hc *healthcheck.Healthcheck +} +``` + +### Factory Function Pattern + +All components expose a `New` function with this signature: + +```go +func New(lc fx.Lifecycle, params SomeParams) (*Something, error) { + s := new(Something) + s.params = ¶ms + s.log = params.Logger.Get() + + lc.Append(fx.Hook{ + OnStart: func(ctx context.Context) error { + // Initialize resources + return nil + }, + OnStop: func(ctx context.Context) error { + // Cleanup resources + return nil + }, + }) + return s, nil +} +``` + +### Dependency Order + +Providers are resolved automatically by fx, but conceptually follow this order: + +1. `globals.New` - Build-time variables (no dependencies) +2. `logger.New` - Logger (depends on Globals) +3. `config.New` - Configuration (depends on Globals, Logger) +4. `database.New` - Database (depends on Logger, Config) +5. `healthcheck.New` - Health check (depends on Globals, Config, Logger, Database) +6. `middleware.New` - Middleware (depends on Logger, Globals, Config) +7. `handlers.New` - Handlers (depends on Logger, Globals, Database, Healthcheck) +8. `server.New` - Server (depends on all above) + +--- + +## 4. Server Architecture + +### Server Struct + +The Server struct is the central orchestrator: + +```go +// internal/server/server.go +type ServerParams struct { + fx.In + Logger *logger.Logger + Globals *globals.Globals + Config *config.Config + Middleware *middleware.Middleware + Handlers *handlers.Handlers +} + +type Server struct { + startupTime time.Time + port int + exitCode int + sentryEnabled bool + log *slog.Logger + ctx context.Context + cancelFunc context.CancelFunc + httpServer *http.Server + router *chi.Mux + params ServerParams + mw *middleware.Middleware + h *handlers.Handlers +} +``` + +### Server Factory + +```go +func New(lc fx.Lifecycle, params ServerParams) (*Server, error) { + s := new(Server) + s.params = params + s.mw = params.Middleware + s.h = params.Handlers + s.log = params.Logger.Get() + + lc.Append(fx.Hook{ + OnStart: func(ctx context.Context) error { + s.startupTime = time.Now() + go s.Run() + return nil + }, + OnStop: func(ctx context.Context) error { + // Server shutdown logic + return nil + }, + }) + return s, nil +} +``` + +### HTTP Server Setup + +```go +// internal/server/http.go +func (s *Server) serveUntilShutdown() { + listenAddr := fmt.Sprintf(":%d", s.params.Config.Port) + s.httpServer = &http.Server{ + Addr: listenAddr, + ReadTimeout: 10 * time.Second, + WriteTimeout: 10 * time.Second, + MaxHeaderBytes: 1 << 20, + Handler: s, + } + + s.SetupRoutes() + + s.log.Info("http begin listen", "listenaddr", listenAddr) + if err := s.httpServer.ListenAndServe(); err != nil && err != http.ErrServerClosed { + s.log.Error("listen error", "error", err) + if s.cancelFunc != nil { + s.cancelFunc() + } + } +} + +func (s *Server) ServeHTTP(w http.ResponseWriter, r *http.Request) { + s.router.ServeHTTP(w, r) +} +``` + +### Signal Handling and Graceful Shutdown + +```go +func (s *Server) serve() int { + s.ctx, s.cancelFunc = context.WithCancel(context.Background()) + + // Signal watcher + go func() { + c := make(chan os.Signal, 1) + signal.Ignore(syscall.SIGPIPE) + signal.Notify(c, os.Interrupt, syscall.SIGTERM) + sig := <-c + s.log.Info("signal received", "signal", sig) + if s.cancelFunc != nil { + s.cancelFunc() + } + }() + + go s.serveUntilShutdown() + + for range s.ctx.Done() { + } + s.cleanShutdown() + return s.exitCode +} + +func (s *Server) cleanShutdown() { + s.exitCode = 0 + ctxShutdown, shutdownCancel := context.WithTimeout(context.Background(), 5*time.Second) + if err := s.httpServer.Shutdown(ctxShutdown); err != nil { + s.log.Error("server clean shutdown failed", "error", err) + } + if shutdownCancel != nil { + shutdownCancel() + } + s.cleanupForExit() + if s.sentryEnabled { + sentry.Flush(2 * time.Second) + } +} +``` + +--- + +## 5. Routing (go-chi) + +### Route Setup Pattern + +```go +// internal/server/routes.go +func (s *Server) SetupRoutes() { + s.router = chi.NewRouter() + + // Global middleware (applied to all routes) + s.router.Use(middleware.Recoverer) + s.router.Use(middleware.RequestID) + s.router.Use(s.mw.Logging()) + + // Conditional middleware + if viper.GetString("METRICS_USERNAME") != "" { + s.router.Use(s.mw.Metrics()) + } + + s.router.Use(s.mw.CORS()) + s.router.Use(middleware.Timeout(60 * time.Second)) + + if s.sentryEnabled { + sentryHandler := sentryhttp.New(sentryhttp.Options{ + Repanic: true, + }) + s.router.Use(sentryHandler.Handle) + } + + // Routes + s.router.Get("/", s.h.HandleIndex()) + + // Static files + s.router.Mount("/s", http.StripPrefix("/s", http.FileServer(http.FS(static.Static)))) + + // API versioning + s.router.Route("/api/v1", func(r chi.Router) { + r.Get("/now", s.h.HandleNow()) + }) + + // Routes with specific middleware + auth := s.mw.Auth() + s.router.Get("/login", auth(s.h.HandleLoginGET()).ServeHTTP) + + // Health check (standard path) + s.router.Get("/.well-known/healthcheck.json", s.h.HandleHealthCheck()) + + // Protected route groups + if viper.GetString("METRICS_USERNAME") != "" { + s.router.Group(func(r chi.Router) { + r.Use(s.mw.MetricsAuth()) + r.Get("/metrics", http.HandlerFunc(promhttp.Handler().ServeHTTP)) + }) + } +} +``` + +### Middleware Ordering (Critical) + +1. `middleware.Recoverer` - Panic recovery (must be first) +2. `middleware.RequestID` - Generate request IDs +3. `s.mw.Logging()` - Request logging +4. `s.mw.Metrics()` - Prometheus metrics (if enabled) +5. `s.mw.CORS()` - CORS headers +6. `middleware.Timeout(60s)` - Request timeout +7. `sentryhttp.Handler` - Sentry error reporting (if enabled) + +### API Versioning + +Use route groups for API versioning: + +```go +s.router.Route("/api/v1", func(r chi.Router) { + r.Get("/resource", s.h.HandleResource()) +}) +``` + +### Static File Serving + +Static files are served at `/s/` prefix: + +```go +s.router.Mount("/s", http.StripPrefix("/s", http.FileServer(http.FS(static.Static)))) +``` + +--- + +## 6. Handler Conventions + +### Handler Base Struct + +```go +// internal/handlers/handlers.go +type HandlersParams struct { + fx.In + Logger *logger.Logger + Globals *globals.Globals + Database *database.Database + Healthcheck *healthcheck.Healthcheck +} + +type Handlers struct { + params *HandlersParams + log *slog.Logger + hc *healthcheck.Healthcheck +} + +func New(lc fx.Lifecycle, params HandlersParams) (*Handlers, error) { + s := new(Handlers) + s.params = ¶ms + s.log = params.Logger.Get() + s.hc = params.Healthcheck + lc.Append(fx.Hook{ + OnStart: func(ctx context.Context) error { + // Compile templates or other initialization + return nil + }, + }) + return s, nil +} +``` + +### Closure-Based Handler Pattern + +All handlers return `http.HandlerFunc` using the closure pattern. This allows initialization logic to run once when the handler is created: + +```go +// internal/handlers/index.go +func (s *Handlers) HandleIndex() http.HandlerFunc { + // Initialization runs once + t := templates.GetParsed() + + // Handler runs per-request + return func(w http.ResponseWriter, r *http.Request) { + err := t.ExecuteTemplate(w, "index.html", nil) + if err != nil { + s.log.Error("template execution failed", "error", err) + http.Error(w, http.StatusText(500), 500) + } + } +} +``` + +### JSON Handler Pattern + +```go +// internal/handlers/now.go +func (s *Handlers) HandleNow() http.HandlerFunc { + // Response struct defined in closure scope + type response struct { + Now time.Time `json:"now"` + } + return func(w http.ResponseWriter, r *http.Request) { + s.respondJSON(w, r, &response{Now: time.Now()}, 200) + } +} +``` + +### Response Helpers + +```go +// internal/handlers/handlers.go +func (s *Handlers) respondJSON(w http.ResponseWriter, r *http.Request, data interface{}, status int) { + w.WriteHeader(status) + w.Header().Set("Content-Type", "application/json") + if data != nil { + err := json.NewEncoder(w).Encode(data) + if err != nil { + s.log.Error("json encode error", "error", err) + } + } +} + +func (s *Handlers) decodeJSON(w http.ResponseWriter, r *http.Request, v interface{}) error { + return json.NewDecoder(r.Body).Decode(v) +} +``` + +### Handler Naming Convention + +- `HandleIndex()` - Main page +- `HandleLoginGET()` / `HandleLoginPOST()` - Form handlers with HTTP method suffix +- `HandleNow()` - API endpoints +- `HandleHealthCheck()` - System endpoints + +--- + +## 7. Middleware Conventions + +### Middleware Struct + +```go +// internal/middleware/middleware.go +type MiddlewareParams struct { + fx.In + Logger *logger.Logger + Globals *globals.Globals + Config *config.Config +} + +type Middleware struct { + log *slog.Logger + params *MiddlewareParams +} + +func New(lc fx.Lifecycle, params MiddlewareParams) (*Middleware, error) { + s := new(Middleware) + s.params = ¶ms + s.log = params.Logger.Get() + return s, nil +} +``` + +### Middleware Signature + +All custom middleware methods return `func(http.Handler) http.Handler`: + +```go +func (s *Middleware) Auth() func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + // Before request + s.log.Info("AUTH: before request") + + next.ServeHTTP(w, r) + + // After request (optional) + }) + } +} +``` + +### Logging Middleware with Status Capture + +```go +type loggingResponseWriter struct { + http.ResponseWriter + statusCode int +} + +func NewLoggingResponseWriter(w http.ResponseWriter) *loggingResponseWriter { + return &loggingResponseWriter{w, http.StatusOK} +} + +func (lrw *loggingResponseWriter) WriteHeader(code int) { + lrw.statusCode = code + lrw.ResponseWriter.WriteHeader(code) +} + +func (s *Middleware) Logging() func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + start := time.Now() + lrw := NewLoggingResponseWriter(w) + ctx := r.Context() + defer func() { + latency := time.Since(start) + s.log.InfoContext(ctx, "request", + "request_start", start, + "method", r.Method, + "url", r.URL.String(), + "useragent", r.UserAgent(), + "request_id", ctx.Value(middleware.RequestIDKey).(string), + "referer", r.Referer(), + "proto", r.Proto, + "remoteIP", ipFromHostPort(r.RemoteAddr), + "status", lrw.statusCode, + "latency_ms", latency.Milliseconds(), + ) + }() + next.ServeHTTP(lrw, r) + }) + } +} +``` + +### CORS Middleware + +```go +func (s *Middleware) CORS() func(http.Handler) http.Handler { + return cors.Handler(cors.Options{ + AllowedOrigins: []string{"*"}, + AllowedMethods: []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"}, + AllowedHeaders: []string{"Accept", "Authorization", "Content-Type", "X-CSRF-Token"}, + ExposedHeaders: []string{"Link"}, + AllowCredentials: false, + MaxAge: 300, + }) +} +``` + +### Metrics Middleware + +```go +func (s *Middleware) Metrics() func(http.Handler) http.Handler { + mdlw := ghmm.New(ghmm.Config{ + Recorder: metrics.NewRecorder(metrics.Config{}), + }) + return func(next http.Handler) http.Handler { + return std.Handler("", mdlw, next) + } +} + +func (s *Middleware) MetricsAuth() func(http.Handler) http.Handler { + return basicauth.New( + "metrics", + map[string][]string{ + viper.GetString("METRICS_USERNAME"): { + viper.GetString("METRICS_PASSWORD"), + }, + }, + ) +} +``` + +--- + +## 8. Configuration (Viper) + +### Config Struct + +```go +// internal/config/config.go +type ConfigParams struct { + fx.In + Globals *globals.Globals + Logger *logger.Logger +} + +type Config struct { + DBURL string + Debug bool + MaintenanceMode bool + MetricsPassword string + MetricsUsername string + Port int + SentryDSN string + params *ConfigParams + log *slog.Logger +} +``` + +### Configuration Loading + +```go +func New(lc fx.Lifecycle, params ConfigParams) (*Config, error) { + log := params.Logger.Get() + name := params.Globals.Appname + + // Config file settings + viper.SetConfigName(name) + viper.SetConfigType("yaml") + viper.AddConfigPath(fmt.Sprintf("/etc/%s", name)) + viper.AddConfigPath(fmt.Sprintf("$HOME/.config/%s", name)) + + // Environment variables override everything + viper.AutomaticEnv() + + // Defaults + viper.SetDefault("DEBUG", "false") + viper.SetDefault("MAINTENANCE_MODE", "false") + viper.SetDefault("PORT", "8080") + viper.SetDefault("DBURL", "") + viper.SetDefault("SENTRY_DSN", "") + viper.SetDefault("METRICS_USERNAME", "") + viper.SetDefault("METRICS_PASSWORD", "") + + // Read config file (optional) + if err := viper.ReadInConfig(); err != nil { + if _, ok := err.(viper.ConfigFileNotFoundError); ok { + // Config file not found is OK + } else { + log.Error("config file malformed", "error", err) + panic(err) + } + } + + // Build config struct + s := &Config{ + DBURL: viper.GetString("DBURL"), + Debug: viper.GetBool("debug"), + Port: viper.GetInt("PORT"), + SentryDSN: viper.GetString("SENTRY_DSN"), + MaintenanceMode: viper.GetBool("MAINTENANCE_MODE"), + MetricsUsername: viper.GetString("METRICS_USERNAME"), + MetricsPassword: viper.GetString("METRICS_PASSWORD"), + log: log, + params: ¶ms, + } + + // Enable debug logging if configured + if s.Debug { + params.Logger.EnableDebugLogging() + s.log = params.Logger.Get() + } + + return s, nil +} +``` + +### Configuration Precedence + +1. **Environment variables** (highest priority via `AutomaticEnv()`) +2. **`.env` file** (loaded via `godotenv/autoload` import) +3. **Config files**: `/etc/{appname}/{appname}.yaml`, `~/.config/{appname}/{appname}.yaml` +4. **Defaults** (lowest priority) + +### Environment Loading + +Import godotenv with autoload to automatically load `.env` files: + +```go +import ( + _ "github.com/joho/godotenv/autoload" +) +``` + +--- + +## 9. Logging (slog) + +### Logger Struct + +```go +// internal/logger/logger.go +type LoggerParams struct { + fx.In + Globals *globals.Globals +} + +type Logger struct { + log *slog.Logger + level *slog.LevelVar + params LoggerParams +} +``` + +### Logger Setup with TTY Detection + +```go +func New(lc fx.Lifecycle, params LoggerParams) (*Logger, error) { + l := new(Logger) + l.level = new(slog.LevelVar) + l.level.Set(slog.LevelInfo) + + // TTY detection for dev vs prod output + tty := false + if fileInfo, _ := os.Stdout.Stat(); (fileInfo.Mode() & os.ModeCharDevice) != 0 { + tty = true + } + + var handler slog.Handler + if tty { + // Text output for development + handler = slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ + Level: l.level, + AddSource: true, + }) + } else { + // JSON output for production + handler = slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: l.level, + AddSource: true, + }) + } + + l.log = slog.New(handler) + return l, nil +} +``` + +### Logger Methods + +```go +func (l *Logger) EnableDebugLogging() { + l.level.Set(slog.LevelDebug) + l.log.Debug("debug logging enabled", "debug", true) +} + +func (l *Logger) Get() *slog.Logger { + return l.log +} + +func (l *Logger) Identify() { + l.log.Info("starting", + "appname", l.params.Globals.Appname, + "version", l.params.Globals.Version, + "buildarch", l.params.Globals.Buildarch, + ) +} +``` + +### Logging Patterns + +```go +// Info with fields +s.log.Info("message", "key", "value") + +// Error with error object +s.log.Error("operation failed", "error", err) + +// With context +s.log.InfoContext(ctx, "processing request", "request_id", reqID) + +// Structured request logging +s.log.Info("request completed", + "request_start", start, + "method", r.Method, + "url", r.URL.String(), + "status", statusCode, + "latency_ms", latency.Milliseconds(), +) + +// Using slog.Group for nested attributes +s.log.Info("request", + slog.Group("http", + "method", r.Method, + "url", r.URL.String(), + ), + slog.Group("timing", + "start", start, + "latency_ms", latency.Milliseconds(), + ), +) +``` + +--- + +## 10. Database Wrapper + +### Database Struct + +```go +// internal/database/database.go +type DatabaseParams struct { + fx.In + Logger *logger.Logger + Config *config.Config +} + +type Database struct { + URL string + log *slog.Logger + params *DatabaseParams +} +``` + +### Database Factory with Lifecycle + +```go +func New(lc fx.Lifecycle, params DatabaseParams) (*Database, error) { + s := new(Database) + s.params = ¶ms + s.log = params.Logger.Get() + + s.log.Info("Database instantiated") + + lc.Append(fx.Hook{ + OnStart: func(ctx context.Context) error { + s.log.Info("Database OnStart Hook") + // Connect to database here + // Example: s.db, err = sql.Open("postgres", s.params.Config.DBURL) + return nil + }, + OnStop: func(ctx context.Context) error { + // Disconnect from database here + // Example: s.db.Close() + return nil + }, + }) + return s, nil +} +``` + +### Usage Pattern + +The Database struct is injected into handlers and other services: + +```go +type HandlersParams struct { + fx.In + Database *database.Database + // ... +} + +// Access in handler +func (s *Handlers) HandleSomething() http.HandlerFunc { + return func(w http.ResponseWriter, r *http.Request) { + // Use s.params.Database + } +} +``` + +--- + +## 11. Globals Package + +### Package Variables and Struct + +```go +// internal/globals/globals.go +package globals + +import "go.uber.org/fx" + +// Package-level variables (set from main) +var ( + Appname string + Version string + Buildarch string +) + +// Struct for DI +type Globals struct { + Appname string + Version string + Buildarch string +} + +func New(lc fx.Lifecycle) (*Globals, error) { + n := &Globals{ + Appname: Appname, + Buildarch: Buildarch, + Version: Version, + } + return n, nil +} +``` + +### Setting Globals in Main + +```go +// cmd/httpd/main.go +var ( + Appname string = "CHANGEME" // Default, overridden by build + Version string // Set at build time + Buildarch string // Set at build time +) + +func main() { + globals.Appname = Appname + globals.Version = Version + globals.Buildarch = Buildarch + // ... +} +``` + +### Build-Time Variable Injection + +Use ldflags to inject version information at build time: + +```makefile +VERSION := $(shell git describe --tags --always) +BUILDARCH := $(shell go env GOARCH) + +build: + go build -ldflags "-X main.Version=$(VERSION) -X main.Buildarch=$(BUILDARCH)" ./cmd/httpd +``` + +--- + +## 12. Static Assets & Templates + +### Static File Embedding + +```go +// static/static.go +package static + +import "embed" + +//go:embed css js +var Static embed.FS +``` + +Directory structure: +``` +static/ +├── static.go +├── css/ +│ ├── bootstrap-4.5.3.min.css +│ └── style.css +└── js/ + ├── bootstrap-4.5.3.bundle.min.js + └── jquery-3.5.1.slim.min.js +``` + +### Template Embedding and Lazy Parsing + +```go +// templates/templates.go +package templates + +import ( + "embed" + "text/template" +) + +//go:embed *.html +var TemplatesRaw embed.FS +var TemplatesParsed *template.Template + +func GetParsed() *template.Template { + if TemplatesParsed == nil { + TemplatesParsed = template.Must(template.ParseFS(TemplatesRaw, "*")) + } + return TemplatesParsed +} +``` + +### Template Composition + +Templates use Go's template composition: + +```html + +{{ template "htmlheader.html" . }} +{{ template "navbar.html" . }} + +
+ +
+ +{{ template "pagefooter.html" . }} +{{ template "htmlfooter.html" . }} +``` + +### Static Asset References + +Reference static files with `/s/` prefix: + +```html + + + +``` + +--- + +## 13. Health Check + +### Health Check Service + +```go +// internal/healthcheck/healthcheck.go +type HealthcheckParams struct { + fx.In + Globals *globals.Globals + Config *config.Config + Logger *logger.Logger + Database *database.Database +} + +type Healthcheck struct { + StartupTime time.Time + log *slog.Logger + params *HealthcheckParams +} + +func New(lc fx.Lifecycle, params HealthcheckParams) (*Healthcheck, error) { + s := new(Healthcheck) + s.params = ¶ms + s.log = params.Logger.Get() + + lc.Append(fx.Hook{ + OnStart: func(ctx context.Context) error { + s.StartupTime = time.Now() + return nil + }, + OnStop: func(ctx context.Context) error { + return nil + }, + }) + return s, nil +} +``` + +### Health Check Response + +```go +type HealthcheckResponse struct { + Status string `json:"status"` + Now string `json:"now"` + UptimeSeconds int64 `json:"uptime_seconds"` + UptimeHuman string `json:"uptime_human"` + Version string `json:"version"` + Appname string `json:"appname"` + Maintenance bool `json:"maintenance_mode"` +} + +func (s *Healthcheck) uptime() time.Duration { + return time.Since(s.StartupTime) +} + +func (s *Healthcheck) Healthcheck() *HealthcheckResponse { + resp := &HealthcheckResponse{ + Status: "ok", + Now: time.Now().UTC().Format(time.RFC3339Nano), + UptimeSeconds: int64(s.uptime().Seconds()), + UptimeHuman: s.uptime().String(), + Appname: s.params.Globals.Appname, + Version: s.params.Globals.Version, + } + return resp +} +``` + +### Standard Endpoint + +Health check is served at the standard `.well-known` path: + +```go +s.router.Get("/.well-known/healthcheck.json", s.h.HandleHealthCheck()) +``` + +--- + +## 14. External Integrations + +### Sentry Error Reporting + +Sentry is conditionally enabled based on `SENTRY_DSN` environment variable: + +```go +func (s *Server) enableSentry() { + s.sentryEnabled = false + + if s.params.Config.SentryDSN == "" { + return + } + + err := sentry.Init(sentry.ClientOptions{ + Dsn: s.params.Config.SentryDSN, + Release: fmt.Sprintf("%s-%s", s.params.Globals.Appname, s.params.Globals.Version), + }) + if err != nil { + s.log.Error("sentry init failure", "error", err) + os.Exit(1) + return + } + s.log.Info("sentry error reporting activated") + s.sentryEnabled = true +} +``` + +Sentry middleware with repanic (bubbles panics to chi's Recoverer): + +```go +if s.sentryEnabled { + sentryHandler := sentryhttp.New(sentryhttp.Options{ + Repanic: true, + }) + s.router.Use(sentryHandler.Handle) +} +``` + +Flush Sentry on shutdown: + +```go +if s.sentryEnabled { + sentry.Flush(2 * time.Second) +} +``` + +### Prometheus Metrics + +Metrics are conditionally enabled and protected by basic auth: + +```go +// Only enable if credentials are configured +if viper.GetString("METRICS_USERNAME") != "" { + s.router.Use(s.mw.Metrics()) +} + +// Protected /metrics endpoint +if viper.GetString("METRICS_USERNAME") != "" { + s.router.Group(func(r chi.Router) { + r.Use(s.mw.MetricsAuth()) + r.Get("/metrics", http.HandlerFunc(promhttp.Handler().ServeHTTP)) + }) +} +``` + +### Environment Variables Summary + +| Variable | Description | Default | +|----------|-------------|---------| +| `PORT` | HTTP listen port | 8080 | +| `DEBUG` | Enable debug logging | false | +| `DBURL` | Database connection URL | "" | +| `SENTRY_DSN` | Sentry DSN for error reporting | "" | +| `MAINTENANCE_MODE` | Enable maintenance mode | false | +| `METRICS_USERNAME` | Basic auth username for /metrics | "" | +| `METRICS_PASSWORD` | Basic auth password for /metrics | "" | diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..dd40645 --- /dev/null +++ b/Dockerfile @@ -0,0 +1,38 @@ +# Build stage +FROM golang:1.25-alpine AS builder + +RUN apk add --no-cache git make gcc musl-dev + +# Install golangci-lint v2 +RUN go install github.com/golangci/golangci-lint/v2/cmd/golangci-lint@latest +RUN go install golang.org/x/tools/cmd/goimports@latest + +WORKDIR /src +COPY go.mod go.sum ./ +RUN go mod download + +COPY . . + +# Run all checks - build fails if any check fails +RUN make check + +# Build the binary +RUN make build + +# Runtime stage +FROM alpine:3.21 + +RUN apk add --no-cache ca-certificates tzdata + +WORKDIR /app + +COPY --from=builder /src/bin/dnswatcher /app/dnswatcher + +# Create data directory +RUN mkdir -p /var/lib/dnswatcher + +ENV DNSWATCHER_DATA_DIR=/var/lib/dnswatcher + +EXPOSE 8080 + +ENTRYPOINT ["/app/dnswatcher"] diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..1da1f2d --- /dev/null +++ b/Makefile @@ -0,0 +1,37 @@ +.PHONY: all build lint fmt test check clean + +BINARY := dnswatcher +VERSION := $(shell git describe --tags --always --dirty 2>/dev/null || echo "dev") +BUILDARCH := $(shell go env GOARCH) +LDFLAGS := -X main.Version=$(VERSION) -X main.Buildarch=$(BUILDARCH) + +all: check build + +build: + go build -ldflags "$(LDFLAGS)" -o bin/$(BINARY) ./cmd/dnswatcher + +lint: + golangci-lint run --config .golangci.yml ./... + +fmt: + gofmt -s -w . + goimports -w . + +test: + go test -v -race -cover ./... + +# Check runs all validation without making changes +# Used by CI and Docker build - fails if anything is wrong +check: + @echo "==> Checking formatting..." + @test -z "$$(gofmt -l .)" || (echo "Files not formatted:" && gofmt -l . && exit 1) + @echo "==> Running linter..." + golangci-lint run --config .golangci.yml ./... + @echo "==> Running tests..." + go test -v -race ./... + @echo "==> Building..." + go build -ldflags "$(LDFLAGS)" -o /dev/null ./cmd/dnswatcher + @echo "==> All checks passed!" + +clean: + rm -rf bin/ diff --git a/README.md b/README.md new file mode 100644 index 0000000..1460218 --- /dev/null +++ b/README.md @@ -0,0 +1,385 @@ +# dnswatcher + +dnswatcher is a production DNS and infrastructure monitoring daemon written in +Go. It watches configured DNS domains and hostnames for changes, monitors TCP +port availability, tracks TLS certificate expiry, and delivers real-time +notifications via Slack, Mattermost, and/or ntfy webhooks. + +It performs all DNS resolution itself via iterative (non-recursive) queries, +tracing from root nameservers to authoritative servers directly—never relying +on upstream recursive resolvers. + +State is persisted to a local JSON file so that monitoring survives restarts +without requiring an external database. + +--- + +## Features + +### DNS Domain Monitoring (Apex Domains) + +- Accepts a list of DNS domain names (apex domains, identified via the + [Public Suffix List](https://publicsuffix.org/)). +- Every **1 hour**, performs a full iterative trace from root servers to + discover all authoritative nameservers (NS records) for each domain. +- Queries **every** discovered authoritative nameserver independently. +- Stores the NS record set as observed by the delegation chain. +- Any change triggers a notification: + - NS added to or removed from the delegation. + - NS IP address changed (glue record change). + +### DNS Hostname Monitoring (Subdomains) + +- Accepts a list of DNS hostnames (subdomains, distinguished from apex + domains via the Public Suffix List). +- Every **1 hour**, performs a full iterative trace to discover the + authoritative nameservers for the hostname's parent domain. +- Queries **each** authoritative nameserver independently for **all** + record types: A, AAAA, CNAME, MX, TXT, SRV, CAA, NS. +- Stores results **per nameserver**. The state for a hostname is not a + merged view — it is a map from nameserver to record set. +- Any observable change in any nameserver's response triggers a + notification. This includes: + - **Record change**: A nameserver returns different records than it + did on the previous check (additions, removals, value changes). + - **NS query failure**: A nameserver that previously responded + becomes unreachable (timeout, SERVFAIL, REFUSED, network error). + This is distinct from "responded with no records." + - **NS recovery**: A previously-unreachable nameserver starts + responding again. + - **Inconsistency detected**: Two nameservers that previously agreed + now return different record sets for the same hostname. + - **Inconsistency resolved**: Nameservers that previously disagreed + are now back in agreement. + - **Empty response**: A nameserver that previously returned records + now returns an authoritative empty response (NODATA/NXDOMAIN). + +### TCP Port Monitoring + +- For every configured domain and hostname, constructs a deduplicated list + of all IPv4 and IPv6 addresses resolved via A, AAAA, and CNAME chain + resolution across all authoritative nameservers. +- Checks TCP connectivity on ports **80** and **443** for each IP address. +- Every **1 hour**, re-checks all ports. +- Any change in port availability triggers a notification: + - Port transitioned from open to closed (or vice versa). + - New IP appeared (from DNS change) and its port state was recorded. + - IP disappeared (from DNS change) — noted in the DNS change + notification; port state for that IP is removed. + +### TLS Certificate Monitoring + +- Every **12 hours**, for each IP address listening on port 443, connects + via TLS using the correct SNI hostname. +- Records the certificate's Subject CN, SANs, issuer, and expiry date. +- Any change triggers a notification: + - Certificate is expiring within **7 days** (warning, repeated each + check until renewed or expired). + - Certificate CN, issuer, or SANs changed (replacement detected, + reports old and new values). + - TLS connection failure to a previously-reachable IP:443 (handshake + error, timeout, connection refused after previously succeeding). + - TLS recovery: a previously-failing IP:443 now completes a + handshake again. + +### Notifications + +**Every observable state change produces a notification.** dnswatcher is +designed as a real-time change feed — degradations, failures, recoveries, +and routine changes are all reported equally. + +Supported notification backends: + +| Backend | Configuration | Payload Format | +|----------------|--------------------------|------------------------------| +| **Slack** | Incoming Webhook URL | Attachments with color | +| **Mattermost** | Incoming Webhook URL | Slack-compatible attachments | +| **ntfy** | Topic URL (e.g. `https://ntfy.sh/mytopic`) | Title + body + priority | + +All configured endpoints receive every notification. Notification content +includes: + +- **DNS record changes**: Which hostname, which nameserver, what record + type, old values, new values. +- **DNS NS changes**: Which domain, which nameservers were added/removed. +- **NS query failures**: Which nameserver failed, error type (timeout, + SERVFAIL, REFUSED, network error), which hostname/domain affected. +- **NS recoveries**: Which nameserver recovered, which hostname/domain. +- **NS inconsistencies**: Which nameservers disagree, what each one + returned, which hostname affected. +- **Port changes**: Which IP:port, old state, new state, associated + hostname. +- **TLS expiry warnings**: Which certificate, days remaining, CN, + issuer, associated hostname and IP. +- **TLS certificate changes**: Old and new CN/issuer/SANs, associated + hostname and IP. +- **TLS connection failures/recoveries**: Which IP:port, error details, + associated hostname. + +### State Management + +- All monitoring state is kept in memory and persisted to a JSON file on + disk (`DATA_DIR/state.json`). +- State is loaded on startup to resume monitoring without triggering + false-positive change notifications. +- State is written atomically (write to temp file, then rename) to prevent + corruption. + +### HTTP API + +dnswatcher exposes a lightweight HTTP API for operational visibility: + +| Endpoint | Description | +|---------------------------------------|--------------------------------| +| `GET /health` | Health check (JSON) | +| `GET /api/v1/status` | Current monitoring state | +| `GET /api/v1/domains` | Configured domains and status | +| `GET /api/v1/hostnames` | Configured hostnames and status| +| `GET /metrics` | Prometheus metrics (optional) | + +--- + +## Architecture + +``` +cmd/dnswatcher/main.go Entry point (uber/fx bootstrap) + +internal/ + config/config.go Viper-based configuration + globals/globals.go Build-time variables (version, arch) + logger/logger.go slog structured logging (TTY detection) + healthcheck/healthcheck.go Health check service + middleware/middleware.go HTTP middleware (logging, CORS, metrics auth) + handlers/handlers.go HTTP request handlers + server/ + server.go HTTP server lifecycle + routes.go Route definitions + state/state.go JSON file state persistence + resolver/resolver.go Iterative DNS resolution engine + portcheck/portcheck.go TCP port connectivity checker + tlscheck/tlscheck.go TLS certificate inspector + notify/notify.go Notification service (Slack, Mattermost, ntfy) + watcher/watcher.go Main monitoring orchestrator and scheduler +``` + +### Design Principles + +- **No recursive resolvers**: All DNS resolution is performed iteratively, + tracing from root nameservers through the delegation chain to + authoritative servers. +- **No external database**: State is persisted as a single JSON file. +- **Dependency injection**: All components are wired via + [uber/fx](https://github.com/uber-go/fx). +- **Structured logging**: All logs use `log/slog` with JSON output in + production (TTY detection for development). +- **Graceful shutdown**: All background goroutines respect context + cancellation and the fx lifecycle. + +--- + +## Configuration + +Configuration is loaded via [Viper](https://github.com/spf13/viper) with +the following precedence (highest to lowest): + +1. Environment variables (prefixed with `DNSWATCHER_`) +2. `.env` file (loaded via godotenv) +3. Config file: `/etc/dnswatcher/dnswatcher.yaml`, + `~/.config/dnswatcher/dnswatcher.yaml`, or `./dnswatcher.yaml` +4. Defaults + +### Environment Variables + +| Variable | Description | Default | +|---------------------------------|--------------------------------------------|-------------| +| `PORT` | HTTP listen port | `8080` | +| `DNSWATCHER_DEBUG` | Enable debug logging | `false` | +| `DNSWATCHER_DATA_DIR` | Directory for state file | `./data` | +| `DNSWATCHER_DOMAINS` | Comma-separated list of apex domains | `""` | +| `DNSWATCHER_HOSTNAMES` | Comma-separated list of hostnames | `""` | +| `DNSWATCHER_SLACK_WEBHOOK` | Slack incoming webhook URL | `""` | +| `DNSWATCHER_MATTERMOST_WEBHOOK` | Mattermost incoming webhook URL | `""` | +| `DNSWATCHER_NTFY_TOPIC` | ntfy topic URL | `""` | +| `DNSWATCHER_DNS_INTERVAL` | DNS check interval | `1h` | +| `DNSWATCHER_TLS_INTERVAL` | TLS check interval | `12h` | +| `DNSWATCHER_TLS_EXPIRY_WARNING` | Days before expiry to warn | `7` | +| `DNSWATCHER_SENTRY_DSN` | Sentry DSN for error reporting | `""` | +| `DNSWATCHER_MAINTENANCE_MODE` | Enable maintenance mode | `false` | +| `DNSWATCHER_METRICS_USERNAME` | Basic auth username for /metrics | `""` | +| `DNSWATCHER_METRICS_PASSWORD` | Basic auth password for /metrics | `""` | + +### Example `.env` + +```sh +PORT=8080 +DNSWATCHER_DEBUG=false +DNSWATCHER_DATA_DIR=./data +DNSWATCHER_DOMAINS=example.com,example.org +DNSWATCHER_HOSTNAMES=www.example.com,api.example.com,mail.example.org +DNSWATCHER_SLACK_WEBHOOK=https://hooks.slack.com/services/T.../B.../xxx +DNSWATCHER_MATTERMOST_WEBHOOK=https://mattermost.example.com/hooks/xxx +DNSWATCHER_NTFY_TOPIC=https://ntfy.sh/my-dns-alerts +``` + +--- + +## DNS Resolution Strategy + +dnswatcher never uses the system's configured recursive resolver. Instead, +it performs full iterative resolution: + +1. **Root servers**: Starts from the IANA root nameserver list (hardcoded, + with periodic refresh). +2. **TLD delegation**: Queries root servers for the TLD NS records. +3. **Domain delegation**: Queries TLD nameservers for the domain's NS + records. +4. **Authoritative query**: Queries all discovered authoritative + nameservers directly for the requested records. + +This approach ensures: +- Independence from any upstream resolver's cache or filtering. +- Ability to detect split-horizon or inconsistent responses across + authoritative servers. +- Visibility into the full delegation chain. + +For hostname monitoring, the resolver follows CNAME chains (with a +depth limit to prevent loops) before collecting terminal A/AAAA records. + +--- + +## State File Format + +The state file (`DATA_DIR/state.json`) contains the complete monitoring +snapshot. Hostname records are stored **per authoritative nameserver**, +not as a merged view, to enable inconsistency detection. + +```json +{ + "version": 1, + "lastUpdated": "2026-02-19T12:00:00Z", + "domains": { + "example.com": { + "nameservers": ["ns1.example.com.", "ns2.example.com."], + "lastChecked": "2026-02-19T12:00:00Z" + } + }, + "hostnames": { + "www.example.com": { + "recordsByNameserver": { + "ns1.example.com.": { + "records": { + "A": ["93.184.216.34"], + "AAAA": ["2606:2800:220:1:248:1893:25c8:1946"] + }, + "status": "ok", + "lastChecked": "2026-02-19T12:00:00Z" + }, + "ns2.example.com.": { + "records": { + "A": ["93.184.216.34"], + "AAAA": ["2606:2800:220:1:248:1893:25c8:1946"] + }, + "status": "ok", + "lastChecked": "2026-02-19T12:00:00Z" + } + }, + "lastChecked": "2026-02-19T12:00:00Z" + } + }, + "ports": { + "93.184.216.34:80": { + "open": true, + "hostname": "www.example.com", + "lastChecked": "2026-02-19T12:00:00Z" + }, + "93.184.216.34:443": { + "open": true, + "hostname": "www.example.com", + "lastChecked": "2026-02-19T12:00:00Z" + } + }, + "certificates": { + "93.184.216.34:443:www.example.com": { + "commonName": "www.example.com", + "issuer": "DigiCert TLS RSA SHA256 2020 CA1", + "notAfter": "2027-01-15T23:59:59Z", + "subjectAlternativeNames": ["www.example.com"], + "status": "ok", + "lastChecked": "2026-02-19T06:00:00Z" + } + } +} +``` + +The `status` field for each per-nameserver entry and certificate entry +tracks reachability: + +| Status | Meaning | +|-------------|-------------------------------------------------| +| `ok` | Query succeeded, records are current | +| `error` | Query failed (timeout, SERVFAIL, network error) | +| `nxdomain` | Authoritative NXDOMAIN response | +| `nodata` | Authoritative empty response (NODATA) | + +--- + +## Building + +```sh +make build # Build binary to bin/dnswatcher +make test # Run tests with race detector +make lint # Run golangci-lint +make fmt # Format code +make check # Run all checks (format, lint, test, build) +make clean # Remove build artifacts +``` + +### Build-Time Variables + +Version and architecture are injected via `-ldflags`: + +```sh +go build -ldflags "-X main.Version=$(git describe --tags --always) \ + -X main.Buildarch=$(go env GOARCH)" ./cmd/dnswatcher +``` + +--- + +## Docker + +```sh +docker build -t dnswatcher . +docker run -d \ + -p 8080:8080 \ + -v dnswatcher-data:/var/lib/dnswatcher \ + -e DNSWATCHER_DOMAINS=example.com \ + -e DNSWATCHER_HOSTNAMES=www.example.com \ + -e DNSWATCHER_NTFY_TOPIC=https://ntfy.sh/my-alerts \ + dnswatcher +``` + +--- + +## Monitoring Lifecycle + +1. **Startup**: Load state from disk. If no state file exists, start + with empty state (first check will establish baseline without + triggering change notifications). +2. **Initial check**: Immediately perform all DNS, port, and TLS checks + on startup. +3. **Periodic checks**: + - DNS and port checks: every `DNSWATCHER_DNS_INTERVAL` (default 1h). + - TLS checks: every `DNSWATCHER_TLS_INTERVAL` (default 12h). +4. **On change detection**: Send notifications to all configured + endpoints, update in-memory state, persist to disk. +5. **Shutdown**: Persist final state to disk, complete in-flight + notifications, stop gracefully. + +--- + +## Project Structure + +Follows the conventions defined in `CONVENTIONS.md`, adapted from the +[upaas](https://git.eeqj.de/sneak/upaas) project template. Uses uber/fx +for dependency injection, go-chi for HTTP routing, slog for logging, and +Viper for configuration. diff --git a/cmd/dnswatcher/main.go b/cmd/dnswatcher/main.go new file mode 100644 index 0000000..0dd77b9 --- /dev/null +++ b/cmd/dnswatcher/main.go @@ -0,0 +1,56 @@ +// Package main is the entry point for dnswatcher. +package main + +import ( + "go.uber.org/fx" + + "sneak.berlin/go/dnswatcher/internal/config" + "sneak.berlin/go/dnswatcher/internal/globals" + "sneak.berlin/go/dnswatcher/internal/handlers" + "sneak.berlin/go/dnswatcher/internal/healthcheck" + "sneak.berlin/go/dnswatcher/internal/logger" + "sneak.berlin/go/dnswatcher/internal/middleware" + "sneak.berlin/go/dnswatcher/internal/notify" + "sneak.berlin/go/dnswatcher/internal/portcheck" + "sneak.berlin/go/dnswatcher/internal/resolver" + "sneak.berlin/go/dnswatcher/internal/server" + "sneak.berlin/go/dnswatcher/internal/state" + "sneak.berlin/go/dnswatcher/internal/tlscheck" + "sneak.berlin/go/dnswatcher/internal/watcher" + + _ "github.com/joho/godotenv/autoload" +) + +// Build-time variables injected by linker flags (-ldflags). +// +//nolint:gochecknoglobals // build-time variables +var ( + Appname = "dnswatcher" + Version string + Buildarch string +) + +func main() { + globals.SetAppname(Appname) + globals.SetVersion(Version) + globals.SetBuildarch(Buildarch) + + fx.New( + fx.Provide( + globals.New, + logger.New, + config.New, + state.New, + healthcheck.New, + resolver.New, + portcheck.New, + tlscheck.New, + notify.New, + watcher.New, + middleware.New, + handlers.New, + server.New, + ), + fx.Invoke(func(*server.Server, *watcher.Watcher) {}), + ).Run() +} diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..1b21a1c --- /dev/null +++ b/go.mod @@ -0,0 +1,39 @@ +module sneak.berlin/go/dnswatcher + +go 1.25.5 + +require ( + github.com/99designs/basicauth-go v0.0.0-20230316000542-bf6f9cbbf0f8 + github.com/go-chi/chi/v5 v5.2.5 + github.com/go-chi/cors v1.2.2 + github.com/joho/godotenv v1.5.1 + github.com/prometheus/client_golang v1.23.2 + github.com/spf13/viper v1.21.0 + go.uber.org/fx v1.24.0 +) + +require ( + github.com/beorn7/perks v1.0.1 // indirect + github.com/cespare/xxhash/v2 v2.3.0 // indirect + github.com/fsnotify/fsnotify v1.9.0 // indirect + github.com/go-viper/mapstructure/v2 v2.4.0 // indirect + github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect + github.com/pelletier/go-toml/v2 v2.2.4 // indirect + github.com/prometheus/client_model v0.6.2 // indirect + github.com/prometheus/common v0.66.1 // indirect + github.com/prometheus/procfs v0.16.1 // indirect + github.com/sagikazarmark/locafero v0.11.0 // indirect + github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 // indirect + github.com/spf13/afero v1.15.0 // indirect + github.com/spf13/cast v1.10.0 // indirect + github.com/spf13/pflag v1.0.10 // indirect + github.com/subosito/gotenv v1.6.0 // indirect + go.uber.org/dig v1.19.0 // indirect + go.uber.org/multierr v1.10.0 // indirect + go.uber.org/zap v1.26.0 // indirect + go.yaml.in/yaml/v2 v2.4.2 // indirect + go.yaml.in/yaml/v3 v3.0.4 // indirect + golang.org/x/sys v0.35.0 // indirect + golang.org/x/text v0.28.0 // indirect + google.golang.org/protobuf v1.36.8 // indirect +) diff --git a/go.sum b/go.sum new file mode 100644 index 0000000..1b5631a --- /dev/null +++ b/go.sum @@ -0,0 +1,87 @@ +github.com/99designs/basicauth-go v0.0.0-20230316000542-bf6f9cbbf0f8 h1:nMpu1t4amK3vJWBibQ5X/Nv0aXL+b69TQf2uK5PH7Go= +github.com/99designs/basicauth-go v0.0.0-20230316000542-bf6f9cbbf0f8/go.mod h1:3cARGAK9CfW3HoxCy1a0G4TKrdiKke8ftOMEOHyySYs= +github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM= +github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw= +github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs= +github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs= +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= +github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= +github.com/fsnotify/fsnotify v1.9.0 h1:2Ml+OJNzbYCTzsxtv8vKSFD9PbJjmhYF14k/jKC7S9k= +github.com/fsnotify/fsnotify v1.9.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0= +github.com/go-chi/chi/v5 v5.2.5 h1:Eg4myHZBjyvJmAFjFvWgrqDTXFyOzjj7YIm3L3mu6Ug= +github.com/go-chi/chi/v5 v5.2.5/go.mod h1:X7Gx4mteadT3eDOMTsXzmI4/rwUpOwBHLpAfupzFJP0= +github.com/go-chi/cors v1.2.2 h1:Jmey33TE+b+rB7fT8MUy1u0I4L+NARQlK6LhzKPSyQE= +github.com/go-chi/cors v1.2.2/go.mod h1:sSbTewc+6wYHBBCW7ytsFSn836hqM7JxpglAy2Vzc58= +github.com/go-viper/mapstructure/v2 v2.4.0 h1:EBsztssimR/CONLSZZ04E8qAkxNYq4Qp9LvH92wZUgs= +github.com/go-viper/mapstructure/v2 v2.4.0/go.mod h1:oJDH3BJKyqBA2TXFhDsKDGDTlndYOZ6rGS0BRZIxGhM= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= +github.com/joho/godotenv v1.5.1 h1:7eLL/+HRGLY0ldzfGMeQkb7vMd0as4CfYvUVzLqw0N0= +github.com/joho/godotenv v1.5.1/go.mod h1:f4LDr5Voq0i2e/R5DDNOoa2zzDfwtkZa6DnEwAbqwq4= +github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo= +github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ= +github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= +github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= +github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= +github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= +github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA= +github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ= +github.com/pelletier/go-toml/v2 v2.2.4 h1:mye9XuhQ6gvn5h28+VilKrrPoQVanw5PMw/TB0t5Ec4= +github.com/pelletier/go-toml/v2 v2.2.4/go.mod h1:2gIqNv+qfxSVS7cM2xJQKtLSTLUE9V8t9Stt+h56mCY= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o= +github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg= +github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk= +github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE= +github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs= +github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA= +github.com/prometheus/procfs v0.16.1 h1:hZ15bTNuirocR6u0JZ6BAHHmwS1p8B4P6MRqxtzMyRg= +github.com/prometheus/procfs v0.16.1/go.mod h1:teAbpZRB1iIAJYREa1LsoWUXykVXA1KlTmWl8x/U+Is= +github.com/rogpeppe/go-internal v1.10.0 h1:TMyTOH3F/DB16zRVcYyreMH6GnZZrwQVAoYjRBZyWFQ= +github.com/rogpeppe/go-internal v1.10.0/go.mod h1:UQnix2H7Ngw/k4C5ijL5+65zddjncjaFoBhdsK/akog= +github.com/sagikazarmark/locafero v0.11.0 h1:1iurJgmM9G3PA/I+wWYIOw/5SyBtxapeHDcg+AAIFXc= +github.com/sagikazarmark/locafero v0.11.0/go.mod h1:nVIGvgyzw595SUSUE6tvCp3YYTeHs15MvlmU87WwIik= +github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8 h1:+jumHNA0Wrelhe64i8F6HNlS8pkoyMv5sreGx2Ry5Rw= +github.com/sourcegraph/conc v0.3.1-0.20240121214520-5f936abd7ae8/go.mod h1:3n1Cwaq1E1/1lhQhtRK2ts/ZwZEhjcQeJQ1RuC6Q/8U= +github.com/spf13/afero v1.15.0 h1:b/YBCLWAJdFWJTN9cLhiXXcD7mzKn9Dm86dNnfyQw1I= +github.com/spf13/afero v1.15.0/go.mod h1:NC2ByUVxtQs4b3sIUphxK0NioZnmxgyCrfzeuq8lxMg= +github.com/spf13/cast v1.10.0 h1:h2x0u2shc1QuLHfxi+cTJvs30+ZAHOGRic8uyGTDWxY= +github.com/spf13/cast v1.10.0/go.mod h1:jNfB8QC9IA6ZuY2ZjDp0KtFO2LZZlg4S/7bzP6qqeHo= +github.com/spf13/pflag v1.0.10 h1:4EBh2KAYBwaONj6b2Ye1GiHfwjqyROoF4RwYO+vPwFk= +github.com/spf13/pflag v1.0.10/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= +github.com/spf13/viper v1.21.0 h1:x5S+0EU27Lbphp4UKm1C+1oQO+rKx36vfCoaVebLFSU= +github.com/spf13/viper v1.21.0/go.mod h1:P0lhsswPGWD/1lZJ9ny3fYnVqxiegrlNrEmgLjbTCAY= +github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U= +github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U= +github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= +github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= +go.uber.org/dig v1.19.0 h1:BACLhebsYdpQ7IROQ1AGPjrXcP5dF80U3gKoFzbaq/4= +go.uber.org/dig v1.19.0/go.mod h1:Us0rSJiThwCv2GteUN0Q7OKvU7n5J4dxZ9JKUXozFdE= +go.uber.org/fx v1.24.0 h1:wE8mruvpg2kiiL1Vqd0CC+tr0/24XIB10Iwp2lLWzkg= +go.uber.org/fx v1.24.0/go.mod h1:AmDeGyS+ZARGKM4tlH4FY2Jr63VjbEDJHtqXTGP5hbo= +go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto= +go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE= +go.uber.org/multierr v1.10.0 h1:S0h4aNzvfcFsC3dRF1jLoaov7oRaKqRGC/pUEJ2yvPQ= +go.uber.org/multierr v1.10.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y= +go.uber.org/zap v1.26.0 h1:sI7k6L95XOKS281NhVKOFCUNIvv9e0w4BF8N3u+tCRo= +go.uber.org/zap v1.26.0/go.mod h1:dtElttAiwGvoJ/vj4IwHBS/gXsEu/pZ50mUIRWuG0so= +go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI= +go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU= +go.yaml.in/yaml/v3 v3.0.4 h1:tfq32ie2Jv2UxXFdLJdh3jXuOzWiL1fo0bu/FbuKpbc= +go.yaml.in/yaml/v3 v3.0.4/go.mod h1:DhzuOOF2ATzADvBadXxruRBLzYTpT36CKvDb3+aBEFg= +golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI= +golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k= +golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng= +golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU= +google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc= +google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= +gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/config/config.go b/internal/config/config.go new file mode 100644 index 0000000..b43027d --- /dev/null +++ b/internal/config/config.go @@ -0,0 +1,187 @@ +// Package config provides application configuration via Viper. +package config + +import ( + "errors" + "fmt" + "log/slog" + "strings" + "time" + + "github.com/spf13/viper" + "go.uber.org/fx" + + "sneak.berlin/go/dnswatcher/internal/globals" + "sneak.berlin/go/dnswatcher/internal/logger" +) + +// Default configuration values. +const ( + defaultPort = 8080 + defaultDNSInterval = 1 * time.Hour + defaultTLSInterval = 12 * time.Hour + defaultTLSExpiryWarning = 7 +) + +// Params contains dependencies for Config. +type Params struct { + fx.In + + Globals *globals.Globals + Logger *logger.Logger +} + +// Config holds application configuration. +type Config struct { + Port int + Debug bool + DataDir string + Domains []string + Hostnames []string + SlackWebhook string + MattermostWebhook string + NtfyTopic string + DNSInterval time.Duration + TLSInterval time.Duration + TLSExpiryWarning int + SentryDSN string + MaintenanceMode bool + MetricsUsername string + MetricsPassword string + params *Params + log *slog.Logger +} + +// New creates a new Config instance from environment and config files. +func New(_ fx.Lifecycle, params Params) (*Config, error) { + log := params.Logger.Get() + + name := params.Globals.Appname + if name == "" { + name = "dnswatcher" + } + + setupViper(name) + + cfg, err := buildConfig(log, ¶ms) + if err != nil { + return nil, err + } + + configureDebugLogging(cfg, params) + + return cfg, nil +} + +func setupViper(name string) { + viper.SetConfigName(name) + viper.SetConfigType("yaml") + viper.AddConfigPath("/etc/" + name) + viper.AddConfigPath("$HOME/.config/" + name) + viper.AddConfigPath(".") + + viper.SetEnvPrefix("DNSWATCHER") + viper.AutomaticEnv() + + // PORT is not prefixed for compatibility + _ = viper.BindEnv("PORT", "PORT") + + viper.SetDefault("PORT", defaultPort) + viper.SetDefault("DEBUG", false) + viper.SetDefault("DATA_DIR", "./data") + viper.SetDefault("DOMAINS", "") + viper.SetDefault("HOSTNAMES", "") + viper.SetDefault("SLACK_WEBHOOK", "") + viper.SetDefault("MATTERMOST_WEBHOOK", "") + viper.SetDefault("NTFY_TOPIC", "") + viper.SetDefault("DNS_INTERVAL", defaultDNSInterval.String()) + viper.SetDefault("TLS_INTERVAL", defaultTLSInterval.String()) + viper.SetDefault("TLS_EXPIRY_WARNING", defaultTLSExpiryWarning) + viper.SetDefault("SENTRY_DSN", "") + viper.SetDefault("MAINTENANCE_MODE", false) + viper.SetDefault("METRICS_USERNAME", "") + viper.SetDefault("METRICS_PASSWORD", "") +} + +func buildConfig( + log *slog.Logger, + params *Params, +) (*Config, error) { + err := viper.ReadInConfig() + if err != nil { + var notFound viper.ConfigFileNotFoundError + if !errors.As(err, ¬Found) { + log.Error("config file malformed", "error", err) + + return nil, fmt.Errorf( + "config file malformed: %w", err, + ) + } + } + + dnsInterval, err := time.ParseDuration( + viper.GetString("DNS_INTERVAL"), + ) + if err != nil { + dnsInterval = defaultDNSInterval + } + + tlsInterval, err := time.ParseDuration( + viper.GetString("TLS_INTERVAL"), + ) + if err != nil { + tlsInterval = defaultTLSInterval + } + + cfg := &Config{ + Port: viper.GetInt("PORT"), + Debug: viper.GetBool("DEBUG"), + DataDir: viper.GetString("DATA_DIR"), + Domains: parseCSV(viper.GetString("DOMAINS")), + Hostnames: parseCSV(viper.GetString("HOSTNAMES")), + SlackWebhook: viper.GetString("SLACK_WEBHOOK"), + MattermostWebhook: viper.GetString("MATTERMOST_WEBHOOK"), + NtfyTopic: viper.GetString("NTFY_TOPIC"), + DNSInterval: dnsInterval, + TLSInterval: tlsInterval, + TLSExpiryWarning: viper.GetInt("TLS_EXPIRY_WARNING"), + SentryDSN: viper.GetString("SENTRY_DSN"), + MaintenanceMode: viper.GetBool("MAINTENANCE_MODE"), + MetricsUsername: viper.GetString("METRICS_USERNAME"), + MetricsPassword: viper.GetString("METRICS_PASSWORD"), + params: params, + log: log, + } + + return cfg, nil +} + +func parseCSV(input string) []string { + if input == "" { + return nil + } + + parts := strings.Split(input, ",") + result := make([]string, 0, len(parts)) + + for _, part := range parts { + trimmed := strings.TrimSpace(part) + if trimmed != "" { + result = append(result, trimmed) + } + } + + return result +} + +func configureDebugLogging(cfg *Config, params Params) { + if cfg.Debug { + params.Logger.EnableDebugLogging() + cfg.log = params.Logger.Get() + } +} + +// StatePath returns the full path to the state JSON file. +func (c *Config) StatePath() string { + return c.DataDir + "/state.json" +} diff --git a/internal/globals/globals.go b/internal/globals/globals.go new file mode 100644 index 0000000..02ce645 --- /dev/null +++ b/internal/globals/globals.go @@ -0,0 +1,62 @@ +// Package globals provides build-time variables and application-wide constants. +package globals + +import ( + "sync" + + "go.uber.org/fx" +) + +// Package-level variables set from main via ldflags. +// These are intentionally global to allow build-time injection using -ldflags. +// +//nolint:gochecknoglobals // Required for ldflags injection at build time +var ( + mu sync.RWMutex + appname string + version string + buildarch string +) + +// Globals holds build-time variables for dependency injection. +type Globals struct { + Appname string + Version string + Buildarch string +} + +// New creates a new Globals instance from package-level variables. +func New(_ fx.Lifecycle) (*Globals, error) { + mu.RLock() + defer mu.RUnlock() + + return &Globals{ + Appname: appname, + Version: version, + Buildarch: buildarch, + }, nil +} + +// SetAppname sets the application name. +func SetAppname(name string) { + mu.Lock() + defer mu.Unlock() + + appname = name +} + +// SetVersion sets the version. +func SetVersion(ver string) { + mu.Lock() + defer mu.Unlock() + + version = ver +} + +// SetBuildarch sets the build architecture. +func SetBuildarch(arch string) { + mu.Lock() + defer mu.Unlock() + + buildarch = arch +} diff --git a/internal/handlers/handlers.go b/internal/handlers/handlers.go new file mode 100644 index 0000000..474c1bc --- /dev/null +++ b/internal/handlers/handlers.go @@ -0,0 +1,58 @@ +// Package handlers provides HTTP request handlers. +package handlers + +import ( + "encoding/json" + "log/slog" + "net/http" + + "go.uber.org/fx" + + "sneak.berlin/go/dnswatcher/internal/globals" + "sneak.berlin/go/dnswatcher/internal/healthcheck" + "sneak.berlin/go/dnswatcher/internal/logger" +) + +// Params contains dependencies for Handlers. +type Params struct { + fx.In + + Logger *logger.Logger + Globals *globals.Globals + Healthcheck *healthcheck.Healthcheck +} + +// Handlers provides HTTP request handlers. +type Handlers struct { + log *slog.Logger + params *Params + globals *globals.Globals + hc *healthcheck.Healthcheck +} + +// New creates a new Handlers instance. +func New(_ fx.Lifecycle, params Params) (*Handlers, error) { + return &Handlers{ + log: params.Logger.Get(), + params: ¶ms, + globals: params.Globals, + hc: params.Healthcheck, + }, nil +} + +func (h *Handlers) respondJSON( + writer http.ResponseWriter, + _ *http.Request, + data any, + status int, +) { + writer.Header().Set("Content-Type", "application/json") + writer.WriteHeader(status) + + if data != nil { + err := json.NewEncoder(writer).Encode(data) + if err != nil { + h.log.Error("json encode error", "error", err) + } + } +} diff --git a/internal/handlers/healthcheck.go b/internal/handlers/healthcheck.go new file mode 100644 index 0000000..7185944 --- /dev/null +++ b/internal/handlers/healthcheck.go @@ -0,0 +1,17 @@ +package handlers + +import ( + "net/http" +) + +// HandleHealthCheck returns the health check handler. +func (h *Handlers) HandleHealthCheck() http.HandlerFunc { + return func( + writer http.ResponseWriter, + request *http.Request, + ) { + h.respondJSON( + writer, request, h.hc.Check(), http.StatusOK, + ) + } +} diff --git a/internal/handlers/status.go b/internal/handlers/status.go new file mode 100644 index 0000000..9996fc8 --- /dev/null +++ b/internal/handlers/status.go @@ -0,0 +1,23 @@ +package handlers + +import ( + "net/http" +) + +// HandleStatus returns the monitoring status handler. +func (h *Handlers) HandleStatus() http.HandlerFunc { + type response struct { + Status string `json:"status"` + } + + return func( + writer http.ResponseWriter, + request *http.Request, + ) { + h.respondJSON( + writer, request, + &response{Status: "ok"}, + http.StatusOK, + ) + } +} diff --git a/internal/healthcheck/healthcheck.go b/internal/healthcheck/healthcheck.go new file mode 100644 index 0000000..34c035a --- /dev/null +++ b/internal/healthcheck/healthcheck.go @@ -0,0 +1,79 @@ +// Package healthcheck provides application health status. +package healthcheck + +import ( + "context" + "log/slog" + "time" + + "go.uber.org/fx" + + "sneak.berlin/go/dnswatcher/internal/config" + "sneak.berlin/go/dnswatcher/internal/globals" + "sneak.berlin/go/dnswatcher/internal/logger" +) + +// Params contains dependencies for Healthcheck. +type Params struct { + fx.In + + Globals *globals.Globals + Config *config.Config + Logger *logger.Logger +} + +// Healthcheck provides health status information. +type Healthcheck struct { + StartupTime time.Time + log *slog.Logger + params *Params +} + +// Response is the health check response structure. +type Response struct { + Status string `json:"status"` + Now string `json:"now"` + UptimeSeconds int64 `json:"uptimeSeconds"` + UptimeHuman string `json:"uptimeHuman"` + Version string `json:"version"` + Appname string `json:"appname"` + Maintenance bool `json:"maintenanceMode"` +} + +// New creates a new Healthcheck instance. +func New( + lifecycle fx.Lifecycle, + params Params, +) (*Healthcheck, error) { + healthcheck := &Healthcheck{ + log: params.Logger.Get(), + params: ¶ms, + } + + lifecycle.Append(fx.Hook{ + OnStart: func(_ context.Context) error { + healthcheck.StartupTime = time.Now() + + return nil + }, + }) + + return healthcheck, nil +} + +// Check returns the current health status. +func (h *Healthcheck) Check() *Response { + return &Response{ + Status: "ok", + Now: time.Now().UTC().Format(time.RFC3339Nano), + UptimeSeconds: int64(h.uptime().Seconds()), + UptimeHuman: h.uptime().String(), + Appname: h.params.Globals.Appname, + Version: h.params.Globals.Version, + Maintenance: h.params.Config.MaintenanceMode, + } +} + +func (h *Healthcheck) uptime() time.Duration { + return time.Since(h.StartupTime) +} diff --git a/internal/logger/logger.go b/internal/logger/logger.go new file mode 100644 index 0000000..0bcdd28 --- /dev/null +++ b/internal/logger/logger.go @@ -0,0 +1,83 @@ +// Package logger provides structured logging with slog. +package logger + +import ( + "log/slog" + "os" + + "go.uber.org/fx" + + "sneak.berlin/go/dnswatcher/internal/globals" +) + +// Params contains dependencies for Logger. +type Params struct { + fx.In + + Globals *globals.Globals +} + +// Logger wraps slog.Logger with level control. +type Logger struct { + log *slog.Logger + level *slog.LevelVar + params Params +} + +// New creates a new Logger with TTY detection for output format. +func New(_ fx.Lifecycle, params Params) (*Logger, error) { + loggerInstance := &Logger{ + level: new(slog.LevelVar), + params: params, + } + loggerInstance.level.Set(slog.LevelInfo) + + isTTY := detectTTY() + + var handler slog.Handler + + if isTTY { + handler = slog.NewTextHandler(os.Stdout, &slog.HandlerOptions{ + Level: loggerInstance.level, + AddSource: true, + }) + } else { + handler = slog.NewJSONHandler(os.Stdout, &slog.HandlerOptions{ + Level: loggerInstance.level, + AddSource: true, + }) + } + + loggerInstance.log = slog.New(handler) + + return loggerInstance, nil +} + +func detectTTY() bool { + fileInfo, err := os.Stdout.Stat() + if err != nil { + return false + } + + return (fileInfo.Mode() & os.ModeCharDevice) != 0 +} + +// Get returns the underlying slog.Logger. +func (l *Logger) Get() *slog.Logger { + return l.log +} + +// EnableDebugLogging sets the log level to debug. +func (l *Logger) EnableDebugLogging() { + l.level.Set(slog.LevelDebug) + l.log.Debug("debug logging enabled", "debug", true) +} + +// Identify logs application startup information. +func (l *Logger) Identify() { + l.log.Info("starting", + "appname", l.params.Globals.Appname, + "version", l.params.Globals.Version, + "buildarch", l.params.Globals.Buildarch, + ) +} diff --git a/internal/middleware/middleware.go b/internal/middleware/middleware.go new file mode 100644 index 0000000..0a05dd5 --- /dev/null +++ b/internal/middleware/middleware.go @@ -0,0 +1,205 @@ +// Package middleware provides HTTP middleware. +package middleware + +import ( + "log/slog" + "net" + "net/http" + "strings" + "time" + + "github.com/99designs/basicauth-go" + "github.com/go-chi/chi/v5/middleware" + "github.com/go-chi/cors" + "go.uber.org/fx" + + "sneak.berlin/go/dnswatcher/internal/config" + "sneak.berlin/go/dnswatcher/internal/globals" + "sneak.berlin/go/dnswatcher/internal/logger" +) + +// corsMaxAge is the maximum age for CORS preflight responses. +const corsMaxAge = 300 + +// Params contains dependencies for Middleware. +type Params struct { + fx.In + + Logger *logger.Logger + Globals *globals.Globals + Config *config.Config +} + +// Middleware provides HTTP middleware. +type Middleware struct { + log *slog.Logger + params *Params +} + +// New creates a new Middleware instance. +func New( + _ fx.Lifecycle, + params Params, +) (*Middleware, error) { + return &Middleware{ + log: params.Logger.Get(), + params: ¶ms, + }, nil +} + +// loggingResponseWriter wraps http.ResponseWriter to capture status. +type loggingResponseWriter struct { + http.ResponseWriter + + statusCode int +} + +func newLoggingResponseWriter( + writer http.ResponseWriter, +) *loggingResponseWriter { + return &loggingResponseWriter{writer, http.StatusOK} +} + +func (lrw *loggingResponseWriter) WriteHeader(code int) { + lrw.statusCode = code + lrw.ResponseWriter.WriteHeader(code) +} + +// Logging returns a request logging middleware. +func (m *Middleware) Logging() func(http.Handler) http.Handler { + return func(next http.Handler) http.Handler { + return http.HandlerFunc(func( + writer http.ResponseWriter, + request *http.Request, + ) { + start := time.Now() + lrw := newLoggingResponseWriter(writer) + ctx := request.Context() + + defer func() { + latency := time.Since(start) + reqID := middleware.GetReqID(ctx) + m.log.InfoContext(ctx, "request", + "request_start", start, + "method", request.Method, + "url", request.URL.String(), + "useragent", request.UserAgent(), + "request_id", reqID, + "referer", request.Referer(), + "proto", request.Proto, + "remoteIP", realIP(request), + "status", lrw.statusCode, + "latency_ms", latency.Milliseconds(), + ) + }() + + next.ServeHTTP(lrw, request) + }) + } +} + +func ipFromHostPort(hostPort string) string { + host, _, err := net.SplitHostPort(hostPort) + if err != nil { + return hostPort + } + + return host +} + +// trustedProxyNets are RFC1918 and loopback CIDRs. +// +//nolint:gochecknoglobals // package-level constant nets parsed once +var trustedProxyNets = func() []*net.IPNet { + cidrs := []string{ + "10.0.0.0/8", + "172.16.0.0/12", + "192.168.0.0/16", + "127.0.0.0/8", + "::1/128", + "fc00::/7", + } + + nets := make([]*net.IPNet, 0, len(cidrs)) + + for _, cidr := range cidrs { + _, n, _ := net.ParseCIDR(cidr) + nets = append(nets, n) + } + + return nets +}() + +func isTrustedProxy(ip net.IP) bool { + for _, n := range trustedProxyNets { + if n.Contains(ip) { + return true + } + } + + return false +} + +// realIP extracts the client's real IP address from the request. +// Proxy headers are only trusted from RFC1918/loopback addresses. +func realIP(r *http.Request) string { + addr := ipFromHostPort(r.RemoteAddr) + remoteIP := net.ParseIP(addr) + + if remoteIP == nil || !isTrustedProxy(remoteIP) { + return addr + } + + if ip := strings.TrimSpace( + r.Header.Get("X-Real-IP"), + ); ip != "" { + return ip + } + + if xff := r.Header.Get("X-Forwarded-For"); xff != "" { + if parts := strings.SplitN( + xff, ",", 2, //nolint:mnd + ); len(parts) > 0 { + if ip := strings.TrimSpace(parts[0]); ip != "" { + return ip + } + } + } + + return addr +} + +// CORS returns CORS middleware. +func (m *Middleware) CORS() func(http.Handler) http.Handler { + return cors.Handler(cors.Options{ + AllowedOrigins: []string{"*"}, + AllowedMethods: []string{ + "GET", "POST", "PUT", "DELETE", "OPTIONS", + }, + AllowedHeaders: []string{ + "Accept", "Authorization", + "Content-Type", "X-CSRF-Token", + }, + ExposedHeaders: []string{"Link"}, + AllowCredentials: false, + MaxAge: corsMaxAge, + }) +} + +// MetricsAuth returns basic auth middleware for /metrics. +func (m *Middleware) MetricsAuth() func(http.Handler) http.Handler { + if m.params.Config.MetricsUsername == "" { + return func(next http.Handler) http.Handler { + return next + } + } + + return basicauth.New( + "metrics", + map[string][]string{ + m.params.Config.MetricsUsername: { + m.params.Config.MetricsPassword, + }, + }, + ) +} diff --git a/internal/notify/notify.go b/internal/notify/notify.go new file mode 100644 index 0000000..08dbe5a --- /dev/null +++ b/internal/notify/notify.go @@ -0,0 +1,261 @@ +// Package notify provides notification delivery to Slack, Mattermost, and ntfy. +package notify + +import ( + "bytes" + "context" + "encoding/json" + "errors" + "fmt" + "log/slog" + "net/http" + "time" + + "go.uber.org/fx" + + "sneak.berlin/go/dnswatcher/internal/config" + "sneak.berlin/go/dnswatcher/internal/logger" +) + +// HTTP client timeout. +const httpClientTimeout = 10 * time.Second + +// HTTP status code thresholds. +const httpStatusClientError = 400 + +// Sentinel errors for notification failures. +var ( + // ErrNtfyFailed indicates the ntfy request failed. + ErrNtfyFailed = errors.New("ntfy notification failed") + // ErrSlackFailed indicates the Slack request failed. + ErrSlackFailed = errors.New("slack notification failed") + // ErrMattermostFailed indicates the Mattermost request failed. + ErrMattermostFailed = errors.New( + "mattermost notification failed", + ) +) + +// Params contains dependencies for Service. +type Params struct { + fx.In + + Logger *logger.Logger + Config *config.Config +} + +// Service provides notification functionality. +type Service struct { + log *slog.Logger + client *http.Client + config *config.Config +} + +// New creates a new notify Service. +func New( + _ fx.Lifecycle, + params Params, +) (*Service, error) { + return &Service{ + log: params.Logger.Get(), + client: &http.Client{ + Timeout: httpClientTimeout, + }, + config: params.Config, + }, nil +} + +// SendNotification sends a notification to all configured endpoints. +func (svc *Service) SendNotification( + ctx context.Context, + title, message, priority string, +) { + if svc.config.NtfyTopic != "" { + go func() { + notifyCtx := context.WithoutCancel(ctx) + + err := svc.sendNtfy( + notifyCtx, + svc.config.NtfyTopic, + title, message, priority, + ) + if err != nil { + svc.log.Error( + "failed to send ntfy notification", + "error", err, + ) + } + }() + } + + if svc.config.SlackWebhook != "" { + go func() { + notifyCtx := context.WithoutCancel(ctx) + + err := svc.sendSlack( + notifyCtx, + svc.config.SlackWebhook, + title, message, priority, + ) + if err != nil { + svc.log.Error( + "failed to send slack notification", + "error", err, + ) + } + }() + } + + if svc.config.MattermostWebhook != "" { + go func() { + notifyCtx := context.WithoutCancel(ctx) + + err := svc.sendSlack( + notifyCtx, + svc.config.MattermostWebhook, + title, message, priority, + ) + if err != nil { + svc.log.Error( + "failed to send mattermost notification", + "error", err, + ) + } + }() + } +} + +func (svc *Service) sendNtfy( + ctx context.Context, + topic, title, message, priority string, +) error { + svc.log.Debug( + "sending ntfy notification", + "topic", topic, + "title", title, + ) + + request, err := http.NewRequestWithContext( + ctx, + http.MethodPost, + topic, + bytes.NewBufferString(message), + ) + if err != nil { + return fmt.Errorf("creating ntfy request: %w", err) + } + + request.Header.Set("Title", title) + request.Header.Set("Priority", ntfyPriority(priority)) + + resp, err := svc.client.Do(request) + if err != nil { + return fmt.Errorf("sending ntfy request: %w", err) + } + + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode >= httpStatusClientError { + return fmt.Errorf( + "%w: status %d", ErrNtfyFailed, resp.StatusCode, + ) + } + + return nil +} + +func ntfyPriority(priority string) string { + switch priority { + case "error": + return "urgent" + case "warning": + return "high" + case "success": + return "default" + case "info": + return "low" + default: + return "default" + } +} + +// SlackPayload represents a Slack/Mattermost webhook payload. +type SlackPayload struct { + Text string `json:"text"` + Attachments []SlackAttachment `json:"attachments,omitempty"` +} + +// SlackAttachment represents a Slack/Mattermost attachment. +type SlackAttachment struct { + Color string `json:"color"` + Title string `json:"title"` + Text string `json:"text"` +} + +func (svc *Service) sendSlack( + ctx context.Context, + webhookURL, title, message, priority string, +) error { + svc.log.Debug( + "sending webhook notification", + "url", webhookURL, + "title", title, + ) + + payload := SlackPayload{ + Attachments: []SlackAttachment{ + { + Color: slackColor(priority), + Title: title, + Text: message, + }, + }, + } + + body, err := json.Marshal(payload) + if err != nil { + return fmt.Errorf("marshaling webhook payload: %w", err) + } + + request, err := http.NewRequestWithContext( + ctx, + http.MethodPost, + webhookURL, + bytes.NewBuffer(body), + ) + if err != nil { + return fmt.Errorf("creating webhook request: %w", err) + } + + request.Header.Set("Content-Type", "application/json") + + resp, err := svc.client.Do(request) + if err != nil { + return fmt.Errorf("sending webhook request: %w", err) + } + + defer func() { _ = resp.Body.Close() }() + + if resp.StatusCode >= httpStatusClientError { + return fmt.Errorf( + "%w: status %d", + ErrSlackFailed, resp.StatusCode, + ) + } + + return nil +} + +func slackColor(priority string) string { + switch priority { + case "error": + return "#dc3545" + case "warning": + return "#ffc107" + case "success": + return "#28a745" + case "info": + return "#17a2b8" + default: + return "#6c757d" + } +} diff --git a/internal/portcheck/portcheck.go b/internal/portcheck/portcheck.go new file mode 100644 index 0000000..2c061b7 --- /dev/null +++ b/internal/portcheck/portcheck.go @@ -0,0 +1,48 @@ +// Package portcheck provides TCP port connectivity checking. +package portcheck + +import ( + "context" + "errors" + "log/slog" + + "go.uber.org/fx" + + "sneak.berlin/go/dnswatcher/internal/logger" +) + +// ErrNotImplemented indicates the port checker is not yet implemented. +var ErrNotImplemented = errors.New( + "port checker not yet implemented", +) + +// Params contains dependencies for Checker. +type Params struct { + fx.In + + Logger *logger.Logger +} + +// Checker performs TCP port connectivity checks. +type Checker struct { + log *slog.Logger +} + +// New creates a new port Checker instance. +func New( + _ fx.Lifecycle, + params Params, +) (*Checker, error) { + return &Checker{ + log: params.Logger.Get(), + }, nil +} + +// CheckPort tests TCP connectivity to the given address and port. +func (c *Checker) CheckPort( + _ context.Context, + _ string, + _ int, +) (bool, error) { + return false, ErrNotImplemented +} diff --git a/internal/resolver/resolver.go b/internal/resolver/resolver.go new file mode 100644 index 0000000..be47717 --- /dev/null +++ b/internal/resolver/resolver.go @@ -0,0 +1,64 @@ +// Package resolver provides iterative DNS resolution from root nameservers. +package resolver + +import ( + "context" + "errors" + "log/slog" + + "go.uber.org/fx" + + "sneak.berlin/go/dnswatcher/internal/logger" +) + +// ErrNotImplemented indicates the resolver is not yet implemented. +var ErrNotImplemented = errors.New("resolver not yet implemented") + +// Params contains dependencies for Resolver. +type Params struct { + fx.In + + Logger *logger.Logger +} + +// Resolver performs iterative DNS resolution from root servers. +type Resolver struct { + log *slog.Logger +} + +// New creates a new Resolver instance. +func New( + _ fx.Lifecycle, + params Params, +) (*Resolver, error) { + return &Resolver{ + log: params.Logger.Get(), + }, nil +} + +// LookupNS performs iterative resolution to find authoritative +// nameservers for the given domain. +func (r *Resolver) LookupNS( + _ context.Context, + _ string, +) ([]string, error) { + return nil, ErrNotImplemented +} + +// LookupAllRecords performs iterative resolution to find all DNS +// records for the given hostname. +func (r *Resolver) LookupAllRecords( + _ context.Context, + _ string, +) (map[string][]string, error) { + return nil, ErrNotImplemented +} + +// ResolveIPAddresses resolves a hostname to all IPv4 and IPv6 +// addresses, following CNAME chains. +func (r *Resolver) ResolveIPAddresses( + _ context.Context, + _ string, +) ([]string, error) { + return nil, ErrNotImplemented +} diff --git a/internal/server/routes.go b/internal/server/routes.go new file mode 100644 index 0000000..cd07ba1 --- /dev/null +++ b/internal/server/routes.go @@ -0,0 +1,43 @@ +package server + +import ( + "time" + + "github.com/go-chi/chi/v5" + chimw "github.com/go-chi/chi/v5/middleware" + "github.com/prometheus/client_golang/prometheus/promhttp" +) + +// requestTimeout is the maximum duration for handling a request. +const requestTimeout = 60 * time.Second + +// SetupRoutes configures all HTTP routes. +func (s *Server) SetupRoutes() { + s.router = chi.NewRouter() + + // Global middleware + s.router.Use(chimw.Recoverer) + s.router.Use(chimw.RequestID) + s.router.Use(s.mw.Logging()) + s.router.Use(s.mw.CORS()) + s.router.Use(chimw.Timeout(requestTimeout)) + + // Health check + s.router.Get("/health", s.handlers.HandleHealthCheck()) + + // API v1 routes + s.router.Route("/api/v1", func(r chi.Router) { + r.Get("/status", s.handlers.HandleStatus()) + }) + + // Metrics endpoint (optional, with basic auth) + if s.params.Config.MetricsUsername != "" { + s.router.Group(func(r chi.Router) { + r.Use(s.mw.MetricsAuth()) + r.Get( + "/metrics", + promhttp.Handler().ServeHTTP, + ) + }) + } +} diff --git a/internal/server/server.go b/internal/server/server.go new file mode 100644 index 0000000..4cb30a2 --- /dev/null +++ b/internal/server/server.go @@ -0,0 +1,129 @@ +// Package server provides the HTTP server. +package server + +import ( + "context" + "errors" + "fmt" + "log/slog" + "net/http" + "time" + + "github.com/go-chi/chi/v5" + "go.uber.org/fx" + + "sneak.berlin/go/dnswatcher/internal/config" + "sneak.berlin/go/dnswatcher/internal/globals" + "sneak.berlin/go/dnswatcher/internal/handlers" + "sneak.berlin/go/dnswatcher/internal/logger" + "sneak.berlin/go/dnswatcher/internal/middleware" +) + +// Params contains dependencies for Server. +type Params struct { + fx.In + + Logger *logger.Logger + Globals *globals.Globals + Config *config.Config + Middleware *middleware.Middleware + Handlers *handlers.Handlers +} + +// shutdownTimeout is how long to wait for graceful shutdown. +const shutdownTimeout = 30 * time.Second + +// readHeaderTimeout is the max duration for reading request headers. +const readHeaderTimeout = 10 * time.Second + +// Server is the HTTP server. +type Server struct { + startupTime time.Time + port int + log *slog.Logger + router *chi.Mux + httpServer *http.Server + params Params + mw *middleware.Middleware + handlers *handlers.Handlers +} + +// New creates a new Server instance. +func New( + lifecycle fx.Lifecycle, + params Params, +) (*Server, error) { + srv := &Server{ + port: params.Config.Port, + log: params.Logger.Get(), + params: params, + mw: params.Middleware, + handlers: params.Handlers, + } + + lifecycle.Append(fx.Hook{ + OnStart: func(_ context.Context) error { + srv.startupTime = time.Now() + go srv.Run() + + return nil + }, + OnStop: func(ctx context.Context) error { + return srv.Shutdown(ctx) + }, + }) + + return srv, nil +} + +// Run starts the HTTP server. +func (s *Server) Run() { + s.SetupRoutes() + + listenAddr := fmt.Sprintf(":%d", s.port) + s.httpServer = &http.Server{ + Addr: listenAddr, + Handler: s, + ReadHeaderTimeout: readHeaderTimeout, + } + + s.log.Info("http server starting", "addr", listenAddr) + + err := s.httpServer.ListenAndServe() + if err != nil && !errors.Is(err, http.ErrServerClosed) { + s.log.Error("http server error", "error", err) + } +} + +// Shutdown gracefully shuts down the server. +func (s *Server) Shutdown(ctx context.Context) error { + if s.httpServer == nil { + return nil + } + + s.log.Info("shutting down http server") + + shutdownCtx, cancel := context.WithTimeout( + ctx, shutdownTimeout, + ) + defer cancel() + + err := s.httpServer.Shutdown(shutdownCtx) + if err != nil { + s.log.Error("http server shutdown error", "error", err) + + return fmt.Errorf("shutting down http server: %w", err) + } + + s.log.Info("http server stopped") + + return nil +} + +// ServeHTTP implements http.Handler. +func (s *Server) ServeHTTP( + writer http.ResponseWriter, + request *http.Request, +) { + s.router.ServeHTTP(writer, request) +} diff --git a/internal/state/state.go b/internal/state/state.go new file mode 100644 index 0000000..fca7276 --- /dev/null +++ b/internal/state/state.go @@ -0,0 +1,287 @@ +// Package state provides JSON file-based state persistence. +package state + +import ( + "context" + "encoding/json" + "fmt" + "log/slog" + "os" + "path/filepath" + "sync" + "time" + + "go.uber.org/fx" + + "sneak.berlin/go/dnswatcher/internal/config" + "sneak.berlin/go/dnswatcher/internal/logger" +) + +// filePermissions for the state file. +const filePermissions = 0o600 + +// dirPermissions for the data directory. +const dirPermissions = 0o700 + +// stateVersion is the current state file format version. +const stateVersion = 1 + +// Params contains dependencies for State. +type Params struct { + fx.In + + Logger *logger.Logger + Config *config.Config +} + +// DomainState holds the monitoring state for an apex domain. +type DomainState struct { + Nameservers []string `json:"nameservers"` + LastChecked time.Time `json:"lastChecked"` +} + +// NameserverRecordState holds one NS's response for a hostname. +type NameserverRecordState struct { + Records map[string][]string `json:"records"` + Status string `json:"status"` + Error string `json:"error,omitempty"` + LastChecked time.Time `json:"lastChecked"` +} + +// HostnameState holds per-nameserver monitoring state for a hostname. +type HostnameState struct { + RecordsByNameserver map[string]*NameserverRecordState `json:"recordsByNameserver"` + LastChecked time.Time `json:"lastChecked"` +} + +// PortState holds the monitoring state for a port. +type PortState struct { + Open bool `json:"open"` + Hostname string `json:"hostname"` + LastChecked time.Time `json:"lastChecked"` +} + +// CertificateState holds TLS certificate monitoring state. +type CertificateState struct { + CommonName string `json:"commonName"` + Issuer string `json:"issuer"` + NotAfter time.Time `json:"notAfter"` + SubjectAlternativeNames []string `json:"subjectAlternativeNames"` + Status string `json:"status"` + Error string `json:"error,omitempty"` + LastChecked time.Time `json:"lastChecked"` +} + +// Snapshot is the complete monitoring state persisted to disk. +type Snapshot struct { + Version int `json:"version"` + LastUpdated time.Time `json:"lastUpdated"` + Domains map[string]*DomainState `json:"domains"` + Hostnames map[string]*HostnameState `json:"hostnames"` + Ports map[string]*PortState `json:"ports"` + Certificates map[string]*CertificateState `json:"certificates"` +} + +// State manages the monitoring state with file persistence. +type State struct { + mu sync.RWMutex + snapshot *Snapshot + log *slog.Logger + config *config.Config +} + +// New creates a new State instance and loads existing state from disk. +func New( + lifecycle fx.Lifecycle, + params Params, +) (*State, error) { + state := &State{ + log: params.Logger.Get(), + config: params.Config, + snapshot: &Snapshot{ + Version: stateVersion, + Domains: make(map[string]*DomainState), + Hostnames: make(map[string]*HostnameState), + Ports: make(map[string]*PortState), + Certificates: make(map[string]*CertificateState), + }, + } + + lifecycle.Append(fx.Hook{ + OnStart: func(_ context.Context) error { + return state.Load() + }, + OnStop: func(_ context.Context) error { + return state.Save() + }, + }) + + return state, nil +} + +// Load reads the state from disk. +func (s *State) Load() error { + s.mu.Lock() + defer s.mu.Unlock() + + path := s.config.StatePath() + + //nolint:gosec // path is from trusted config + data, err := os.ReadFile(path) + if err != nil { + if os.IsNotExist(err) { + s.log.Info( + "no existing state file, starting fresh", + "path", path, + ) + + return nil + } + + return fmt.Errorf("reading state file: %w", err) + } + + var snapshot Snapshot + + err = json.Unmarshal(data, &snapshot) + if err != nil { + return fmt.Errorf("parsing state file: %w", err) + } + + s.snapshot = &snapshot + s.log.Info("loaded state from disk", "path", path) + + return nil +} + +// Save writes the current state to disk atomically. +func (s *State) Save() error { + s.mu.RLock() + defer s.mu.RUnlock() + + s.snapshot.LastUpdated = time.Now().UTC() + + data, err := json.MarshalIndent(s.snapshot, "", " ") + if err != nil { + return fmt.Errorf("marshaling state: %w", err) + } + + path := s.config.StatePath() + + err = os.MkdirAll(filepath.Dir(path), dirPermissions) + if err != nil { + return fmt.Errorf("creating data directory: %w", err) + } + + // Atomic write: write to temp file, then rename + tmpPath := path + ".tmp" + + err = os.WriteFile(tmpPath, data, filePermissions) + if err != nil { + return fmt.Errorf("writing temp state file: %w", err) + } + + err = os.Rename(tmpPath, path) + if err != nil { + return fmt.Errorf("renaming state file: %w", err) + } + + s.log.Debug("state saved to disk", "path", path) + + return nil +} + +// GetSnapshot returns a copy of the current snapshot. +func (s *State) GetSnapshot() Snapshot { + s.mu.RLock() + defer s.mu.RUnlock() + + return *s.snapshot +} + +// SetDomainState updates the state for a domain. +func (s *State) SetDomainState( + domain string, + ds *DomainState, +) { + s.mu.Lock() + defer s.mu.Unlock() + + s.snapshot.Domains[domain] = ds +} + +// GetDomainState returns the state for a domain. +func (s *State) GetDomainState( + domain string, +) (*DomainState, bool) { + s.mu.RLock() + defer s.mu.RUnlock() + + ds, ok := s.snapshot.Domains[domain] + + return ds, ok +} + +// SetHostnameState updates the state for a hostname. +func (s *State) SetHostnameState( + hostname string, + hs *HostnameState, +) { + s.mu.Lock() + defer s.mu.Unlock() + + s.snapshot.Hostnames[hostname] = hs +} + +// GetHostnameState returns the state for a hostname. +func (s *State) GetHostnameState( + hostname string, +) (*HostnameState, bool) { + s.mu.RLock() + defer s.mu.RUnlock() + + hs, ok := s.snapshot.Hostnames[hostname] + + return hs, ok +} + +// SetPortState updates the state for a port. +func (s *State) SetPortState(key string, ps *PortState) { + s.mu.Lock() + defer s.mu.Unlock() + + s.snapshot.Ports[key] = ps +} + +// GetPortState returns the state for a port. +func (s *State) GetPortState(key string) (*PortState, bool) { + s.mu.RLock() + defer s.mu.RUnlock() + + ps, ok := s.snapshot.Ports[key] + + return ps, ok +} + +// SetCertificateState updates the state for a certificate. +func (s *State) SetCertificateState( + key string, + cs *CertificateState, +) { + s.mu.Lock() + defer s.mu.Unlock() + + s.snapshot.Certificates[key] = cs +} + +// GetCertificateState returns the state for a certificate. +func (s *State) GetCertificateState( + key string, +) (*CertificateState, bool) { + s.mu.RLock() + defer s.mu.RUnlock() + + cs, ok := s.snapshot.Certificates[key] + + return cs, ok +} diff --git a/internal/tlscheck/tlscheck.go b/internal/tlscheck/tlscheck.go new file mode 100644 index 0000000..42f086d --- /dev/null +++ b/internal/tlscheck/tlscheck.go @@ -0,0 +1,58 @@ +// Package tlscheck provides TLS certificate inspection. +package tlscheck + +import ( + "context" + "errors" + "log/slog" + "time" + + "go.uber.org/fx" + + "sneak.berlin/go/dnswatcher/internal/logger" +) + +// ErrNotImplemented indicates the TLS checker is not yet implemented. +var ErrNotImplemented = errors.New( + "tls checker not yet implemented", +) + +// Params contains dependencies for Checker. +type Params struct { + fx.In + + Logger *logger.Logger +} + +// Checker performs TLS certificate inspection. +type Checker struct { + log *slog.Logger +} + +// CertificateInfo holds information about a TLS certificate. +type CertificateInfo struct { + CommonName string + Issuer string + NotAfter time.Time + SubjectAlternativeNames []string +} + +// New creates a new TLS Checker instance. +func New( + _ fx.Lifecycle, + params Params, +) (*Checker, error) { + return &Checker{ + log: params.Logger.Get(), + }, nil +} + +// CheckCertificate connects to the given IP:port using SNI and +// returns certificate information. +func (c *Checker) CheckCertificate( + _ context.Context, + _ string, + _ string, +) (*CertificateInfo, error) { + return nil, ErrNotImplemented +} diff --git a/internal/watcher/watcher.go b/internal/watcher/watcher.go new file mode 100644 index 0000000..a9b1caa --- /dev/null +++ b/internal/watcher/watcher.go @@ -0,0 +1,94 @@ +// Package watcher provides the main monitoring orchestrator and scheduler. +package watcher + +import ( + "context" + "log/slog" + + "go.uber.org/fx" + + "sneak.berlin/go/dnswatcher/internal/config" + "sneak.berlin/go/dnswatcher/internal/logger" + "sneak.berlin/go/dnswatcher/internal/notify" + "sneak.berlin/go/dnswatcher/internal/portcheck" + "sneak.berlin/go/dnswatcher/internal/resolver" + "sneak.berlin/go/dnswatcher/internal/state" + "sneak.berlin/go/dnswatcher/internal/tlscheck" +) + +// Params contains dependencies for Watcher. +type Params struct { + fx.In + + Logger *logger.Logger + Config *config.Config + State *state.State + Resolver *resolver.Resolver + PortCheck *portcheck.Checker + TLSCheck *tlscheck.Checker + Notify *notify.Service +} + +// Watcher orchestrates all monitoring checks on a schedule. +type Watcher struct { + log *slog.Logger + config *config.Config + state *state.State + resolver *resolver.Resolver + portCheck *portcheck.Checker + tlsCheck *tlscheck.Checker + notify *notify.Service + cancel context.CancelFunc +} + +// New creates a new Watcher instance. +func New( + lifecycle fx.Lifecycle, + params Params, +) (*Watcher, error) { + watcher := &Watcher{ + log: params.Logger.Get(), + config: params.Config, + state: params.State, + resolver: params.Resolver, + portCheck: params.PortCheck, + tlsCheck: params.TLSCheck, + notify: params.Notify, + } + + lifecycle.Append(fx.Hook{ + OnStart: func(startCtx context.Context) error { + ctx, cancel := context.WithCancel(startCtx) + watcher.cancel = cancel + + go watcher.Run(ctx) + + return nil + }, + OnStop: func(_ context.Context) error { + if watcher.cancel != nil { + watcher.cancel() + } + + return nil + }, + }) + + return watcher, nil +} + +// Run starts the monitoring loop. +func (w *Watcher) Run(ctx context.Context) { + w.log.Info( + "watcher starting", + "domains", len(w.config.Domains), + "hostnames", len(w.config.Hostnames), + "dnsInterval", w.config.DNSInterval, + "tlsInterval", w.config.TLSInterval, + ) + + // Stub: wait for context cancellation. + // Implementation will add initial check + periodic scheduling. + <-ctx.Done() + w.log.Info("watcher stopped") +}