diff --git a/README.md b/README.md index 91877d7..ed27973 100644 --- a/README.md +++ b/README.md @@ -36,11 +36,13 @@ upaas/ │ ├── handlers/ # HTTP request handlers │ ├── healthcheck/ # Health status service │ ├── logger/ # Structured logging (slog) -│ ├── middleware/ # HTTP middleware (auth, logging, CORS) +│ ├── metrics/ # Prometheus metrics registration +│ ├── middleware/ # HTTP middleware (auth, logging, CORS, metrics) │ ├── models/ # Active Record style database models │ ├── server/ # HTTP server and routes │ ├── service/ │ │ ├── app/ # App management service +│ │ ├── audit/ # Audit logging service │ │ ├── auth/ # Authentication service │ │ ├── deploy/ # Deployment orchestration │ │ ├── notify/ # Notifications (ntfy, Slack) @@ -58,11 +60,13 @@ Uses Uber fx for dependency injection. Components are wired in this order: 2. `logger` - Structured logging 3. `config` - Configuration loading 4. `database` - SQLite connection + migrations -5. `healthcheck` - Health status -6. `auth` - Authentication service -7. `app` - App management -8. `docker` - Docker client -9. `notify` - Notification service +5. `metrics` - Prometheus metrics registration +6. `healthcheck` - Health status +7. `auth` - Authentication service +8. `app` - App management +9. `docker` - Docker client +10. `notify` - Notification service +11. `audit` - Audit logging service 10. `deploy` - Deployment service 11. `webhook` - Webhook processing 12. `middleware` - HTTP middleware @@ -211,6 +215,48 @@ Example: `HOST_DATA_DIR=/srv/upaas/data docker compose up -d` Session secrets are automatically generated on first startup and persisted to `$UPAAS_DATA_DIR/session.key`. +## Observability + +### Prometheus Metrics + +All custom metrics are exposed under the `upaas_` namespace at `/metrics`. The +endpoint is always available and can be optionally protected with basic auth via +`METRICS_USERNAME` and `METRICS_PASSWORD`. + +| Metric | Type | Labels | Description | +|--------|------|--------|-------------| +| `upaas_deployments_total` | Counter | `app`, `status` | Total deployments (success/failed/cancelled) | +| `upaas_deployments_duration_seconds` | Histogram | `app`, `status` | Deployment duration | +| `upaas_deployments_in_flight` | Gauge | `app` | Currently running deployments | +| `upaas_container_healthy` | Gauge | `app` | Container health (1=healthy, 0=unhealthy) | +| `upaas_webhook_events_total` | Counter | `app`, `event_type`, `matched` | Webhook events received | +| `upaas_http_requests_total` | Counter | `method`, `status_code` | HTTP requests | +| `upaas_http_request_duration_seconds` | Histogram | `method` | HTTP request latency | +| `upaas_http_response_size_bytes` | Histogram | `method` | HTTP response sizes | +| `upaas_audit_events_total` | Counter | `action` | Audit log events | + +### Audit Log + +All user-facing actions are recorded in an `audit_log` SQLite table with: + +- **Who**: user ID and username +- **What**: action type and affected resource (app, deployment, session, etc.) +- **Where**: client IP (via X-Real-IP/X-Forwarded-For/RemoteAddr) +- **When**: timestamp + +Audited actions include login/logout, app CRUD, deployments, container +start/stop/restart, rollbacks, deployment cancellation, and webhook receipt. + +The audit log is available via the API at `GET /api/audit?limit=N` (max 500, +default 50). + +### Structured Logging + +All operations use Go's `slog` structured logger. HTTP requests are logged with +method, URL, status code, response size, latency, user agent, and client IP. +Deployment events are logged with app name, status, and duration. Audit events +are also logged to stdout for correlation with external log aggregators. + ## License WTFPL diff --git a/cmd/upaasd/main.go b/cmd/upaasd/main.go index 1d37fd3..e44457d 100644 --- a/cmd/upaasd/main.go +++ b/cmd/upaasd/main.go @@ -11,9 +11,11 @@ import ( "sneak.berlin/go/upaas/internal/handlers" "sneak.berlin/go/upaas/internal/healthcheck" "sneak.berlin/go/upaas/internal/logger" + "sneak.berlin/go/upaas/internal/metrics" "sneak.berlin/go/upaas/internal/middleware" "sneak.berlin/go/upaas/internal/server" "sneak.berlin/go/upaas/internal/service/app" + "sneak.berlin/go/upaas/internal/service/audit" "sneak.berlin/go/upaas/internal/service/auth" "sneak.berlin/go/upaas/internal/service/deploy" "sneak.berlin/go/upaas/internal/service/notify" @@ -41,6 +43,7 @@ func main() { logger.New, config.New, database.New, + metrics.New, healthcheck.New, auth.New, app.New, @@ -48,6 +51,7 @@ func main() { notify.New, deploy.New, webhook.New, + audit.New, middleware.New, handlers.New, server.New, diff --git a/internal/database/migrations/007_add_audit_log.sql b/internal/database/migrations/007_add_audit_log.sql new file mode 100644 index 0000000..a63bd22 --- /dev/null +++ b/internal/database/migrations/007_add_audit_log.sql @@ -0,0 +1,16 @@ +-- Audit log table for tracking user actions. +CREATE TABLE audit_log ( + id INTEGER PRIMARY KEY, + user_id INTEGER, + username TEXT NOT NULL, + action TEXT NOT NULL, + resource_type TEXT NOT NULL, + resource_id TEXT, + detail TEXT, + remote_ip TEXT, + created_at DATETIME DEFAULT CURRENT_TIMESTAMP +); + +CREATE INDEX idx_audit_log_created_at ON audit_log(created_at); +CREATE INDEX idx_audit_log_action ON audit_log(action); +CREATE INDEX idx_audit_log_resource ON audit_log(resource_type, resource_id); diff --git a/internal/handlers/api.go b/internal/handlers/api.go index 3e3a537..a6a54c6 100644 --- a/internal/handlers/api.go +++ b/internal/handlers/api.go @@ -1,6 +1,7 @@ package handlers import ( + "database/sql" "encoding/json" "net/http" "strconv" @@ -120,6 +121,9 @@ func (h *Handlers) HandleAPILoginPOST() http.HandlerFunc { return } + h.auditLog(request, models.AuditActionLogin, + models.AuditResourceSession, "", "api login") + h.respondJSON(writer, request, loginResponse{ UserID: user.ID, Username: user.Username, @@ -243,3 +247,79 @@ func (h *Handlers) HandleAPIWhoAmI() http.HandlerFunc { }, http.StatusOK) } } + +// auditLogDefaultLimit is the default number of audit entries returned. +const auditLogDefaultLimit = 50 + +// auditLogMaxLimit is the maximum number of audit entries returned. +const auditLogMaxLimit = 500 + +// HandleAPIAuditLog returns a handler that lists recent audit log entries. +func (h *Handlers) HandleAPIAuditLog() http.HandlerFunc { + type auditEntryResponse struct { + ID int64 `json:"id"` + UserID *int64 `json:"userId,omitempty"` + Username string `json:"username"` + Action string `json:"action"` + ResourceType string `json:"resourceType"` + ResourceID string `json:"resourceId,omitempty"` + Detail string `json:"detail,omitempty"` + RemoteIP string `json:"remoteIp,omitempty"` + CreatedAt string `json:"createdAt"` + } + + return func(writer http.ResponseWriter, request *http.Request) { + limit := auditLogDefaultLimit + + if limitStr := request.URL.Query().Get("limit"); limitStr != "" { + parsed, parseErr := strconv.Atoi(limitStr) + if parseErr == nil && parsed > 0 && parsed <= auditLogMaxLimit { + limit = parsed + } + } + + entries, err := h.audit.Recent(request.Context(), limit) + if err != nil { + h.log.Error("failed to fetch audit log", "error", err) + h.respondJSON(writer, request, + map[string]string{"error": "failed to fetch audit log"}, + http.StatusInternalServerError) + + return + } + + result := make([]auditEntryResponse, 0, len(entries)) + + for _, e := range entries { + entry := auditEntryResponse{ + ID: e.ID, + Username: e.Username, + Action: string(e.Action), + ResourceType: string(e.ResourceType), + CreatedAt: e.CreatedAt.UTC().Format("2006-01-02T15:04:05Z"), + } + + if e.UserID.Valid { + id := e.UserID.Int64 + entry.UserID = &id + } + + entry.ResourceID = nullStringValue(e.ResourceID) + entry.Detail = nullStringValue(e.Detail) + entry.RemoteIP = nullStringValue(e.RemoteIP) + + result = append(result, entry) + } + + h.respondJSON(writer, request, result, http.StatusOK) + } +} + +// nullStringValue returns the string value if valid, empty string otherwise. +func nullStringValue(ns sql.NullString) string { + if ns.Valid { + return ns.String + } + + return "" +} diff --git a/internal/handlers/app.go b/internal/handlers/app.go index d55c27c..57b4814 100644 --- a/internal/handlers/app.go +++ b/internal/handlers/app.go @@ -119,6 +119,9 @@ func (h *Handlers) HandleAppCreate() http.HandlerFunc { //nolint:funlen // valid return } + h.auditLog(request, models.AuditActionAppCreate, + models.AuditResourceApp, createdApp.ID, "created app: "+createdApp.Name) + http.Redirect(writer, request, "/apps/"+createdApp.ID, http.StatusSeeOther) } } @@ -289,6 +292,9 @@ func (h *Handlers) HandleAppUpdate() http.HandlerFunc { //nolint:funlen // valid return } + h.auditLog(request, models.AuditActionAppUpdate, + models.AuditResourceApp, application.ID, "updated app: "+application.Name) + redirectURL := "/apps/" + application.ID + "?success=updated" http.Redirect(writer, request, redirectURL, http.StatusSeeOther) } @@ -344,6 +350,9 @@ func (h *Handlers) HandleAppDelete() http.HandlerFunc { return } + h.auditLog(request, models.AuditActionAppDelete, + models.AuditResourceApp, appID, "deleted app: "+application.Name) + http.Redirect(writer, request, "/", http.StatusSeeOther) } } @@ -360,6 +369,9 @@ func (h *Handlers) HandleAppDeploy() http.HandlerFunc { return } + h.auditLog(request, models.AuditActionAppDeploy, + models.AuditResourceApp, application.ID, "manual deploy: "+application.Name) + // Trigger deployment in background with a detached context // so the deployment continues even if the HTTP request is cancelled deployCtx := context.WithoutCancel(request.Context()) @@ -399,6 +411,8 @@ func (h *Handlers) HandleCancelDeploy() http.HandlerFunc { cancelled := h.deploy.CancelDeploy(application.ID) if cancelled { h.log.Info("deployment cancelled by user", "app", application.Name) + h.auditLog(request, models.AuditActionDeployCancel, + models.AuditResourceDeployment, application.ID, "cancelled deploy: "+application.Name) } http.Redirect( @@ -430,6 +444,9 @@ func (h *Handlers) HandleAppRollback() http.HandlerFunc { return } + h.auditLog(request, models.AuditActionAppRollback, + models.AuditResourceApp, application.ID, "rolled back: "+application.Name) + http.Redirect(writer, request, "/apps/"+application.ID+"?success=rolledback", http.StatusSeeOther) } } @@ -834,11 +851,29 @@ func (h *Handlers) handleContainerAction( } else { h.log.Info("container action completed", "action", action, "app", application.Name, "container", containerID) + + auditAction := containerActionToAuditAction(action) + h.auditLog(request, auditAction, + models.AuditResourceApp, appID, string(action)+" container: "+application.Name) } http.Redirect(writer, request, "/apps/"+appID, http.StatusSeeOther) } +// containerActionToAuditAction maps container actions to audit actions. +func containerActionToAuditAction(action containerAction) models.AuditAction { + switch action { + case actionRestart: + return models.AuditActionAppRestart + case actionStop: + return models.AuditActionAppStop + case actionStart: + return models.AuditActionAppStart + default: + return models.AuditAction("app." + string(action)) + } +} + // HandleAppRestart handles restarting an app's container. func (h *Handlers) HandleAppRestart() http.HandlerFunc { return func(writer http.ResponseWriter, request *http.Request) { diff --git a/internal/handlers/auth.go b/internal/handlers/auth.go index 96e6b0b..665e15a 100644 --- a/internal/handlers/auth.go +++ b/internal/handlers/auth.go @@ -3,6 +3,7 @@ package handlers import ( "net/http" + "sneak.berlin/go/upaas/internal/models" "sneak.berlin/go/upaas/templates" ) @@ -61,6 +62,9 @@ func (h *Handlers) HandleLoginPOST() http.HandlerFunc { return } + h.auditLog(request, models.AuditActionLogin, + models.AuditResourceSession, "", "user logged in") + http.Redirect(writer, request, "/", http.StatusSeeOther) } } @@ -68,6 +72,9 @@ func (h *Handlers) HandleLoginPOST() http.HandlerFunc { // HandleLogout handles logout requests. func (h *Handlers) HandleLogout() http.HandlerFunc { return func(writer http.ResponseWriter, request *http.Request) { + h.auditLog(request, models.AuditActionLogout, + models.AuditResourceSession, "", "user logged out") + destroyErr := h.auth.DestroySession(writer, request) if destroyErr != nil { h.log.Error("failed to destroy session", "error", destroyErr) diff --git a/internal/handlers/handlers.go b/internal/handlers/handlers.go index 728eeb9..7e0f642 100644 --- a/internal/handlers/handlers.go +++ b/internal/handlers/handlers.go @@ -15,7 +15,9 @@ import ( "sneak.berlin/go/upaas/internal/globals" "sneak.berlin/go/upaas/internal/healthcheck" "sneak.berlin/go/upaas/internal/logger" + "sneak.berlin/go/upaas/internal/models" "sneak.berlin/go/upaas/internal/service/app" + "sneak.berlin/go/upaas/internal/service/audit" "sneak.berlin/go/upaas/internal/service/auth" "sneak.berlin/go/upaas/internal/service/deploy" "sneak.berlin/go/upaas/internal/service/webhook" @@ -35,6 +37,7 @@ type Params struct { Deploy *deploy.Service Webhook *webhook.Service Docker *docker.Client + Audit *audit.Service } // Handlers provides HTTP request handlers. @@ -48,6 +51,7 @@ type Handlers struct { deploy *deploy.Service webhook *webhook.Service docker *docker.Client + audit *audit.Service globals *globals.Globals } @@ -63,10 +67,48 @@ func New(_ fx.Lifecycle, params Params) (*Handlers, error) { deploy: params.Deploy, webhook: params.Webhook, docker: params.Docker, + audit: params.Audit, globals: params.Globals, }, nil } +// currentUser returns the currently authenticated user, or nil if not authenticated. +func (h *Handlers) currentUser(request *http.Request) *models.User { + user, err := h.auth.GetCurrentUser(request.Context(), request) + if err != nil || user == nil { + return nil + } + + return user +} + +// auditLog records an audit entry for the current request. +func (h *Handlers) auditLog( + request *http.Request, + action models.AuditAction, + resourceType models.AuditResourceType, + resourceID string, + detail string, +) { + user := h.currentUser(request) + + entry := audit.LogEntry{ + Action: action, + ResourceType: resourceType, + ResourceID: resourceID, + Detail: detail, + } + + if user != nil { + entry.UserID = user.ID + entry.Username = user.Username + } else { + entry.Username = "anonymous" + } + + h.audit.LogFromRequest(request.Context(), request, entry) +} + // addGlobals adds version info and CSRF token to template data map. func (h *Handlers) addGlobals( data map[string]any, diff --git a/internal/handlers/handlers_test.go b/internal/handlers/handlers_test.go index 6a7d0b3..c2462c8 100644 --- a/internal/handlers/handlers_test.go +++ b/internal/handlers/handlers_test.go @@ -11,6 +11,7 @@ import ( "time" "github.com/go-chi/chi/v5" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/fx" @@ -24,8 +25,10 @@ import ( "sneak.berlin/go/upaas/internal/handlers" "sneak.berlin/go/upaas/internal/healthcheck" "sneak.berlin/go/upaas/internal/logger" + "sneak.berlin/go/upaas/internal/metrics" "sneak.berlin/go/upaas/internal/middleware" "sneak.berlin/go/upaas/internal/service/app" + "sneak.berlin/go/upaas/internal/service/audit" "sneak.berlin/go/upaas/internal/service/auth" "sneak.berlin/go/upaas/internal/service/deploy" "sneak.berlin/go/upaas/internal/service/notify" @@ -92,7 +95,8 @@ func createAppServices( logInstance *logger.Logger, dbInstance *database.Database, cfg *config.Config, -) (*auth.Service, *app.Service, *deploy.Service, *webhook.Service, *docker.Client) { + metricsInstance *metrics.Metrics, +) (*auth.Service, *app.Service, *deploy.Service, *webhook.Service, *docker.Client, *audit.Service) { t.Helper() authSvc, authErr := auth.New(fx.Lifecycle(nil), auth.ServiceParams{ @@ -125,6 +129,7 @@ func createAppServices( Database: dbInstance, Docker: dockerClient, Notify: notifySvc, + Metrics: metricsInstance, }) require.NoError(t, deployErr) @@ -132,10 +137,18 @@ func createAppServices( Logger: logInstance, Database: dbInstance, Deploy: deploySvc, + Metrics: metricsInstance, }) require.NoError(t, webhookErr) - return authSvc, appSvc, deploySvc, webhookSvc, dockerClient + auditSvc, auditErr := audit.New(fx.Lifecycle(nil), audit.ServiceParams{ + Logger: logInstance, + Database: dbInstance, + Metrics: metricsInstance, + }) + require.NoError(t, auditErr) + + return authSvc, appSvc, deploySvc, webhookSvc, dockerClient, auditSvc } func setupTestHandlers(t *testing.T) *testContext { @@ -145,11 +158,14 @@ func setupTestHandlers(t *testing.T) *testContext { globalInstance, logInstance, dbInstance, hcInstance := createCoreServices(t, cfg) - authSvc, appSvc, deploySvc, webhookSvc, dockerClient := createAppServices( + metricsInstance := metrics.NewForTest(prometheus.NewRegistry()) + + authSvc, appSvc, deploySvc, webhookSvc, dockerClient, auditSvc := createAppServices( t, logInstance, dbInstance, cfg, + metricsInstance, ) handlersInstance, handlerErr := handlers.New( @@ -164,6 +180,7 @@ func setupTestHandlers(t *testing.T) *testContext { Deploy: deploySvc, Webhook: webhookSvc, Docker: dockerClient, + Audit: auditSvc, }, ) require.NoError(t, handlerErr) @@ -173,6 +190,7 @@ func setupTestHandlers(t *testing.T) *testContext { Globals: globalInstance, Config: cfg, Auth: authSvc, + Metrics: metricsInstance, }) require.NoError(t, mwErr) diff --git a/internal/handlers/setup.go b/internal/handlers/setup.go index e84fc47..a2c52a9 100644 --- a/internal/handlers/setup.go +++ b/internal/handlers/setup.go @@ -3,6 +3,7 @@ package handlers import ( "net/http" + "sneak.berlin/go/upaas/internal/models" "sneak.berlin/go/upaas/templates" ) @@ -111,6 +112,9 @@ func (h *Handlers) HandleSetupPOST() http.HandlerFunc { return } + h.auditLog(request, models.AuditActionSetup, + models.AuditResourceUser, "", "initial setup completed") + http.Redirect(writer, request, "/", http.StatusSeeOther) } } diff --git a/internal/handlers/webhook.go b/internal/handlers/webhook.go index 806f8da..7113e4c 100644 --- a/internal/handlers/webhook.go +++ b/internal/handlers/webhook.go @@ -7,13 +7,14 @@ import ( "github.com/go-chi/chi/v5" "sneak.berlin/go/upaas/internal/models" + "sneak.berlin/go/upaas/internal/service/audit" ) // maxWebhookBodySize is the maximum allowed size of a webhook request body (1MB). const maxWebhookBodySize = 1 << 20 // HandleWebhook handles incoming Gitea webhooks. -func (h *Handlers) HandleWebhook() http.HandlerFunc { +func (h *Handlers) HandleWebhook() http.HandlerFunc { //nolint:funlen // audit logging adds necessary length return func(writer http.ResponseWriter, request *http.Request) { secret := chi.URLParam(request, "secret") if secret == "" { @@ -56,6 +57,15 @@ func (h *Handlers) HandleWebhook() http.HandlerFunc { eventType = "push" } + // Log webhook receipt + h.audit.LogFromRequest(request.Context(), request, audit.LogEntry{ + Username: "webhook", + Action: models.AuditActionWebhookReceive, + ResourceType: models.AuditResourceWebhook, + ResourceID: application.ID, + Detail: "webhook from app: " + application.Name + ", event: " + eventType, + }) + // Process webhook webhookErr := h.webhook.HandleWebhook( request.Context(), diff --git a/internal/metrics/metrics.go b/internal/metrics/metrics.go new file mode 100644 index 0000000..82b74ed --- /dev/null +++ b/internal/metrics/metrics.go @@ -0,0 +1,148 @@ +// Package metrics provides Prometheus metrics for upaas. +// +//nolint:revive // "metrics" matches the domain; runtime/metrics is rarely imported directly +package metrics + +import ( + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + "go.uber.org/fx" +) + +// Params contains dependencies for Metrics. +type Params struct { + fx.In +} + +// Metrics holds all Prometheus metrics for the application. +type Metrics struct { + // Deployment metrics. + DeploymentsTotal *prometheus.CounterVec + DeploymentDuration *prometheus.HistogramVec + DeploymentsInFlight *prometheus.GaugeVec + + // Container health metrics. + ContainerHealthy *prometheus.GaugeVec + + // Webhook metrics. + WebhookEventsTotal *prometheus.CounterVec + + // HTTP request metrics. + HTTPRequestsTotal *prometheus.CounterVec + HTTPRequestDuration *prometheus.HistogramVec + HTTPResponseSizeBytes *prometheus.HistogramVec + + // Audit log metrics. + AuditEventsTotal *prometheus.CounterVec +} + +// New creates a new Metrics instance with all Prometheus metrics registered +// in the default Prometheus registry. +func New(_ fx.Lifecycle, _ Params) (*Metrics, error) { + return newMetrics(promauto.With(prometheus.DefaultRegisterer)), nil +} + +// NewForTest creates a Metrics instance with a custom registry for test isolation. +func NewForTest(reg prometheus.Registerer) *Metrics { + return newMetrics(promauto.With(reg)) +} + +// newMetrics creates a Metrics instance using the given factory. +func newMetrics(factory promauto.Factory) *Metrics { + return &Metrics{ + DeploymentsTotal: newDeploymentsTotal(factory), + DeploymentDuration: newDeploymentDuration(factory), + DeploymentsInFlight: newDeploymentsInFlight(factory), + ContainerHealthy: newContainerHealthy(factory), + WebhookEventsTotal: newWebhookEventsTotal(factory), + HTTPRequestsTotal: newHTTPRequestsTotal(factory), + HTTPRequestDuration: newHTTPRequestDuration(factory), + HTTPResponseSizeBytes: newHTTPResponseSizeBytes(factory), + AuditEventsTotal: newAuditEventsTotal(factory), + } +} + +func newDeploymentsTotal(f promauto.Factory) *prometheus.CounterVec { + return f.NewCounterVec(prometheus.CounterOpts{ + Namespace: "upaas", + Subsystem: "deployments", + Name: "total", + Help: "Total number of deployments by app and status.", + }, []string{"app", "status"}) +} + +func newDeploymentDuration(f promauto.Factory) *prometheus.HistogramVec { + return f.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: "upaas", + Subsystem: "deployments", + Name: "duration_seconds", + Help: "Duration of deployments in seconds by app and status.", + Buckets: []float64{10, 30, 60, 120, 300, 600, 1800}, + }, []string{"app", "status"}) +} + +func newDeploymentsInFlight(f promauto.Factory) *prometheus.GaugeVec { + return f.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "upaas", + Subsystem: "deployments", + Name: "in_flight", + Help: "Number of deployments currently in progress by app.", + }, []string{"app"}) +} + +func newContainerHealthy(f promauto.Factory) *prometheus.GaugeVec { + return f.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: "upaas", + Subsystem: "container", + Name: "healthy", + Help: "Whether the app container is healthy (1) or unhealthy (0).", + }, []string{"app"}) +} + +func newWebhookEventsTotal(f promauto.Factory) *prometheus.CounterVec { + return f.NewCounterVec(prometheus.CounterOpts{ + Namespace: "upaas", + Subsystem: "webhook", + Name: "events_total", + Help: "Total number of webhook events by app, event type, and matched status.", + }, []string{"app", "event_type", "matched"}) +} + +func newHTTPRequestsTotal(f promauto.Factory) *prometheus.CounterVec { + return f.NewCounterVec(prometheus.CounterOpts{ + Namespace: "upaas", + Subsystem: "http", + Name: "requests_total", + Help: "Total number of HTTP requests by method and status code.", + }, []string{"method", "status_code"}) +} + +func newHTTPRequestDuration(f promauto.Factory) *prometheus.HistogramVec { + return f.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: "upaas", + Subsystem: "http", + Name: "request_duration_seconds", + Help: "Duration of HTTP requests in seconds by method.", + Buckets: prometheus.DefBuckets, + }, []string{"method"}) +} + +//nolint:mnd // bucket boundaries are domain-specific constants +func newHTTPResponseSizeBytes(f promauto.Factory) *prometheus.HistogramVec { + return f.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: "upaas", + Subsystem: "http", + Name: "response_size_bytes", + Help: "Size of HTTP responses in bytes by method.", + Buckets: prometheus.ExponentialBuckets(100, 10, 7), + }, []string{"method"}) +} + +func newAuditEventsTotal(f promauto.Factory) *prometheus.CounterVec { + return f.NewCounterVec(prometheus.CounterOpts{ + Namespace: "upaas", + Subsystem: "audit", + Name: "events_total", + Help: "Total number of audit log events by action.", + }, []string{"action"}) +} diff --git a/internal/metrics/metrics_test.go b/internal/metrics/metrics_test.go new file mode 100644 index 0000000..ea2509c --- /dev/null +++ b/internal/metrics/metrics_test.go @@ -0,0 +1,158 @@ +package metrics_test + +import ( + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/fx" + + "sneak.berlin/go/upaas/internal/metrics" +) + +func TestNewForTest(t *testing.T) { + t.Parallel() + + reg := prometheus.NewRegistry() + m := metrics.NewForTest(reg) + + require.NotNil(t, m) + assert.NotNil(t, m.DeploymentsTotal) + assert.NotNil(t, m.DeploymentDuration) + assert.NotNil(t, m.DeploymentsInFlight) + assert.NotNil(t, m.ContainerHealthy) + assert.NotNil(t, m.WebhookEventsTotal) + assert.NotNil(t, m.HTTPRequestsTotal) + assert.NotNil(t, m.HTTPRequestDuration) + assert.NotNil(t, m.HTTPResponseSizeBytes) + assert.NotNil(t, m.AuditEventsTotal) +} + +func TestNew(t *testing.T) { + t.Parallel() + + m, err := metrics.New(fx.Lifecycle(nil), metrics.Params{}) + require.NoError(t, err) + require.NotNil(t, m) +} + +func TestDeploymentMetrics(t *testing.T) { + t.Parallel() + + reg := prometheus.NewRegistry() + m := metrics.NewForTest(reg) + + m.DeploymentsTotal.WithLabelValues("test-app", "success").Inc() + m.DeploymentDuration.WithLabelValues("test-app", "success").Observe(42.5) + m.DeploymentsInFlight.WithLabelValues("test-app").Set(1) + + families, err := reg.Gather() + require.NoError(t, err) + + names := make(map[string]bool) + + for _, f := range families { + names[f.GetName()] = true + } + + assert.True(t, names["upaas_deployments_total"]) + assert.True(t, names["upaas_deployments_duration_seconds"]) + assert.True(t, names["upaas_deployments_in_flight"]) +} + +func TestContainerHealthMetrics(t *testing.T) { + t.Parallel() + + reg := prometheus.NewRegistry() + m := metrics.NewForTest(reg) + + m.ContainerHealthy.WithLabelValues("my-app").Set(1) + + families, err := reg.Gather() + require.NoError(t, err) + + found := false + + for _, f := range families { + if f.GetName() == "upaas_container_healthy" { + found = true + + break + } + } + + assert.True(t, found) +} + +func TestWebhookMetrics(t *testing.T) { + t.Parallel() + + reg := prometheus.NewRegistry() + m := metrics.NewForTest(reg) + + m.WebhookEventsTotal.WithLabelValues("test-app", "push", "true").Inc() + + families, err := reg.Gather() + require.NoError(t, err) + + found := false + + for _, f := range families { + if f.GetName() == "upaas_webhook_events_total" { + found = true + + break + } + } + + assert.True(t, found) +} + +func TestHTTPMetrics(t *testing.T) { + t.Parallel() + + reg := prometheus.NewRegistry() + m := metrics.NewForTest(reg) + + m.HTTPRequestsTotal.WithLabelValues("GET", "200").Inc() + m.HTTPRequestDuration.WithLabelValues("GET").Observe(0.05) + m.HTTPResponseSizeBytes.WithLabelValues("GET").Observe(1024) + + families, err := reg.Gather() + require.NoError(t, err) + + names := make(map[string]bool) + + for _, f := range families { + names[f.GetName()] = true + } + + assert.True(t, names["upaas_http_requests_total"]) + assert.True(t, names["upaas_http_request_duration_seconds"]) + assert.True(t, names["upaas_http_response_size_bytes"]) +} + +func TestAuditMetrics(t *testing.T) { + t.Parallel() + + reg := prometheus.NewRegistry() + m := metrics.NewForTest(reg) + + m.AuditEventsTotal.WithLabelValues("login").Inc() + + families, err := reg.Gather() + require.NoError(t, err) + + found := false + + for _, f := range families { + if f.GetName() == "upaas_audit_events_total" { + found = true + + break + } + } + + assert.True(t, found) +} diff --git a/internal/middleware/middleware.go b/internal/middleware/middleware.go index b0ad2d7..d5ff91e 100644 --- a/internal/middleware/middleware.go +++ b/internal/middleware/middleware.go @@ -21,6 +21,7 @@ import ( "sneak.berlin/go/upaas/internal/config" "sneak.berlin/go/upaas/internal/globals" "sneak.berlin/go/upaas/internal/logger" + "sneak.berlin/go/upaas/internal/metrics" "sneak.berlin/go/upaas/internal/service/auth" ) @@ -35,33 +36,37 @@ type Params struct { Globals *globals.Globals Config *config.Config Auth *auth.Service + Metrics *metrics.Metrics } // Middleware provides HTTP middleware. type Middleware struct { - log *slog.Logger - params *Params + log *slog.Logger + metrics *metrics.Metrics + params *Params } // New creates a new Middleware instance. func New(_ fx.Lifecycle, params Params) (*Middleware, error) { return &Middleware{ - log: params.Logger.Get(), - params: ¶ms, + log: params.Logger.Get(), + metrics: params.Metrics, + params: ¶ms, }, nil } -// loggingResponseWriter wraps http.ResponseWriter to capture status code. +// loggingResponseWriter wraps http.ResponseWriter to capture status code and bytes written. type loggingResponseWriter struct { http.ResponseWriter - statusCode int + statusCode int + bytesWritten int } func newLoggingResponseWriter( writer http.ResponseWriter, ) *loggingResponseWriter { - return &loggingResponseWriter{writer, http.StatusOK} + return &loggingResponseWriter{ResponseWriter: writer, statusCode: http.StatusOK} } func (lrw *loggingResponseWriter) WriteHeader(code int) { @@ -69,7 +74,14 @@ func (lrw *loggingResponseWriter) WriteHeader(code int) { lrw.ResponseWriter.WriteHeader(code) } -// Logging returns a request logging middleware. +func (lrw *loggingResponseWriter) Write(b []byte) (int, error) { + n, err := lrw.ResponseWriter.Write(b) + lrw.bytesWritten += n + + return n, err +} + +// Logging returns a request logging middleware that also records HTTP metrics. func (m *Middleware) Logging() func(http.Handler) http.Handler { return func(next http.Handler) http.Handler { return http.HandlerFunc(func( @@ -83,6 +95,8 @@ func (m *Middleware) Logging() func(http.Handler) http.Handler { defer func() { latency := time.Since(start) reqID := middleware.GetReqID(ctx) + statusStr := strconv.Itoa(lrw.statusCode) + m.log.InfoContext(ctx, "request", "request_start", start, "method", request.Method, @@ -93,8 +107,19 @@ func (m *Middleware) Logging() func(http.Handler) http.Handler { "proto", request.Proto, "remoteIP", realIP(request), "status", lrw.statusCode, + "bytes", lrw.bytesWritten, "latency_ms", latency.Milliseconds(), ) + + m.metrics.HTTPRequestsTotal.WithLabelValues( + request.Method, statusStr, + ).Inc() + m.metrics.HTTPRequestDuration.WithLabelValues( + request.Method, + ).Observe(latency.Seconds()) + m.metrics.HTTPResponseSizeBytes.WithLabelValues( + request.Method, + ).Observe(float64(lrw.bytesWritten)) }() next.ServeHTTP(lrw, request) diff --git a/internal/models/audit_log.go b/internal/models/audit_log.go new file mode 100644 index 0000000..c66d121 --- /dev/null +++ b/internal/models/audit_log.go @@ -0,0 +1,193 @@ +package models + +import ( + "context" + "database/sql" + "fmt" + "time" + + "sneak.berlin/go/upaas/internal/database" +) + +// AuditAction represents the type of audited user action. +type AuditAction string + +// Audit action constants. +const ( + AuditActionLogin AuditAction = "login" + AuditActionLogout AuditAction = "logout" + AuditActionAppCreate AuditAction = "app.create" + AuditActionAppUpdate AuditAction = "app.update" + AuditActionAppDelete AuditAction = "app.delete" + AuditActionAppDeploy AuditAction = "app.deploy" + AuditActionAppRollback AuditAction = "app.rollback" + AuditActionAppRestart AuditAction = "app.restart" + AuditActionAppStop AuditAction = "app.stop" + AuditActionAppStart AuditAction = "app.start" + AuditActionDeployCancel AuditAction = "deploy.cancel" + AuditActionEnvVarSave AuditAction = "env_var.save" + AuditActionLabelAdd AuditAction = "label.add" + AuditActionLabelEdit AuditAction = "label.edit" + AuditActionLabelDelete AuditAction = "label.delete" + AuditActionVolumeAdd AuditAction = "volume.add" + AuditActionVolumeEdit AuditAction = "volume.edit" + AuditActionVolumeDelete AuditAction = "volume.delete" + AuditActionPortAdd AuditAction = "port.add" + AuditActionPortDelete AuditAction = "port.delete" + AuditActionSetup AuditAction = "setup" + AuditActionWebhookReceive AuditAction = "webhook.receive" +) + +// AuditResourceType represents the type of resource being acted on. +type AuditResourceType string + +// Audit resource type constants. +const ( + AuditResourceApp AuditResourceType = "app" + AuditResourceUser AuditResourceType = "user" + AuditResourceSession AuditResourceType = "session" + AuditResourceEnvVar AuditResourceType = "env_var" + AuditResourceLabel AuditResourceType = "label" + AuditResourceVolume AuditResourceType = "volume" + AuditResourcePort AuditResourceType = "port" + AuditResourceDeployment AuditResourceType = "deployment" + AuditResourceWebhook AuditResourceType = "webhook" +) + +// AuditEntry represents a single audit log entry. +type AuditEntry struct { + db *database.Database + + ID int64 + UserID sql.NullInt64 + Username string + Action AuditAction + ResourceType AuditResourceType + ResourceID sql.NullString + Detail sql.NullString + RemoteIP sql.NullString + CreatedAt time.Time +} + +// NewAuditEntry creates a new AuditEntry with a database reference. +func NewAuditEntry(db *database.Database) *AuditEntry { + return &AuditEntry{db: db} +} + +// Save inserts the audit entry into the database. +func (a *AuditEntry) Save(ctx context.Context) error { + query := ` + INSERT INTO audit_log ( + user_id, username, action, resource_type, resource_id, + detail, remote_ip + ) VALUES (?, ?, ?, ?, ?, ?, ?)` + + result, err := a.db.Exec(ctx, query, + a.UserID, a.Username, a.Action, a.ResourceType, + a.ResourceID, a.Detail, a.RemoteIP, + ) + if err != nil { + return fmt.Errorf("inserting audit entry: %w", err) + } + + id, err := result.LastInsertId() + if err != nil { + return fmt.Errorf("getting audit entry id: %w", err) + } + + a.ID = id + + return nil +} + +// FindAuditEntries returns recent audit log entries, newest first. +func FindAuditEntries( + ctx context.Context, + db *database.Database, + limit int, +) ([]*AuditEntry, error) { + query := ` + SELECT id, user_id, username, action, resource_type, resource_id, + detail, remote_ip, created_at + FROM audit_log + ORDER BY created_at DESC + LIMIT ?` + + rows, err := db.Query(ctx, query, limit) + if err != nil { + return nil, fmt.Errorf("querying audit entries: %w", err) + } + + defer func() { _ = rows.Close() }() + + return scanAuditRows(rows) +} + +// FindAuditEntriesByResource returns audit log entries for a specific resource. +func FindAuditEntriesByResource( + ctx context.Context, + db *database.Database, + resourceType AuditResourceType, + resourceID string, + limit int, +) ([]*AuditEntry, error) { + query := ` + SELECT id, user_id, username, action, resource_type, resource_id, + detail, remote_ip, created_at + FROM audit_log + WHERE resource_type = ? AND resource_id = ? + ORDER BY created_at DESC + LIMIT ?` + + rows, err := db.Query(ctx, query, resourceType, resourceID, limit) + if err != nil { + return nil, fmt.Errorf("querying audit entries by resource: %w", err) + } + + defer func() { _ = rows.Close() }() + + return scanAuditRows(rows) +} + +// CountAuditEntries returns the total number of audit log entries. +func CountAuditEntries( + ctx context.Context, + db *database.Database, +) (int, error) { + var count int + + row := db.QueryRow(ctx, "SELECT COUNT(*) FROM audit_log") + + err := row.Scan(&count) + if err != nil { + return 0, fmt.Errorf("counting audit entries: %w", err) + } + + return count, nil +} + +func scanAuditRows(rows *sql.Rows) ([]*AuditEntry, error) { + var entries []*AuditEntry + + for rows.Next() { + entry := &AuditEntry{} + + scanErr := rows.Scan( + &entry.ID, &entry.UserID, &entry.Username, &entry.Action, + &entry.ResourceType, &entry.ResourceID, &entry.Detail, + &entry.RemoteIP, &entry.CreatedAt, + ) + if scanErr != nil { + return nil, fmt.Errorf("scanning audit entry: %w", scanErr) + } + + entries = append(entries, entry) + } + + rowsErr := rows.Err() + if rowsErr != nil { + return nil, fmt.Errorf("iterating audit entries: %w", rowsErr) + } + + return entries, nil +} diff --git a/internal/models/models_test.go b/internal/models/models_test.go index 2d894b5..537b98d 100644 --- a/internal/models/models_test.go +++ b/internal/models/models_test.go @@ -23,6 +23,7 @@ const ( testBranch = "main" testValue = "value" testEventType = "push" + testAdmin = "admin" ) func setupTestDB(t *testing.T) (*database.Database, func()) { @@ -183,7 +184,7 @@ func TestUserExists(t *testing.T) { defer cleanup() user := models.NewUser(testDB) - user.Username = "admin" + user.Username = testAdmin user.PasswordHash = testHash err := user.Save(context.Background()) @@ -781,6 +782,179 @@ func TestCascadeDelete(t *testing.T) { }) } +// AuditEntry Tests. + +func TestAuditEntryCreateAndFind(t *testing.T) { + t.Parallel() + + testDB, cleanup := setupTestDB(t) + defer cleanup() + + entry := models.NewAuditEntry(testDB) + entry.Username = testAdmin + entry.Action = models.AuditActionLogin + entry.ResourceType = models.AuditResourceSession + + err := entry.Save(context.Background()) + require.NoError(t, err) + assert.NotZero(t, entry.ID) + + entries, err := models.FindAuditEntries(context.Background(), testDB, 10) + require.NoError(t, err) + require.Len(t, entries, 1) + assert.Equal(t, testAdmin, entries[0].Username) + assert.Equal(t, models.AuditActionLogin, entries[0].Action) + assert.Equal(t, models.AuditResourceSession, entries[0].ResourceType) +} + +func TestAuditEntryWithAllFields(t *testing.T) { + t.Parallel() + + testDB, cleanup := setupTestDB(t) + defer cleanup() + + entry := models.NewAuditEntry(testDB) + entry.UserID = sql.NullInt64{Int64: 1, Valid: true} + entry.Username = testAdmin + entry.Action = models.AuditActionAppCreate + entry.ResourceType = models.AuditResourceApp + entry.ResourceID = sql.NullString{String: "app-123", Valid: true} + entry.Detail = sql.NullString{String: "created new app", Valid: true} + entry.RemoteIP = sql.NullString{String: "192.168.1.1", Valid: true} + + err := entry.Save(context.Background()) + require.NoError(t, err) + + entries, err := models.FindAuditEntries(context.Background(), testDB, 10) + require.NoError(t, err) + require.Len(t, entries, 1) + assert.Equal(t, int64(1), entries[0].UserID.Int64) + assert.Equal(t, "app-123", entries[0].ResourceID.String) + assert.Equal(t, "created new app", entries[0].Detail.String) + assert.Equal(t, "192.168.1.1", entries[0].RemoteIP.String) +} + +func TestAuditEntryFindByResource(t *testing.T) { + t.Parallel() + + testDB, cleanup := setupTestDB(t) + defer cleanup() + + // Create entries for different resources. + for _, action := range []models.AuditAction{ + models.AuditActionAppCreate, + models.AuditActionAppUpdate, + models.AuditActionAppDeploy, + } { + entry := models.NewAuditEntry(testDB) + entry.Username = testAdmin + entry.Action = action + entry.ResourceType = models.AuditResourceApp + entry.ResourceID = sql.NullString{String: "app-1", Valid: true} + + err := entry.Save(context.Background()) + require.NoError(t, err) + } + + // Create entry for a different resource. + entry := models.NewAuditEntry(testDB) + entry.Username = testAdmin + entry.Action = models.AuditActionLogin + entry.ResourceType = models.AuditResourceSession + + err := entry.Save(context.Background()) + require.NoError(t, err) + + // Find by resource. + appEntries, err := models.FindAuditEntriesByResource( + context.Background(), testDB, + models.AuditResourceApp, "app-1", 10, + ) + require.NoError(t, err) + assert.Len(t, appEntries, 3) + + // All entries. + allEntries, err := models.FindAuditEntries(context.Background(), testDB, 10) + require.NoError(t, err) + assert.Len(t, allEntries, 4) +} + +func TestAuditEntryCount(t *testing.T) { + t.Parallel() + + testDB, cleanup := setupTestDB(t) + defer cleanup() + + count, err := models.CountAuditEntries(context.Background(), testDB) + require.NoError(t, err) + assert.Equal(t, 0, count) + + entry := models.NewAuditEntry(testDB) + entry.Username = testAdmin + entry.Action = models.AuditActionLogin + entry.ResourceType = models.AuditResourceSession + + err = entry.Save(context.Background()) + require.NoError(t, err) + + count, err = models.CountAuditEntries(context.Background(), testDB) + require.NoError(t, err) + assert.Equal(t, 1, count) +} + +func TestAuditEntryFindLimit(t *testing.T) { + t.Parallel() + + testDB, cleanup := setupTestDB(t) + defer cleanup() + + for range 5 { + entry := models.NewAuditEntry(testDB) + entry.Username = testAdmin + entry.Action = models.AuditActionLogin + entry.ResourceType = models.AuditResourceSession + + err := entry.Save(context.Background()) + require.NoError(t, err) + } + + entries, err := models.FindAuditEntries(context.Background(), testDB, 3) + require.NoError(t, err) + assert.Len(t, entries, 3) +} + +func TestAuditEntryOrderByCreatedAtDesc(t *testing.T) { + t.Parallel() + + testDB, cleanup := setupTestDB(t) + defer cleanup() + + actions := []models.AuditAction{ + models.AuditActionLogin, + models.AuditActionAppCreate, + models.AuditActionLogout, + } + + for _, action := range actions { + entry := models.NewAuditEntry(testDB) + entry.Username = testAdmin + entry.Action = action + entry.ResourceType = models.AuditResourceSession + + err := entry.Save(context.Background()) + require.NoError(t, err) + } + + entries, err := models.FindAuditEntries(context.Background(), testDB, 10) + require.NoError(t, err) + require.Len(t, entries, 3) + + // Newest first (logout was last inserted). + assert.Equal(t, models.AuditActionLogout, entries[0].Action) + assert.Equal(t, models.AuditActionAppCreate, entries[1].Action) + assert.Equal(t, models.AuditActionLogin, entries[2].Action) +} + // Helper function to create a test app. func createTestApp(t *testing.T, testDB *database.Database) *models.App { t.Helper() diff --git a/internal/server/routes.go b/internal/server/routes.go index ebddba9..ad601c7 100644 --- a/internal/server/routes.go +++ b/internal/server/routes.go @@ -115,14 +115,13 @@ func (s *Server) SetupRoutes() { r.Get("/apps", s.handlers.HandleAPIListApps()) r.Get("/apps/{id}", s.handlers.HandleAPIGetApp()) r.Get("/apps/{id}/deployments", s.handlers.HandleAPIListDeployments()) + r.Get("/audit", s.handlers.HandleAPIAuditLog()) }) }) - // Metrics endpoint (optional, with basic auth) - if s.params.Config.MetricsUsername != "" { - s.router.Group(func(r chi.Router) { - r.Use(s.mw.MetricsAuth()) - r.Get("/metrics", promhttp.Handler().ServeHTTP) - }) - } + // Metrics endpoint (always available, optionally protected with basic auth) + s.router.Group(func(r chi.Router) { + r.Use(s.mw.MetricsAuth()) + r.Get("/metrics", promhttp.Handler().ServeHTTP) + }) } diff --git a/internal/service/audit/audit.go b/internal/service/audit/audit.go new file mode 100644 index 0000000..2ebd1f5 --- /dev/null +++ b/internal/service/audit/audit.go @@ -0,0 +1,153 @@ +// Package audit provides audit logging for user actions. +package audit + +import ( + "context" + "database/sql" + "log/slog" + "net" + "net/http" + "strings" + + "go.uber.org/fx" + + "sneak.berlin/go/upaas/internal/database" + "sneak.berlin/go/upaas/internal/logger" + "sneak.berlin/go/upaas/internal/metrics" + "sneak.berlin/go/upaas/internal/models" +) + +// ServiceParams contains dependencies for Service. +type ServiceParams struct { + fx.In + + Logger *logger.Logger + Database *database.Database + Metrics *metrics.Metrics +} + +// Service provides audit logging functionality. +type Service struct { + log *slog.Logger + db *database.Database + metrics *metrics.Metrics +} + +// New creates a new audit Service. +func New(_ fx.Lifecycle, params ServiceParams) (*Service, error) { + return &Service{ + log: params.Logger.Get(), + db: params.Database, + metrics: params.Metrics, + }, nil +} + +// LogEntry records an audit event. +type LogEntry struct { + UserID int64 + Username string + Action models.AuditAction + ResourceType models.AuditResourceType + ResourceID string + Detail string + RemoteIP string +} + +// Log records an audit log entry and increments the audit metrics counter. +func (svc *Service) Log(ctx context.Context, entry LogEntry) { + auditEntry := models.NewAuditEntry(svc.db) + auditEntry.Username = entry.Username + auditEntry.Action = entry.Action + auditEntry.ResourceType = entry.ResourceType + + if entry.UserID != 0 { + auditEntry.UserID = sql.NullInt64{Int64: entry.UserID, Valid: true} + } + + if entry.ResourceID != "" { + auditEntry.ResourceID = sql.NullString{String: entry.ResourceID, Valid: true} + } + + if entry.Detail != "" { + auditEntry.Detail = sql.NullString{String: entry.Detail, Valid: true} + } + + if entry.RemoteIP != "" { + auditEntry.RemoteIP = sql.NullString{String: entry.RemoteIP, Valid: true} + } + + err := auditEntry.Save(ctx) + if err != nil { + svc.log.Error("failed to save audit entry", + "error", err, + "action", entry.Action, + "username", entry.Username, + ) + + return + } + + svc.metrics.AuditEventsTotal.WithLabelValues(string(entry.Action)).Inc() + + svc.log.Info("audit", + "action", entry.Action, + "username", entry.Username, + "resource_type", entry.ResourceType, + "resource_id", entry.ResourceID, + ) +} + +// LogFromRequest records an audit log entry, extracting the remote IP from +// the HTTP request. +func (svc *Service) LogFromRequest( + ctx context.Context, + request *http.Request, + entry LogEntry, +) { + entry.RemoteIP = extractRemoteIP(request) + svc.Log(ctx, entry) +} + +// extractRemoteIP extracts the client IP from the request, preferring +// X-Real-IP and X-Forwarded-For headers from trusted proxies. +func extractRemoteIP(r *http.Request) string { + // Check X-Real-IP first + if ip := strings.TrimSpace(r.Header.Get("X-Real-IP")); ip != "" { + return ip + } + + // Check X-Forwarded-For (leftmost = client) + if xff := r.Header.Get("X-Forwarded-For"); xff != "" { + if parts := strings.SplitN(xff, ",", 2); len(parts) > 0 { //nolint:mnd // split limit + if ip := strings.TrimSpace(parts[0]); ip != "" { + return ip + } + } + } + + // Fall back to RemoteAddr + host, _, err := net.SplitHostPort(r.RemoteAddr) + if err != nil { + return r.RemoteAddr + } + + return host +} + +// Recent returns the most recent audit log entries. +func (svc *Service) Recent( + ctx context.Context, + limit int, +) ([]*models.AuditEntry, error) { + return models.FindAuditEntries(ctx, svc.db, limit) +} + +// ForResource returns audit log entries for a specific resource. +func (svc *Service) ForResource( + ctx context.Context, + resourceType models.AuditResourceType, + resourceID string, + limit int, +) ([]*models.AuditEntry, error) { + return models.FindAuditEntriesByResource(ctx, svc.db, resourceType, resourceID, limit) +} diff --git a/internal/service/audit/audit_test.go b/internal/service/audit/audit_test.go new file mode 100644 index 0000000..7ee242f --- /dev/null +++ b/internal/service/audit/audit_test.go @@ -0,0 +1,196 @@ +package audit_test + +import ( + "context" + "log/slog" + "net/http" + "net/http/httptest" + "os" + "testing" + + "github.com/prometheus/client_golang/prometheus" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + "go.uber.org/fx" + + "sneak.berlin/go/upaas/internal/config" + "sneak.berlin/go/upaas/internal/database" + "sneak.berlin/go/upaas/internal/globals" + "sneak.berlin/go/upaas/internal/logger" + "sneak.berlin/go/upaas/internal/metrics" + "sneak.berlin/go/upaas/internal/models" + "sneak.berlin/go/upaas/internal/service/audit" +) + +func setupTestAuditService(t *testing.T) (*audit.Service, *database.Database) { + t.Helper() + + globals.SetAppname("upaas-test") + globals.SetVersion("test") + + tmpDir := t.TempDir() + + cfg := &config.Config{ + DataDir: tmpDir, + } + + log := slog.New(slog.NewTextHandler(os.Stderr, nil)) + logWrapper := logger.NewForTest(log) + + db, err := database.New(fx.Lifecycle(nil), database.Params{ + Logger: logWrapper, + Config: cfg, + }) + require.NoError(t, err) + + reg := prometheus.NewRegistry() + metricsInstance := metrics.NewForTest(reg) + + svc, err := audit.New(fx.Lifecycle(nil), audit.ServiceParams{ + Logger: logWrapper, + Database: db, + Metrics: metricsInstance, + }) + require.NoError(t, err) + + return svc, db +} + +func TestAuditServiceLog(t *testing.T) { + t.Parallel() + + svc, db := setupTestAuditService(t) + ctx := context.Background() + + svc.Log(ctx, audit.LogEntry{ + UserID: 1, + Username: "admin", + Action: models.AuditActionLogin, + ResourceType: models.AuditResourceSession, + Detail: "user logged in", + RemoteIP: "127.0.0.1", + }) + + entries, err := models.FindAuditEntries(ctx, db, 10) + require.NoError(t, err) + require.Len(t, entries, 1) + assert.Equal(t, "admin", entries[0].Username) + assert.Equal(t, models.AuditActionLogin, entries[0].Action) + assert.Equal(t, "127.0.0.1", entries[0].RemoteIP.String) +} + +func TestAuditServiceLogFromRequest(t *testing.T) { + t.Parallel() + + svc, db := setupTestAuditService(t) + ctx := context.Background() + + request := httptest.NewRequest(http.MethodPost, "/apps", nil) + request.RemoteAddr = "10.0.0.1:12345" + + svc.LogFromRequest(ctx, request, audit.LogEntry{ + Username: "admin", + Action: models.AuditActionAppCreate, + ResourceType: models.AuditResourceApp, + ResourceID: "app-1", + Detail: "created app", + }) + + entries, err := models.FindAuditEntries(ctx, db, 10) + require.NoError(t, err) + require.Len(t, entries, 1) + assert.Equal(t, "10.0.0.1", entries[0].RemoteIP.String) + assert.Equal(t, "app-1", entries[0].ResourceID.String) +} + +func TestAuditServiceLogFromRequestWithXRealIP(t *testing.T) { + t.Parallel() + + svc, db := setupTestAuditService(t) + ctx := context.Background() + + request := httptest.NewRequest(http.MethodPost, "/apps", nil) + request.Header.Set("X-Real-IP", "203.0.113.50") + + svc.LogFromRequest(ctx, request, audit.LogEntry{ + Username: "admin", + Action: models.AuditActionAppCreate, + ResourceType: models.AuditResourceApp, + }) + + entries, err := models.FindAuditEntries(ctx, db, 10) + require.NoError(t, err) + require.Len(t, entries, 1) + assert.Equal(t, "203.0.113.50", entries[0].RemoteIP.String) +} + +func TestAuditServiceRecent(t *testing.T) { + t.Parallel() + + svc, _ := setupTestAuditService(t) + ctx := context.Background() + + for range 5 { + svc.Log(ctx, audit.LogEntry{ + Username: "admin", + Action: models.AuditActionLogin, + ResourceType: models.AuditResourceSession, + }) + } + + entries, err := svc.Recent(ctx, 3) + require.NoError(t, err) + assert.Len(t, entries, 3) +} + +func TestAuditServiceForResource(t *testing.T) { + t.Parallel() + + svc, _ := setupTestAuditService(t) + ctx := context.Background() + + // Log entries for different resources. + svc.Log(ctx, audit.LogEntry{ + Username: "admin", + Action: models.AuditActionAppCreate, + ResourceType: models.AuditResourceApp, + ResourceID: "app-1", + }) + svc.Log(ctx, audit.LogEntry{ + Username: "admin", + Action: models.AuditActionAppDeploy, + ResourceType: models.AuditResourceApp, + ResourceID: "app-1", + }) + svc.Log(ctx, audit.LogEntry{ + Username: "admin", + Action: models.AuditActionAppCreate, + ResourceType: models.AuditResourceApp, + ResourceID: "app-2", + }) + + entries, err := svc.ForResource(ctx, models.AuditResourceApp, "app-1", 10) + require.NoError(t, err) + assert.Len(t, entries, 2) +} + +func TestAuditServiceLogWithNoOptionalFields(t *testing.T) { + t.Parallel() + + svc, db := setupTestAuditService(t) + ctx := context.Background() + + svc.Log(ctx, audit.LogEntry{ + Username: "system", + Action: models.AuditActionWebhookReceive, + ResourceType: models.AuditResourceWebhook, + }) + + entries, err := models.FindAuditEntries(ctx, db, 10) + require.NoError(t, err) + require.Len(t, entries, 1) + assert.False(t, entries[0].UserID.Valid) + assert.False(t, entries[0].ResourceID.Valid) + assert.False(t, entries[0].Detail.Valid) + assert.False(t, entries[0].RemoteIP.Valid) +} diff --git a/internal/service/deploy/deploy.go b/internal/service/deploy/deploy.go index 4b78e29..cb3840a 100644 --- a/internal/service/deploy/deploy.go +++ b/internal/service/deploy/deploy.go @@ -21,6 +21,7 @@ import ( "sneak.berlin/go/upaas/internal/database" "sneak.berlin/go/upaas/internal/docker" "sneak.berlin/go/upaas/internal/logger" + "sneak.berlin/go/upaas/internal/metrics" "sneak.berlin/go/upaas/internal/models" "sneak.berlin/go/upaas/internal/service/notify" ) @@ -208,6 +209,7 @@ type ServiceParams struct { Database *database.Database Docker *docker.Client Notify *notify.Service + Metrics *metrics.Metrics } // activeDeploy tracks a running deployment so it can be cancelled. @@ -222,6 +224,7 @@ type Service struct { db *database.Database docker *docker.Client notify *notify.Service + metrics *metrics.Metrics config *config.Config params *ServiceParams activeDeploys sync.Map // map[string]*activeDeploy - per-app active deployment tracking @@ -231,12 +234,13 @@ type Service struct { // New creates a new deploy Service. func New(lc fx.Lifecycle, params ServiceParams) (*Service, error) { svc := &Service{ - log: params.Logger.Get(), - db: params.Database, - docker: params.Docker, - notify: params.Notify, - config: params.Config, - params: ¶ms, + log: params.Logger.Get(), + db: params.Database, + docker: params.Docker, + notify: params.Notify, + metrics: params.Metrics, + config: params.Config, + params: ¶ms, } if lc != nil { @@ -327,6 +331,11 @@ func (svc *Service) Deploy( } defer svc.unlockApp(app.ID) + // Track in-flight deployments + svc.metrics.DeploymentsInFlight.WithLabelValues(app.Name).Inc() + + deployStart := time.Now() + // Set up cancellable context and register as active deploy deployCtx, cancel := context.WithCancel(ctx) done := make(chan struct{}) @@ -334,6 +343,7 @@ func (svc *Service) Deploy( svc.activeDeploys.Store(app.ID, ad) defer func() { + svc.metrics.DeploymentsInFlight.WithLabelValues(app.Name).Dec() cancel() close(done) svc.activeDeploys.Delete(app.ID) @@ -359,7 +369,7 @@ func (svc *Service) Deploy( svc.notify.NotifyBuildStart(bgCtx, app, deployment) - return svc.runBuildAndDeploy(deployCtx, bgCtx, app, deployment) + return svc.runBuildAndDeploy(deployCtx, bgCtx, app, deployment, deployStart) } // Rollback rolls back an app to its previous image. @@ -467,15 +477,20 @@ func (svc *Service) runBuildAndDeploy( bgCtx context.Context, app *models.App, deployment *models.Deployment, + deployStart time.Time, ) error { // Build phase with timeout imageID, err := svc.buildImageWithTimeout(deployCtx, app, deployment) if err != nil { cancelErr := svc.checkCancelled(deployCtx, bgCtx, app, deployment, "") if cancelErr != nil { + svc.recordDeployMetrics(app.Name, "cancelled", deployStart) + return cancelErr } + svc.recordDeployMetrics(app.Name, "failed", deployStart) + return err } @@ -486,9 +501,13 @@ func (svc *Service) runBuildAndDeploy( if err != nil { cancelErr := svc.checkCancelled(deployCtx, bgCtx, app, deployment, imageID) if cancelErr != nil { + svc.recordDeployMetrics(app.Name, "cancelled", deployStart) + return cancelErr } + svc.recordDeployMetrics(app.Name, "failed", deployStart) + return err } @@ -504,11 +523,19 @@ func (svc *Service) runBuildAndDeploy( // Use context.WithoutCancel to ensure health check completes even if // the parent context is cancelled (e.g., HTTP request ends). - go svc.checkHealthAfterDelay(bgCtx, app, deployment) + go svc.checkHealthAfterDelay(bgCtx, app, deployment, deployStart) return nil } +// recordDeployMetrics records deployment completion metrics. +func (svc *Service) recordDeployMetrics(appName, status string, start time.Time) { + duration := time.Since(start).Seconds() + + svc.metrics.DeploymentsTotal.WithLabelValues(appName, status).Inc() + svc.metrics.DeploymentDuration.WithLabelValues(appName, status).Observe(duration) +} + // buildImageWithTimeout runs the build phase with a timeout. func (svc *Service) buildImageWithTimeout( ctx context.Context, @@ -1163,6 +1190,7 @@ func (svc *Service) checkHealthAfterDelay( ctx context.Context, app *models.App, deployment *models.Deployment, + deployStart time.Time, ) { svc.log.Info( "waiting 60 seconds to check container health", @@ -1189,6 +1217,8 @@ func (svc *Service) checkHealthAfterDelay( svc.notify.NotifyDeployFailed(ctx, reloadedApp, deployment, err) _ = deployment.MarkFinished(ctx, models.DeploymentStatusFailed) svc.writeLogsToFile(reloadedApp, deployment) + svc.recordDeployMetrics(reloadedApp.Name, "failed", deployStart) + svc.metrics.ContainerHealthy.WithLabelValues(reloadedApp.Name).Set(0) reloadedApp.Status = models.AppStatusError _ = reloadedApp.Save(ctx) @@ -1200,6 +1230,8 @@ func (svc *Service) checkHealthAfterDelay( svc.notify.NotifyDeploySuccess(ctx, reloadedApp, deployment) _ = deployment.MarkFinished(ctx, models.DeploymentStatusSuccess) svc.writeLogsToFile(reloadedApp, deployment) + svc.recordDeployMetrics(reloadedApp.Name, "success", deployStart) + svc.metrics.ContainerHealthy.WithLabelValues(reloadedApp.Name).Set(1) } else { svc.log.Warn( "container unhealthy after 60 seconds", @@ -1208,6 +1240,8 @@ func (svc *Service) checkHealthAfterDelay( svc.notify.NotifyDeployFailed(ctx, reloadedApp, deployment, ErrContainerUnhealthy) _ = deployment.MarkFinished(ctx, models.DeploymentStatusFailed) svc.writeLogsToFile(reloadedApp, deployment) + svc.recordDeployMetrics(reloadedApp.Name, "failed", deployStart) + svc.metrics.ContainerHealthy.WithLabelValues(reloadedApp.Name).Set(0) reloadedApp.Status = models.AppStatusError _ = reloadedApp.Save(ctx) } diff --git a/internal/service/webhook/webhook.go b/internal/service/webhook/webhook.go index db772d5..63ecac8 100644 --- a/internal/service/webhook/webhook.go +++ b/internal/service/webhook/webhook.go @@ -7,12 +7,14 @@ import ( "encoding/json" "fmt" "log/slog" + "strconv" "go.uber.org/fx" "sneak.berlin/go/upaas/internal/database" "sneak.berlin/go/upaas/internal/logger" + "sneak.berlin/go/upaas/internal/metrics" "sneak.berlin/go/upaas/internal/models" "sneak.berlin/go/upaas/internal/service/deploy" ) @@ -24,23 +26,26 @@ type ServiceParams struct { Logger *logger.Logger Database *database.Database Deploy *deploy.Service + Metrics *metrics.Metrics } // Service provides webhook handling functionality. type Service struct { - log *slog.Logger - db *database.Database - deploy *deploy.Service - params *ServiceParams + log *slog.Logger + db *database.Database + deploy *deploy.Service + metrics *metrics.Metrics + params *ServiceParams } // New creates a new webhook Service. func New(_ fx.Lifecycle, params ServiceParams) (*Service, error) { return &Service{ - log: params.Logger.Get(), - db: params.Database, - deploy: params.Deploy, - params: ¶ms, + log: params.Logger.Get(), + db: params.Database, + deploy: params.Deploy, + metrics: params.Metrics, + params: ¶ms, }, nil } @@ -122,6 +127,10 @@ func (svc *Service) HandleWebhook( "commit", commitSHA, ) + svc.metrics.WebhookEventsTotal.WithLabelValues( + app.Name, eventType, strconv.FormatBool(matched), + ).Inc() + // If branch matches, trigger deployment if matched { svc.triggerDeployment(ctx, app, event) diff --git a/internal/service/webhook/webhook_test.go b/internal/service/webhook/webhook_test.go index 9c425a2..3b525d0 100644 --- a/internal/service/webhook/webhook_test.go +++ b/internal/service/webhook/webhook_test.go @@ -8,6 +8,7 @@ import ( "testing" "time" + "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" "go.uber.org/fx" @@ -17,6 +18,7 @@ import ( "sneak.berlin/go/upaas/internal/docker" "sneak.berlin/go/upaas/internal/globals" "sneak.berlin/go/upaas/internal/logger" + "sneak.berlin/go/upaas/internal/metrics" "sneak.berlin/go/upaas/internal/models" "sneak.berlin/go/upaas/internal/service/deploy" "sneak.berlin/go/upaas/internal/service/notify" @@ -63,13 +65,17 @@ func setupTestService(t *testing.T) (*webhook.Service, *database.Database, func( notifySvc, err := notify.New(fx.Lifecycle(nil), notify.ServiceParams{Logger: deps.logger}) require.NoError(t, err) + metricsInstance := metrics.NewForTest(prometheus.NewRegistry()) + deploySvc, err := deploy.New(fx.Lifecycle(nil), deploy.ServiceParams{ Logger: deps.logger, Config: deps.config, Database: deps.db, Docker: dockerClient, Notify: notifySvc, + Metrics: metricsInstance, }) require.NoError(t, err) svc, err := webhook.New(fx.Lifecycle(nil), webhook.ServiceParams{ Logger: deps.logger, Database: deps.db, Deploy: deploySvc, + Metrics: metricsInstance, }) require.NoError(t, err)