feat: add observability improvements (metrics, audit log, structured logging)
All checks were successful
Check / check (pull_request) Successful in 1m45s

- Add Prometheus metrics package (internal/metrics) with deployment,
  container health, webhook, HTTP request, and audit counters/histograms
- Add audit_log SQLite table via migration 007
- Add AuditEntry model with CRUD operations and query methods
- Add audit service (internal/service/audit) for recording user actions
- Instrument deploy service with deployment duration, count, and
  in-flight metrics; container health gauge updates on deploy completion
- Instrument webhook service with event counters by app/type/matched
- Instrument HTTP middleware with request count, duration, and response
  size metrics; also log response bytes in structured request logs
- Add audit logging to all key handler operations: login/logout, app
  CRUD, deploy, cancel, rollback, restart/stop/start, webhook receipt,
  and initial setup
- Add GET /api/audit endpoint for querying recent audit entries
- Make /metrics endpoint always available (optionally auth-protected)
- Add comprehensive tests for metrics, audit model, and audit service
- Update existing test infrastructure with metrics and audit dependencies
- Update README with Observability section documenting all metrics,
  audit log, and structured logging
This commit is contained in:
clawbot
2026-03-17 02:23:44 -07:00
parent fd110e69db
commit f558e2cdd8
21 changed files with 1399 additions and 42 deletions

View File

@@ -0,0 +1,16 @@
-- Audit log table for tracking user actions.
CREATE TABLE audit_log (
id INTEGER PRIMARY KEY,
user_id INTEGER,
username TEXT NOT NULL,
action TEXT NOT NULL,
resource_type TEXT NOT NULL,
resource_id TEXT,
detail TEXT,
remote_ip TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_audit_log_created_at ON audit_log(created_at);
CREATE INDEX idx_audit_log_action ON audit_log(action);
CREATE INDEX idx_audit_log_resource ON audit_log(resource_type, resource_id);

View File

@@ -1,6 +1,7 @@
package handlers
import (
"database/sql"
"encoding/json"
"net/http"
"strconv"
@@ -120,6 +121,9 @@ func (h *Handlers) HandleAPILoginPOST() http.HandlerFunc {
return
}
h.auditLog(request, models.AuditActionLogin,
models.AuditResourceSession, "", "api login")
h.respondJSON(writer, request, loginResponse{
UserID: user.ID,
Username: user.Username,
@@ -243,3 +247,79 @@ func (h *Handlers) HandleAPIWhoAmI() http.HandlerFunc {
}, http.StatusOK)
}
}
// auditLogDefaultLimit is the default number of audit entries returned.
const auditLogDefaultLimit = 50
// auditLogMaxLimit is the maximum number of audit entries returned.
const auditLogMaxLimit = 500
// HandleAPIAuditLog returns a handler that lists recent audit log entries.
func (h *Handlers) HandleAPIAuditLog() http.HandlerFunc {
type auditEntryResponse struct {
ID int64 `json:"id"`
UserID *int64 `json:"userId,omitempty"`
Username string `json:"username"`
Action string `json:"action"`
ResourceType string `json:"resourceType"`
ResourceID string `json:"resourceId,omitempty"`
Detail string `json:"detail,omitempty"`
RemoteIP string `json:"remoteIp,omitempty"`
CreatedAt string `json:"createdAt"`
}
return func(writer http.ResponseWriter, request *http.Request) {
limit := auditLogDefaultLimit
if limitStr := request.URL.Query().Get("limit"); limitStr != "" {
parsed, parseErr := strconv.Atoi(limitStr)
if parseErr == nil && parsed > 0 && parsed <= auditLogMaxLimit {
limit = parsed
}
}
entries, err := h.audit.Recent(request.Context(), limit)
if err != nil {
h.log.Error("failed to fetch audit log", "error", err)
h.respondJSON(writer, request,
map[string]string{"error": "failed to fetch audit log"},
http.StatusInternalServerError)
return
}
result := make([]auditEntryResponse, 0, len(entries))
for _, e := range entries {
entry := auditEntryResponse{
ID: e.ID,
Username: e.Username,
Action: string(e.Action),
ResourceType: string(e.ResourceType),
CreatedAt: e.CreatedAt.UTC().Format("2006-01-02T15:04:05Z"),
}
if e.UserID.Valid {
id := e.UserID.Int64
entry.UserID = &id
}
entry.ResourceID = nullStringValue(e.ResourceID)
entry.Detail = nullStringValue(e.Detail)
entry.RemoteIP = nullStringValue(e.RemoteIP)
result = append(result, entry)
}
h.respondJSON(writer, request, result, http.StatusOK)
}
}
// nullStringValue returns the string value if valid, empty string otherwise.
func nullStringValue(ns sql.NullString) string {
if ns.Valid {
return ns.String
}
return ""
}

View File

@@ -119,6 +119,9 @@ func (h *Handlers) HandleAppCreate() http.HandlerFunc { //nolint:funlen // valid
return
}
h.auditLog(request, models.AuditActionAppCreate,
models.AuditResourceApp, createdApp.ID, "created app: "+createdApp.Name)
http.Redirect(writer, request, "/apps/"+createdApp.ID, http.StatusSeeOther)
}
}
@@ -289,6 +292,9 @@ func (h *Handlers) HandleAppUpdate() http.HandlerFunc { //nolint:funlen // valid
return
}
h.auditLog(request, models.AuditActionAppUpdate,
models.AuditResourceApp, application.ID, "updated app: "+application.Name)
redirectURL := "/apps/" + application.ID + "?success=updated"
http.Redirect(writer, request, redirectURL, http.StatusSeeOther)
}
@@ -344,6 +350,9 @@ func (h *Handlers) HandleAppDelete() http.HandlerFunc {
return
}
h.auditLog(request, models.AuditActionAppDelete,
models.AuditResourceApp, appID, "deleted app: "+application.Name)
http.Redirect(writer, request, "/", http.StatusSeeOther)
}
}
@@ -360,6 +369,9 @@ func (h *Handlers) HandleAppDeploy() http.HandlerFunc {
return
}
h.auditLog(request, models.AuditActionAppDeploy,
models.AuditResourceApp, application.ID, "manual deploy: "+application.Name)
// Trigger deployment in background with a detached context
// so the deployment continues even if the HTTP request is cancelled
deployCtx := context.WithoutCancel(request.Context())
@@ -399,6 +411,8 @@ func (h *Handlers) HandleCancelDeploy() http.HandlerFunc {
cancelled := h.deploy.CancelDeploy(application.ID)
if cancelled {
h.log.Info("deployment cancelled by user", "app", application.Name)
h.auditLog(request, models.AuditActionDeployCancel,
models.AuditResourceDeployment, application.ID, "cancelled deploy: "+application.Name)
}
http.Redirect(
@@ -430,6 +444,9 @@ func (h *Handlers) HandleAppRollback() http.HandlerFunc {
return
}
h.auditLog(request, models.AuditActionAppRollback,
models.AuditResourceApp, application.ID, "rolled back: "+application.Name)
http.Redirect(writer, request, "/apps/"+application.ID+"?success=rolledback", http.StatusSeeOther)
}
}
@@ -834,11 +851,29 @@ func (h *Handlers) handleContainerAction(
} else {
h.log.Info("container action completed",
"action", action, "app", application.Name, "container", containerID)
auditAction := containerActionToAuditAction(action)
h.auditLog(request, auditAction,
models.AuditResourceApp, appID, string(action)+" container: "+application.Name)
}
http.Redirect(writer, request, "/apps/"+appID, http.StatusSeeOther)
}
// containerActionToAuditAction maps container actions to audit actions.
func containerActionToAuditAction(action containerAction) models.AuditAction {
switch action {
case actionRestart:
return models.AuditActionAppRestart
case actionStop:
return models.AuditActionAppStop
case actionStart:
return models.AuditActionAppStart
default:
return models.AuditAction("app." + string(action))
}
}
// HandleAppRestart handles restarting an app's container.
func (h *Handlers) HandleAppRestart() http.HandlerFunc {
return func(writer http.ResponseWriter, request *http.Request) {

View File

@@ -3,6 +3,7 @@ package handlers
import (
"net/http"
"sneak.berlin/go/upaas/internal/models"
"sneak.berlin/go/upaas/templates"
)
@@ -61,6 +62,9 @@ func (h *Handlers) HandleLoginPOST() http.HandlerFunc {
return
}
h.auditLog(request, models.AuditActionLogin,
models.AuditResourceSession, "", "user logged in")
http.Redirect(writer, request, "/", http.StatusSeeOther)
}
}
@@ -68,6 +72,9 @@ func (h *Handlers) HandleLoginPOST() http.HandlerFunc {
// HandleLogout handles logout requests.
func (h *Handlers) HandleLogout() http.HandlerFunc {
return func(writer http.ResponseWriter, request *http.Request) {
h.auditLog(request, models.AuditActionLogout,
models.AuditResourceSession, "", "user logged out")
destroyErr := h.auth.DestroySession(writer, request)
if destroyErr != nil {
h.log.Error("failed to destroy session", "error", destroyErr)

View File

@@ -15,7 +15,9 @@ import (
"sneak.berlin/go/upaas/internal/globals"
"sneak.berlin/go/upaas/internal/healthcheck"
"sneak.berlin/go/upaas/internal/logger"
"sneak.berlin/go/upaas/internal/models"
"sneak.berlin/go/upaas/internal/service/app"
"sneak.berlin/go/upaas/internal/service/audit"
"sneak.berlin/go/upaas/internal/service/auth"
"sneak.berlin/go/upaas/internal/service/deploy"
"sneak.berlin/go/upaas/internal/service/webhook"
@@ -35,6 +37,7 @@ type Params struct {
Deploy *deploy.Service
Webhook *webhook.Service
Docker *docker.Client
Audit *audit.Service
}
// Handlers provides HTTP request handlers.
@@ -48,6 +51,7 @@ type Handlers struct {
deploy *deploy.Service
webhook *webhook.Service
docker *docker.Client
audit *audit.Service
globals *globals.Globals
}
@@ -63,10 +67,48 @@ func New(_ fx.Lifecycle, params Params) (*Handlers, error) {
deploy: params.Deploy,
webhook: params.Webhook,
docker: params.Docker,
audit: params.Audit,
globals: params.Globals,
}, nil
}
// currentUser returns the currently authenticated user, or nil if not authenticated.
func (h *Handlers) currentUser(request *http.Request) *models.User {
user, err := h.auth.GetCurrentUser(request.Context(), request)
if err != nil || user == nil {
return nil
}
return user
}
// auditLog records an audit entry for the current request.
func (h *Handlers) auditLog(
request *http.Request,
action models.AuditAction,
resourceType models.AuditResourceType,
resourceID string,
detail string,
) {
user := h.currentUser(request)
entry := audit.LogEntry{
Action: action,
ResourceType: resourceType,
ResourceID: resourceID,
Detail: detail,
}
if user != nil {
entry.UserID = user.ID
entry.Username = user.Username
} else {
entry.Username = "anonymous"
}
h.audit.LogFromRequest(request.Context(), request, entry)
}
// addGlobals adds version info and CSRF token to template data map.
func (h *Handlers) addGlobals(
data map[string]any,

View File

@@ -11,6 +11,7 @@ import (
"time"
"github.com/go-chi/chi/v5"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/fx"
@@ -24,8 +25,10 @@ import (
"sneak.berlin/go/upaas/internal/handlers"
"sneak.berlin/go/upaas/internal/healthcheck"
"sneak.berlin/go/upaas/internal/logger"
"sneak.berlin/go/upaas/internal/metrics"
"sneak.berlin/go/upaas/internal/middleware"
"sneak.berlin/go/upaas/internal/service/app"
"sneak.berlin/go/upaas/internal/service/audit"
"sneak.berlin/go/upaas/internal/service/auth"
"sneak.berlin/go/upaas/internal/service/deploy"
"sneak.berlin/go/upaas/internal/service/notify"
@@ -92,7 +95,8 @@ func createAppServices(
logInstance *logger.Logger,
dbInstance *database.Database,
cfg *config.Config,
) (*auth.Service, *app.Service, *deploy.Service, *webhook.Service, *docker.Client) {
metricsInstance *metrics.Metrics,
) (*auth.Service, *app.Service, *deploy.Service, *webhook.Service, *docker.Client, *audit.Service) {
t.Helper()
authSvc, authErr := auth.New(fx.Lifecycle(nil), auth.ServiceParams{
@@ -125,6 +129,7 @@ func createAppServices(
Database: dbInstance,
Docker: dockerClient,
Notify: notifySvc,
Metrics: metricsInstance,
})
require.NoError(t, deployErr)
@@ -132,10 +137,18 @@ func createAppServices(
Logger: logInstance,
Database: dbInstance,
Deploy: deploySvc,
Metrics: metricsInstance,
})
require.NoError(t, webhookErr)
return authSvc, appSvc, deploySvc, webhookSvc, dockerClient
auditSvc, auditErr := audit.New(fx.Lifecycle(nil), audit.ServiceParams{
Logger: logInstance,
Database: dbInstance,
Metrics: metricsInstance,
})
require.NoError(t, auditErr)
return authSvc, appSvc, deploySvc, webhookSvc, dockerClient, auditSvc
}
func setupTestHandlers(t *testing.T) *testContext {
@@ -145,11 +158,14 @@ func setupTestHandlers(t *testing.T) *testContext {
globalInstance, logInstance, dbInstance, hcInstance := createCoreServices(t, cfg)
authSvc, appSvc, deploySvc, webhookSvc, dockerClient := createAppServices(
metricsInstance := metrics.NewForTest(prometheus.NewRegistry())
authSvc, appSvc, deploySvc, webhookSvc, dockerClient, auditSvc := createAppServices(
t,
logInstance,
dbInstance,
cfg,
metricsInstance,
)
handlersInstance, handlerErr := handlers.New(
@@ -164,6 +180,7 @@ func setupTestHandlers(t *testing.T) *testContext {
Deploy: deploySvc,
Webhook: webhookSvc,
Docker: dockerClient,
Audit: auditSvc,
},
)
require.NoError(t, handlerErr)
@@ -173,6 +190,7 @@ func setupTestHandlers(t *testing.T) *testContext {
Globals: globalInstance,
Config: cfg,
Auth: authSvc,
Metrics: metricsInstance,
})
require.NoError(t, mwErr)

View File

@@ -3,6 +3,7 @@ package handlers
import (
"net/http"
"sneak.berlin/go/upaas/internal/models"
"sneak.berlin/go/upaas/templates"
)
@@ -111,6 +112,9 @@ func (h *Handlers) HandleSetupPOST() http.HandlerFunc {
return
}
h.auditLog(request, models.AuditActionSetup,
models.AuditResourceUser, "", "initial setup completed")
http.Redirect(writer, request, "/", http.StatusSeeOther)
}
}

View File

@@ -7,13 +7,14 @@ import (
"github.com/go-chi/chi/v5"
"sneak.berlin/go/upaas/internal/models"
"sneak.berlin/go/upaas/internal/service/audit"
)
// maxWebhookBodySize is the maximum allowed size of a webhook request body (1MB).
const maxWebhookBodySize = 1 << 20
// HandleWebhook handles incoming Gitea webhooks.
func (h *Handlers) HandleWebhook() http.HandlerFunc {
func (h *Handlers) HandleWebhook() http.HandlerFunc { //nolint:funlen // audit logging adds necessary length
return func(writer http.ResponseWriter, request *http.Request) {
secret := chi.URLParam(request, "secret")
if secret == "" {
@@ -56,6 +57,15 @@ func (h *Handlers) HandleWebhook() http.HandlerFunc {
eventType = "push"
}
// Log webhook receipt
h.audit.LogFromRequest(request.Context(), request, audit.LogEntry{
Username: "webhook",
Action: models.AuditActionWebhookReceive,
ResourceType: models.AuditResourceWebhook,
ResourceID: application.ID,
Detail: "webhook from app: " + application.Name + ", event: " + eventType,
})
// Process webhook
webhookErr := h.webhook.HandleWebhook(
request.Context(),

148
internal/metrics/metrics.go Normal file
View File

@@ -0,0 +1,148 @@
// Package metrics provides Prometheus metrics for upaas.
//
//nolint:revive // "metrics" matches the domain; runtime/metrics is rarely imported directly
package metrics
import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"go.uber.org/fx"
)
// Params contains dependencies for Metrics.
type Params struct {
fx.In
}
// Metrics holds all Prometheus metrics for the application.
type Metrics struct {
// Deployment metrics.
DeploymentsTotal *prometheus.CounterVec
DeploymentDuration *prometheus.HistogramVec
DeploymentsInFlight *prometheus.GaugeVec
// Container health metrics.
ContainerHealthy *prometheus.GaugeVec
// Webhook metrics.
WebhookEventsTotal *prometheus.CounterVec
// HTTP request metrics.
HTTPRequestsTotal *prometheus.CounterVec
HTTPRequestDuration *prometheus.HistogramVec
HTTPResponseSizeBytes *prometheus.HistogramVec
// Audit log metrics.
AuditEventsTotal *prometheus.CounterVec
}
// New creates a new Metrics instance with all Prometheus metrics registered
// in the default Prometheus registry.
func New(_ fx.Lifecycle, _ Params) (*Metrics, error) {
return newMetrics(promauto.With(prometheus.DefaultRegisterer)), nil
}
// NewForTest creates a Metrics instance with a custom registry for test isolation.
func NewForTest(reg prometheus.Registerer) *Metrics {
return newMetrics(promauto.With(reg))
}
// newMetrics creates a Metrics instance using the given factory.
func newMetrics(factory promauto.Factory) *Metrics {
return &Metrics{
DeploymentsTotal: newDeploymentsTotal(factory),
DeploymentDuration: newDeploymentDuration(factory),
DeploymentsInFlight: newDeploymentsInFlight(factory),
ContainerHealthy: newContainerHealthy(factory),
WebhookEventsTotal: newWebhookEventsTotal(factory),
HTTPRequestsTotal: newHTTPRequestsTotal(factory),
HTTPRequestDuration: newHTTPRequestDuration(factory),
HTTPResponseSizeBytes: newHTTPResponseSizeBytes(factory),
AuditEventsTotal: newAuditEventsTotal(factory),
}
}
func newDeploymentsTotal(f promauto.Factory) *prometheus.CounterVec {
return f.NewCounterVec(prometheus.CounterOpts{
Namespace: "upaas",
Subsystem: "deployments",
Name: "total",
Help: "Total number of deployments by app and status.",
}, []string{"app", "status"})
}
func newDeploymentDuration(f promauto.Factory) *prometheus.HistogramVec {
return f.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "upaas",
Subsystem: "deployments",
Name: "duration_seconds",
Help: "Duration of deployments in seconds by app and status.",
Buckets: []float64{10, 30, 60, 120, 300, 600, 1800},
}, []string{"app", "status"})
}
func newDeploymentsInFlight(f promauto.Factory) *prometheus.GaugeVec {
return f.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "upaas",
Subsystem: "deployments",
Name: "in_flight",
Help: "Number of deployments currently in progress by app.",
}, []string{"app"})
}
func newContainerHealthy(f promauto.Factory) *prometheus.GaugeVec {
return f.NewGaugeVec(prometheus.GaugeOpts{
Namespace: "upaas",
Subsystem: "container",
Name: "healthy",
Help: "Whether the app container is healthy (1) or unhealthy (0).",
}, []string{"app"})
}
func newWebhookEventsTotal(f promauto.Factory) *prometheus.CounterVec {
return f.NewCounterVec(prometheus.CounterOpts{
Namespace: "upaas",
Subsystem: "webhook",
Name: "events_total",
Help: "Total number of webhook events by app, event type, and matched status.",
}, []string{"app", "event_type", "matched"})
}
func newHTTPRequestsTotal(f promauto.Factory) *prometheus.CounterVec {
return f.NewCounterVec(prometheus.CounterOpts{
Namespace: "upaas",
Subsystem: "http",
Name: "requests_total",
Help: "Total number of HTTP requests by method and status code.",
}, []string{"method", "status_code"})
}
func newHTTPRequestDuration(f promauto.Factory) *prometheus.HistogramVec {
return f.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "upaas",
Subsystem: "http",
Name: "request_duration_seconds",
Help: "Duration of HTTP requests in seconds by method.",
Buckets: prometheus.DefBuckets,
}, []string{"method"})
}
//nolint:mnd // bucket boundaries are domain-specific constants
func newHTTPResponseSizeBytes(f promauto.Factory) *prometheus.HistogramVec {
return f.NewHistogramVec(prometheus.HistogramOpts{
Namespace: "upaas",
Subsystem: "http",
Name: "response_size_bytes",
Help: "Size of HTTP responses in bytes by method.",
Buckets: prometheus.ExponentialBuckets(100, 10, 7),
}, []string{"method"})
}
func newAuditEventsTotal(f promauto.Factory) *prometheus.CounterVec {
return f.NewCounterVec(prometheus.CounterOpts{
Namespace: "upaas",
Subsystem: "audit",
Name: "events_total",
Help: "Total number of audit log events by action.",
}, []string{"action"})
}

View File

@@ -0,0 +1,158 @@
package metrics_test
import (
"testing"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/fx"
"sneak.berlin/go/upaas/internal/metrics"
)
func TestNewForTest(t *testing.T) {
t.Parallel()
reg := prometheus.NewRegistry()
m := metrics.NewForTest(reg)
require.NotNil(t, m)
assert.NotNil(t, m.DeploymentsTotal)
assert.NotNil(t, m.DeploymentDuration)
assert.NotNil(t, m.DeploymentsInFlight)
assert.NotNil(t, m.ContainerHealthy)
assert.NotNil(t, m.WebhookEventsTotal)
assert.NotNil(t, m.HTTPRequestsTotal)
assert.NotNil(t, m.HTTPRequestDuration)
assert.NotNil(t, m.HTTPResponseSizeBytes)
assert.NotNil(t, m.AuditEventsTotal)
}
func TestNew(t *testing.T) {
t.Parallel()
m, err := metrics.New(fx.Lifecycle(nil), metrics.Params{})
require.NoError(t, err)
require.NotNil(t, m)
}
func TestDeploymentMetrics(t *testing.T) {
t.Parallel()
reg := prometheus.NewRegistry()
m := metrics.NewForTest(reg)
m.DeploymentsTotal.WithLabelValues("test-app", "success").Inc()
m.DeploymentDuration.WithLabelValues("test-app", "success").Observe(42.5)
m.DeploymentsInFlight.WithLabelValues("test-app").Set(1)
families, err := reg.Gather()
require.NoError(t, err)
names := make(map[string]bool)
for _, f := range families {
names[f.GetName()] = true
}
assert.True(t, names["upaas_deployments_total"])
assert.True(t, names["upaas_deployments_duration_seconds"])
assert.True(t, names["upaas_deployments_in_flight"])
}
func TestContainerHealthMetrics(t *testing.T) {
t.Parallel()
reg := prometheus.NewRegistry()
m := metrics.NewForTest(reg)
m.ContainerHealthy.WithLabelValues("my-app").Set(1)
families, err := reg.Gather()
require.NoError(t, err)
found := false
for _, f := range families {
if f.GetName() == "upaas_container_healthy" {
found = true
break
}
}
assert.True(t, found)
}
func TestWebhookMetrics(t *testing.T) {
t.Parallel()
reg := prometheus.NewRegistry()
m := metrics.NewForTest(reg)
m.WebhookEventsTotal.WithLabelValues("test-app", "push", "true").Inc()
families, err := reg.Gather()
require.NoError(t, err)
found := false
for _, f := range families {
if f.GetName() == "upaas_webhook_events_total" {
found = true
break
}
}
assert.True(t, found)
}
func TestHTTPMetrics(t *testing.T) {
t.Parallel()
reg := prometheus.NewRegistry()
m := metrics.NewForTest(reg)
m.HTTPRequestsTotal.WithLabelValues("GET", "200").Inc()
m.HTTPRequestDuration.WithLabelValues("GET").Observe(0.05)
m.HTTPResponseSizeBytes.WithLabelValues("GET").Observe(1024)
families, err := reg.Gather()
require.NoError(t, err)
names := make(map[string]bool)
for _, f := range families {
names[f.GetName()] = true
}
assert.True(t, names["upaas_http_requests_total"])
assert.True(t, names["upaas_http_request_duration_seconds"])
assert.True(t, names["upaas_http_response_size_bytes"])
}
func TestAuditMetrics(t *testing.T) {
t.Parallel()
reg := prometheus.NewRegistry()
m := metrics.NewForTest(reg)
m.AuditEventsTotal.WithLabelValues("login").Inc()
families, err := reg.Gather()
require.NoError(t, err)
found := false
for _, f := range families {
if f.GetName() == "upaas_audit_events_total" {
found = true
break
}
}
assert.True(t, found)
}

View File

@@ -21,6 +21,7 @@ import (
"sneak.berlin/go/upaas/internal/config"
"sneak.berlin/go/upaas/internal/globals"
"sneak.berlin/go/upaas/internal/logger"
"sneak.berlin/go/upaas/internal/metrics"
"sneak.berlin/go/upaas/internal/service/auth"
)
@@ -35,33 +36,37 @@ type Params struct {
Globals *globals.Globals
Config *config.Config
Auth *auth.Service
Metrics *metrics.Metrics
}
// Middleware provides HTTP middleware.
type Middleware struct {
log *slog.Logger
params *Params
log *slog.Logger
metrics *metrics.Metrics
params *Params
}
// New creates a new Middleware instance.
func New(_ fx.Lifecycle, params Params) (*Middleware, error) {
return &Middleware{
log: params.Logger.Get(),
params: &params,
log: params.Logger.Get(),
metrics: params.Metrics,
params: &params,
}, nil
}
// loggingResponseWriter wraps http.ResponseWriter to capture status code.
// loggingResponseWriter wraps http.ResponseWriter to capture status code and bytes written.
type loggingResponseWriter struct {
http.ResponseWriter
statusCode int
statusCode int
bytesWritten int
}
func newLoggingResponseWriter(
writer http.ResponseWriter,
) *loggingResponseWriter {
return &loggingResponseWriter{writer, http.StatusOK}
return &loggingResponseWriter{ResponseWriter: writer, statusCode: http.StatusOK}
}
func (lrw *loggingResponseWriter) WriteHeader(code int) {
@@ -69,7 +74,14 @@ func (lrw *loggingResponseWriter) WriteHeader(code int) {
lrw.ResponseWriter.WriteHeader(code)
}
// Logging returns a request logging middleware.
func (lrw *loggingResponseWriter) Write(b []byte) (int, error) {
n, err := lrw.ResponseWriter.Write(b)
lrw.bytesWritten += n
return n, err
}
// Logging returns a request logging middleware that also records HTTP metrics.
func (m *Middleware) Logging() func(http.Handler) http.Handler {
return func(next http.Handler) http.Handler {
return http.HandlerFunc(func(
@@ -83,6 +95,8 @@ func (m *Middleware) Logging() func(http.Handler) http.Handler {
defer func() {
latency := time.Since(start)
reqID := middleware.GetReqID(ctx)
statusStr := strconv.Itoa(lrw.statusCode)
m.log.InfoContext(ctx, "request",
"request_start", start,
"method", request.Method,
@@ -93,8 +107,19 @@ func (m *Middleware) Logging() func(http.Handler) http.Handler {
"proto", request.Proto,
"remoteIP", realIP(request),
"status", lrw.statusCode,
"bytes", lrw.bytesWritten,
"latency_ms", latency.Milliseconds(),
)
m.metrics.HTTPRequestsTotal.WithLabelValues(
request.Method, statusStr,
).Inc()
m.metrics.HTTPRequestDuration.WithLabelValues(
request.Method,
).Observe(latency.Seconds())
m.metrics.HTTPResponseSizeBytes.WithLabelValues(
request.Method,
).Observe(float64(lrw.bytesWritten))
}()
next.ServeHTTP(lrw, request)

View File

@@ -0,0 +1,193 @@
package models
import (
"context"
"database/sql"
"fmt"
"time"
"sneak.berlin/go/upaas/internal/database"
)
// AuditAction represents the type of audited user action.
type AuditAction string
// Audit action constants.
const (
AuditActionLogin AuditAction = "login"
AuditActionLogout AuditAction = "logout"
AuditActionAppCreate AuditAction = "app.create"
AuditActionAppUpdate AuditAction = "app.update"
AuditActionAppDelete AuditAction = "app.delete"
AuditActionAppDeploy AuditAction = "app.deploy"
AuditActionAppRollback AuditAction = "app.rollback"
AuditActionAppRestart AuditAction = "app.restart"
AuditActionAppStop AuditAction = "app.stop"
AuditActionAppStart AuditAction = "app.start"
AuditActionDeployCancel AuditAction = "deploy.cancel"
AuditActionEnvVarSave AuditAction = "env_var.save"
AuditActionLabelAdd AuditAction = "label.add"
AuditActionLabelEdit AuditAction = "label.edit"
AuditActionLabelDelete AuditAction = "label.delete"
AuditActionVolumeAdd AuditAction = "volume.add"
AuditActionVolumeEdit AuditAction = "volume.edit"
AuditActionVolumeDelete AuditAction = "volume.delete"
AuditActionPortAdd AuditAction = "port.add"
AuditActionPortDelete AuditAction = "port.delete"
AuditActionSetup AuditAction = "setup"
AuditActionWebhookReceive AuditAction = "webhook.receive"
)
// AuditResourceType represents the type of resource being acted on.
type AuditResourceType string
// Audit resource type constants.
const (
AuditResourceApp AuditResourceType = "app"
AuditResourceUser AuditResourceType = "user"
AuditResourceSession AuditResourceType = "session"
AuditResourceEnvVar AuditResourceType = "env_var"
AuditResourceLabel AuditResourceType = "label"
AuditResourceVolume AuditResourceType = "volume"
AuditResourcePort AuditResourceType = "port"
AuditResourceDeployment AuditResourceType = "deployment"
AuditResourceWebhook AuditResourceType = "webhook"
)
// AuditEntry represents a single audit log entry.
type AuditEntry struct {
db *database.Database
ID int64
UserID sql.NullInt64
Username string
Action AuditAction
ResourceType AuditResourceType
ResourceID sql.NullString
Detail sql.NullString
RemoteIP sql.NullString
CreatedAt time.Time
}
// NewAuditEntry creates a new AuditEntry with a database reference.
func NewAuditEntry(db *database.Database) *AuditEntry {
return &AuditEntry{db: db}
}
// Save inserts the audit entry into the database.
func (a *AuditEntry) Save(ctx context.Context) error {
query := `
INSERT INTO audit_log (
user_id, username, action, resource_type, resource_id,
detail, remote_ip
) VALUES (?, ?, ?, ?, ?, ?, ?)`
result, err := a.db.Exec(ctx, query,
a.UserID, a.Username, a.Action, a.ResourceType,
a.ResourceID, a.Detail, a.RemoteIP,
)
if err != nil {
return fmt.Errorf("inserting audit entry: %w", err)
}
id, err := result.LastInsertId()
if err != nil {
return fmt.Errorf("getting audit entry id: %w", err)
}
a.ID = id
return nil
}
// FindAuditEntries returns recent audit log entries, newest first.
func FindAuditEntries(
ctx context.Context,
db *database.Database,
limit int,
) ([]*AuditEntry, error) {
query := `
SELECT id, user_id, username, action, resource_type, resource_id,
detail, remote_ip, created_at
FROM audit_log
ORDER BY created_at DESC
LIMIT ?`
rows, err := db.Query(ctx, query, limit)
if err != nil {
return nil, fmt.Errorf("querying audit entries: %w", err)
}
defer func() { _ = rows.Close() }()
return scanAuditRows(rows)
}
// FindAuditEntriesByResource returns audit log entries for a specific resource.
func FindAuditEntriesByResource(
ctx context.Context,
db *database.Database,
resourceType AuditResourceType,
resourceID string,
limit int,
) ([]*AuditEntry, error) {
query := `
SELECT id, user_id, username, action, resource_type, resource_id,
detail, remote_ip, created_at
FROM audit_log
WHERE resource_type = ? AND resource_id = ?
ORDER BY created_at DESC
LIMIT ?`
rows, err := db.Query(ctx, query, resourceType, resourceID, limit)
if err != nil {
return nil, fmt.Errorf("querying audit entries by resource: %w", err)
}
defer func() { _ = rows.Close() }()
return scanAuditRows(rows)
}
// CountAuditEntries returns the total number of audit log entries.
func CountAuditEntries(
ctx context.Context,
db *database.Database,
) (int, error) {
var count int
row := db.QueryRow(ctx, "SELECT COUNT(*) FROM audit_log")
err := row.Scan(&count)
if err != nil {
return 0, fmt.Errorf("counting audit entries: %w", err)
}
return count, nil
}
func scanAuditRows(rows *sql.Rows) ([]*AuditEntry, error) {
var entries []*AuditEntry
for rows.Next() {
entry := &AuditEntry{}
scanErr := rows.Scan(
&entry.ID, &entry.UserID, &entry.Username, &entry.Action,
&entry.ResourceType, &entry.ResourceID, &entry.Detail,
&entry.RemoteIP, &entry.CreatedAt,
)
if scanErr != nil {
return nil, fmt.Errorf("scanning audit entry: %w", scanErr)
}
entries = append(entries, entry)
}
rowsErr := rows.Err()
if rowsErr != nil {
return nil, fmt.Errorf("iterating audit entries: %w", rowsErr)
}
return entries, nil
}

View File

@@ -23,6 +23,7 @@ const (
testBranch = "main"
testValue = "value"
testEventType = "push"
testAdmin = "admin"
)
func setupTestDB(t *testing.T) (*database.Database, func()) {
@@ -183,7 +184,7 @@ func TestUserExists(t *testing.T) {
defer cleanup()
user := models.NewUser(testDB)
user.Username = "admin"
user.Username = testAdmin
user.PasswordHash = testHash
err := user.Save(context.Background())
@@ -781,6 +782,179 @@ func TestCascadeDelete(t *testing.T) {
})
}
// AuditEntry Tests.
func TestAuditEntryCreateAndFind(t *testing.T) {
t.Parallel()
testDB, cleanup := setupTestDB(t)
defer cleanup()
entry := models.NewAuditEntry(testDB)
entry.Username = testAdmin
entry.Action = models.AuditActionLogin
entry.ResourceType = models.AuditResourceSession
err := entry.Save(context.Background())
require.NoError(t, err)
assert.NotZero(t, entry.ID)
entries, err := models.FindAuditEntries(context.Background(), testDB, 10)
require.NoError(t, err)
require.Len(t, entries, 1)
assert.Equal(t, testAdmin, entries[0].Username)
assert.Equal(t, models.AuditActionLogin, entries[0].Action)
assert.Equal(t, models.AuditResourceSession, entries[0].ResourceType)
}
func TestAuditEntryWithAllFields(t *testing.T) {
t.Parallel()
testDB, cleanup := setupTestDB(t)
defer cleanup()
entry := models.NewAuditEntry(testDB)
entry.UserID = sql.NullInt64{Int64: 1, Valid: true}
entry.Username = testAdmin
entry.Action = models.AuditActionAppCreate
entry.ResourceType = models.AuditResourceApp
entry.ResourceID = sql.NullString{String: "app-123", Valid: true}
entry.Detail = sql.NullString{String: "created new app", Valid: true}
entry.RemoteIP = sql.NullString{String: "192.168.1.1", Valid: true}
err := entry.Save(context.Background())
require.NoError(t, err)
entries, err := models.FindAuditEntries(context.Background(), testDB, 10)
require.NoError(t, err)
require.Len(t, entries, 1)
assert.Equal(t, int64(1), entries[0].UserID.Int64)
assert.Equal(t, "app-123", entries[0].ResourceID.String)
assert.Equal(t, "created new app", entries[0].Detail.String)
assert.Equal(t, "192.168.1.1", entries[0].RemoteIP.String)
}
func TestAuditEntryFindByResource(t *testing.T) {
t.Parallel()
testDB, cleanup := setupTestDB(t)
defer cleanup()
// Create entries for different resources.
for _, action := range []models.AuditAction{
models.AuditActionAppCreate,
models.AuditActionAppUpdate,
models.AuditActionAppDeploy,
} {
entry := models.NewAuditEntry(testDB)
entry.Username = testAdmin
entry.Action = action
entry.ResourceType = models.AuditResourceApp
entry.ResourceID = sql.NullString{String: "app-1", Valid: true}
err := entry.Save(context.Background())
require.NoError(t, err)
}
// Create entry for a different resource.
entry := models.NewAuditEntry(testDB)
entry.Username = testAdmin
entry.Action = models.AuditActionLogin
entry.ResourceType = models.AuditResourceSession
err := entry.Save(context.Background())
require.NoError(t, err)
// Find by resource.
appEntries, err := models.FindAuditEntriesByResource(
context.Background(), testDB,
models.AuditResourceApp, "app-1", 10,
)
require.NoError(t, err)
assert.Len(t, appEntries, 3)
// All entries.
allEntries, err := models.FindAuditEntries(context.Background(), testDB, 10)
require.NoError(t, err)
assert.Len(t, allEntries, 4)
}
func TestAuditEntryCount(t *testing.T) {
t.Parallel()
testDB, cleanup := setupTestDB(t)
defer cleanup()
count, err := models.CountAuditEntries(context.Background(), testDB)
require.NoError(t, err)
assert.Equal(t, 0, count)
entry := models.NewAuditEntry(testDB)
entry.Username = testAdmin
entry.Action = models.AuditActionLogin
entry.ResourceType = models.AuditResourceSession
err = entry.Save(context.Background())
require.NoError(t, err)
count, err = models.CountAuditEntries(context.Background(), testDB)
require.NoError(t, err)
assert.Equal(t, 1, count)
}
func TestAuditEntryFindLimit(t *testing.T) {
t.Parallel()
testDB, cleanup := setupTestDB(t)
defer cleanup()
for range 5 {
entry := models.NewAuditEntry(testDB)
entry.Username = testAdmin
entry.Action = models.AuditActionLogin
entry.ResourceType = models.AuditResourceSession
err := entry.Save(context.Background())
require.NoError(t, err)
}
entries, err := models.FindAuditEntries(context.Background(), testDB, 3)
require.NoError(t, err)
assert.Len(t, entries, 3)
}
func TestAuditEntryOrderByCreatedAtDesc(t *testing.T) {
t.Parallel()
testDB, cleanup := setupTestDB(t)
defer cleanup()
actions := []models.AuditAction{
models.AuditActionLogin,
models.AuditActionAppCreate,
models.AuditActionLogout,
}
for _, action := range actions {
entry := models.NewAuditEntry(testDB)
entry.Username = testAdmin
entry.Action = action
entry.ResourceType = models.AuditResourceSession
err := entry.Save(context.Background())
require.NoError(t, err)
}
entries, err := models.FindAuditEntries(context.Background(), testDB, 10)
require.NoError(t, err)
require.Len(t, entries, 3)
// Newest first (logout was last inserted).
assert.Equal(t, models.AuditActionLogout, entries[0].Action)
assert.Equal(t, models.AuditActionAppCreate, entries[1].Action)
assert.Equal(t, models.AuditActionLogin, entries[2].Action)
}
// Helper function to create a test app.
func createTestApp(t *testing.T, testDB *database.Database) *models.App {
t.Helper()

View File

@@ -115,14 +115,13 @@ func (s *Server) SetupRoutes() {
r.Get("/apps", s.handlers.HandleAPIListApps())
r.Get("/apps/{id}", s.handlers.HandleAPIGetApp())
r.Get("/apps/{id}/deployments", s.handlers.HandleAPIListDeployments())
r.Get("/audit", s.handlers.HandleAPIAuditLog())
})
})
// Metrics endpoint (optional, with basic auth)
if s.params.Config.MetricsUsername != "" {
s.router.Group(func(r chi.Router) {
r.Use(s.mw.MetricsAuth())
r.Get("/metrics", promhttp.Handler().ServeHTTP)
})
}
// Metrics endpoint (always available, optionally protected with basic auth)
s.router.Group(func(r chi.Router) {
r.Use(s.mw.MetricsAuth())
r.Get("/metrics", promhttp.Handler().ServeHTTP)
})
}

View File

@@ -0,0 +1,153 @@
// Package audit provides audit logging for user actions.
package audit
import (
"context"
"database/sql"
"log/slog"
"net"
"net/http"
"strings"
"go.uber.org/fx"
"sneak.berlin/go/upaas/internal/database"
"sneak.berlin/go/upaas/internal/logger"
"sneak.berlin/go/upaas/internal/metrics"
"sneak.berlin/go/upaas/internal/models"
)
// ServiceParams contains dependencies for Service.
type ServiceParams struct {
fx.In
Logger *logger.Logger
Database *database.Database
Metrics *metrics.Metrics
}
// Service provides audit logging functionality.
type Service struct {
log *slog.Logger
db *database.Database
metrics *metrics.Metrics
}
// New creates a new audit Service.
func New(_ fx.Lifecycle, params ServiceParams) (*Service, error) {
return &Service{
log: params.Logger.Get(),
db: params.Database,
metrics: params.Metrics,
}, nil
}
// LogEntry records an audit event.
type LogEntry struct {
UserID int64
Username string
Action models.AuditAction
ResourceType models.AuditResourceType
ResourceID string
Detail string
RemoteIP string
}
// Log records an audit log entry and increments the audit metrics counter.
func (svc *Service) Log(ctx context.Context, entry LogEntry) {
auditEntry := models.NewAuditEntry(svc.db)
auditEntry.Username = entry.Username
auditEntry.Action = entry.Action
auditEntry.ResourceType = entry.ResourceType
if entry.UserID != 0 {
auditEntry.UserID = sql.NullInt64{Int64: entry.UserID, Valid: true}
}
if entry.ResourceID != "" {
auditEntry.ResourceID = sql.NullString{String: entry.ResourceID, Valid: true}
}
if entry.Detail != "" {
auditEntry.Detail = sql.NullString{String: entry.Detail, Valid: true}
}
if entry.RemoteIP != "" {
auditEntry.RemoteIP = sql.NullString{String: entry.RemoteIP, Valid: true}
}
err := auditEntry.Save(ctx)
if err != nil {
svc.log.Error("failed to save audit entry",
"error", err,
"action", entry.Action,
"username", entry.Username,
)
return
}
svc.metrics.AuditEventsTotal.WithLabelValues(string(entry.Action)).Inc()
svc.log.Info("audit",
"action", entry.Action,
"username", entry.Username,
"resource_type", entry.ResourceType,
"resource_id", entry.ResourceID,
)
}
// LogFromRequest records an audit log entry, extracting the remote IP from
// the HTTP request.
func (svc *Service) LogFromRequest(
ctx context.Context,
request *http.Request,
entry LogEntry,
) {
entry.RemoteIP = extractRemoteIP(request)
svc.Log(ctx, entry)
}
// extractRemoteIP extracts the client IP from the request, preferring
// X-Real-IP and X-Forwarded-For headers from trusted proxies.
func extractRemoteIP(r *http.Request) string {
// Check X-Real-IP first
if ip := strings.TrimSpace(r.Header.Get("X-Real-IP")); ip != "" {
return ip
}
// Check X-Forwarded-For (leftmost = client)
if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
if parts := strings.SplitN(xff, ",", 2); len(parts) > 0 { //nolint:mnd // split limit
if ip := strings.TrimSpace(parts[0]); ip != "" {
return ip
}
}
}
// Fall back to RemoteAddr
host, _, err := net.SplitHostPort(r.RemoteAddr)
if err != nil {
return r.RemoteAddr
}
return host
}
// Recent returns the most recent audit log entries.
func (svc *Service) Recent(
ctx context.Context,
limit int,
) ([]*models.AuditEntry, error) {
return models.FindAuditEntries(ctx, svc.db, limit)
}
// ForResource returns audit log entries for a specific resource.
func (svc *Service) ForResource(
ctx context.Context,
resourceType models.AuditResourceType,
resourceID string,
limit int,
) ([]*models.AuditEntry, error) {
return models.FindAuditEntriesByResource(ctx, svc.db, resourceType, resourceID, limit)
}

View File

@@ -0,0 +1,196 @@
package audit_test
import (
"context"
"log/slog"
"net/http"
"net/http/httptest"
"os"
"testing"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/fx"
"sneak.berlin/go/upaas/internal/config"
"sneak.berlin/go/upaas/internal/database"
"sneak.berlin/go/upaas/internal/globals"
"sneak.berlin/go/upaas/internal/logger"
"sneak.berlin/go/upaas/internal/metrics"
"sneak.berlin/go/upaas/internal/models"
"sneak.berlin/go/upaas/internal/service/audit"
)
func setupTestAuditService(t *testing.T) (*audit.Service, *database.Database) {
t.Helper()
globals.SetAppname("upaas-test")
globals.SetVersion("test")
tmpDir := t.TempDir()
cfg := &config.Config{
DataDir: tmpDir,
}
log := slog.New(slog.NewTextHandler(os.Stderr, nil))
logWrapper := logger.NewForTest(log)
db, err := database.New(fx.Lifecycle(nil), database.Params{
Logger: logWrapper,
Config: cfg,
})
require.NoError(t, err)
reg := prometheus.NewRegistry()
metricsInstance := metrics.NewForTest(reg)
svc, err := audit.New(fx.Lifecycle(nil), audit.ServiceParams{
Logger: logWrapper,
Database: db,
Metrics: metricsInstance,
})
require.NoError(t, err)
return svc, db
}
func TestAuditServiceLog(t *testing.T) {
t.Parallel()
svc, db := setupTestAuditService(t)
ctx := context.Background()
svc.Log(ctx, audit.LogEntry{
UserID: 1,
Username: "admin",
Action: models.AuditActionLogin,
ResourceType: models.AuditResourceSession,
Detail: "user logged in",
RemoteIP: "127.0.0.1",
})
entries, err := models.FindAuditEntries(ctx, db, 10)
require.NoError(t, err)
require.Len(t, entries, 1)
assert.Equal(t, "admin", entries[0].Username)
assert.Equal(t, models.AuditActionLogin, entries[0].Action)
assert.Equal(t, "127.0.0.1", entries[0].RemoteIP.String)
}
func TestAuditServiceLogFromRequest(t *testing.T) {
t.Parallel()
svc, db := setupTestAuditService(t)
ctx := context.Background()
request := httptest.NewRequest(http.MethodPost, "/apps", nil)
request.RemoteAddr = "10.0.0.1:12345"
svc.LogFromRequest(ctx, request, audit.LogEntry{
Username: "admin",
Action: models.AuditActionAppCreate,
ResourceType: models.AuditResourceApp,
ResourceID: "app-1",
Detail: "created app",
})
entries, err := models.FindAuditEntries(ctx, db, 10)
require.NoError(t, err)
require.Len(t, entries, 1)
assert.Equal(t, "10.0.0.1", entries[0].RemoteIP.String)
assert.Equal(t, "app-1", entries[0].ResourceID.String)
}
func TestAuditServiceLogFromRequestWithXRealIP(t *testing.T) {
t.Parallel()
svc, db := setupTestAuditService(t)
ctx := context.Background()
request := httptest.NewRequest(http.MethodPost, "/apps", nil)
request.Header.Set("X-Real-IP", "203.0.113.50")
svc.LogFromRequest(ctx, request, audit.LogEntry{
Username: "admin",
Action: models.AuditActionAppCreate,
ResourceType: models.AuditResourceApp,
})
entries, err := models.FindAuditEntries(ctx, db, 10)
require.NoError(t, err)
require.Len(t, entries, 1)
assert.Equal(t, "203.0.113.50", entries[0].RemoteIP.String)
}
func TestAuditServiceRecent(t *testing.T) {
t.Parallel()
svc, _ := setupTestAuditService(t)
ctx := context.Background()
for range 5 {
svc.Log(ctx, audit.LogEntry{
Username: "admin",
Action: models.AuditActionLogin,
ResourceType: models.AuditResourceSession,
})
}
entries, err := svc.Recent(ctx, 3)
require.NoError(t, err)
assert.Len(t, entries, 3)
}
func TestAuditServiceForResource(t *testing.T) {
t.Parallel()
svc, _ := setupTestAuditService(t)
ctx := context.Background()
// Log entries for different resources.
svc.Log(ctx, audit.LogEntry{
Username: "admin",
Action: models.AuditActionAppCreate,
ResourceType: models.AuditResourceApp,
ResourceID: "app-1",
})
svc.Log(ctx, audit.LogEntry{
Username: "admin",
Action: models.AuditActionAppDeploy,
ResourceType: models.AuditResourceApp,
ResourceID: "app-1",
})
svc.Log(ctx, audit.LogEntry{
Username: "admin",
Action: models.AuditActionAppCreate,
ResourceType: models.AuditResourceApp,
ResourceID: "app-2",
})
entries, err := svc.ForResource(ctx, models.AuditResourceApp, "app-1", 10)
require.NoError(t, err)
assert.Len(t, entries, 2)
}
func TestAuditServiceLogWithNoOptionalFields(t *testing.T) {
t.Parallel()
svc, db := setupTestAuditService(t)
ctx := context.Background()
svc.Log(ctx, audit.LogEntry{
Username: "system",
Action: models.AuditActionWebhookReceive,
ResourceType: models.AuditResourceWebhook,
})
entries, err := models.FindAuditEntries(ctx, db, 10)
require.NoError(t, err)
require.Len(t, entries, 1)
assert.False(t, entries[0].UserID.Valid)
assert.False(t, entries[0].ResourceID.Valid)
assert.False(t, entries[0].Detail.Valid)
assert.False(t, entries[0].RemoteIP.Valid)
}

View File

@@ -21,6 +21,7 @@ import (
"sneak.berlin/go/upaas/internal/database"
"sneak.berlin/go/upaas/internal/docker"
"sneak.berlin/go/upaas/internal/logger"
"sneak.berlin/go/upaas/internal/metrics"
"sneak.berlin/go/upaas/internal/models"
"sneak.berlin/go/upaas/internal/service/notify"
)
@@ -208,6 +209,7 @@ type ServiceParams struct {
Database *database.Database
Docker *docker.Client
Notify *notify.Service
Metrics *metrics.Metrics
}
// activeDeploy tracks a running deployment so it can be cancelled.
@@ -222,6 +224,7 @@ type Service struct {
db *database.Database
docker *docker.Client
notify *notify.Service
metrics *metrics.Metrics
config *config.Config
params *ServiceParams
activeDeploys sync.Map // map[string]*activeDeploy - per-app active deployment tracking
@@ -231,12 +234,13 @@ type Service struct {
// New creates a new deploy Service.
func New(lc fx.Lifecycle, params ServiceParams) (*Service, error) {
svc := &Service{
log: params.Logger.Get(),
db: params.Database,
docker: params.Docker,
notify: params.Notify,
config: params.Config,
params: &params,
log: params.Logger.Get(),
db: params.Database,
docker: params.Docker,
notify: params.Notify,
metrics: params.Metrics,
config: params.Config,
params: &params,
}
if lc != nil {
@@ -327,6 +331,11 @@ func (svc *Service) Deploy(
}
defer svc.unlockApp(app.ID)
// Track in-flight deployments
svc.metrics.DeploymentsInFlight.WithLabelValues(app.Name).Inc()
deployStart := time.Now()
// Set up cancellable context and register as active deploy
deployCtx, cancel := context.WithCancel(ctx)
done := make(chan struct{})
@@ -334,6 +343,7 @@ func (svc *Service) Deploy(
svc.activeDeploys.Store(app.ID, ad)
defer func() {
svc.metrics.DeploymentsInFlight.WithLabelValues(app.Name).Dec()
cancel()
close(done)
svc.activeDeploys.Delete(app.ID)
@@ -359,7 +369,7 @@ func (svc *Service) Deploy(
svc.notify.NotifyBuildStart(bgCtx, app, deployment)
return svc.runBuildAndDeploy(deployCtx, bgCtx, app, deployment)
return svc.runBuildAndDeploy(deployCtx, bgCtx, app, deployment, deployStart)
}
// Rollback rolls back an app to its previous image.
@@ -467,15 +477,20 @@ func (svc *Service) runBuildAndDeploy(
bgCtx context.Context,
app *models.App,
deployment *models.Deployment,
deployStart time.Time,
) error {
// Build phase with timeout
imageID, err := svc.buildImageWithTimeout(deployCtx, app, deployment)
if err != nil {
cancelErr := svc.checkCancelled(deployCtx, bgCtx, app, deployment, "")
if cancelErr != nil {
svc.recordDeployMetrics(app.Name, "cancelled", deployStart)
return cancelErr
}
svc.recordDeployMetrics(app.Name, "failed", deployStart)
return err
}
@@ -486,9 +501,13 @@ func (svc *Service) runBuildAndDeploy(
if err != nil {
cancelErr := svc.checkCancelled(deployCtx, bgCtx, app, deployment, imageID)
if cancelErr != nil {
svc.recordDeployMetrics(app.Name, "cancelled", deployStart)
return cancelErr
}
svc.recordDeployMetrics(app.Name, "failed", deployStart)
return err
}
@@ -504,11 +523,19 @@ func (svc *Service) runBuildAndDeploy(
// Use context.WithoutCancel to ensure health check completes even if
// the parent context is cancelled (e.g., HTTP request ends).
go svc.checkHealthAfterDelay(bgCtx, app, deployment)
go svc.checkHealthAfterDelay(bgCtx, app, deployment, deployStart)
return nil
}
// recordDeployMetrics records deployment completion metrics.
func (svc *Service) recordDeployMetrics(appName, status string, start time.Time) {
duration := time.Since(start).Seconds()
svc.metrics.DeploymentsTotal.WithLabelValues(appName, status).Inc()
svc.metrics.DeploymentDuration.WithLabelValues(appName, status).Observe(duration)
}
// buildImageWithTimeout runs the build phase with a timeout.
func (svc *Service) buildImageWithTimeout(
ctx context.Context,
@@ -1163,6 +1190,7 @@ func (svc *Service) checkHealthAfterDelay(
ctx context.Context,
app *models.App,
deployment *models.Deployment,
deployStart time.Time,
) {
svc.log.Info(
"waiting 60 seconds to check container health",
@@ -1189,6 +1217,8 @@ func (svc *Service) checkHealthAfterDelay(
svc.notify.NotifyDeployFailed(ctx, reloadedApp, deployment, err)
_ = deployment.MarkFinished(ctx, models.DeploymentStatusFailed)
svc.writeLogsToFile(reloadedApp, deployment)
svc.recordDeployMetrics(reloadedApp.Name, "failed", deployStart)
svc.metrics.ContainerHealthy.WithLabelValues(reloadedApp.Name).Set(0)
reloadedApp.Status = models.AppStatusError
_ = reloadedApp.Save(ctx)
@@ -1200,6 +1230,8 @@ func (svc *Service) checkHealthAfterDelay(
svc.notify.NotifyDeploySuccess(ctx, reloadedApp, deployment)
_ = deployment.MarkFinished(ctx, models.DeploymentStatusSuccess)
svc.writeLogsToFile(reloadedApp, deployment)
svc.recordDeployMetrics(reloadedApp.Name, "success", deployStart)
svc.metrics.ContainerHealthy.WithLabelValues(reloadedApp.Name).Set(1)
} else {
svc.log.Warn(
"container unhealthy after 60 seconds",
@@ -1208,6 +1240,8 @@ func (svc *Service) checkHealthAfterDelay(
svc.notify.NotifyDeployFailed(ctx, reloadedApp, deployment, ErrContainerUnhealthy)
_ = deployment.MarkFinished(ctx, models.DeploymentStatusFailed)
svc.writeLogsToFile(reloadedApp, deployment)
svc.recordDeployMetrics(reloadedApp.Name, "failed", deployStart)
svc.metrics.ContainerHealthy.WithLabelValues(reloadedApp.Name).Set(0)
reloadedApp.Status = models.AppStatusError
_ = reloadedApp.Save(ctx)
}

View File

@@ -7,12 +7,14 @@ import (
"encoding/json"
"fmt"
"log/slog"
"strconv"
"go.uber.org/fx"
"sneak.berlin/go/upaas/internal/database"
"sneak.berlin/go/upaas/internal/logger"
"sneak.berlin/go/upaas/internal/metrics"
"sneak.berlin/go/upaas/internal/models"
"sneak.berlin/go/upaas/internal/service/deploy"
)
@@ -24,23 +26,26 @@ type ServiceParams struct {
Logger *logger.Logger
Database *database.Database
Deploy *deploy.Service
Metrics *metrics.Metrics
}
// Service provides webhook handling functionality.
type Service struct {
log *slog.Logger
db *database.Database
deploy *deploy.Service
params *ServiceParams
log *slog.Logger
db *database.Database
deploy *deploy.Service
metrics *metrics.Metrics
params *ServiceParams
}
// New creates a new webhook Service.
func New(_ fx.Lifecycle, params ServiceParams) (*Service, error) {
return &Service{
log: params.Logger.Get(),
db: params.Database,
deploy: params.Deploy,
params: &params,
log: params.Logger.Get(),
db: params.Database,
deploy: params.Deploy,
metrics: params.Metrics,
params: &params,
}, nil
}
@@ -122,6 +127,10 @@ func (svc *Service) HandleWebhook(
"commit", commitSHA,
)
svc.metrics.WebhookEventsTotal.WithLabelValues(
app.Name, eventType, strconv.FormatBool(matched),
).Inc()
// If branch matches, trigger deployment
if matched {
svc.triggerDeployment(ctx, app, event)

View File

@@ -8,6 +8,7 @@ import (
"testing"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/fx"
@@ -17,6 +18,7 @@ import (
"sneak.berlin/go/upaas/internal/docker"
"sneak.berlin/go/upaas/internal/globals"
"sneak.berlin/go/upaas/internal/logger"
"sneak.berlin/go/upaas/internal/metrics"
"sneak.berlin/go/upaas/internal/models"
"sneak.berlin/go/upaas/internal/service/deploy"
"sneak.berlin/go/upaas/internal/service/notify"
@@ -63,13 +65,17 @@ func setupTestService(t *testing.T) (*webhook.Service, *database.Database, func(
notifySvc, err := notify.New(fx.Lifecycle(nil), notify.ServiceParams{Logger: deps.logger})
require.NoError(t, err)
metricsInstance := metrics.NewForTest(prometheus.NewRegistry())
deploySvc, err := deploy.New(fx.Lifecycle(nil), deploy.ServiceParams{
Logger: deps.logger, Config: deps.config, Database: deps.db, Docker: dockerClient, Notify: notifySvc,
Metrics: metricsInstance,
})
require.NoError(t, err)
svc, err := webhook.New(fx.Lifecycle(nil), webhook.ServiceParams{
Logger: deps.logger, Database: deps.db, Deploy: deploySvc,
Metrics: metricsInstance,
})
require.NoError(t, err)