feat: add observability improvements (metrics, audit log, structured logging)
All checks were successful
Check / check (pull_request) Successful in 1m45s

- Add Prometheus metrics package (internal/metrics) with deployment,
  container health, webhook, HTTP request, and audit counters/histograms
- Add audit_log SQLite table via migration 007
- Add AuditEntry model with CRUD operations and query methods
- Add audit service (internal/service/audit) for recording user actions
- Instrument deploy service with deployment duration, count, and
  in-flight metrics; container health gauge updates on deploy completion
- Instrument webhook service with event counters by app/type/matched
- Instrument HTTP middleware with request count, duration, and response
  size metrics; also log response bytes in structured request logs
- Add audit logging to all key handler operations: login/logout, app
  CRUD, deploy, cancel, rollback, restart/stop/start, webhook receipt,
  and initial setup
- Add GET /api/audit endpoint for querying recent audit entries
- Make /metrics endpoint always available (optionally auth-protected)
- Add comprehensive tests for metrics, audit model, and audit service
- Update existing test infrastructure with metrics and audit dependencies
- Update README with Observability section documenting all metrics,
  audit log, and structured logging
This commit is contained in:
clawbot
2026-03-17 02:23:44 -07:00
parent fd110e69db
commit f558e2cdd8
21 changed files with 1399 additions and 42 deletions

View File

@@ -15,7 +15,9 @@ import (
"sneak.berlin/go/upaas/internal/globals"
"sneak.berlin/go/upaas/internal/healthcheck"
"sneak.berlin/go/upaas/internal/logger"
"sneak.berlin/go/upaas/internal/models"
"sneak.berlin/go/upaas/internal/service/app"
"sneak.berlin/go/upaas/internal/service/audit"
"sneak.berlin/go/upaas/internal/service/auth"
"sneak.berlin/go/upaas/internal/service/deploy"
"sneak.berlin/go/upaas/internal/service/webhook"
@@ -35,6 +37,7 @@ type Params struct {
Deploy *deploy.Service
Webhook *webhook.Service
Docker *docker.Client
Audit *audit.Service
}
// Handlers provides HTTP request handlers.
@@ -48,6 +51,7 @@ type Handlers struct {
deploy *deploy.Service
webhook *webhook.Service
docker *docker.Client
audit *audit.Service
globals *globals.Globals
}
@@ -63,10 +67,48 @@ func New(_ fx.Lifecycle, params Params) (*Handlers, error) {
deploy: params.Deploy,
webhook: params.Webhook,
docker: params.Docker,
audit: params.Audit,
globals: params.Globals,
}, nil
}
// currentUser returns the currently authenticated user, or nil if not authenticated.
func (h *Handlers) currentUser(request *http.Request) *models.User {
user, err := h.auth.GetCurrentUser(request.Context(), request)
if err != nil || user == nil {
return nil
}
return user
}
// auditLog records an audit entry for the current request.
func (h *Handlers) auditLog(
request *http.Request,
action models.AuditAction,
resourceType models.AuditResourceType,
resourceID string,
detail string,
) {
user := h.currentUser(request)
entry := audit.LogEntry{
Action: action,
ResourceType: resourceType,
ResourceID: resourceID,
Detail: detail,
}
if user != nil {
entry.UserID = user.ID
entry.Username = user.Username
} else {
entry.Username = "anonymous"
}
h.audit.LogFromRequest(request.Context(), request, entry)
}
// addGlobals adds version info and CSRF token to template data map.
func (h *Handlers) addGlobals(
data map[string]any,