feat: add observability improvements (metrics, audit log, structured logging)
All checks were successful
Check / check (pull_request) Successful in 1m45s
All checks were successful
Check / check (pull_request) Successful in 1m45s
- Add Prometheus metrics package (internal/metrics) with deployment, container health, webhook, HTTP request, and audit counters/histograms - Add audit_log SQLite table via migration 007 - Add AuditEntry model with CRUD operations and query methods - Add audit service (internal/service/audit) for recording user actions - Instrument deploy service with deployment duration, count, and in-flight metrics; container health gauge updates on deploy completion - Instrument webhook service with event counters by app/type/matched - Instrument HTTP middleware with request count, duration, and response size metrics; also log response bytes in structured request logs - Add audit logging to all key handler operations: login/logout, app CRUD, deploy, cancel, rollback, restart/stop/start, webhook receipt, and initial setup - Add GET /api/audit endpoint for querying recent audit entries - Make /metrics endpoint always available (optionally auth-protected) - Add comprehensive tests for metrics, audit model, and audit service - Update existing test infrastructure with metrics and audit dependencies - Update README with Observability section documenting all metrics, audit log, and structured logging
This commit is contained in:
@@ -1,6 +1,7 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"database/sql"
|
||||
"encoding/json"
|
||||
"net/http"
|
||||
"strconv"
|
||||
@@ -120,6 +121,9 @@ func (h *Handlers) HandleAPILoginPOST() http.HandlerFunc {
|
||||
return
|
||||
}
|
||||
|
||||
h.auditLog(request, models.AuditActionLogin,
|
||||
models.AuditResourceSession, "", "api login")
|
||||
|
||||
h.respondJSON(writer, request, loginResponse{
|
||||
UserID: user.ID,
|
||||
Username: user.Username,
|
||||
@@ -243,3 +247,79 @@ func (h *Handlers) HandleAPIWhoAmI() http.HandlerFunc {
|
||||
}, http.StatusOK)
|
||||
}
|
||||
}
|
||||
|
||||
// auditLogDefaultLimit is the default number of audit entries returned.
|
||||
const auditLogDefaultLimit = 50
|
||||
|
||||
// auditLogMaxLimit is the maximum number of audit entries returned.
|
||||
const auditLogMaxLimit = 500
|
||||
|
||||
// HandleAPIAuditLog returns a handler that lists recent audit log entries.
|
||||
func (h *Handlers) HandleAPIAuditLog() http.HandlerFunc {
|
||||
type auditEntryResponse struct {
|
||||
ID int64 `json:"id"`
|
||||
UserID *int64 `json:"userId,omitempty"`
|
||||
Username string `json:"username"`
|
||||
Action string `json:"action"`
|
||||
ResourceType string `json:"resourceType"`
|
||||
ResourceID string `json:"resourceId,omitempty"`
|
||||
Detail string `json:"detail,omitempty"`
|
||||
RemoteIP string `json:"remoteIp,omitempty"`
|
||||
CreatedAt string `json:"createdAt"`
|
||||
}
|
||||
|
||||
return func(writer http.ResponseWriter, request *http.Request) {
|
||||
limit := auditLogDefaultLimit
|
||||
|
||||
if limitStr := request.URL.Query().Get("limit"); limitStr != "" {
|
||||
parsed, parseErr := strconv.Atoi(limitStr)
|
||||
if parseErr == nil && parsed > 0 && parsed <= auditLogMaxLimit {
|
||||
limit = parsed
|
||||
}
|
||||
}
|
||||
|
||||
entries, err := h.audit.Recent(request.Context(), limit)
|
||||
if err != nil {
|
||||
h.log.Error("failed to fetch audit log", "error", err)
|
||||
h.respondJSON(writer, request,
|
||||
map[string]string{"error": "failed to fetch audit log"},
|
||||
http.StatusInternalServerError)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
result := make([]auditEntryResponse, 0, len(entries))
|
||||
|
||||
for _, e := range entries {
|
||||
entry := auditEntryResponse{
|
||||
ID: e.ID,
|
||||
Username: e.Username,
|
||||
Action: string(e.Action),
|
||||
ResourceType: string(e.ResourceType),
|
||||
CreatedAt: e.CreatedAt.UTC().Format("2006-01-02T15:04:05Z"),
|
||||
}
|
||||
|
||||
if e.UserID.Valid {
|
||||
id := e.UserID.Int64
|
||||
entry.UserID = &id
|
||||
}
|
||||
|
||||
entry.ResourceID = nullStringValue(e.ResourceID)
|
||||
entry.Detail = nullStringValue(e.Detail)
|
||||
entry.RemoteIP = nullStringValue(e.RemoteIP)
|
||||
|
||||
result = append(result, entry)
|
||||
}
|
||||
|
||||
h.respondJSON(writer, request, result, http.StatusOK)
|
||||
}
|
||||
}
|
||||
|
||||
// nullStringValue returns the string value if valid, empty string otherwise.
|
||||
func nullStringValue(ns sql.NullString) string {
|
||||
if ns.Valid {
|
||||
return ns.String
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
||||
|
||||
@@ -119,6 +119,9 @@ func (h *Handlers) HandleAppCreate() http.HandlerFunc { //nolint:funlen // valid
|
||||
return
|
||||
}
|
||||
|
||||
h.auditLog(request, models.AuditActionAppCreate,
|
||||
models.AuditResourceApp, createdApp.ID, "created app: "+createdApp.Name)
|
||||
|
||||
http.Redirect(writer, request, "/apps/"+createdApp.ID, http.StatusSeeOther)
|
||||
}
|
||||
}
|
||||
@@ -289,6 +292,9 @@ func (h *Handlers) HandleAppUpdate() http.HandlerFunc { //nolint:funlen // valid
|
||||
return
|
||||
}
|
||||
|
||||
h.auditLog(request, models.AuditActionAppUpdate,
|
||||
models.AuditResourceApp, application.ID, "updated app: "+application.Name)
|
||||
|
||||
redirectURL := "/apps/" + application.ID + "?success=updated"
|
||||
http.Redirect(writer, request, redirectURL, http.StatusSeeOther)
|
||||
}
|
||||
@@ -344,6 +350,9 @@ func (h *Handlers) HandleAppDelete() http.HandlerFunc {
|
||||
return
|
||||
}
|
||||
|
||||
h.auditLog(request, models.AuditActionAppDelete,
|
||||
models.AuditResourceApp, appID, "deleted app: "+application.Name)
|
||||
|
||||
http.Redirect(writer, request, "/", http.StatusSeeOther)
|
||||
}
|
||||
}
|
||||
@@ -360,6 +369,9 @@ func (h *Handlers) HandleAppDeploy() http.HandlerFunc {
|
||||
return
|
||||
}
|
||||
|
||||
h.auditLog(request, models.AuditActionAppDeploy,
|
||||
models.AuditResourceApp, application.ID, "manual deploy: "+application.Name)
|
||||
|
||||
// Trigger deployment in background with a detached context
|
||||
// so the deployment continues even if the HTTP request is cancelled
|
||||
deployCtx := context.WithoutCancel(request.Context())
|
||||
@@ -399,6 +411,8 @@ func (h *Handlers) HandleCancelDeploy() http.HandlerFunc {
|
||||
cancelled := h.deploy.CancelDeploy(application.ID)
|
||||
if cancelled {
|
||||
h.log.Info("deployment cancelled by user", "app", application.Name)
|
||||
h.auditLog(request, models.AuditActionDeployCancel,
|
||||
models.AuditResourceDeployment, application.ID, "cancelled deploy: "+application.Name)
|
||||
}
|
||||
|
||||
http.Redirect(
|
||||
@@ -430,6 +444,9 @@ func (h *Handlers) HandleAppRollback() http.HandlerFunc {
|
||||
return
|
||||
}
|
||||
|
||||
h.auditLog(request, models.AuditActionAppRollback,
|
||||
models.AuditResourceApp, application.ID, "rolled back: "+application.Name)
|
||||
|
||||
http.Redirect(writer, request, "/apps/"+application.ID+"?success=rolledback", http.StatusSeeOther)
|
||||
}
|
||||
}
|
||||
@@ -834,11 +851,29 @@ func (h *Handlers) handleContainerAction(
|
||||
} else {
|
||||
h.log.Info("container action completed",
|
||||
"action", action, "app", application.Name, "container", containerID)
|
||||
|
||||
auditAction := containerActionToAuditAction(action)
|
||||
h.auditLog(request, auditAction,
|
||||
models.AuditResourceApp, appID, string(action)+" container: "+application.Name)
|
||||
}
|
||||
|
||||
http.Redirect(writer, request, "/apps/"+appID, http.StatusSeeOther)
|
||||
}
|
||||
|
||||
// containerActionToAuditAction maps container actions to audit actions.
|
||||
func containerActionToAuditAction(action containerAction) models.AuditAction {
|
||||
switch action {
|
||||
case actionRestart:
|
||||
return models.AuditActionAppRestart
|
||||
case actionStop:
|
||||
return models.AuditActionAppStop
|
||||
case actionStart:
|
||||
return models.AuditActionAppStart
|
||||
default:
|
||||
return models.AuditAction("app." + string(action))
|
||||
}
|
||||
}
|
||||
|
||||
// HandleAppRestart handles restarting an app's container.
|
||||
func (h *Handlers) HandleAppRestart() http.HandlerFunc {
|
||||
return func(writer http.ResponseWriter, request *http.Request) {
|
||||
|
||||
@@ -3,6 +3,7 @@ package handlers
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"sneak.berlin/go/upaas/internal/models"
|
||||
"sneak.berlin/go/upaas/templates"
|
||||
)
|
||||
|
||||
@@ -61,6 +62,9 @@ func (h *Handlers) HandleLoginPOST() http.HandlerFunc {
|
||||
return
|
||||
}
|
||||
|
||||
h.auditLog(request, models.AuditActionLogin,
|
||||
models.AuditResourceSession, "", "user logged in")
|
||||
|
||||
http.Redirect(writer, request, "/", http.StatusSeeOther)
|
||||
}
|
||||
}
|
||||
@@ -68,6 +72,9 @@ func (h *Handlers) HandleLoginPOST() http.HandlerFunc {
|
||||
// HandleLogout handles logout requests.
|
||||
func (h *Handlers) HandleLogout() http.HandlerFunc {
|
||||
return func(writer http.ResponseWriter, request *http.Request) {
|
||||
h.auditLog(request, models.AuditActionLogout,
|
||||
models.AuditResourceSession, "", "user logged out")
|
||||
|
||||
destroyErr := h.auth.DestroySession(writer, request)
|
||||
if destroyErr != nil {
|
||||
h.log.Error("failed to destroy session", "error", destroyErr)
|
||||
|
||||
@@ -15,7 +15,9 @@ import (
|
||||
"sneak.berlin/go/upaas/internal/globals"
|
||||
"sneak.berlin/go/upaas/internal/healthcheck"
|
||||
"sneak.berlin/go/upaas/internal/logger"
|
||||
"sneak.berlin/go/upaas/internal/models"
|
||||
"sneak.berlin/go/upaas/internal/service/app"
|
||||
"sneak.berlin/go/upaas/internal/service/audit"
|
||||
"sneak.berlin/go/upaas/internal/service/auth"
|
||||
"sneak.berlin/go/upaas/internal/service/deploy"
|
||||
"sneak.berlin/go/upaas/internal/service/webhook"
|
||||
@@ -35,6 +37,7 @@ type Params struct {
|
||||
Deploy *deploy.Service
|
||||
Webhook *webhook.Service
|
||||
Docker *docker.Client
|
||||
Audit *audit.Service
|
||||
}
|
||||
|
||||
// Handlers provides HTTP request handlers.
|
||||
@@ -48,6 +51,7 @@ type Handlers struct {
|
||||
deploy *deploy.Service
|
||||
webhook *webhook.Service
|
||||
docker *docker.Client
|
||||
audit *audit.Service
|
||||
globals *globals.Globals
|
||||
}
|
||||
|
||||
@@ -63,10 +67,48 @@ func New(_ fx.Lifecycle, params Params) (*Handlers, error) {
|
||||
deploy: params.Deploy,
|
||||
webhook: params.Webhook,
|
||||
docker: params.Docker,
|
||||
audit: params.Audit,
|
||||
globals: params.Globals,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// currentUser returns the currently authenticated user, or nil if not authenticated.
|
||||
func (h *Handlers) currentUser(request *http.Request) *models.User {
|
||||
user, err := h.auth.GetCurrentUser(request.Context(), request)
|
||||
if err != nil || user == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
return user
|
||||
}
|
||||
|
||||
// auditLog records an audit entry for the current request.
|
||||
func (h *Handlers) auditLog(
|
||||
request *http.Request,
|
||||
action models.AuditAction,
|
||||
resourceType models.AuditResourceType,
|
||||
resourceID string,
|
||||
detail string,
|
||||
) {
|
||||
user := h.currentUser(request)
|
||||
|
||||
entry := audit.LogEntry{
|
||||
Action: action,
|
||||
ResourceType: resourceType,
|
||||
ResourceID: resourceID,
|
||||
Detail: detail,
|
||||
}
|
||||
|
||||
if user != nil {
|
||||
entry.UserID = user.ID
|
||||
entry.Username = user.Username
|
||||
} else {
|
||||
entry.Username = "anonymous"
|
||||
}
|
||||
|
||||
h.audit.LogFromRequest(request.Context(), request, entry)
|
||||
}
|
||||
|
||||
// addGlobals adds version info and CSRF token to template data map.
|
||||
func (h *Handlers) addGlobals(
|
||||
data map[string]any,
|
||||
|
||||
@@ -11,6 +11,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"go.uber.org/fx"
|
||||
@@ -24,8 +25,10 @@ import (
|
||||
"sneak.berlin/go/upaas/internal/handlers"
|
||||
"sneak.berlin/go/upaas/internal/healthcheck"
|
||||
"sneak.berlin/go/upaas/internal/logger"
|
||||
"sneak.berlin/go/upaas/internal/metrics"
|
||||
"sneak.berlin/go/upaas/internal/middleware"
|
||||
"sneak.berlin/go/upaas/internal/service/app"
|
||||
"sneak.berlin/go/upaas/internal/service/audit"
|
||||
"sneak.berlin/go/upaas/internal/service/auth"
|
||||
"sneak.berlin/go/upaas/internal/service/deploy"
|
||||
"sneak.berlin/go/upaas/internal/service/notify"
|
||||
@@ -92,7 +95,8 @@ func createAppServices(
|
||||
logInstance *logger.Logger,
|
||||
dbInstance *database.Database,
|
||||
cfg *config.Config,
|
||||
) (*auth.Service, *app.Service, *deploy.Service, *webhook.Service, *docker.Client) {
|
||||
metricsInstance *metrics.Metrics,
|
||||
) (*auth.Service, *app.Service, *deploy.Service, *webhook.Service, *docker.Client, *audit.Service) {
|
||||
t.Helper()
|
||||
|
||||
authSvc, authErr := auth.New(fx.Lifecycle(nil), auth.ServiceParams{
|
||||
@@ -125,6 +129,7 @@ func createAppServices(
|
||||
Database: dbInstance,
|
||||
Docker: dockerClient,
|
||||
Notify: notifySvc,
|
||||
Metrics: metricsInstance,
|
||||
})
|
||||
require.NoError(t, deployErr)
|
||||
|
||||
@@ -132,10 +137,18 @@ func createAppServices(
|
||||
Logger: logInstance,
|
||||
Database: dbInstance,
|
||||
Deploy: deploySvc,
|
||||
Metrics: metricsInstance,
|
||||
})
|
||||
require.NoError(t, webhookErr)
|
||||
|
||||
return authSvc, appSvc, deploySvc, webhookSvc, dockerClient
|
||||
auditSvc, auditErr := audit.New(fx.Lifecycle(nil), audit.ServiceParams{
|
||||
Logger: logInstance,
|
||||
Database: dbInstance,
|
||||
Metrics: metricsInstance,
|
||||
})
|
||||
require.NoError(t, auditErr)
|
||||
|
||||
return authSvc, appSvc, deploySvc, webhookSvc, dockerClient, auditSvc
|
||||
}
|
||||
|
||||
func setupTestHandlers(t *testing.T) *testContext {
|
||||
@@ -145,11 +158,14 @@ func setupTestHandlers(t *testing.T) *testContext {
|
||||
|
||||
globalInstance, logInstance, dbInstance, hcInstance := createCoreServices(t, cfg)
|
||||
|
||||
authSvc, appSvc, deploySvc, webhookSvc, dockerClient := createAppServices(
|
||||
metricsInstance := metrics.NewForTest(prometheus.NewRegistry())
|
||||
|
||||
authSvc, appSvc, deploySvc, webhookSvc, dockerClient, auditSvc := createAppServices(
|
||||
t,
|
||||
logInstance,
|
||||
dbInstance,
|
||||
cfg,
|
||||
metricsInstance,
|
||||
)
|
||||
|
||||
handlersInstance, handlerErr := handlers.New(
|
||||
@@ -164,6 +180,7 @@ func setupTestHandlers(t *testing.T) *testContext {
|
||||
Deploy: deploySvc,
|
||||
Webhook: webhookSvc,
|
||||
Docker: dockerClient,
|
||||
Audit: auditSvc,
|
||||
},
|
||||
)
|
||||
require.NoError(t, handlerErr)
|
||||
@@ -173,6 +190,7 @@ func setupTestHandlers(t *testing.T) *testContext {
|
||||
Globals: globalInstance,
|
||||
Config: cfg,
|
||||
Auth: authSvc,
|
||||
Metrics: metricsInstance,
|
||||
})
|
||||
require.NoError(t, mwErr)
|
||||
|
||||
|
||||
@@ -3,6 +3,7 @@ package handlers
|
||||
import (
|
||||
"net/http"
|
||||
|
||||
"sneak.berlin/go/upaas/internal/models"
|
||||
"sneak.berlin/go/upaas/templates"
|
||||
)
|
||||
|
||||
@@ -111,6 +112,9 @@ func (h *Handlers) HandleSetupPOST() http.HandlerFunc {
|
||||
return
|
||||
}
|
||||
|
||||
h.auditLog(request, models.AuditActionSetup,
|
||||
models.AuditResourceUser, "", "initial setup completed")
|
||||
|
||||
http.Redirect(writer, request, "/", http.StatusSeeOther)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -7,13 +7,14 @@ import (
|
||||
"github.com/go-chi/chi/v5"
|
||||
|
||||
"sneak.berlin/go/upaas/internal/models"
|
||||
"sneak.berlin/go/upaas/internal/service/audit"
|
||||
)
|
||||
|
||||
// maxWebhookBodySize is the maximum allowed size of a webhook request body (1MB).
|
||||
const maxWebhookBodySize = 1 << 20
|
||||
|
||||
// HandleWebhook handles incoming Gitea webhooks.
|
||||
func (h *Handlers) HandleWebhook() http.HandlerFunc {
|
||||
func (h *Handlers) HandleWebhook() http.HandlerFunc { //nolint:funlen // audit logging adds necessary length
|
||||
return func(writer http.ResponseWriter, request *http.Request) {
|
||||
secret := chi.URLParam(request, "secret")
|
||||
if secret == "" {
|
||||
@@ -56,6 +57,15 @@ func (h *Handlers) HandleWebhook() http.HandlerFunc {
|
||||
eventType = "push"
|
||||
}
|
||||
|
||||
// Log webhook receipt
|
||||
h.audit.LogFromRequest(request.Context(), request, audit.LogEntry{
|
||||
Username: "webhook",
|
||||
Action: models.AuditActionWebhookReceive,
|
||||
ResourceType: models.AuditResourceWebhook,
|
||||
ResourceID: application.ID,
|
||||
Detail: "webhook from app: " + application.Name + ", event: " + eventType,
|
||||
})
|
||||
|
||||
// Process webhook
|
||||
webhookErr := h.webhook.HandleWebhook(
|
||||
request.Context(),
|
||||
|
||||
Reference in New Issue
Block a user