feat: add observability improvements (metrics, audit log, structured logging)
All checks were successful
Check / check (pull_request) Successful in 1m45s

- Add Prometheus metrics package (internal/metrics) with deployment,
  container health, webhook, HTTP request, and audit counters/histograms
- Add audit_log SQLite table via migration 007
- Add AuditEntry model with CRUD operations and query methods
- Add audit service (internal/service/audit) for recording user actions
- Instrument deploy service with deployment duration, count, and
  in-flight metrics; container health gauge updates on deploy completion
- Instrument webhook service with event counters by app/type/matched
- Instrument HTTP middleware with request count, duration, and response
  size metrics; also log response bytes in structured request logs
- Add audit logging to all key handler operations: login/logout, app
  CRUD, deploy, cancel, rollback, restart/stop/start, webhook receipt,
  and initial setup
- Add GET /api/audit endpoint for querying recent audit entries
- Make /metrics endpoint always available (optionally auth-protected)
- Add comprehensive tests for metrics, audit model, and audit service
- Update existing test infrastructure with metrics and audit dependencies
- Update README with Observability section documenting all metrics,
  audit log, and structured logging
This commit is contained in:
clawbot
2026-03-17 02:23:44 -07:00
parent fd110e69db
commit f558e2cdd8
21 changed files with 1399 additions and 42 deletions

View File

@@ -119,6 +119,9 @@ func (h *Handlers) HandleAppCreate() http.HandlerFunc { //nolint:funlen // valid
return
}
h.auditLog(request, models.AuditActionAppCreate,
models.AuditResourceApp, createdApp.ID, "created app: "+createdApp.Name)
http.Redirect(writer, request, "/apps/"+createdApp.ID, http.StatusSeeOther)
}
}
@@ -289,6 +292,9 @@ func (h *Handlers) HandleAppUpdate() http.HandlerFunc { //nolint:funlen // valid
return
}
h.auditLog(request, models.AuditActionAppUpdate,
models.AuditResourceApp, application.ID, "updated app: "+application.Name)
redirectURL := "/apps/" + application.ID + "?success=updated"
http.Redirect(writer, request, redirectURL, http.StatusSeeOther)
}
@@ -344,6 +350,9 @@ func (h *Handlers) HandleAppDelete() http.HandlerFunc {
return
}
h.auditLog(request, models.AuditActionAppDelete,
models.AuditResourceApp, appID, "deleted app: "+application.Name)
http.Redirect(writer, request, "/", http.StatusSeeOther)
}
}
@@ -360,6 +369,9 @@ func (h *Handlers) HandleAppDeploy() http.HandlerFunc {
return
}
h.auditLog(request, models.AuditActionAppDeploy,
models.AuditResourceApp, application.ID, "manual deploy: "+application.Name)
// Trigger deployment in background with a detached context
// so the deployment continues even if the HTTP request is cancelled
deployCtx := context.WithoutCancel(request.Context())
@@ -399,6 +411,8 @@ func (h *Handlers) HandleCancelDeploy() http.HandlerFunc {
cancelled := h.deploy.CancelDeploy(application.ID)
if cancelled {
h.log.Info("deployment cancelled by user", "app", application.Name)
h.auditLog(request, models.AuditActionDeployCancel,
models.AuditResourceDeployment, application.ID, "cancelled deploy: "+application.Name)
}
http.Redirect(
@@ -430,6 +444,9 @@ func (h *Handlers) HandleAppRollback() http.HandlerFunc {
return
}
h.auditLog(request, models.AuditActionAppRollback,
models.AuditResourceApp, application.ID, "rolled back: "+application.Name)
http.Redirect(writer, request, "/apps/"+application.ID+"?success=rolledback", http.StatusSeeOther)
}
}
@@ -834,11 +851,29 @@ func (h *Handlers) handleContainerAction(
} else {
h.log.Info("container action completed",
"action", action, "app", application.Name, "container", containerID)
auditAction := containerActionToAuditAction(action)
h.auditLog(request, auditAction,
models.AuditResourceApp, appID, string(action)+" container: "+application.Name)
}
http.Redirect(writer, request, "/apps/"+appID, http.StatusSeeOther)
}
// containerActionToAuditAction maps container actions to audit actions.
func containerActionToAuditAction(action containerAction) models.AuditAction {
switch action {
case actionRestart:
return models.AuditActionAppRestart
case actionStop:
return models.AuditActionAppStop
case actionStart:
return models.AuditActionAppStart
default:
return models.AuditAction("app." + string(action))
}
}
// HandleAppRestart handles restarting an app's container.
func (h *Handlers) HandleAppRestart() http.HandlerFunc {
return func(writer http.ResponseWriter, request *http.Request) {