feat: add observability improvements (metrics, audit log, structured logging)
All checks were successful
Check / check (pull_request) Successful in 1m45s

- Add Prometheus metrics package (internal/metrics) with deployment,
  container health, webhook, HTTP request, and audit counters/histograms
- Add audit_log SQLite table via migration 007
- Add AuditEntry model with CRUD operations and query methods
- Add audit service (internal/service/audit) for recording user actions
- Instrument deploy service with deployment duration, count, and
  in-flight metrics; container health gauge updates on deploy completion
- Instrument webhook service with event counters by app/type/matched
- Instrument HTTP middleware with request count, duration, and response
  size metrics; also log response bytes in structured request logs
- Add audit logging to all key handler operations: login/logout, app
  CRUD, deploy, cancel, rollback, restart/stop/start, webhook receipt,
  and initial setup
- Add GET /api/audit endpoint for querying recent audit entries
- Make /metrics endpoint always available (optionally auth-protected)
- Add comprehensive tests for metrics, audit model, and audit service
- Update existing test infrastructure with metrics and audit dependencies
- Update README with Observability section documenting all metrics,
  audit log, and structured logging
This commit is contained in:
clawbot
2026-03-17 02:23:44 -07:00
parent fd110e69db
commit f558e2cdd8
21 changed files with 1399 additions and 42 deletions

View File

@@ -1,6 +1,7 @@
package handlers
import (
"database/sql"
"encoding/json"
"net/http"
"strconv"
@@ -120,6 +121,9 @@ func (h *Handlers) HandleAPILoginPOST() http.HandlerFunc {
return
}
h.auditLog(request, models.AuditActionLogin,
models.AuditResourceSession, "", "api login")
h.respondJSON(writer, request, loginResponse{
UserID: user.ID,
Username: user.Username,
@@ -243,3 +247,79 @@ func (h *Handlers) HandleAPIWhoAmI() http.HandlerFunc {
}, http.StatusOK)
}
}
// auditLogDefaultLimit is the default number of audit entries returned.
const auditLogDefaultLimit = 50
// auditLogMaxLimit is the maximum number of audit entries returned.
const auditLogMaxLimit = 500
// HandleAPIAuditLog returns a handler that lists recent audit log entries.
func (h *Handlers) HandleAPIAuditLog() http.HandlerFunc {
type auditEntryResponse struct {
ID int64 `json:"id"`
UserID *int64 `json:"userId,omitempty"`
Username string `json:"username"`
Action string `json:"action"`
ResourceType string `json:"resourceType"`
ResourceID string `json:"resourceId,omitempty"`
Detail string `json:"detail,omitempty"`
RemoteIP string `json:"remoteIp,omitempty"`
CreatedAt string `json:"createdAt"`
}
return func(writer http.ResponseWriter, request *http.Request) {
limit := auditLogDefaultLimit
if limitStr := request.URL.Query().Get("limit"); limitStr != "" {
parsed, parseErr := strconv.Atoi(limitStr)
if parseErr == nil && parsed > 0 && parsed <= auditLogMaxLimit {
limit = parsed
}
}
entries, err := h.audit.Recent(request.Context(), limit)
if err != nil {
h.log.Error("failed to fetch audit log", "error", err)
h.respondJSON(writer, request,
map[string]string{"error": "failed to fetch audit log"},
http.StatusInternalServerError)
return
}
result := make([]auditEntryResponse, 0, len(entries))
for _, e := range entries {
entry := auditEntryResponse{
ID: e.ID,
Username: e.Username,
Action: string(e.Action),
ResourceType: string(e.ResourceType),
CreatedAt: e.CreatedAt.UTC().Format("2006-01-02T15:04:05Z"),
}
if e.UserID.Valid {
id := e.UserID.Int64
entry.UserID = &id
}
entry.ResourceID = nullStringValue(e.ResourceID)
entry.Detail = nullStringValue(e.Detail)
entry.RemoteIP = nullStringValue(e.RemoteIP)
result = append(result, entry)
}
h.respondJSON(writer, request, result, http.StatusOK)
}
}
// nullStringValue returns the string value if valid, empty string otherwise.
func nullStringValue(ns sql.NullString) string {
if ns.Valid {
return ns.String
}
return ""
}