feat: add observability improvements (metrics, audit log, structured logging)
All checks were successful
Check / check (pull_request) Successful in 1m45s
All checks were successful
Check / check (pull_request) Successful in 1m45s
- Add Prometheus metrics package (internal/metrics) with deployment, container health, webhook, HTTP request, and audit counters/histograms - Add audit_log SQLite table via migration 007 - Add AuditEntry model with CRUD operations and query methods - Add audit service (internal/service/audit) for recording user actions - Instrument deploy service with deployment duration, count, and in-flight metrics; container health gauge updates on deploy completion - Instrument webhook service with event counters by app/type/matched - Instrument HTTP middleware with request count, duration, and response size metrics; also log response bytes in structured request logs - Add audit logging to all key handler operations: login/logout, app CRUD, deploy, cancel, rollback, restart/stop/start, webhook receipt, and initial setup - Add GET /api/audit endpoint for querying recent audit entries - Make /metrics endpoint always available (optionally auth-protected) - Add comprehensive tests for metrics, audit model, and audit service - Update existing test infrastructure with metrics and audit dependencies - Update README with Observability section documenting all metrics, audit log, and structured logging
This commit is contained in:
153
internal/service/audit/audit.go
Normal file
153
internal/service/audit/audit.go
Normal file
@@ -0,0 +1,153 @@
|
||||
// Package audit provides audit logging for user actions.
|
||||
package audit
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"log/slog"
|
||||
"net"
|
||||
"net/http"
|
||||
"strings"
|
||||
|
||||
"go.uber.org/fx"
|
||||
|
||||
"sneak.berlin/go/upaas/internal/database"
|
||||
"sneak.berlin/go/upaas/internal/logger"
|
||||
"sneak.berlin/go/upaas/internal/metrics"
|
||||
"sneak.berlin/go/upaas/internal/models"
|
||||
)
|
||||
|
||||
// ServiceParams contains dependencies for Service.
|
||||
type ServiceParams struct {
|
||||
fx.In
|
||||
|
||||
Logger *logger.Logger
|
||||
Database *database.Database
|
||||
Metrics *metrics.Metrics
|
||||
}
|
||||
|
||||
// Service provides audit logging functionality.
|
||||
type Service struct {
|
||||
log *slog.Logger
|
||||
db *database.Database
|
||||
metrics *metrics.Metrics
|
||||
}
|
||||
|
||||
// New creates a new audit Service.
|
||||
func New(_ fx.Lifecycle, params ServiceParams) (*Service, error) {
|
||||
return &Service{
|
||||
log: params.Logger.Get(),
|
||||
db: params.Database,
|
||||
metrics: params.Metrics,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// LogEntry records an audit event.
|
||||
type LogEntry struct {
|
||||
UserID int64
|
||||
Username string
|
||||
Action models.AuditAction
|
||||
ResourceType models.AuditResourceType
|
||||
ResourceID string
|
||||
Detail string
|
||||
RemoteIP string
|
||||
}
|
||||
|
||||
// Log records an audit log entry and increments the audit metrics counter.
|
||||
func (svc *Service) Log(ctx context.Context, entry LogEntry) {
|
||||
auditEntry := models.NewAuditEntry(svc.db)
|
||||
auditEntry.Username = entry.Username
|
||||
auditEntry.Action = entry.Action
|
||||
auditEntry.ResourceType = entry.ResourceType
|
||||
|
||||
if entry.UserID != 0 {
|
||||
auditEntry.UserID = sql.NullInt64{Int64: entry.UserID, Valid: true}
|
||||
}
|
||||
|
||||
if entry.ResourceID != "" {
|
||||
auditEntry.ResourceID = sql.NullString{String: entry.ResourceID, Valid: true}
|
||||
}
|
||||
|
||||
if entry.Detail != "" {
|
||||
auditEntry.Detail = sql.NullString{String: entry.Detail, Valid: true}
|
||||
}
|
||||
|
||||
if entry.RemoteIP != "" {
|
||||
auditEntry.RemoteIP = sql.NullString{String: entry.RemoteIP, Valid: true}
|
||||
}
|
||||
|
||||
err := auditEntry.Save(ctx)
|
||||
if err != nil {
|
||||
svc.log.Error("failed to save audit entry",
|
||||
"error", err,
|
||||
"action", entry.Action,
|
||||
"username", entry.Username,
|
||||
)
|
||||
|
||||
return
|
||||
}
|
||||
|
||||
svc.metrics.AuditEventsTotal.WithLabelValues(string(entry.Action)).Inc()
|
||||
|
||||
svc.log.Info("audit",
|
||||
"action", entry.Action,
|
||||
"username", entry.Username,
|
||||
"resource_type", entry.ResourceType,
|
||||
"resource_id", entry.ResourceID,
|
||||
)
|
||||
}
|
||||
|
||||
// LogFromRequest records an audit log entry, extracting the remote IP from
|
||||
// the HTTP request.
|
||||
func (svc *Service) LogFromRequest(
|
||||
ctx context.Context,
|
||||
request *http.Request,
|
||||
entry LogEntry,
|
||||
) {
|
||||
entry.RemoteIP = extractRemoteIP(request)
|
||||
svc.Log(ctx, entry)
|
||||
}
|
||||
|
||||
// extractRemoteIP extracts the client IP from the request, preferring
|
||||
// X-Real-IP and X-Forwarded-For headers from trusted proxies.
|
||||
func extractRemoteIP(r *http.Request) string {
|
||||
// Check X-Real-IP first
|
||||
if ip := strings.TrimSpace(r.Header.Get("X-Real-IP")); ip != "" {
|
||||
return ip
|
||||
}
|
||||
|
||||
// Check X-Forwarded-For (leftmost = client)
|
||||
if xff := r.Header.Get("X-Forwarded-For"); xff != "" {
|
||||
if parts := strings.SplitN(xff, ",", 2); len(parts) > 0 { //nolint:mnd // split limit
|
||||
if ip := strings.TrimSpace(parts[0]); ip != "" {
|
||||
return ip
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Fall back to RemoteAddr
|
||||
host, _, err := net.SplitHostPort(r.RemoteAddr)
|
||||
if err != nil {
|
||||
return r.RemoteAddr
|
||||
}
|
||||
|
||||
return host
|
||||
}
|
||||
|
||||
// Recent returns the most recent audit log entries.
|
||||
func (svc *Service) Recent(
|
||||
ctx context.Context,
|
||||
limit int,
|
||||
) ([]*models.AuditEntry, error) {
|
||||
return models.FindAuditEntries(ctx, svc.db, limit)
|
||||
}
|
||||
|
||||
// ForResource returns audit log entries for a specific resource.
|
||||
func (svc *Service) ForResource(
|
||||
ctx context.Context,
|
||||
resourceType models.AuditResourceType,
|
||||
resourceID string,
|
||||
limit int,
|
||||
) ([]*models.AuditEntry, error) {
|
||||
return models.FindAuditEntriesByResource(ctx, svc.db, resourceType, resourceID, limit)
|
||||
}
|
||||
Reference in New Issue
Block a user