feat: add observability improvements (metrics, audit log, structured logging)
All checks were successful
Check / check (pull_request) Successful in 1m45s

- Add Prometheus metrics package (internal/metrics) with deployment,
  container health, webhook, HTTP request, and audit counters/histograms
- Add audit_log SQLite table via migration 007
- Add AuditEntry model with CRUD operations and query methods
- Add audit service (internal/service/audit) for recording user actions
- Instrument deploy service with deployment duration, count, and
  in-flight metrics; container health gauge updates on deploy completion
- Instrument webhook service with event counters by app/type/matched
- Instrument HTTP middleware with request count, duration, and response
  size metrics; also log response bytes in structured request logs
- Add audit logging to all key handler operations: login/logout, app
  CRUD, deploy, cancel, rollback, restart/stop/start, webhook receipt,
  and initial setup
- Add GET /api/audit endpoint for querying recent audit entries
- Make /metrics endpoint always available (optionally auth-protected)
- Add comprehensive tests for metrics, audit model, and audit service
- Update existing test infrastructure with metrics and audit dependencies
- Update README with Observability section documenting all metrics,
  audit log, and structured logging
This commit is contained in:
clawbot
2026-03-17 02:23:44 -07:00
parent fd110e69db
commit f558e2cdd8
21 changed files with 1399 additions and 42 deletions

View File

@@ -0,0 +1,193 @@
package models
import (
"context"
"database/sql"
"fmt"
"time"
"sneak.berlin/go/upaas/internal/database"
)
// AuditAction represents the type of audited user action.
type AuditAction string
// Audit action constants.
const (
AuditActionLogin AuditAction = "login"
AuditActionLogout AuditAction = "logout"
AuditActionAppCreate AuditAction = "app.create"
AuditActionAppUpdate AuditAction = "app.update"
AuditActionAppDelete AuditAction = "app.delete"
AuditActionAppDeploy AuditAction = "app.deploy"
AuditActionAppRollback AuditAction = "app.rollback"
AuditActionAppRestart AuditAction = "app.restart"
AuditActionAppStop AuditAction = "app.stop"
AuditActionAppStart AuditAction = "app.start"
AuditActionDeployCancel AuditAction = "deploy.cancel"
AuditActionEnvVarSave AuditAction = "env_var.save"
AuditActionLabelAdd AuditAction = "label.add"
AuditActionLabelEdit AuditAction = "label.edit"
AuditActionLabelDelete AuditAction = "label.delete"
AuditActionVolumeAdd AuditAction = "volume.add"
AuditActionVolumeEdit AuditAction = "volume.edit"
AuditActionVolumeDelete AuditAction = "volume.delete"
AuditActionPortAdd AuditAction = "port.add"
AuditActionPortDelete AuditAction = "port.delete"
AuditActionSetup AuditAction = "setup"
AuditActionWebhookReceive AuditAction = "webhook.receive"
)
// AuditResourceType represents the type of resource being acted on.
type AuditResourceType string
// Audit resource type constants.
const (
AuditResourceApp AuditResourceType = "app"
AuditResourceUser AuditResourceType = "user"
AuditResourceSession AuditResourceType = "session"
AuditResourceEnvVar AuditResourceType = "env_var"
AuditResourceLabel AuditResourceType = "label"
AuditResourceVolume AuditResourceType = "volume"
AuditResourcePort AuditResourceType = "port"
AuditResourceDeployment AuditResourceType = "deployment"
AuditResourceWebhook AuditResourceType = "webhook"
)
// AuditEntry represents a single audit log entry.
type AuditEntry struct {
db *database.Database
ID int64
UserID sql.NullInt64
Username string
Action AuditAction
ResourceType AuditResourceType
ResourceID sql.NullString
Detail sql.NullString
RemoteIP sql.NullString
CreatedAt time.Time
}
// NewAuditEntry creates a new AuditEntry with a database reference.
func NewAuditEntry(db *database.Database) *AuditEntry {
return &AuditEntry{db: db}
}
// Save inserts the audit entry into the database.
func (a *AuditEntry) Save(ctx context.Context) error {
query := `
INSERT INTO audit_log (
user_id, username, action, resource_type, resource_id,
detail, remote_ip
) VALUES (?, ?, ?, ?, ?, ?, ?)`
result, err := a.db.Exec(ctx, query,
a.UserID, a.Username, a.Action, a.ResourceType,
a.ResourceID, a.Detail, a.RemoteIP,
)
if err != nil {
return fmt.Errorf("inserting audit entry: %w", err)
}
id, err := result.LastInsertId()
if err != nil {
return fmt.Errorf("getting audit entry id: %w", err)
}
a.ID = id
return nil
}
// FindAuditEntries returns recent audit log entries, newest first.
func FindAuditEntries(
ctx context.Context,
db *database.Database,
limit int,
) ([]*AuditEntry, error) {
query := `
SELECT id, user_id, username, action, resource_type, resource_id,
detail, remote_ip, created_at
FROM audit_log
ORDER BY created_at DESC
LIMIT ?`
rows, err := db.Query(ctx, query, limit)
if err != nil {
return nil, fmt.Errorf("querying audit entries: %w", err)
}
defer func() { _ = rows.Close() }()
return scanAuditRows(rows)
}
// FindAuditEntriesByResource returns audit log entries for a specific resource.
func FindAuditEntriesByResource(
ctx context.Context,
db *database.Database,
resourceType AuditResourceType,
resourceID string,
limit int,
) ([]*AuditEntry, error) {
query := `
SELECT id, user_id, username, action, resource_type, resource_id,
detail, remote_ip, created_at
FROM audit_log
WHERE resource_type = ? AND resource_id = ?
ORDER BY created_at DESC
LIMIT ?`
rows, err := db.Query(ctx, query, resourceType, resourceID, limit)
if err != nil {
return nil, fmt.Errorf("querying audit entries by resource: %w", err)
}
defer func() { _ = rows.Close() }()
return scanAuditRows(rows)
}
// CountAuditEntries returns the total number of audit log entries.
func CountAuditEntries(
ctx context.Context,
db *database.Database,
) (int, error) {
var count int
row := db.QueryRow(ctx, "SELECT COUNT(*) FROM audit_log")
err := row.Scan(&count)
if err != nil {
return 0, fmt.Errorf("counting audit entries: %w", err)
}
return count, nil
}
func scanAuditRows(rows *sql.Rows) ([]*AuditEntry, error) {
var entries []*AuditEntry
for rows.Next() {
entry := &AuditEntry{}
scanErr := rows.Scan(
&entry.ID, &entry.UserID, &entry.Username, &entry.Action,
&entry.ResourceType, &entry.ResourceID, &entry.Detail,
&entry.RemoteIP, &entry.CreatedAt,
)
if scanErr != nil {
return nil, fmt.Errorf("scanning audit entry: %w", scanErr)
}
entries = append(entries, entry)
}
rowsErr := rows.Err()
if rowsErr != nil {
return nil, fmt.Errorf("iterating audit entries: %w", rowsErr)
}
return entries, nil
}

View File

@@ -23,6 +23,7 @@ const (
testBranch = "main"
testValue = "value"
testEventType = "push"
testAdmin = "admin"
)
func setupTestDB(t *testing.T) (*database.Database, func()) {
@@ -183,7 +184,7 @@ func TestUserExists(t *testing.T) {
defer cleanup()
user := models.NewUser(testDB)
user.Username = "admin"
user.Username = testAdmin
user.PasswordHash = testHash
err := user.Save(context.Background())
@@ -781,6 +782,179 @@ func TestCascadeDelete(t *testing.T) {
})
}
// AuditEntry Tests.
func TestAuditEntryCreateAndFind(t *testing.T) {
t.Parallel()
testDB, cleanup := setupTestDB(t)
defer cleanup()
entry := models.NewAuditEntry(testDB)
entry.Username = testAdmin
entry.Action = models.AuditActionLogin
entry.ResourceType = models.AuditResourceSession
err := entry.Save(context.Background())
require.NoError(t, err)
assert.NotZero(t, entry.ID)
entries, err := models.FindAuditEntries(context.Background(), testDB, 10)
require.NoError(t, err)
require.Len(t, entries, 1)
assert.Equal(t, testAdmin, entries[0].Username)
assert.Equal(t, models.AuditActionLogin, entries[0].Action)
assert.Equal(t, models.AuditResourceSession, entries[0].ResourceType)
}
func TestAuditEntryWithAllFields(t *testing.T) {
t.Parallel()
testDB, cleanup := setupTestDB(t)
defer cleanup()
entry := models.NewAuditEntry(testDB)
entry.UserID = sql.NullInt64{Int64: 1, Valid: true}
entry.Username = testAdmin
entry.Action = models.AuditActionAppCreate
entry.ResourceType = models.AuditResourceApp
entry.ResourceID = sql.NullString{String: "app-123", Valid: true}
entry.Detail = sql.NullString{String: "created new app", Valid: true}
entry.RemoteIP = sql.NullString{String: "192.168.1.1", Valid: true}
err := entry.Save(context.Background())
require.NoError(t, err)
entries, err := models.FindAuditEntries(context.Background(), testDB, 10)
require.NoError(t, err)
require.Len(t, entries, 1)
assert.Equal(t, int64(1), entries[0].UserID.Int64)
assert.Equal(t, "app-123", entries[0].ResourceID.String)
assert.Equal(t, "created new app", entries[0].Detail.String)
assert.Equal(t, "192.168.1.1", entries[0].RemoteIP.String)
}
func TestAuditEntryFindByResource(t *testing.T) {
t.Parallel()
testDB, cleanup := setupTestDB(t)
defer cleanup()
// Create entries for different resources.
for _, action := range []models.AuditAction{
models.AuditActionAppCreate,
models.AuditActionAppUpdate,
models.AuditActionAppDeploy,
} {
entry := models.NewAuditEntry(testDB)
entry.Username = testAdmin
entry.Action = action
entry.ResourceType = models.AuditResourceApp
entry.ResourceID = sql.NullString{String: "app-1", Valid: true}
err := entry.Save(context.Background())
require.NoError(t, err)
}
// Create entry for a different resource.
entry := models.NewAuditEntry(testDB)
entry.Username = testAdmin
entry.Action = models.AuditActionLogin
entry.ResourceType = models.AuditResourceSession
err := entry.Save(context.Background())
require.NoError(t, err)
// Find by resource.
appEntries, err := models.FindAuditEntriesByResource(
context.Background(), testDB,
models.AuditResourceApp, "app-1", 10,
)
require.NoError(t, err)
assert.Len(t, appEntries, 3)
// All entries.
allEntries, err := models.FindAuditEntries(context.Background(), testDB, 10)
require.NoError(t, err)
assert.Len(t, allEntries, 4)
}
func TestAuditEntryCount(t *testing.T) {
t.Parallel()
testDB, cleanup := setupTestDB(t)
defer cleanup()
count, err := models.CountAuditEntries(context.Background(), testDB)
require.NoError(t, err)
assert.Equal(t, 0, count)
entry := models.NewAuditEntry(testDB)
entry.Username = testAdmin
entry.Action = models.AuditActionLogin
entry.ResourceType = models.AuditResourceSession
err = entry.Save(context.Background())
require.NoError(t, err)
count, err = models.CountAuditEntries(context.Background(), testDB)
require.NoError(t, err)
assert.Equal(t, 1, count)
}
func TestAuditEntryFindLimit(t *testing.T) {
t.Parallel()
testDB, cleanup := setupTestDB(t)
defer cleanup()
for range 5 {
entry := models.NewAuditEntry(testDB)
entry.Username = testAdmin
entry.Action = models.AuditActionLogin
entry.ResourceType = models.AuditResourceSession
err := entry.Save(context.Background())
require.NoError(t, err)
}
entries, err := models.FindAuditEntries(context.Background(), testDB, 3)
require.NoError(t, err)
assert.Len(t, entries, 3)
}
func TestAuditEntryOrderByCreatedAtDesc(t *testing.T) {
t.Parallel()
testDB, cleanup := setupTestDB(t)
defer cleanup()
actions := []models.AuditAction{
models.AuditActionLogin,
models.AuditActionAppCreate,
models.AuditActionLogout,
}
for _, action := range actions {
entry := models.NewAuditEntry(testDB)
entry.Username = testAdmin
entry.Action = action
entry.ResourceType = models.AuditResourceSession
err := entry.Save(context.Background())
require.NoError(t, err)
}
entries, err := models.FindAuditEntries(context.Background(), testDB, 10)
require.NoError(t, err)
require.Len(t, entries, 3)
// Newest first (logout was last inserted).
assert.Equal(t, models.AuditActionLogout, entries[0].Action)
assert.Equal(t, models.AuditActionAppCreate, entries[1].Action)
assert.Equal(t, models.AuditActionLogin, entries[2].Action)
}
// Helper function to create a test app.
func createTestApp(t *testing.T, testDB *database.Database) *models.App {
t.Helper()