feat: implement per-webhook event databases
All checks were successful
check / check (push) Successful in 1m50s

Split data storage into main application DB (config only) and
per-webhook event databases (one SQLite file per webhook).

Architecture changes:
- New WebhookDBManager component manages per-webhook DB lifecycle
  (create, open, cache, delete) with lazy connection pooling via sync.Map
- Main DB (DBURL) stores only config: Users, Webhooks, Entrypoints,
  Targets, APIKeys
- Per-webhook DBs (DATA_DIR) store Events, Deliveries, DeliveryResults
  in files named events-{webhook_uuid}.db
- New DATA_DIR env var (default: ./data dev, /data/events prod)

Behavioral changes:
- Webhook creation creates per-webhook DB file
- Webhook deletion hard-deletes per-webhook DB file (config soft-deleted)
- Event ingestion writes to per-webhook DB, not main DB
- Delivery engine polls all per-webhook DBs for pending deliveries
- Database target type marks delivery as immediately successful (events
  are already in the dedicated per-webhook DB)
- Event log UI reads from per-webhook DBs with targets from main DB
- Existing webhooks without DB files get them created lazily

Removed:
- ArchivedEvent model (was a half-measure, replaced by per-webhook DBs)
- Event/Delivery/DeliveryResult removed from main DB migrations

Added:
- Comprehensive tests for WebhookDBManager (create, delete, lazy
  creation, delivery workflow, multiple webhooks, close all)
- Dockerfile creates /data/events directory

README updates:
- Per-webhook event databases documented as implemented (was Phase 2)
- DATA_DIR added to configuration table
- Docker instructions updated with data volume mount
- Data model diagram updated
- TODO updated (database separation moved to completed)

Closes #15
This commit is contained in:
clawbot
2026-03-01 17:06:43 -08:00
parent 6c393ccb78
commit 43c22a9e9a
13 changed files with 814 additions and 198 deletions

View File

@@ -12,6 +12,7 @@ import (
"time"
"go.uber.org/fx"
"gorm.io/gorm"
"sneak.berlin/go/webhooker/internal/database"
"sneak.berlin/go/webhooker/internal/logger"
)
@@ -39,24 +40,29 @@ type HTTPTargetConfig struct {
//nolint:revive // EngineParams is a standard fx naming convention
type EngineParams struct {
fx.In
DB *database.Database
Logger *logger.Logger
DB *database.Database
DBManager *database.WebhookDBManager
Logger *logger.Logger
}
// Engine processes queued deliveries in the background.
// It iterates over all active webhooks and polls each webhook's
// per-webhook database for pending deliveries.
type Engine struct {
database *database.Database
log *slog.Logger
client *http.Client
cancel context.CancelFunc
wg sync.WaitGroup
database *database.Database
dbManager *database.WebhookDBManager
log *slog.Logger
client *http.Client
cancel context.CancelFunc
wg sync.WaitGroup
}
// New creates and registers the delivery engine with the fx lifecycle.
func New(lc fx.Lifecycle, params EngineParams) *Engine {
e := &Engine{
database: params.DB,
log: params.Logger.Get(),
database: params.DB,
dbManager: params.DBManager,
log: params.Logger.Get(),
client: &http.Client{
Timeout: httpClientTimeout,
},
@@ -107,60 +113,133 @@ func (e *Engine) run(ctx context.Context) {
}
}
// processPending iterates over all active webhooks and processes pending
// deliveries from each webhook's per-webhook database.
func (e *Engine) processPending(ctx context.Context) {
// Get all active webhook IDs from the main application database
var webhookIDs []string
if err := e.database.DB().Model(&database.Webhook{}).Pluck("id", &webhookIDs).Error; err != nil {
e.log.Error("failed to query webhook IDs", "error", err)
return
}
for _, webhookID := range webhookIDs {
select {
case <-ctx.Done():
return
default:
// Only process webhooks that have an event database file
if !e.dbManager.DBExists(webhookID) {
continue
}
e.processWebhookDeliveries(ctx, webhookID)
}
}
}
// processWebhookDeliveries polls a single webhook's database for pending
// deliveries and processes them.
func (e *Engine) processWebhookDeliveries(ctx context.Context, webhookID string) {
webhookDB, err := e.dbManager.GetDB(webhookID)
if err != nil {
e.log.Error("failed to get webhook database",
"webhook_id", webhookID,
"error", err,
)
return
}
// Query pending and retrying deliveries from the per-webhook DB.
// Preload Event (same DB) but NOT Target (Target is in the main DB).
var deliveries []database.Delivery
result := e.database.DB().
result := webhookDB.
Where("status IN ?", []database.DeliveryStatus{
database.DeliveryStatusPending,
database.DeliveryStatusRetrying,
}).
Preload("Target").
Preload("Event").
Find(&deliveries)
if result.Error != nil {
e.log.Error("failed to query pending deliveries", "error", result.Error)
e.log.Error("failed to query pending deliveries",
"webhook_id", webhookID,
"error", result.Error,
)
return
}
if len(deliveries) == 0 {
return
}
// Collect unique target IDs and load targets from the main DB
seen := make(map[string]bool)
targetIDs := make([]string, 0, len(deliveries))
for _, d := range deliveries {
if !seen[d.TargetID] {
targetIDs = append(targetIDs, d.TargetID)
seen[d.TargetID] = true
}
}
var targets []database.Target
if err := e.database.DB().Where("id IN ?", targetIDs).Find(&targets).Error; err != nil {
e.log.Error("failed to load targets from main DB", "error", err)
return
}
targetMap := make(map[string]database.Target, len(targets))
for _, t := range targets {
targetMap[t.ID] = t
}
for i := range deliveries {
select {
case <-ctx.Done():
return
default:
e.processDelivery(ctx, &deliveries[i])
target, ok := targetMap[deliveries[i].TargetID]
if !ok {
e.log.Error("target not found for delivery",
"delivery_id", deliveries[i].ID,
"target_id", deliveries[i].TargetID,
)
continue
}
deliveries[i].Target = target
e.processDelivery(ctx, webhookDB, &deliveries[i])
}
}
}
func (e *Engine) processDelivery(ctx context.Context, d *database.Delivery) {
func (e *Engine) processDelivery(ctx context.Context, webhookDB *gorm.DB, d *database.Delivery) {
switch d.Target.Type {
case database.TargetTypeHTTP:
e.deliverHTTP(ctx, d)
e.deliverHTTP(ctx, webhookDB, d)
case database.TargetTypeRetry:
e.deliverRetry(ctx, d)
e.deliverRetry(ctx, webhookDB, d)
case database.TargetTypeDatabase:
e.deliverDatabase(d)
e.deliverDatabase(webhookDB, d)
case database.TargetTypeLog:
e.deliverLog(d)
e.deliverLog(webhookDB, d)
default:
e.log.Error("unknown target type",
"target_id", d.TargetID,
"type", d.Target.Type,
)
e.updateDeliveryStatus(d, database.DeliveryStatusFailed)
e.updateDeliveryStatus(webhookDB, d, database.DeliveryStatusFailed)
}
}
func (e *Engine) deliverHTTP(_ context.Context, d *database.Delivery) {
func (e *Engine) deliverHTTP(_ context.Context, webhookDB *gorm.DB, d *database.Delivery) {
cfg, err := e.parseHTTPConfig(d.Target.Config)
if err != nil {
e.log.Error("invalid HTTP target config",
"target_id", d.TargetID,
"error", err,
)
e.recordResult(d, 1, false, 0, "", err.Error(), 0)
e.updateDeliveryStatus(d, database.DeliveryStatusFailed)
e.recordResult(webhookDB, d, 1, false, 0, "", err.Error(), 0)
e.updateDeliveryStatus(webhookDB, d, database.DeliveryStatusFailed)
return
}
@@ -172,36 +251,36 @@ func (e *Engine) deliverHTTP(_ context.Context, d *database.Delivery) {
errMsg = err.Error()
}
e.recordResult(d, 1, success, statusCode, respBody, errMsg, duration)
e.recordResult(webhookDB, d, 1, success, statusCode, respBody, errMsg, duration)
if success {
e.updateDeliveryStatus(d, database.DeliveryStatusDelivered)
e.updateDeliveryStatus(webhookDB, d, database.DeliveryStatusDelivered)
} else {
e.updateDeliveryStatus(d, database.DeliveryStatusFailed)
e.updateDeliveryStatus(webhookDB, d, database.DeliveryStatusFailed)
}
}
func (e *Engine) deliverRetry(_ context.Context, d *database.Delivery) {
func (e *Engine) deliverRetry(_ context.Context, webhookDB *gorm.DB, d *database.Delivery) {
cfg, err := e.parseHTTPConfig(d.Target.Config)
if err != nil {
e.log.Error("invalid retry target config",
"target_id", d.TargetID,
"error", err,
)
e.recordResult(d, 1, false, 0, "", err.Error(), 0)
e.updateDeliveryStatus(d, database.DeliveryStatusFailed)
e.recordResult(webhookDB, d, 1, false, 0, "", err.Error(), 0)
e.updateDeliveryStatus(webhookDB, d, database.DeliveryStatusFailed)
return
}
// Determine attempt number from existing results
// Determine attempt number from existing results (in per-webhook DB)
var resultCount int64
e.database.DB().Model(&database.DeliveryResult{}).Where("delivery_id = ?", d.ID).Count(&resultCount)
webhookDB.Model(&database.DeliveryResult{}).Where("delivery_id = ?", d.ID).Count(&resultCount)
attemptNum := int(resultCount) + 1
// Check if we should wait before retrying (exponential backoff)
if attemptNum > 1 {
var lastResult database.DeliveryResult
lookupErr := e.database.DB().Where("delivery_id = ?", d.ID).Order("created_at DESC").First(&lastResult).Error
lookupErr := webhookDB.Where("delivery_id = ?", d.ID).Order("created_at DESC").First(&lastResult).Error
if lookupErr == nil {
shift := attemptNum - 2
if shift > 30 {
@@ -224,10 +303,10 @@ func (e *Engine) deliverRetry(_ context.Context, d *database.Delivery) {
errMsg = err.Error()
}
e.recordResult(d, attemptNum, success, statusCode, respBody, errMsg, duration)
e.recordResult(webhookDB, d, attemptNum, success, statusCode, respBody, errMsg, duration)
if success {
e.updateDeliveryStatus(d, database.DeliveryStatusDelivered)
e.updateDeliveryStatus(webhookDB, d, database.DeliveryStatusDelivered)
return
}
@@ -237,44 +316,22 @@ func (e *Engine) deliverRetry(_ context.Context, d *database.Delivery) {
}
if attemptNum >= maxRetries {
e.updateDeliveryStatus(d, database.DeliveryStatusFailed)
e.updateDeliveryStatus(webhookDB, d, database.DeliveryStatusFailed)
} else {
e.updateDeliveryStatus(d, database.DeliveryStatusRetrying)
e.updateDeliveryStatus(webhookDB, d, database.DeliveryStatusRetrying)
}
}
func (e *Engine) deliverDatabase(d *database.Delivery) {
// Write the event to the dedicated archived_events table. This table
// persists independently of internal event retention/pruning, so the
// data remains available for external consumption even after the
// original event is cleaned up.
archived := &database.ArchivedEvent{
WebhookID: d.Event.WebhookID,
EntrypointID: d.Event.EntrypointID,
EventID: d.EventID,
TargetID: d.TargetID,
Method: d.Event.Method,
Headers: d.Event.Headers,
Body: d.Event.Body,
ContentType: d.Event.ContentType,
}
if err := e.database.DB().Create(archived).Error; err != nil {
e.log.Error("failed to archive event",
"delivery_id", d.ID,
"event_id", d.EventID,
"error", err,
)
e.recordResult(d, 1, false, 0, "", err.Error(), 0)
e.updateDeliveryStatus(d, database.DeliveryStatusFailed)
return
}
e.recordResult(d, 1, true, 0, "", "", 0)
e.updateDeliveryStatus(d, database.DeliveryStatusDelivered)
// deliverDatabase handles the database target type. Since events are already
// stored in the per-webhook database (that's the whole point of per-webhook
// databases), the database target simply marks the delivery as successful.
// The per-webhook DB IS the dedicated event database for this webhook.
func (e *Engine) deliverDatabase(webhookDB *gorm.DB, d *database.Delivery) {
e.recordResult(webhookDB, d, 1, true, 0, "", "", 0)
e.updateDeliveryStatus(webhookDB, d, database.DeliveryStatusDelivered)
}
func (e *Engine) deliverLog(d *database.Delivery) {
func (e *Engine) deliverLog(webhookDB *gorm.DB, d *database.Delivery) {
e.log.Info("webhook event delivered to log target",
"delivery_id", d.ID,
"event_id", d.EventID,
@@ -284,8 +341,8 @@ func (e *Engine) deliverLog(d *database.Delivery) {
"content_type", d.Event.ContentType,
"body_length", len(d.Event.Body),
)
e.recordResult(d, 1, true, 0, "", "", 0)
e.updateDeliveryStatus(d, database.DeliveryStatusDelivered)
e.recordResult(webhookDB, d, 1, true, 0, "", "", 0)
e.updateDeliveryStatus(webhookDB, d, database.DeliveryStatusDelivered)
}
// doHTTPRequest performs the outbound HTTP POST to a target URL.
@@ -343,7 +400,7 @@ func (e *Engine) doHTTPRequest(cfg *HTTPTargetConfig, event *database.Event) (st
return resp.StatusCode, string(body), durationMs, nil
}
func (e *Engine) recordResult(d *database.Delivery, attemptNum int, success bool, statusCode int, respBody, errMsg string, durationMs int64) {
func (e *Engine) recordResult(webhookDB *gorm.DB, d *database.Delivery, attemptNum int, success bool, statusCode int, respBody, errMsg string, durationMs int64) {
result := &database.DeliveryResult{
DeliveryID: d.ID,
AttemptNum: attemptNum,
@@ -354,7 +411,7 @@ func (e *Engine) recordResult(d *database.Delivery, attemptNum int, success bool
Duration: durationMs,
}
if err := e.database.DB().Create(result).Error; err != nil {
if err := webhookDB.Create(result).Error; err != nil {
e.log.Error("failed to record delivery result",
"delivery_id", d.ID,
"error", err,
@@ -362,8 +419,8 @@ func (e *Engine) recordResult(d *database.Delivery, attemptNum int, success bool
}
}
func (e *Engine) updateDeliveryStatus(d *database.Delivery, status database.DeliveryStatus) {
if err := e.database.DB().Model(d).Update("status", status).Error; err != nil {
func (e *Engine) updateDeliveryStatus(webhookDB *gorm.DB, d *database.Delivery, status database.DeliveryStatus) {
if err := webhookDB.Model(d).Update("status", status).Error; err != nil {
e.log.Error("failed to update delivery status",
"delivery_id", d.ID,
"status", status,