feat: implement per-webhook event databases
All checks were successful
check / check (push) Successful in 1m50s

Split data storage into main application DB (config only) and
per-webhook event databases (one SQLite file per webhook).

Architecture changes:
- New WebhookDBManager component manages per-webhook DB lifecycle
  (create, open, cache, delete) with lazy connection pooling via sync.Map
- Main DB (DBURL) stores only config: Users, Webhooks, Entrypoints,
  Targets, APIKeys
- Per-webhook DBs (DATA_DIR) store Events, Deliveries, DeliveryResults
  in files named events-{webhook_uuid}.db
- New DATA_DIR env var (default: ./data dev, /data/events prod)

Behavioral changes:
- Webhook creation creates per-webhook DB file
- Webhook deletion hard-deletes per-webhook DB file (config soft-deleted)
- Event ingestion writes to per-webhook DB, not main DB
- Delivery engine polls all per-webhook DBs for pending deliveries
- Database target type marks delivery as immediately successful (events
  are already in the dedicated per-webhook DB)
- Event log UI reads from per-webhook DBs with targets from main DB
- Existing webhooks without DB files get them created lazily

Removed:
- ArchivedEvent model (was a half-measure, replaced by per-webhook DBs)
- Event/Delivery/DeliveryResult removed from main DB migrations

Added:
- Comprehensive tests for WebhookDBManager (create, delete, lazy
  creation, delivery workflow, multiple webhooks, close all)
- Dockerfile creates /data/events directory

README updates:
- Per-webhook event databases documented as implemented (was Phase 2)
- DATA_DIR added to configuration table
- Docker instructions updated with data volume mount
- Data model diagram updated
- TODO updated (database separation moved to completed)

Closes #15
This commit is contained in:
clawbot
2026-03-01 17:06:43 -08:00
parent 6c393ccb78
commit 43c22a9e9a
13 changed files with 814 additions and 198 deletions

View File

@@ -1,19 +0,0 @@
package database
// ArchivedEvent stores webhook events delivered via the "database" target type.
// These records persist independently of internal event retention and pruning,
// providing a durable archive for downstream consumption.
type ArchivedEvent struct {
BaseModel
WebhookID string `gorm:"type:uuid;not null;index" json:"webhook_id"`
EntrypointID string `gorm:"type:uuid;not null" json:"entrypoint_id"`
EventID string `gorm:"type:uuid;not null" json:"event_id"`
TargetID string `gorm:"type:uuid;not null" json:"target_id"`
// Original request data (copied from Event at archive time)
Method string `gorm:"not null" json:"method"`
Headers string `gorm:"type:text" json:"headers"` // JSON
Body string `gorm:"type:text" json:"body"`
ContentType string `json:"content_type"`
}

View File

@@ -1,6 +1,9 @@
package database
// Migrate runs database migrations for all models
// Migrate runs database migrations for the main application database.
// Only configuration-tier models are stored in the main database.
// Event-tier models (Event, Delivery, DeliveryResult) live in
// per-webhook dedicated databases managed by WebhookDBManager.
func (d *Database) Migrate() error {
return d.db.AutoMigrate(
&User{},
@@ -8,9 +11,5 @@ func (d *Database) Migrate() error {
&Webhook{},
&Entrypoint{},
&Target{},
&Event{},
&Delivery{},
&DeliveryResult{},
&ArchivedEvent{},
)
}

View File

@@ -0,0 +1,183 @@
package database
import (
"context"
"database/sql"
"fmt"
"log/slog"
"os"
"path/filepath"
"sync"
"go.uber.org/fx"
"gorm.io/driver/sqlite"
"gorm.io/gorm"
"sneak.berlin/go/webhooker/internal/config"
"sneak.berlin/go/webhooker/internal/logger"
)
// nolint:revive // WebhookDBManagerParams is a standard fx naming convention
type WebhookDBManagerParams struct {
fx.In
Config *config.Config
Logger *logger.Logger
}
// WebhookDBManager manages per-webhook SQLite database files for event storage.
// Each webhook gets its own dedicated database containing Events, Deliveries,
// and DeliveryResults. Database connections are opened lazily and cached.
type WebhookDBManager struct {
dataDir string
dbs sync.Map // map[webhookID]*gorm.DB
log *slog.Logger
}
// NewWebhookDBManager creates a new WebhookDBManager and registers lifecycle hooks.
func NewWebhookDBManager(lc fx.Lifecycle, params WebhookDBManagerParams) (*WebhookDBManager, error) {
m := &WebhookDBManager{
dataDir: params.Config.DataDir,
log: params.Logger.Get(),
}
// Create data directory if it doesn't exist
if err := os.MkdirAll(m.dataDir, 0750); err != nil {
return nil, fmt.Errorf("creating data directory %s: %w", m.dataDir, err)
}
lc.Append(fx.Hook{
OnStop: func(_ context.Context) error { //nolint:revive // ctx unused but required by fx
return m.CloseAll()
},
})
m.log.Info("webhook database manager initialized", "data_dir", m.dataDir)
return m, nil
}
// dbPath returns the filesystem path for a webhook's database file.
func (m *WebhookDBManager) dbPath(webhookID string) string {
return filepath.Join(m.dataDir, fmt.Sprintf("events-%s.db", webhookID))
}
// openDB opens (or creates) a per-webhook SQLite database and runs migrations.
func (m *WebhookDBManager) openDB(webhookID string) (*gorm.DB, error) {
path := m.dbPath(webhookID)
dbURL := fmt.Sprintf("file:%s?cache=shared&mode=rwc", path)
sqlDB, err := sql.Open("sqlite", dbURL)
if err != nil {
return nil, fmt.Errorf("opening webhook database %s: %w", webhookID, err)
}
db, err := gorm.Open(sqlite.Dialector{
Conn: sqlDB,
}, &gorm.Config{})
if err != nil {
sqlDB.Close()
return nil, fmt.Errorf("connecting to webhook database %s: %w", webhookID, err)
}
// Run migrations for event-tier models only
if err := db.AutoMigrate(&Event{}, &Delivery{}, &DeliveryResult{}); err != nil {
sqlDB.Close()
return nil, fmt.Errorf("migrating webhook database %s: %w", webhookID, err)
}
m.log.Info("opened per-webhook database", "webhook_id", webhookID, "path", path)
return db, nil
}
// GetDB returns the database connection for a webhook, creating the database
// file lazily if it doesn't exist. This handles both new webhooks and existing
// webhooks that were created before per-webhook databases were introduced.
func (m *WebhookDBManager) GetDB(webhookID string) (*gorm.DB, error) {
// Fast path: already open
if val, ok := m.dbs.Load(webhookID); ok {
cachedDB, castOK := val.(*gorm.DB)
if !castOK {
return nil, fmt.Errorf("invalid cached database type for webhook %s", webhookID)
}
return cachedDB, nil
}
// Slow path: open/create the database
db, err := m.openDB(webhookID)
if err != nil {
return nil, err
}
// Store it; if another goroutine beat us, close ours and use theirs
actual, loaded := m.dbs.LoadOrStore(webhookID, db)
if loaded {
// Another goroutine created it first; close our duplicate
if sqlDB, closeErr := db.DB(); closeErr == nil {
sqlDB.Close()
}
existingDB, castOK := actual.(*gorm.DB)
if !castOK {
return nil, fmt.Errorf("invalid cached database type for webhook %s", webhookID)
}
return existingDB, nil
}
return db, nil
}
// CreateDB explicitly creates a new per-webhook database file and runs migrations.
// This is called when a new webhook is created.
func (m *WebhookDBManager) CreateDB(webhookID string) error {
_, err := m.GetDB(webhookID)
return err
}
// DBExists checks if a per-webhook database file exists on disk.
func (m *WebhookDBManager) DBExists(webhookID string) bool {
_, err := os.Stat(m.dbPath(webhookID))
return err == nil
}
// DeleteDB closes the connection and deletes the database file for a webhook.
// This performs a hard delete — the file is permanently removed.
func (m *WebhookDBManager) DeleteDB(webhookID string) error {
// Close and remove from cache
if val, ok := m.dbs.LoadAndDelete(webhookID); ok {
if gormDB, castOK := val.(*gorm.DB); castOK {
if sqlDB, err := gormDB.DB(); err == nil {
sqlDB.Close()
}
}
}
// Delete the main DB file and WAL/SHM files
path := m.dbPath(webhookID)
for _, suffix := range []string{"", "-wal", "-shm"} {
if err := os.Remove(path + suffix); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("deleting webhook database file %s%s: %w", path, suffix, err)
}
}
m.log.Info("deleted per-webhook database", "webhook_id", webhookID)
return nil
}
// CloseAll closes all open per-webhook database connections.
// Called during application shutdown.
func (m *WebhookDBManager) CloseAll() error {
var lastErr error
m.dbs.Range(func(key, value interface{}) bool {
if gormDB, castOK := value.(*gorm.DB); castOK {
if sqlDB, err := gormDB.DB(); err == nil {
if closeErr := sqlDB.Close(); closeErr != nil {
lastErr = closeErr
m.log.Error("failed to close webhook database",
"webhook_id", key,
"error", closeErr,
)
}
}
}
m.dbs.Delete(key)
return true
})
return lastErr
}

View File

@@ -0,0 +1,294 @@
package database
import (
"context"
"os"
"path/filepath"
"testing"
"github.com/google/uuid"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
"go.uber.org/fx/fxtest"
"sneak.berlin/go/webhooker/internal/config"
"sneak.berlin/go/webhooker/internal/globals"
"sneak.berlin/go/webhooker/internal/logger"
pkgconfig "sneak.berlin/go/webhooker/pkg/config"
)
func setupTestWebhookDBManager(t *testing.T) (*WebhookDBManager, *fxtest.Lifecycle) {
t.Helper()
fs := afero.NewMemMapFs()
testConfigYAML := `
environments:
dev:
config:
port: 8080
debug: false
dburl: "file::memory:?cache=shared"
secrets:
sessionKey: d2ViaG9va2VyLWRldi1zZXNzaW9uLWtleS1pbnNlY3VyZSE=
configDefaults:
port: 8080
`
require.NoError(t, afero.WriteFile(fs, "config.yaml", []byte(testConfigYAML), 0644))
pkgconfig.SetFs(fs)
lc := fxtest.NewLifecycle(t)
globals.Appname = "webhooker-test"
globals.Version = "test"
globals.Buildarch = "test"
g, err := globals.New(lc)
require.NoError(t, err)
l, err := logger.New(lc, logger.LoggerParams{Globals: g})
require.NoError(t, err)
dataDir := filepath.Join(t.TempDir(), "events")
cfg := &config.Config{
DBURL: "file::memory:?cache=shared",
DataDir: dataDir,
SessionKey: "d2ViaG9va2VyLWRldi1zZXNzaW9uLWtleS1pbnNlY3VyZSE=",
}
_ = cfg
mgr, err := NewWebhookDBManager(lc, WebhookDBManagerParams{
Config: cfg,
Logger: l,
})
require.NoError(t, err)
return mgr, lc
}
func TestWebhookDBManager_CreateAndGetDB(t *testing.T) {
mgr, lc := setupTestWebhookDBManager(t)
ctx := context.Background()
require.NoError(t, lc.Start(ctx))
defer func() { require.NoError(t, lc.Stop(ctx)) }()
webhookID := uuid.New().String()
// DB should not exist yet
assert.False(t, mgr.DBExists(webhookID))
// Create the DB
err := mgr.CreateDB(webhookID)
require.NoError(t, err)
// DB file should now exist
assert.True(t, mgr.DBExists(webhookID))
// Get the DB again (should use cached connection)
db, err := mgr.GetDB(webhookID)
require.NoError(t, err)
require.NotNil(t, db)
// Verify we can write an event
event := &Event{
WebhookID: webhookID,
EntrypointID: uuid.New().String(),
Method: "POST",
Headers: `{"Content-Type":["application/json"]}`,
Body: `{"test": true}`,
ContentType: "application/json",
}
require.NoError(t, db.Create(event).Error)
assert.NotEmpty(t, event.ID)
// Verify we can read it back
var readEvent Event
require.NoError(t, db.First(&readEvent, "id = ?", event.ID).Error)
assert.Equal(t, webhookID, readEvent.WebhookID)
assert.Equal(t, "POST", readEvent.Method)
assert.Equal(t, `{"test": true}`, readEvent.Body)
}
func TestWebhookDBManager_DeleteDB(t *testing.T) {
mgr, lc := setupTestWebhookDBManager(t)
ctx := context.Background()
require.NoError(t, lc.Start(ctx))
defer func() { require.NoError(t, lc.Stop(ctx)) }()
webhookID := uuid.New().String()
// Create the DB and write some data
require.NoError(t, mgr.CreateDB(webhookID))
db, err := mgr.GetDB(webhookID)
require.NoError(t, err)
event := &Event{
WebhookID: webhookID,
EntrypointID: uuid.New().String(),
Method: "POST",
Body: `{"test": true}`,
ContentType: "application/json",
}
require.NoError(t, db.Create(event).Error)
// Delete the DB
require.NoError(t, mgr.DeleteDB(webhookID))
// File should no longer exist
assert.False(t, mgr.DBExists(webhookID))
// Verify the file is actually gone from disk
dbPath := mgr.dbPath(webhookID)
_, err = os.Stat(dbPath)
assert.True(t, os.IsNotExist(err))
}
func TestWebhookDBManager_LazyCreation(t *testing.T) {
mgr, lc := setupTestWebhookDBManager(t)
ctx := context.Background()
require.NoError(t, lc.Start(ctx))
defer func() { require.NoError(t, lc.Stop(ctx)) }()
webhookID := uuid.New().String()
// GetDB should lazily create the database
db, err := mgr.GetDB(webhookID)
require.NoError(t, err)
require.NotNil(t, db)
// File should now exist
assert.True(t, mgr.DBExists(webhookID))
}
func TestWebhookDBManager_DeliveryWorkflow(t *testing.T) {
mgr, lc := setupTestWebhookDBManager(t)
ctx := context.Background()
require.NoError(t, lc.Start(ctx))
defer func() { require.NoError(t, lc.Stop(ctx)) }()
webhookID := uuid.New().String()
targetID := uuid.New().String()
db, err := mgr.GetDB(webhookID)
require.NoError(t, err)
// Create an event
event := &Event{
WebhookID: webhookID,
EntrypointID: uuid.New().String(),
Method: "POST",
Headers: `{"Content-Type":["application/json"]}`,
Body: `{"payload": "test"}`,
ContentType: "application/json",
}
require.NoError(t, db.Create(event).Error)
// Create a delivery
delivery := &Delivery{
EventID: event.ID,
TargetID: targetID,
Status: DeliveryStatusPending,
}
require.NoError(t, db.Create(delivery).Error)
// Query pending deliveries
var pending []Delivery
require.NoError(t, db.Where("status = ?", DeliveryStatusPending).
Preload("Event").
Find(&pending).Error)
require.Len(t, pending, 1)
assert.Equal(t, event.ID, pending[0].EventID)
assert.Equal(t, "POST", pending[0].Event.Method)
// Create a delivery result
result := &DeliveryResult{
DeliveryID: delivery.ID,
AttemptNum: 1,
Success: true,
StatusCode: 200,
Duration: 42,
}
require.NoError(t, db.Create(result).Error)
// Update delivery status
require.NoError(t, db.Model(delivery).Update("status", DeliveryStatusDelivered).Error)
// Verify no more pending deliveries
var stillPending []Delivery
require.NoError(t, db.Where("status = ?", DeliveryStatusPending).Find(&stillPending).Error)
assert.Empty(t, stillPending)
}
func TestWebhookDBManager_MultipleWebhooks(t *testing.T) {
mgr, lc := setupTestWebhookDBManager(t)
ctx := context.Background()
require.NoError(t, lc.Start(ctx))
defer func() { require.NoError(t, lc.Stop(ctx)) }()
webhook1 := uuid.New().String()
webhook2 := uuid.New().String()
// Create DBs for two webhooks
require.NoError(t, mgr.CreateDB(webhook1))
require.NoError(t, mgr.CreateDB(webhook2))
db1, err := mgr.GetDB(webhook1)
require.NoError(t, err)
db2, err := mgr.GetDB(webhook2)
require.NoError(t, err)
// Write events to each webhook's DB
event1 := &Event{
WebhookID: webhook1,
EntrypointID: uuid.New().String(),
Method: "POST",
Body: `{"webhook": 1}`,
ContentType: "application/json",
}
event2 := &Event{
WebhookID: webhook2,
EntrypointID: uuid.New().String(),
Method: "PUT",
Body: `{"webhook": 2}`,
ContentType: "application/json",
}
require.NoError(t, db1.Create(event1).Error)
require.NoError(t, db2.Create(event2).Error)
// Verify isolation: each DB only has its own events
var count1 int64
db1.Model(&Event{}).Count(&count1)
assert.Equal(t, int64(1), count1)
var count2 int64
db2.Model(&Event{}).Count(&count2)
assert.Equal(t, int64(1), count2)
// Delete webhook1's DB, webhook2 should be unaffected
require.NoError(t, mgr.DeleteDB(webhook1))
assert.False(t, mgr.DBExists(webhook1))
assert.True(t, mgr.DBExists(webhook2))
// webhook2's data should still be accessible
var events []Event
require.NoError(t, db2.Find(&events).Error)
assert.Len(t, events, 1)
assert.Equal(t, "PUT", events[0].Method)
}
func TestWebhookDBManager_CloseAll(t *testing.T) {
mgr, lc := setupTestWebhookDBManager(t)
ctx := context.Background()
require.NoError(t, lc.Start(ctx))
// Create a few DBs
for i := 0; i < 3; i++ {
require.NoError(t, mgr.CreateDB(uuid.New().String()))
}
// CloseAll should close all connections without error
require.NoError(t, mgr.CloseAll())
// Stop lifecycle (CloseAll already called, but shouldn't panic)
require.NoError(t, lc.Stop(ctx))
}