Add embedded SQL migrations system

Migrations are stored in schema/*.sql and embedded via go:embed.
Applied migrations are tracked in schema_migrations table.
Initial schema includes source_content, source_metadata, output_content,
request_cache, negative_cache, and cache_stats tables.
This commit is contained in:
2026-01-08 03:35:43 -08:00
parent 27eb9fb513
commit 4595929275
2 changed files with 168 additions and 22 deletions

View File

@@ -4,7 +4,12 @@ package database
import (
"context"
"database/sql"
"embed"
"fmt"
"log/slog"
"path/filepath"
"sort"
"strings"
"go.uber.org/fx"
"sneak.berlin/go/pixa/internal/config"
@@ -13,6 +18,9 @@ import (
_ "modernc.org/sqlite" // SQLite driver registration
)
//go:embed schema/*.sql
var schemaFS embed.FS
// Params defines dependencies for Database.
type Params struct {
fx.In
@@ -76,33 +84,80 @@ func (s *Database) connect(ctx context.Context) error {
s.db = db
s.log.Info("database connected")
return s.createSchema(ctx)
return s.runMigrations(ctx)
}
func (s *Database) createSchema(ctx context.Context) error {
// FIXME: Add actual schema for cache metadata
schema := `
CREATE TABLE IF NOT EXISTS cache_metadata (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_url TEXT NOT NULL UNIQUE,
source_hash TEXT NOT NULL,
content_type TEXT,
fetched_at DATETIME DEFAULT CURRENT_TIMESTAMP,
expires_at DATETIME,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX IF NOT EXISTS idx_cache_source_url ON cache_metadata(source_url);
CREATE INDEX IF NOT EXISTS idx_cache_source_hash ON cache_metadata(source_hash);
`
_, err := s.db.ExecContext(ctx, schema)
func (s *Database) runMigrations(ctx context.Context) error {
// Create migrations tracking table
_, err := s.db.ExecContext(ctx, `
CREATE TABLE IF NOT EXISTS schema_migrations (
version TEXT PRIMARY KEY,
applied_at DATETIME DEFAULT CURRENT_TIMESTAMP
)
`)
if err != nil {
s.log.Error("failed to create schema", "error", err)
return err
return fmt.Errorf("failed to create migrations table: %w", err)
}
s.log.Info("database schema initialized")
// Get list of migration files
entries, err := schemaFS.ReadDir("schema")
if err != nil {
return fmt.Errorf("failed to read schema directory: %w", err)
}
// Sort migration files by name (001.sql, 002.sql, etc.)
var migrations []string
for _, entry := range entries {
if !entry.IsDir() && strings.HasSuffix(entry.Name(), ".sql") {
migrations = append(migrations, entry.Name())
}
}
sort.Strings(migrations)
// Apply each migration that hasn't been applied yet
for _, migration := range migrations {
version := strings.TrimSuffix(migration, filepath.Ext(migration))
// Check if already applied
var count int
err := s.db.QueryRowContext(ctx,
"SELECT COUNT(*) FROM schema_migrations WHERE version = ?",
version,
).Scan(&count)
if err != nil {
return fmt.Errorf("failed to check migration status: %w", err)
}
if count > 0 {
s.log.Debug("migration already applied", "version", version)
continue
}
// Read and apply migration
content, err := schemaFS.ReadFile(filepath.Join("schema", migration))
if err != nil {
return fmt.Errorf("failed to read migration %s: %w", migration, err)
}
s.log.Info("applying migration", "version", version)
_, err = s.db.ExecContext(ctx, string(content))
if err != nil {
return fmt.Errorf("failed to apply migration %s: %w", migration, err)
}
// Record migration as applied
_, err = s.db.ExecContext(ctx,
"INSERT INTO schema_migrations (version) VALUES (?)",
version,
)
if err != nil {
return fmt.Errorf("failed to record migration %s: %w", migration, err)
}
s.log.Info("migration applied successfully", "version", version)
}
return nil
}

View File

@@ -0,0 +1,91 @@
-- Migration 001: Initial schema
-- Creates all tables for the pixa caching image proxy
-- Source content blobs
-- Files stored at: cache/src-content/<ab>/<cd>/<sha256>
CREATE TABLE IF NOT EXISTS source_content (
content_hash TEXT PRIMARY KEY,
content_type TEXT NOT NULL,
size_bytes INTEGER NOT NULL,
fetched_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
-- Source URL metadata - maps URLs to content hashes
-- JSON stored at: cache/src-metadata/<hostname>/<path_hash>.json
CREATE TABLE IF NOT EXISTS source_metadata (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_host TEXT NOT NULL,
source_path TEXT NOT NULL,
source_query TEXT NOT NULL DEFAULT '',
path_hash TEXT NOT NULL,
content_hash TEXT,
status_code INTEGER NOT NULL,
content_type TEXT,
response_headers TEXT,
fetched_at DATETIME DEFAULT CURRENT_TIMESTAMP,
expires_at DATETIME,
etag TEXT,
last_modified TEXT,
UNIQUE(source_host, source_path, source_query),
FOREIGN KEY (content_hash) REFERENCES source_content(content_hash)
);
CREATE INDEX IF NOT EXISTS idx_source_meta_host ON source_metadata(source_host);
CREATE INDEX IF NOT EXISTS idx_source_meta_path_hash ON source_metadata(path_hash);
CREATE INDEX IF NOT EXISTS idx_source_meta_expires ON source_metadata(expires_at);
CREATE INDEX IF NOT EXISTS idx_source_meta_content_hash ON source_metadata(content_hash);
-- Output/transformed content blobs
-- Files stored at: cache/dst-content/<ab>/<cd>/<sha256>
CREATE TABLE IF NOT EXISTS output_content (
content_hash TEXT PRIMARY KEY,
content_type TEXT NOT NULL,
size_bytes INTEGER NOT NULL,
fetched_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
-- Request cache - maps full request params to output content
CREATE TABLE IF NOT EXISTS request_cache (
id INTEGER PRIMARY KEY AUTOINCREMENT,
cache_key TEXT NOT NULL UNIQUE,
source_metadata_id INTEGER NOT NULL,
output_hash TEXT NOT NULL,
width INTEGER NOT NULL,
height INTEGER NOT NULL,
format TEXT NOT NULL,
quality INTEGER NOT NULL DEFAULT 85,
fit_mode TEXT NOT NULL DEFAULT 'cover',
fetched_at DATETIME DEFAULT CURRENT_TIMESTAMP,
access_count INTEGER NOT NULL DEFAULT 1,
FOREIGN KEY (source_metadata_id) REFERENCES source_metadata(id),
FOREIGN KEY (output_hash) REFERENCES output_content(content_hash)
);
CREATE INDEX IF NOT EXISTS idx_request_cache_key ON request_cache(cache_key);
CREATE INDEX IF NOT EXISTS idx_request_cache_source ON request_cache(source_metadata_id);
CREATE INDEX IF NOT EXISTS idx_request_cache_output ON request_cache(output_hash);
CREATE INDEX IF NOT EXISTS idx_request_cache_fetched ON request_cache(fetched_at);
-- Negative cache for failed fetches (404s, timeouts, etc.)
CREATE TABLE IF NOT EXISTS negative_cache (
id INTEGER PRIMARY KEY AUTOINCREMENT,
source_host TEXT NOT NULL,
source_path TEXT NOT NULL,
source_query TEXT NOT NULL DEFAULT '',
status_code INTEGER NOT NULL,
error_message TEXT,
fetched_at DATETIME DEFAULT CURRENT_TIMESTAMP,
expires_at DATETIME NOT NULL,
UNIQUE(source_host, source_path, source_query)
);
CREATE INDEX IF NOT EXISTS idx_negative_cache_expires ON negative_cache(expires_at);
-- Cache statistics for monitoring
CREATE TABLE IF NOT EXISTS cache_stats (
id INTEGER PRIMARY KEY CHECK (id = 1),
hit_count INTEGER NOT NULL DEFAULT 0,
miss_count INTEGER NOT NULL DEFAULT 0,
upstream_fetch_count INTEGER NOT NULL DEFAULT 0,
upstream_fetch_bytes INTEGER NOT NULL DEFAULT 0,
transform_count INTEGER NOT NULL DEFAULT 0,
last_updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
INSERT OR IGNORE INTO cache_stats (id) VALUES (1);