diff --git a/internal/database/database.go b/internal/database/database.go index 8b2b1e3..2a5ea11 100644 --- a/internal/database/database.go +++ b/internal/database/database.go @@ -4,7 +4,12 @@ package database import ( "context" "database/sql" + "embed" + "fmt" "log/slog" + "path/filepath" + "sort" + "strings" "go.uber.org/fx" "sneak.berlin/go/pixa/internal/config" @@ -13,6 +18,9 @@ import ( _ "modernc.org/sqlite" // SQLite driver registration ) +//go:embed schema/*.sql +var schemaFS embed.FS + // Params defines dependencies for Database. type Params struct { fx.In @@ -76,33 +84,80 @@ func (s *Database) connect(ctx context.Context) error { s.db = db s.log.Info("database connected") - return s.createSchema(ctx) + return s.runMigrations(ctx) } -func (s *Database) createSchema(ctx context.Context) error { - // FIXME: Add actual schema for cache metadata - schema := ` - CREATE TABLE IF NOT EXISTS cache_metadata ( - id INTEGER PRIMARY KEY AUTOINCREMENT, - source_url TEXT NOT NULL UNIQUE, - source_hash TEXT NOT NULL, - content_type TEXT, - fetched_at DATETIME DEFAULT CURRENT_TIMESTAMP, - expires_at DATETIME, - created_at DATETIME DEFAULT CURRENT_TIMESTAMP - ); - CREATE INDEX IF NOT EXISTS idx_cache_source_url ON cache_metadata(source_url); - CREATE INDEX IF NOT EXISTS idx_cache_source_hash ON cache_metadata(source_hash); - ` - - _, err := s.db.ExecContext(ctx, schema) +func (s *Database) runMigrations(ctx context.Context) error { + // Create migrations tracking table + _, err := s.db.ExecContext(ctx, ` + CREATE TABLE IF NOT EXISTS schema_migrations ( + version TEXT PRIMARY KEY, + applied_at DATETIME DEFAULT CURRENT_TIMESTAMP + ) + `) if err != nil { - s.log.Error("failed to create schema", "error", err) - - return err + return fmt.Errorf("failed to create migrations table: %w", err) } - s.log.Info("database schema initialized") + // Get list of migration files + entries, err := schemaFS.ReadDir("schema") + if err != nil { + return fmt.Errorf("failed to read schema directory: %w", err) + } + + // Sort migration files by name (001.sql, 002.sql, etc.) + var migrations []string + for _, entry := range entries { + if !entry.IsDir() && strings.HasSuffix(entry.Name(), ".sql") { + migrations = append(migrations, entry.Name()) + } + } + sort.Strings(migrations) + + // Apply each migration that hasn't been applied yet + for _, migration := range migrations { + version := strings.TrimSuffix(migration, filepath.Ext(migration)) + + // Check if already applied + var count int + err := s.db.QueryRowContext(ctx, + "SELECT COUNT(*) FROM schema_migrations WHERE version = ?", + version, + ).Scan(&count) + if err != nil { + return fmt.Errorf("failed to check migration status: %w", err) + } + + if count > 0 { + s.log.Debug("migration already applied", "version", version) + + continue + } + + // Read and apply migration + content, err := schemaFS.ReadFile(filepath.Join("schema", migration)) + if err != nil { + return fmt.Errorf("failed to read migration %s: %w", migration, err) + } + + s.log.Info("applying migration", "version", version) + + _, err = s.db.ExecContext(ctx, string(content)) + if err != nil { + return fmt.Errorf("failed to apply migration %s: %w", migration, err) + } + + // Record migration as applied + _, err = s.db.ExecContext(ctx, + "INSERT INTO schema_migrations (version) VALUES (?)", + version, + ) + if err != nil { + return fmt.Errorf("failed to record migration %s: %w", migration, err) + } + + s.log.Info("migration applied successfully", "version", version) + } return nil } diff --git a/internal/database/schema/001.sql b/internal/database/schema/001.sql new file mode 100644 index 0000000..5552ac1 --- /dev/null +++ b/internal/database/schema/001.sql @@ -0,0 +1,91 @@ +-- Migration 001: Initial schema +-- Creates all tables for the pixa caching image proxy + +-- Source content blobs +-- Files stored at: cache/src-content/// +CREATE TABLE IF NOT EXISTS source_content ( + content_hash TEXT PRIMARY KEY, + content_type TEXT NOT NULL, + size_bytes INTEGER NOT NULL, + fetched_at DATETIME DEFAULT CURRENT_TIMESTAMP +); + +-- Source URL metadata - maps URLs to content hashes +-- JSON stored at: cache/src-metadata//.json +CREATE TABLE IF NOT EXISTS source_metadata ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_host TEXT NOT NULL, + source_path TEXT NOT NULL, + source_query TEXT NOT NULL DEFAULT '', + path_hash TEXT NOT NULL, + content_hash TEXT, + status_code INTEGER NOT NULL, + content_type TEXT, + response_headers TEXT, + fetched_at DATETIME DEFAULT CURRENT_TIMESTAMP, + expires_at DATETIME, + etag TEXT, + last_modified TEXT, + UNIQUE(source_host, source_path, source_query), + FOREIGN KEY (content_hash) REFERENCES source_content(content_hash) +); +CREATE INDEX IF NOT EXISTS idx_source_meta_host ON source_metadata(source_host); +CREATE INDEX IF NOT EXISTS idx_source_meta_path_hash ON source_metadata(path_hash); +CREATE INDEX IF NOT EXISTS idx_source_meta_expires ON source_metadata(expires_at); +CREATE INDEX IF NOT EXISTS idx_source_meta_content_hash ON source_metadata(content_hash); + +-- Output/transformed content blobs +-- Files stored at: cache/dst-content/// +CREATE TABLE IF NOT EXISTS output_content ( + content_hash TEXT PRIMARY KEY, + content_type TEXT NOT NULL, + size_bytes INTEGER NOT NULL, + fetched_at DATETIME DEFAULT CURRENT_TIMESTAMP +); + +-- Request cache - maps full request params to output content +CREATE TABLE IF NOT EXISTS request_cache ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + cache_key TEXT NOT NULL UNIQUE, + source_metadata_id INTEGER NOT NULL, + output_hash TEXT NOT NULL, + width INTEGER NOT NULL, + height INTEGER NOT NULL, + format TEXT NOT NULL, + quality INTEGER NOT NULL DEFAULT 85, + fit_mode TEXT NOT NULL DEFAULT 'cover', + fetched_at DATETIME DEFAULT CURRENT_TIMESTAMP, + access_count INTEGER NOT NULL DEFAULT 1, + FOREIGN KEY (source_metadata_id) REFERENCES source_metadata(id), + FOREIGN KEY (output_hash) REFERENCES output_content(content_hash) +); +CREATE INDEX IF NOT EXISTS idx_request_cache_key ON request_cache(cache_key); +CREATE INDEX IF NOT EXISTS idx_request_cache_source ON request_cache(source_metadata_id); +CREATE INDEX IF NOT EXISTS idx_request_cache_output ON request_cache(output_hash); +CREATE INDEX IF NOT EXISTS idx_request_cache_fetched ON request_cache(fetched_at); + +-- Negative cache for failed fetches (404s, timeouts, etc.) +CREATE TABLE IF NOT EXISTS negative_cache ( + id INTEGER PRIMARY KEY AUTOINCREMENT, + source_host TEXT NOT NULL, + source_path TEXT NOT NULL, + source_query TEXT NOT NULL DEFAULT '', + status_code INTEGER NOT NULL, + error_message TEXT, + fetched_at DATETIME DEFAULT CURRENT_TIMESTAMP, + expires_at DATETIME NOT NULL, + UNIQUE(source_host, source_path, source_query) +); +CREATE INDEX IF NOT EXISTS idx_negative_cache_expires ON negative_cache(expires_at); + +-- Cache statistics for monitoring +CREATE TABLE IF NOT EXISTS cache_stats ( + id INTEGER PRIMARY KEY CHECK (id = 1), + hit_count INTEGER NOT NULL DEFAULT 0, + miss_count INTEGER NOT NULL DEFAULT 0, + upstream_fetch_count INTEGER NOT NULL DEFAULT 0, + upstream_fetch_bytes INTEGER NOT NULL DEFAULT 0, + transform_count INTEGER NOT NULL DEFAULT 0, + last_updated_at DATETIME DEFAULT CURRENT_TIMESTAMP +); +INSERT OR IGNORE INTO cache_stats (id) VALUES (1);