vaultik/internal/database/database.go
sneak 78af626759 Major refactoring: UUID-based storage, streaming architecture, and CLI improvements
This commit represents a significant architectural overhaul of vaultik:

Database Schema Changes:
- Switch files table to use UUID primary keys instead of path-based keys
- Add UUID primary keys to blobs table for immediate chunk association
- Update all foreign key relationships to use UUIDs
- Add comprehensive schema documentation in DATAMODEL.md
- Add SQLite busy timeout handling for concurrent operations

Streaming and Performance Improvements:
- Implement true streaming blob packing without intermediate storage
- Add streaming chunk processing to reduce memory usage
- Improve progress reporting with real-time metrics
- Add upload metrics tracking in new uploads table

CLI Refactoring:
- Restructure CLI to use subcommands: snapshot create/list/purge/verify
- Add store info command for S3 configuration display
- Add custom duration parser supporting days/weeks/months/years
- Remove old backup.go in favor of enhanced snapshot.go
- Add --cron flag for silent operation

Configuration Changes:
- Remove unused index_prefix configuration option
- Add support for snapshot pruning retention policies
- Improve configuration validation and error messages

Testing Improvements:
- Add comprehensive repository tests with edge cases
- Add cascade delete debugging tests
- Fix concurrent operation tests to use SQLite busy timeout
- Remove tolerance for SQLITE_BUSY errors in tests

Documentation:
- Add MIT LICENSE file
- Update README with new command structure
- Add comprehensive DATAMODEL.md explaining database schema
- Update DESIGN.md with UUID-based architecture

Other Changes:
- Add test-config.yml for testing
- Update Makefile with better test output formatting
- Fix various race conditions in concurrent operations
- Improve error handling throughout
2025-07-22 14:56:44 +02:00

291 lines
9.9 KiB
Go

// Package database provides the local SQLite index for Vaultik backup operations.
// The database tracks files, chunks, and their associations with blobs.
//
// Blobs in Vaultik are the final storage units uploaded to S3. Each blob is a
// large (up to 10GB) file containing many compressed and encrypted chunks from
// multiple source files. Blobs are content-addressed, meaning their filename
// is derived from their SHA256 hash after compression and encryption.
//
// The database does not support migrations. If the schema changes, delete
// the local database and perform a full backup to recreate it.
package database
import (
"context"
"database/sql"
_ "embed"
"fmt"
"os"
"strings"
"git.eeqj.de/sneak/vaultik/internal/log"
_ "modernc.org/sqlite"
)
//go:embed schema.sql
var schemaSQL string
// DB represents the Vaultik local index database connection.
// It uses SQLite to track file metadata, content-defined chunks, and blob associations.
// The database enables incremental backups by detecting changed files and
// supports deduplication by tracking which chunks are already stored in blobs.
// Write operations are synchronized through a mutex to ensure thread safety.
type DB struct {
conn *sql.DB
path string
}
// New creates a new database connection at the specified path.
// It automatically handles database recovery, creates the schema if needed,
// and configures SQLite with appropriate settings for performance and reliability.
// The database uses WAL mode for better concurrency and sets a busy timeout
// to handle concurrent access gracefully.
//
// If the database appears locked, it will attempt recovery by removing stale
// lock files and switching temporarily to TRUNCATE journal mode.
//
// New creates a new database connection at the specified path.
// It automatically handles recovery from stale locks, creates the schema if needed,
// and configures SQLite with WAL mode for better concurrency.
// The path parameter can be a file path for persistent storage or ":memory:"
// for an in-memory database (useful for testing).
func New(ctx context.Context, path string) (*DB, error) {
log.Debug("Opening database connection", "path", path)
// First, try to recover from any stale locks
if err := recoverDatabase(ctx, path); err != nil {
log.Warn("Failed to recover database", "error", err)
}
// First attempt with standard WAL mode
log.Debug("Attempting to open database with WAL mode", "path", path)
conn, err := sql.Open(
"sqlite",
path+"?_journal_mode=WAL&_synchronous=NORMAL&_busy_timeout=10000&_locking_mode=NORMAL&_foreign_keys=ON",
)
if err == nil {
// Set connection pool settings
// SQLite can handle multiple readers but only one writer at a time.
// Setting MaxOpenConns to 1 ensures all writes are serialized through
// a single connection, preventing SQLITE_BUSY errors.
conn.SetMaxOpenConns(1)
conn.SetMaxIdleConns(1)
if err := conn.PingContext(ctx); err == nil {
// Success on first try
log.Debug("Database opened successfully with WAL mode", "path", path)
// Enable foreign keys explicitly
if _, err := conn.ExecContext(ctx, "PRAGMA foreign_keys = ON"); err != nil {
log.Warn("Failed to enable foreign keys", "error", err)
}
db := &DB{conn: conn, path: path}
if err := db.createSchema(ctx); err != nil {
_ = conn.Close()
return nil, fmt.Errorf("creating schema: %w", err)
}
return db, nil
}
log.Debug("Failed to ping database, closing connection", "path", path, "error", err)
_ = conn.Close()
}
// If first attempt failed, try with TRUNCATE mode to clear any locks
log.Info(
"Database appears locked, attempting recovery with TRUNCATE mode",
"path", path,
)
conn, err = sql.Open(
"sqlite",
path+"?_journal_mode=TRUNCATE&_synchronous=NORMAL&_busy_timeout=10000&_foreign_keys=ON",
)
if err != nil {
return nil, fmt.Errorf("opening database in recovery mode: %w", err)
}
// Set connection pool settings
// SQLite can handle multiple readers but only one writer at a time.
// Setting MaxOpenConns to 1 ensures all writes are serialized through
// a single connection, preventing SQLITE_BUSY errors.
conn.SetMaxOpenConns(1)
conn.SetMaxIdleConns(1)
if err := conn.PingContext(ctx); err != nil {
log.Debug("Failed to ping database in recovery mode, closing", "path", path, "error", err)
_ = conn.Close()
return nil, fmt.Errorf(
"database still locked after recovery attempt: %w",
err,
)
}
log.Debug("Database opened in TRUNCATE mode", "path", path)
// Switch back to WAL mode
log.Debug("Switching database back to WAL mode", "path", path)
if _, err := conn.ExecContext(ctx, "PRAGMA journal_mode=WAL"); err != nil {
log.Warn("Failed to switch back to WAL mode", "path", path, "error", err)
}
// Ensure foreign keys are enabled
if _, err := conn.ExecContext(ctx, "PRAGMA foreign_keys=ON"); err != nil {
log.Warn("Failed to enable foreign keys", "path", path, "error", err)
}
db := &DB{conn: conn, path: path}
if err := db.createSchema(ctx); err != nil {
_ = conn.Close()
return nil, fmt.Errorf("creating schema: %w", err)
}
log.Debug("Database connection established successfully", "path", path)
return db, nil
}
// Close closes the database connection.
// It ensures all pending operations are completed before closing.
// Returns an error if the database connection cannot be closed properly.
func (db *DB) Close() error {
log.Debug("Closing database connection", "path", db.path)
if err := db.conn.Close(); err != nil {
log.Error("Failed to close database", "path", db.path, "error", err)
return fmt.Errorf("failed to close database: %w", err)
}
log.Debug("Database connection closed successfully", "path", db.path)
return nil
}
// recoverDatabase attempts to recover a locked database
func recoverDatabase(ctx context.Context, path string) error {
// Check if database file exists
if _, err := os.Stat(path); os.IsNotExist(err) {
// No database file, nothing to recover
return nil
}
// Remove stale lock files
// SQLite creates -wal and -shm files for WAL mode
walPath := path + "-wal"
shmPath := path + "-shm"
journalPath := path + "-journal"
log.Info("Attempting database recovery", "path", path)
// Always remove lock files on startup to ensure clean state
removed := false
// Check for and remove journal file (from non-WAL mode)
if _, err := os.Stat(journalPath); err == nil {
log.Info("Found journal file, removing", "path", journalPath)
if err := os.Remove(journalPath); err != nil {
log.Warn("Failed to remove journal file", "error", err)
} else {
removed = true
}
}
// Remove WAL file
if _, err := os.Stat(walPath); err == nil {
log.Info("Found WAL file, removing", "path", walPath)
if err := os.Remove(walPath); err != nil {
log.Warn("Failed to remove WAL file", "error", err)
} else {
removed = true
}
}
// Remove SHM file
if _, err := os.Stat(shmPath); err == nil {
log.Info("Found shared memory file, removing", "path", shmPath)
if err := os.Remove(shmPath); err != nil {
log.Warn("Failed to remove shared memory file", "error", err)
} else {
removed = true
}
}
if removed {
log.Info("Database lock files removed")
}
return nil
}
// Conn returns the underlying *sql.DB connection.
// This should be used sparingly and primarily for read operations.
// For write operations, prefer using the ExecWithLog method.
func (db *DB) Conn() *sql.DB {
return db.conn
}
// BeginTx starts a new database transaction with the given options.
// The caller is responsible for committing or rolling back the transaction.
// For write transactions, consider using the Repositories.WithTx method instead,
// which handles locking and rollback automatically.
func (db *DB) BeginTx(
ctx context.Context,
opts *sql.TxOptions,
) (*sql.Tx, error) {
return db.conn.BeginTx(ctx, opts)
}
// Note: LockForWrite and UnlockWrite methods have been removed.
// SQLite handles its own locking internally, so explicit locking is not needed.
// ExecWithLog executes a write query with SQL logging.
// SQLite handles its own locking internally, so we just pass through to ExecContext.
// The query and args parameters follow the same format as sql.DB.ExecContext.
func (db *DB) ExecWithLog(
ctx context.Context,
query string,
args ...interface{},
) (sql.Result, error) {
LogSQL("Execute", query, args...)
return db.conn.ExecContext(ctx, query, args...)
}
// QueryRowWithLog executes a query that returns at most one row with SQL logging.
// This is useful for queries that modify data and return values (e.g., INSERT ... RETURNING).
// SQLite handles its own locking internally.
// The query and args parameters follow the same format as sql.DB.QueryRowContext.
func (db *DB) QueryRowWithLog(
ctx context.Context,
query string,
args ...interface{},
) *sql.Row {
LogSQL("QueryRow", query, args...)
return db.conn.QueryRowContext(ctx, query, args...)
}
func (db *DB) createSchema(ctx context.Context) error {
_, err := db.conn.ExecContext(ctx, schemaSQL)
return err
}
// NewTestDB creates an in-memory SQLite database for testing purposes.
// The database is automatically initialized with the schema and is ready for use.
// Each call creates a new independent database instance.
func NewTestDB() (*DB, error) {
return New(context.Background(), ":memory:")
}
// LogSQL logs SQL queries and their arguments when debug mode is enabled.
// Debug mode is activated by setting the GODEBUG environment variable to include "vaultik".
// This is useful for troubleshooting database operations and understanding query patterns.
//
// The operation parameter describes the type of SQL operation (e.g., "Execute", "Query").
// The query parameter is the SQL statement being executed.
// The args parameter contains the query arguments that will be interpolated.
func LogSQL(operation, query string, args ...interface{}) {
if strings.Contains(os.Getenv("GODEBUG"), "vaultik") {
log.Debug(
"SQL "+operation,
"query",
strings.TrimSpace(query),
"args",
fmt.Sprintf("%v", args),
)
}
}