This commit represents a significant architectural overhaul of vaultik: Database Schema Changes: - Switch files table to use UUID primary keys instead of path-based keys - Add UUID primary keys to blobs table for immediate chunk association - Update all foreign key relationships to use UUIDs - Add comprehensive schema documentation in DATAMODEL.md - Add SQLite busy timeout handling for concurrent operations Streaming and Performance Improvements: - Implement true streaming blob packing without intermediate storage - Add streaming chunk processing to reduce memory usage - Improve progress reporting with real-time metrics - Add upload metrics tracking in new uploads table CLI Refactoring: - Restructure CLI to use subcommands: snapshot create/list/purge/verify - Add store info command for S3 configuration display - Add custom duration parser supporting days/weeks/months/years - Remove old backup.go in favor of enhanced snapshot.go - Add --cron flag for silent operation Configuration Changes: - Remove unused index_prefix configuration option - Add support for snapshot pruning retention policies - Improve configuration validation and error messages Testing Improvements: - Add comprehensive repository tests with edge cases - Add cascade delete debugging tests - Fix concurrent operation tests to use SQLite busy timeout - Remove tolerance for SQLITE_BUSY errors in tests Documentation: - Add MIT LICENSE file - Update README with new command structure - Add comprehensive DATAMODEL.md explaining database schema - Update DESIGN.md with UUID-based architecture Other Changes: - Add test-config.yml for testing - Update Makefile with better test output formatting - Fix various race conditions in concurrent operations - Improve error handling throughout
291 lines
9.9 KiB
Go
291 lines
9.9 KiB
Go
// Package database provides the local SQLite index for Vaultik backup operations.
|
|
// The database tracks files, chunks, and their associations with blobs.
|
|
//
|
|
// Blobs in Vaultik are the final storage units uploaded to S3. Each blob is a
|
|
// large (up to 10GB) file containing many compressed and encrypted chunks from
|
|
// multiple source files. Blobs are content-addressed, meaning their filename
|
|
// is derived from their SHA256 hash after compression and encryption.
|
|
//
|
|
// The database does not support migrations. If the schema changes, delete
|
|
// the local database and perform a full backup to recreate it.
|
|
package database
|
|
|
|
import (
|
|
"context"
|
|
"database/sql"
|
|
_ "embed"
|
|
"fmt"
|
|
"os"
|
|
"strings"
|
|
|
|
"git.eeqj.de/sneak/vaultik/internal/log"
|
|
_ "modernc.org/sqlite"
|
|
)
|
|
|
|
//go:embed schema.sql
|
|
var schemaSQL string
|
|
|
|
// DB represents the Vaultik local index database connection.
|
|
// It uses SQLite to track file metadata, content-defined chunks, and blob associations.
|
|
// The database enables incremental backups by detecting changed files and
|
|
// supports deduplication by tracking which chunks are already stored in blobs.
|
|
// Write operations are synchronized through a mutex to ensure thread safety.
|
|
type DB struct {
|
|
conn *sql.DB
|
|
path string
|
|
}
|
|
|
|
// New creates a new database connection at the specified path.
|
|
// It automatically handles database recovery, creates the schema if needed,
|
|
// and configures SQLite with appropriate settings for performance and reliability.
|
|
// The database uses WAL mode for better concurrency and sets a busy timeout
|
|
// to handle concurrent access gracefully.
|
|
//
|
|
// If the database appears locked, it will attempt recovery by removing stale
|
|
// lock files and switching temporarily to TRUNCATE journal mode.
|
|
//
|
|
// New creates a new database connection at the specified path.
|
|
// It automatically handles recovery from stale locks, creates the schema if needed,
|
|
// and configures SQLite with WAL mode for better concurrency.
|
|
// The path parameter can be a file path for persistent storage or ":memory:"
|
|
// for an in-memory database (useful for testing).
|
|
func New(ctx context.Context, path string) (*DB, error) {
|
|
log.Debug("Opening database connection", "path", path)
|
|
|
|
// First, try to recover from any stale locks
|
|
if err := recoverDatabase(ctx, path); err != nil {
|
|
log.Warn("Failed to recover database", "error", err)
|
|
}
|
|
|
|
// First attempt with standard WAL mode
|
|
log.Debug("Attempting to open database with WAL mode", "path", path)
|
|
conn, err := sql.Open(
|
|
"sqlite",
|
|
path+"?_journal_mode=WAL&_synchronous=NORMAL&_busy_timeout=10000&_locking_mode=NORMAL&_foreign_keys=ON",
|
|
)
|
|
if err == nil {
|
|
// Set connection pool settings
|
|
// SQLite can handle multiple readers but only one writer at a time.
|
|
// Setting MaxOpenConns to 1 ensures all writes are serialized through
|
|
// a single connection, preventing SQLITE_BUSY errors.
|
|
conn.SetMaxOpenConns(1)
|
|
conn.SetMaxIdleConns(1)
|
|
|
|
if err := conn.PingContext(ctx); err == nil {
|
|
// Success on first try
|
|
log.Debug("Database opened successfully with WAL mode", "path", path)
|
|
|
|
// Enable foreign keys explicitly
|
|
if _, err := conn.ExecContext(ctx, "PRAGMA foreign_keys = ON"); err != nil {
|
|
log.Warn("Failed to enable foreign keys", "error", err)
|
|
}
|
|
|
|
db := &DB{conn: conn, path: path}
|
|
if err := db.createSchema(ctx); err != nil {
|
|
_ = conn.Close()
|
|
return nil, fmt.Errorf("creating schema: %w", err)
|
|
}
|
|
return db, nil
|
|
}
|
|
log.Debug("Failed to ping database, closing connection", "path", path, "error", err)
|
|
_ = conn.Close()
|
|
}
|
|
|
|
// If first attempt failed, try with TRUNCATE mode to clear any locks
|
|
log.Info(
|
|
"Database appears locked, attempting recovery with TRUNCATE mode",
|
|
"path", path,
|
|
)
|
|
conn, err = sql.Open(
|
|
"sqlite",
|
|
path+"?_journal_mode=TRUNCATE&_synchronous=NORMAL&_busy_timeout=10000&_foreign_keys=ON",
|
|
)
|
|
if err != nil {
|
|
return nil, fmt.Errorf("opening database in recovery mode: %w", err)
|
|
}
|
|
|
|
// Set connection pool settings
|
|
// SQLite can handle multiple readers but only one writer at a time.
|
|
// Setting MaxOpenConns to 1 ensures all writes are serialized through
|
|
// a single connection, preventing SQLITE_BUSY errors.
|
|
conn.SetMaxOpenConns(1)
|
|
conn.SetMaxIdleConns(1)
|
|
|
|
if err := conn.PingContext(ctx); err != nil {
|
|
log.Debug("Failed to ping database in recovery mode, closing", "path", path, "error", err)
|
|
_ = conn.Close()
|
|
return nil, fmt.Errorf(
|
|
"database still locked after recovery attempt: %w",
|
|
err,
|
|
)
|
|
}
|
|
|
|
log.Debug("Database opened in TRUNCATE mode", "path", path)
|
|
|
|
// Switch back to WAL mode
|
|
log.Debug("Switching database back to WAL mode", "path", path)
|
|
if _, err := conn.ExecContext(ctx, "PRAGMA journal_mode=WAL"); err != nil {
|
|
log.Warn("Failed to switch back to WAL mode", "path", path, "error", err)
|
|
}
|
|
|
|
// Ensure foreign keys are enabled
|
|
if _, err := conn.ExecContext(ctx, "PRAGMA foreign_keys=ON"); err != nil {
|
|
log.Warn("Failed to enable foreign keys", "path", path, "error", err)
|
|
}
|
|
|
|
db := &DB{conn: conn, path: path}
|
|
if err := db.createSchema(ctx); err != nil {
|
|
_ = conn.Close()
|
|
return nil, fmt.Errorf("creating schema: %w", err)
|
|
}
|
|
|
|
log.Debug("Database connection established successfully", "path", path)
|
|
return db, nil
|
|
}
|
|
|
|
// Close closes the database connection.
|
|
// It ensures all pending operations are completed before closing.
|
|
// Returns an error if the database connection cannot be closed properly.
|
|
func (db *DB) Close() error {
|
|
log.Debug("Closing database connection", "path", db.path)
|
|
if err := db.conn.Close(); err != nil {
|
|
log.Error("Failed to close database", "path", db.path, "error", err)
|
|
return fmt.Errorf("failed to close database: %w", err)
|
|
}
|
|
log.Debug("Database connection closed successfully", "path", db.path)
|
|
return nil
|
|
}
|
|
|
|
// recoverDatabase attempts to recover a locked database
|
|
func recoverDatabase(ctx context.Context, path string) error {
|
|
// Check if database file exists
|
|
if _, err := os.Stat(path); os.IsNotExist(err) {
|
|
// No database file, nothing to recover
|
|
return nil
|
|
}
|
|
|
|
// Remove stale lock files
|
|
// SQLite creates -wal and -shm files for WAL mode
|
|
walPath := path + "-wal"
|
|
shmPath := path + "-shm"
|
|
journalPath := path + "-journal"
|
|
|
|
log.Info("Attempting database recovery", "path", path)
|
|
|
|
// Always remove lock files on startup to ensure clean state
|
|
removed := false
|
|
|
|
// Check for and remove journal file (from non-WAL mode)
|
|
if _, err := os.Stat(journalPath); err == nil {
|
|
log.Info("Found journal file, removing", "path", journalPath)
|
|
if err := os.Remove(journalPath); err != nil {
|
|
log.Warn("Failed to remove journal file", "error", err)
|
|
} else {
|
|
removed = true
|
|
}
|
|
}
|
|
|
|
// Remove WAL file
|
|
if _, err := os.Stat(walPath); err == nil {
|
|
log.Info("Found WAL file, removing", "path", walPath)
|
|
if err := os.Remove(walPath); err != nil {
|
|
log.Warn("Failed to remove WAL file", "error", err)
|
|
} else {
|
|
removed = true
|
|
}
|
|
}
|
|
|
|
// Remove SHM file
|
|
if _, err := os.Stat(shmPath); err == nil {
|
|
log.Info("Found shared memory file, removing", "path", shmPath)
|
|
if err := os.Remove(shmPath); err != nil {
|
|
log.Warn("Failed to remove shared memory file", "error", err)
|
|
} else {
|
|
removed = true
|
|
}
|
|
}
|
|
|
|
if removed {
|
|
log.Info("Database lock files removed")
|
|
}
|
|
|
|
return nil
|
|
}
|
|
|
|
// Conn returns the underlying *sql.DB connection.
|
|
// This should be used sparingly and primarily for read operations.
|
|
// For write operations, prefer using the ExecWithLog method.
|
|
func (db *DB) Conn() *sql.DB {
|
|
return db.conn
|
|
}
|
|
|
|
// BeginTx starts a new database transaction with the given options.
|
|
// The caller is responsible for committing or rolling back the transaction.
|
|
// For write transactions, consider using the Repositories.WithTx method instead,
|
|
// which handles locking and rollback automatically.
|
|
func (db *DB) BeginTx(
|
|
ctx context.Context,
|
|
opts *sql.TxOptions,
|
|
) (*sql.Tx, error) {
|
|
return db.conn.BeginTx(ctx, opts)
|
|
}
|
|
|
|
// Note: LockForWrite and UnlockWrite methods have been removed.
|
|
// SQLite handles its own locking internally, so explicit locking is not needed.
|
|
|
|
// ExecWithLog executes a write query with SQL logging.
|
|
// SQLite handles its own locking internally, so we just pass through to ExecContext.
|
|
// The query and args parameters follow the same format as sql.DB.ExecContext.
|
|
func (db *DB) ExecWithLog(
|
|
ctx context.Context,
|
|
query string,
|
|
args ...interface{},
|
|
) (sql.Result, error) {
|
|
LogSQL("Execute", query, args...)
|
|
return db.conn.ExecContext(ctx, query, args...)
|
|
}
|
|
|
|
// QueryRowWithLog executes a query that returns at most one row with SQL logging.
|
|
// This is useful for queries that modify data and return values (e.g., INSERT ... RETURNING).
|
|
// SQLite handles its own locking internally.
|
|
// The query and args parameters follow the same format as sql.DB.QueryRowContext.
|
|
func (db *DB) QueryRowWithLog(
|
|
ctx context.Context,
|
|
query string,
|
|
args ...interface{},
|
|
) *sql.Row {
|
|
LogSQL("QueryRow", query, args...)
|
|
return db.conn.QueryRowContext(ctx, query, args...)
|
|
}
|
|
|
|
func (db *DB) createSchema(ctx context.Context) error {
|
|
_, err := db.conn.ExecContext(ctx, schemaSQL)
|
|
return err
|
|
}
|
|
|
|
// NewTestDB creates an in-memory SQLite database for testing purposes.
|
|
// The database is automatically initialized with the schema and is ready for use.
|
|
// Each call creates a new independent database instance.
|
|
func NewTestDB() (*DB, error) {
|
|
return New(context.Background(), ":memory:")
|
|
}
|
|
|
|
// LogSQL logs SQL queries and their arguments when debug mode is enabled.
|
|
// Debug mode is activated by setting the GODEBUG environment variable to include "vaultik".
|
|
// This is useful for troubleshooting database operations and understanding query patterns.
|
|
//
|
|
// The operation parameter describes the type of SQL operation (e.g., "Execute", "Query").
|
|
// The query parameter is the SQL statement being executed.
|
|
// The args parameter contains the query arguments that will be interpolated.
|
|
func LogSQL(operation, query string, args ...interface{}) {
|
|
if strings.Contains(os.Getenv("GODEBUG"), "vaultik") {
|
|
log.Debug(
|
|
"SQL "+operation,
|
|
"query",
|
|
strings.TrimSpace(query),
|
|
"args",
|
|
fmt.Sprintf("%v", args),
|
|
)
|
|
}
|
|
}
|