Major refactoring: UUID-based storage, streaming architecture, and CLI improvements
This commit represents a significant architectural overhaul of vaultik: Database Schema Changes: - Switch files table to use UUID primary keys instead of path-based keys - Add UUID primary keys to blobs table for immediate chunk association - Update all foreign key relationships to use UUIDs - Add comprehensive schema documentation in DATAMODEL.md - Add SQLite busy timeout handling for concurrent operations Streaming and Performance Improvements: - Implement true streaming blob packing without intermediate storage - Add streaming chunk processing to reduce memory usage - Improve progress reporting with real-time metrics - Add upload metrics tracking in new uploads table CLI Refactoring: - Restructure CLI to use subcommands: snapshot create/list/purge/verify - Add store info command for S3 configuration display - Add custom duration parser supporting days/weeks/months/years - Remove old backup.go in favor of enhanced snapshot.go - Add --cron flag for silent operation Configuration Changes: - Remove unused index_prefix configuration option - Add support for snapshot pruning retention policies - Improve configuration validation and error messages Testing Improvements: - Add comprehensive repository tests with edge cases - Add cascade delete debugging tests - Fix concurrent operation tests to use SQLite busy timeout - Remove tolerance for SQLITE_BUSY errors in tests Documentation: - Add MIT LICENSE file - Update README with new command structure - Add comprehensive DATAMODEL.md explaining database schema - Update DESIGN.md with UUID-based architecture Other Changes: - Add test-config.yml for testing - Update Makefile with better test output formatting - Fix various race conditions in concurrent operations - Improve error handling throughout
This commit is contained in:
@@ -1,84 +1,158 @@
|
||||
// Package database provides the local SQLite index for Vaultik backup operations.
|
||||
// The database tracks files, chunks, and their associations with blobs.
|
||||
//
|
||||
// Blobs in Vaultik are the final storage units uploaded to S3. Each blob is a
|
||||
// large (up to 10GB) file containing many compressed and encrypted chunks from
|
||||
// multiple source files. Blobs are content-addressed, meaning their filename
|
||||
// is derived from their SHA256 hash after compression and encryption.
|
||||
//
|
||||
// The database does not support migrations. If the schema changes, delete
|
||||
// the local database and perform a full backup to recreate it.
|
||||
package database
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
_ "embed"
|
||||
"fmt"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
_ "modernc.org/sqlite"
|
||||
)
|
||||
|
||||
//go:embed schema.sql
|
||||
var schemaSQL string
|
||||
|
||||
// DB represents the Vaultik local index database connection.
|
||||
// It uses SQLite to track file metadata, content-defined chunks, and blob associations.
|
||||
// The database enables incremental backups by detecting changed files and
|
||||
// supports deduplication by tracking which chunks are already stored in blobs.
|
||||
// Write operations are synchronized through a mutex to ensure thread safety.
|
||||
type DB struct {
|
||||
conn *sql.DB
|
||||
writeLock sync.Mutex
|
||||
conn *sql.DB
|
||||
path string
|
||||
}
|
||||
|
||||
// New creates a new database connection at the specified path.
|
||||
// It automatically handles database recovery, creates the schema if needed,
|
||||
// and configures SQLite with appropriate settings for performance and reliability.
|
||||
// The database uses WAL mode for better concurrency and sets a busy timeout
|
||||
// to handle concurrent access gracefully.
|
||||
//
|
||||
// If the database appears locked, it will attempt recovery by removing stale
|
||||
// lock files and switching temporarily to TRUNCATE journal mode.
|
||||
//
|
||||
// New creates a new database connection at the specified path.
|
||||
// It automatically handles recovery from stale locks, creates the schema if needed,
|
||||
// and configures SQLite with WAL mode for better concurrency.
|
||||
// The path parameter can be a file path for persistent storage or ":memory:"
|
||||
// for an in-memory database (useful for testing).
|
||||
func New(ctx context.Context, path string) (*DB, error) {
|
||||
log.Debug("Opening database connection", "path", path)
|
||||
|
||||
// First, try to recover from any stale locks
|
||||
if err := recoverDatabase(ctx, path); err != nil {
|
||||
log.Warn("Failed to recover database", "error", err)
|
||||
}
|
||||
|
||||
// First attempt with standard WAL mode
|
||||
conn, err := sql.Open("sqlite", path+"?_journal_mode=WAL&_synchronous=NORMAL&_busy_timeout=10000&_locking_mode=NORMAL")
|
||||
log.Debug("Attempting to open database with WAL mode", "path", path)
|
||||
conn, err := sql.Open(
|
||||
"sqlite",
|
||||
path+"?_journal_mode=WAL&_synchronous=NORMAL&_busy_timeout=10000&_locking_mode=NORMAL&_foreign_keys=ON",
|
||||
)
|
||||
if err == nil {
|
||||
// Set connection pool settings to ensure proper cleanup
|
||||
conn.SetMaxOpenConns(1) // SQLite only supports one writer
|
||||
// Set connection pool settings
|
||||
// SQLite can handle multiple readers but only one writer at a time.
|
||||
// Setting MaxOpenConns to 1 ensures all writes are serialized through
|
||||
// a single connection, preventing SQLITE_BUSY errors.
|
||||
conn.SetMaxOpenConns(1)
|
||||
conn.SetMaxIdleConns(1)
|
||||
|
||||
if err := conn.PingContext(ctx); err == nil {
|
||||
// Success on first try
|
||||
db := &DB{conn: conn}
|
||||
log.Debug("Database opened successfully with WAL mode", "path", path)
|
||||
|
||||
// Enable foreign keys explicitly
|
||||
if _, err := conn.ExecContext(ctx, "PRAGMA foreign_keys = ON"); err != nil {
|
||||
log.Warn("Failed to enable foreign keys", "error", err)
|
||||
}
|
||||
|
||||
db := &DB{conn: conn, path: path}
|
||||
if err := db.createSchema(ctx); err != nil {
|
||||
_ = conn.Close()
|
||||
return nil, fmt.Errorf("creating schema: %w", err)
|
||||
}
|
||||
return db, nil
|
||||
}
|
||||
log.Debug("Failed to ping database, closing connection", "path", path, "error", err)
|
||||
_ = conn.Close()
|
||||
}
|
||||
|
||||
// If first attempt failed, try with TRUNCATE mode to clear any locks
|
||||
log.Info("Database appears locked, attempting recovery with TRUNCATE mode")
|
||||
conn, err = sql.Open("sqlite", path+"?_journal_mode=TRUNCATE&_synchronous=NORMAL&_busy_timeout=10000")
|
||||
log.Info(
|
||||
"Database appears locked, attempting recovery with TRUNCATE mode",
|
||||
"path", path,
|
||||
)
|
||||
conn, err = sql.Open(
|
||||
"sqlite",
|
||||
path+"?_journal_mode=TRUNCATE&_synchronous=NORMAL&_busy_timeout=10000&_foreign_keys=ON",
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("opening database in recovery mode: %w", err)
|
||||
}
|
||||
|
||||
// Set connection pool settings
|
||||
// SQLite can handle multiple readers but only one writer at a time.
|
||||
// Setting MaxOpenConns to 1 ensures all writes are serialized through
|
||||
// a single connection, preventing SQLITE_BUSY errors.
|
||||
conn.SetMaxOpenConns(1)
|
||||
conn.SetMaxIdleConns(1)
|
||||
|
||||
if err := conn.PingContext(ctx); err != nil {
|
||||
log.Debug("Failed to ping database in recovery mode, closing", "path", path, "error", err)
|
||||
_ = conn.Close()
|
||||
return nil, fmt.Errorf("database still locked after recovery attempt: %w", err)
|
||||
return nil, fmt.Errorf(
|
||||
"database still locked after recovery attempt: %w",
|
||||
err,
|
||||
)
|
||||
}
|
||||
|
||||
log.Debug("Database opened in TRUNCATE mode", "path", path)
|
||||
|
||||
// Switch back to WAL mode
|
||||
log.Debug("Switching database back to WAL mode", "path", path)
|
||||
if _, err := conn.ExecContext(ctx, "PRAGMA journal_mode=WAL"); err != nil {
|
||||
log.Warn("Failed to switch back to WAL mode", "error", err)
|
||||
log.Warn("Failed to switch back to WAL mode", "path", path, "error", err)
|
||||
}
|
||||
|
||||
db := &DB{conn: conn}
|
||||
// Ensure foreign keys are enabled
|
||||
if _, err := conn.ExecContext(ctx, "PRAGMA foreign_keys=ON"); err != nil {
|
||||
log.Warn("Failed to enable foreign keys", "path", path, "error", err)
|
||||
}
|
||||
|
||||
db := &DB{conn: conn, path: path}
|
||||
if err := db.createSchema(ctx); err != nil {
|
||||
_ = conn.Close()
|
||||
return nil, fmt.Errorf("creating schema: %w", err)
|
||||
}
|
||||
|
||||
log.Debug("Database connection established successfully", "path", path)
|
||||
return db, nil
|
||||
}
|
||||
|
||||
// Close closes the database connection.
|
||||
// It ensures all pending operations are completed before closing.
|
||||
// Returns an error if the database connection cannot be closed properly.
|
||||
func (db *DB) Close() error {
|
||||
log.Debug("Closing database connection")
|
||||
log.Debug("Closing database connection", "path", db.path)
|
||||
if err := db.conn.Close(); err != nil {
|
||||
log.Error("Failed to close database", "error", err)
|
||||
log.Error("Failed to close database", "path", db.path, "error", err)
|
||||
return fmt.Errorf("failed to close database: %w", err)
|
||||
}
|
||||
log.Debug("Database connection closed successfully")
|
||||
log.Debug("Database connection closed successfully", "path", db.path)
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -138,148 +212,79 @@ func recoverDatabase(ctx context.Context, path string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Conn returns the underlying *sql.DB connection.
|
||||
// This should be used sparingly and primarily for read operations.
|
||||
// For write operations, prefer using the ExecWithLog method.
|
||||
func (db *DB) Conn() *sql.DB {
|
||||
return db.conn
|
||||
}
|
||||
|
||||
func (db *DB) BeginTx(ctx context.Context, opts *sql.TxOptions) (*sql.Tx, error) {
|
||||
// BeginTx starts a new database transaction with the given options.
|
||||
// The caller is responsible for committing or rolling back the transaction.
|
||||
// For write transactions, consider using the Repositories.WithTx method instead,
|
||||
// which handles locking and rollback automatically.
|
||||
func (db *DB) BeginTx(
|
||||
ctx context.Context,
|
||||
opts *sql.TxOptions,
|
||||
) (*sql.Tx, error) {
|
||||
return db.conn.BeginTx(ctx, opts)
|
||||
}
|
||||
|
||||
// LockForWrite acquires the write lock
|
||||
func (db *DB) LockForWrite() {
|
||||
log.Debug("Attempting to acquire write lock")
|
||||
db.writeLock.Lock()
|
||||
log.Debug("Write lock acquired")
|
||||
}
|
||||
|
||||
// UnlockWrite releases the write lock
|
||||
func (db *DB) UnlockWrite() {
|
||||
log.Debug("Releasing write lock")
|
||||
db.writeLock.Unlock()
|
||||
log.Debug("Write lock released")
|
||||
}
|
||||
|
||||
// ExecWithLock executes a write query with the write lock held
|
||||
func (db *DB) ExecWithLock(ctx context.Context, query string, args ...interface{}) (sql.Result, error) {
|
||||
db.writeLock.Lock()
|
||||
defer db.writeLock.Unlock()
|
||||
// Note: LockForWrite and UnlockWrite methods have been removed.
|
||||
// SQLite handles its own locking internally, so explicit locking is not needed.
|
||||
|
||||
// ExecWithLog executes a write query with SQL logging.
|
||||
// SQLite handles its own locking internally, so we just pass through to ExecContext.
|
||||
// The query and args parameters follow the same format as sql.DB.ExecContext.
|
||||
func (db *DB) ExecWithLog(
|
||||
ctx context.Context,
|
||||
query string,
|
||||
args ...interface{},
|
||||
) (sql.Result, error) {
|
||||
LogSQL("Execute", query, args...)
|
||||
return db.conn.ExecContext(ctx, query, args...)
|
||||
}
|
||||
|
||||
// QueryRowWithLock executes a write query that returns a row with the write lock held
|
||||
func (db *DB) QueryRowWithLock(ctx context.Context, query string, args ...interface{}) *sql.Row {
|
||||
db.writeLock.Lock()
|
||||
defer db.writeLock.Unlock()
|
||||
// QueryRowWithLog executes a query that returns at most one row with SQL logging.
|
||||
// This is useful for queries that modify data and return values (e.g., INSERT ... RETURNING).
|
||||
// SQLite handles its own locking internally.
|
||||
// The query and args parameters follow the same format as sql.DB.QueryRowContext.
|
||||
func (db *DB) QueryRowWithLog(
|
||||
ctx context.Context,
|
||||
query string,
|
||||
args ...interface{},
|
||||
) *sql.Row {
|
||||
LogSQL("QueryRow", query, args...)
|
||||
return db.conn.QueryRowContext(ctx, query, args...)
|
||||
}
|
||||
|
||||
func (db *DB) createSchema(ctx context.Context) error {
|
||||
schema := `
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
path TEXT PRIMARY KEY,
|
||||
mtime INTEGER NOT NULL,
|
||||
ctime INTEGER NOT NULL,
|
||||
size INTEGER NOT NULL,
|
||||
mode INTEGER NOT NULL,
|
||||
uid INTEGER NOT NULL,
|
||||
gid INTEGER NOT NULL,
|
||||
link_target TEXT
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS file_chunks (
|
||||
path TEXT NOT NULL,
|
||||
idx INTEGER NOT NULL,
|
||||
chunk_hash TEXT NOT NULL,
|
||||
PRIMARY KEY (path, idx)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS chunks (
|
||||
chunk_hash TEXT PRIMARY KEY,
|
||||
sha256 TEXT NOT NULL,
|
||||
size INTEGER NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS blobs (
|
||||
id TEXT PRIMARY KEY,
|
||||
blob_hash TEXT UNIQUE,
|
||||
created_ts INTEGER NOT NULL,
|
||||
finished_ts INTEGER,
|
||||
uncompressed_size INTEGER NOT NULL DEFAULT 0,
|
||||
compressed_size INTEGER NOT NULL DEFAULT 0,
|
||||
uploaded_ts INTEGER
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS blob_chunks (
|
||||
blob_id TEXT NOT NULL,
|
||||
chunk_hash TEXT NOT NULL,
|
||||
offset INTEGER NOT NULL,
|
||||
length INTEGER NOT NULL,
|
||||
PRIMARY KEY (blob_id, chunk_hash),
|
||||
FOREIGN KEY (blob_id) REFERENCES blobs(id)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS chunk_files (
|
||||
chunk_hash TEXT NOT NULL,
|
||||
file_path TEXT NOT NULL,
|
||||
file_offset INTEGER NOT NULL,
|
||||
length INTEGER NOT NULL,
|
||||
PRIMARY KEY (chunk_hash, file_path)
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS snapshots (
|
||||
id TEXT PRIMARY KEY,
|
||||
hostname TEXT NOT NULL,
|
||||
vaultik_version TEXT NOT NULL,
|
||||
started_at INTEGER NOT NULL,
|
||||
completed_at INTEGER,
|
||||
file_count INTEGER NOT NULL DEFAULT 0,
|
||||
chunk_count INTEGER NOT NULL DEFAULT 0,
|
||||
blob_count INTEGER NOT NULL DEFAULT 0,
|
||||
total_size INTEGER NOT NULL DEFAULT 0,
|
||||
blob_size INTEGER NOT NULL DEFAULT 0,
|
||||
compression_ratio REAL NOT NULL DEFAULT 1.0
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS snapshot_files (
|
||||
snapshot_id TEXT NOT NULL,
|
||||
file_path TEXT NOT NULL,
|
||||
PRIMARY KEY (snapshot_id, file_path),
|
||||
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (file_path) REFERENCES files(path) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS snapshot_blobs (
|
||||
snapshot_id TEXT NOT NULL,
|
||||
blob_id TEXT NOT NULL,
|
||||
blob_hash TEXT NOT NULL,
|
||||
PRIMARY KEY (snapshot_id, blob_id),
|
||||
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (blob_id) REFERENCES blobs(id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS uploads (
|
||||
blob_hash TEXT PRIMARY KEY,
|
||||
uploaded_at INTEGER NOT NULL,
|
||||
size INTEGER NOT NULL,
|
||||
duration_ms INTEGER NOT NULL
|
||||
);
|
||||
`
|
||||
|
||||
_, err := db.conn.ExecContext(ctx, schema)
|
||||
_, err := db.conn.ExecContext(ctx, schemaSQL)
|
||||
return err
|
||||
}
|
||||
|
||||
// NewTestDB creates an in-memory SQLite database for testing
|
||||
// NewTestDB creates an in-memory SQLite database for testing purposes.
|
||||
// The database is automatically initialized with the schema and is ready for use.
|
||||
// Each call creates a new independent database instance.
|
||||
func NewTestDB() (*DB, error) {
|
||||
return New(context.Background(), ":memory:")
|
||||
}
|
||||
|
||||
// LogSQL logs SQL queries if debug mode is enabled
|
||||
// LogSQL logs SQL queries and their arguments when debug mode is enabled.
|
||||
// Debug mode is activated by setting the GODEBUG environment variable to include "vaultik".
|
||||
// This is useful for troubleshooting database operations and understanding query patterns.
|
||||
//
|
||||
// The operation parameter describes the type of SQL operation (e.g., "Execute", "Query").
|
||||
// The query parameter is the SQL statement being executed.
|
||||
// The args parameter contains the query arguments that will be interpolated.
|
||||
func LogSQL(operation, query string, args ...interface{}) {
|
||||
if strings.Contains(os.Getenv("GODEBUG"), "vaultik") {
|
||||
log.Debug("SQL "+operation, "query", strings.TrimSpace(query), "args", fmt.Sprintf("%v", args))
|
||||
log.Debug(
|
||||
"SQL "+operation,
|
||||
"query",
|
||||
strings.TrimSpace(query),
|
||||
"args",
|
||||
fmt.Sprintf("%v", args),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user