Refactor blob storage to use UUID primary keys and implement streaming chunking

- Changed blob table to use ID (UUID) as primary key instead of hash
- Blob records are now created at packing start, enabling immediate chunk associations
- Implemented streaming chunking to process large files without memory exhaustion
- Fixed blob manifest generation to include all referenced blobs
- Updated all foreign key references from blob_hash to blob_id
- Added progress reporting and improved error handling
- Enforced encryption requirement for all blob packing
- Updated tests to use test encryption keys
- Added Cyrillic transliteration to README
This commit is contained in:
2025-07-22 07:43:39 +02:00
parent 26db096913
commit 86b533d6ee
49 changed files with 5709 additions and 324 deletions

View File

@@ -4,8 +4,11 @@ import (
"context"
"database/sql"
"fmt"
"os"
"strings"
"sync"
"git.eeqj.de/sneak/vaultik/internal/log"
_ "modernc.org/sqlite"
)
@@ -15,23 +18,54 @@ type DB struct {
}
func New(ctx context.Context, path string) (*DB, error) {
conn, err := sql.Open("sqlite", path+"?_journal_mode=WAL&_synchronous=NORMAL&_busy_timeout=5000")
if err != nil {
return nil, fmt.Errorf("opening database: %w", err)
// First, try to recover from any stale locks
if err := recoverDatabase(ctx, path); err != nil {
log.Warn("Failed to recover database", "error", err)
}
if err := conn.PingContext(ctx); err != nil {
if closeErr := conn.Close(); closeErr != nil {
Fatal("failed to close database connection: %v", closeErr)
// First attempt with standard WAL mode
conn, err := sql.Open("sqlite", path+"?_journal_mode=WAL&_synchronous=NORMAL&_busy_timeout=10000&_locking_mode=NORMAL")
if err == nil {
// Set connection pool settings to ensure proper cleanup
conn.SetMaxOpenConns(1) // SQLite only supports one writer
conn.SetMaxIdleConns(1)
if err := conn.PingContext(ctx); err == nil {
// Success on first try
db := &DB{conn: conn}
if err := db.createSchema(ctx); err != nil {
_ = conn.Close()
return nil, fmt.Errorf("creating schema: %w", err)
}
return db, nil
}
return nil, fmt.Errorf("pinging database: %w", err)
_ = conn.Close()
}
// If first attempt failed, try with TRUNCATE mode to clear any locks
log.Info("Database appears locked, attempting recovery with TRUNCATE mode")
conn, err = sql.Open("sqlite", path+"?_journal_mode=TRUNCATE&_synchronous=NORMAL&_busy_timeout=10000")
if err != nil {
return nil, fmt.Errorf("opening database in recovery mode: %w", err)
}
// Set connection pool settings
conn.SetMaxOpenConns(1)
conn.SetMaxIdleConns(1)
if err := conn.PingContext(ctx); err != nil {
_ = conn.Close()
return nil, fmt.Errorf("database still locked after recovery attempt: %w", err)
}
// Switch back to WAL mode
if _, err := conn.ExecContext(ctx, "PRAGMA journal_mode=WAL"); err != nil {
log.Warn("Failed to switch back to WAL mode", "error", err)
}
db := &DB{conn: conn}
if err := db.createSchema(ctx); err != nil {
if closeErr := conn.Close(); closeErr != nil {
Fatal("failed to close database connection: %v", closeErr)
}
_ = conn.Close()
return nil, fmt.Errorf("creating schema: %w", err)
}
@@ -39,9 +73,68 @@ func New(ctx context.Context, path string) (*DB, error) {
}
func (db *DB) Close() error {
log.Debug("Closing database connection")
if err := db.conn.Close(); err != nil {
Fatal("failed to close database: %v", err)
log.Error("Failed to close database", "error", err)
return fmt.Errorf("failed to close database: %w", err)
}
log.Debug("Database connection closed successfully")
return nil
}
// recoverDatabase attempts to recover a locked database
func recoverDatabase(ctx context.Context, path string) error {
// Check if database file exists
if _, err := os.Stat(path); os.IsNotExist(err) {
// No database file, nothing to recover
return nil
}
// Remove stale lock files
// SQLite creates -wal and -shm files for WAL mode
walPath := path + "-wal"
shmPath := path + "-shm"
journalPath := path + "-journal"
log.Info("Attempting database recovery", "path", path)
// Always remove lock files on startup to ensure clean state
removed := false
// Check for and remove journal file (from non-WAL mode)
if _, err := os.Stat(journalPath); err == nil {
log.Info("Found journal file, removing", "path", journalPath)
if err := os.Remove(journalPath); err != nil {
log.Warn("Failed to remove journal file", "error", err)
} else {
removed = true
}
}
// Remove WAL file
if _, err := os.Stat(walPath); err == nil {
log.Info("Found WAL file, removing", "path", walPath)
if err := os.Remove(walPath); err != nil {
log.Warn("Failed to remove WAL file", "error", err)
} else {
removed = true
}
}
// Remove SHM file
if _, err := os.Stat(shmPath); err == nil {
log.Info("Found shared memory file, removing", "path", shmPath)
if err := os.Remove(shmPath); err != nil {
log.Warn("Failed to remove shared memory file", "error", err)
} else {
removed = true
}
}
if removed {
log.Info("Database lock files removed")
}
return nil
}
@@ -55,18 +148,24 @@ func (db *DB) BeginTx(ctx context.Context, opts *sql.TxOptions) (*sql.Tx, error)
// LockForWrite acquires the write lock
func (db *DB) LockForWrite() {
log.Debug("Attempting to acquire write lock")
db.writeLock.Lock()
log.Debug("Write lock acquired")
}
// UnlockWrite releases the write lock
func (db *DB) UnlockWrite() {
log.Debug("Releasing write lock")
db.writeLock.Unlock()
log.Debug("Write lock released")
}
// ExecWithLock executes a write query with the write lock held
func (db *DB) ExecWithLock(ctx context.Context, query string, args ...interface{}) (sql.Result, error) {
db.writeLock.Lock()
defer db.writeLock.Unlock()
LogSQL("Execute", query, args...)
return db.conn.ExecContext(ctx, query, args...)
}
@@ -104,16 +203,22 @@ func (db *DB) createSchema(ctx context.Context) error {
);
CREATE TABLE IF NOT EXISTS blobs (
blob_hash TEXT PRIMARY KEY,
created_ts INTEGER NOT NULL
id TEXT PRIMARY KEY,
blob_hash TEXT UNIQUE,
created_ts INTEGER NOT NULL,
finished_ts INTEGER,
uncompressed_size INTEGER NOT NULL DEFAULT 0,
compressed_size INTEGER NOT NULL DEFAULT 0,
uploaded_ts INTEGER
);
CREATE TABLE IF NOT EXISTS blob_chunks (
blob_hash TEXT NOT NULL,
blob_id TEXT NOT NULL,
chunk_hash TEXT NOT NULL,
offset INTEGER NOT NULL,
length INTEGER NOT NULL,
PRIMARY KEY (blob_hash, chunk_hash)
PRIMARY KEY (blob_id, chunk_hash),
FOREIGN KEY (blob_id) REFERENCES blobs(id)
);
CREATE TABLE IF NOT EXISTS chunk_files (
@@ -128,13 +233,38 @@ func (db *DB) createSchema(ctx context.Context) error {
id TEXT PRIMARY KEY,
hostname TEXT NOT NULL,
vaultik_version TEXT NOT NULL,
created_ts INTEGER NOT NULL,
file_count INTEGER NOT NULL,
chunk_count INTEGER NOT NULL,
blob_count INTEGER NOT NULL,
total_size INTEGER NOT NULL,
blob_size INTEGER NOT NULL,
compression_ratio REAL NOT NULL
started_at INTEGER NOT NULL,
completed_at INTEGER,
file_count INTEGER NOT NULL DEFAULT 0,
chunk_count INTEGER NOT NULL DEFAULT 0,
blob_count INTEGER NOT NULL DEFAULT 0,
total_size INTEGER NOT NULL DEFAULT 0,
blob_size INTEGER NOT NULL DEFAULT 0,
compression_ratio REAL NOT NULL DEFAULT 1.0
);
CREATE TABLE IF NOT EXISTS snapshot_files (
snapshot_id TEXT NOT NULL,
file_path TEXT NOT NULL,
PRIMARY KEY (snapshot_id, file_path),
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
FOREIGN KEY (file_path) REFERENCES files(path) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS snapshot_blobs (
snapshot_id TEXT NOT NULL,
blob_id TEXT NOT NULL,
blob_hash TEXT NOT NULL,
PRIMARY KEY (snapshot_id, blob_id),
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
FOREIGN KEY (blob_id) REFERENCES blobs(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS uploads (
blob_hash TEXT PRIMARY KEY,
uploaded_at INTEGER NOT NULL,
size INTEGER NOT NULL,
duration_ms INTEGER NOT NULL
);
`
@@ -146,3 +276,10 @@ func (db *DB) createSchema(ctx context.Context) error {
func NewTestDB() (*DB, error) {
return New(context.Background(), ":memory:")
}
// LogSQL logs SQL queries if debug mode is enabled
func LogSQL(operation, query string, args ...interface{}) {
if strings.Contains(os.Getenv("GODEBUG"), "vaultik") {
log.Debug("SQL "+operation, "query", strings.TrimSpace(query), "args", fmt.Sprintf("%v", args))
}
}