vaultik/internal/database/schema.sql
sneak 78af626759 Major refactoring: UUID-based storage, streaming architecture, and CLI improvements
This commit represents a significant architectural overhaul of vaultik:

Database Schema Changes:
- Switch files table to use UUID primary keys instead of path-based keys
- Add UUID primary keys to blobs table for immediate chunk association
- Update all foreign key relationships to use UUIDs
- Add comprehensive schema documentation in DATAMODEL.md
- Add SQLite busy timeout handling for concurrent operations

Streaming and Performance Improvements:
- Implement true streaming blob packing without intermediate storage
- Add streaming chunk processing to reduce memory usage
- Improve progress reporting with real-time metrics
- Add upload metrics tracking in new uploads table

CLI Refactoring:
- Restructure CLI to use subcommands: snapshot create/list/purge/verify
- Add store info command for S3 configuration display
- Add custom duration parser supporting days/weeks/months/years
- Remove old backup.go in favor of enhanced snapshot.go
- Add --cron flag for silent operation

Configuration Changes:
- Remove unused index_prefix configuration option
- Add support for snapshot pruning retention policies
- Improve configuration validation and error messages

Testing Improvements:
- Add comprehensive repository tests with edge cases
- Add cascade delete debugging tests
- Fix concurrent operation tests to use SQLite busy timeout
- Remove tolerance for SQLITE_BUSY errors in tests

Documentation:
- Add MIT LICENSE file
- Update README with new command structure
- Add comprehensive DATAMODEL.md explaining database schema
- Update DESIGN.md with UUID-based architecture

Other Changes:
- Add test-config.yml for testing
- Update Makefile with better test output formatting
- Fix various race conditions in concurrent operations
- Improve error handling throughout
2025-07-22 14:56:44 +02:00

113 lines
3.6 KiB
SQL

-- Vaultik Database Schema
-- Note: This database does not support migrations. If the schema changes,
-- delete the local database and perform a full backup to recreate it.
-- Files table: stores metadata about files in the filesystem
CREATE TABLE IF NOT EXISTS files (
id TEXT PRIMARY KEY, -- UUID
path TEXT NOT NULL UNIQUE,
mtime INTEGER NOT NULL,
ctime INTEGER NOT NULL,
size INTEGER NOT NULL,
mode INTEGER NOT NULL,
uid INTEGER NOT NULL,
gid INTEGER NOT NULL,
link_target TEXT
);
-- Create index on path for efficient lookups
CREATE INDEX IF NOT EXISTS idx_files_path ON files(path);
-- File chunks table: maps files to their constituent chunks
CREATE TABLE IF NOT EXISTS file_chunks (
file_id TEXT NOT NULL,
idx INTEGER NOT NULL,
chunk_hash TEXT NOT NULL,
PRIMARY KEY (file_id, idx),
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
);
-- Chunks table: stores unique content-defined chunks
CREATE TABLE IF NOT EXISTS chunks (
chunk_hash TEXT PRIMARY KEY,
sha256 TEXT NOT NULL,
size INTEGER NOT NULL
);
-- Blobs table: stores packed, compressed, and encrypted blob information
CREATE TABLE IF NOT EXISTS blobs (
id TEXT PRIMARY KEY,
blob_hash TEXT UNIQUE,
created_ts INTEGER NOT NULL,
finished_ts INTEGER,
uncompressed_size INTEGER NOT NULL DEFAULT 0,
compressed_size INTEGER NOT NULL DEFAULT 0,
uploaded_ts INTEGER
);
-- Blob chunks table: maps chunks to the blobs that contain them
CREATE TABLE IF NOT EXISTS blob_chunks (
blob_id TEXT NOT NULL,
chunk_hash TEXT NOT NULL,
offset INTEGER NOT NULL,
length INTEGER NOT NULL,
PRIMARY KEY (blob_id, chunk_hash),
FOREIGN KEY (blob_id) REFERENCES blobs(id)
);
-- Chunk files table: reverse mapping of chunks to files
CREATE TABLE IF NOT EXISTS chunk_files (
chunk_hash TEXT NOT NULL,
file_id TEXT NOT NULL,
file_offset INTEGER NOT NULL,
length INTEGER NOT NULL,
PRIMARY KEY (chunk_hash, file_id),
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
);
-- Snapshots table: tracks backup snapshots
CREATE TABLE IF NOT EXISTS snapshots (
id TEXT PRIMARY KEY,
hostname TEXT NOT NULL,
vaultik_version TEXT NOT NULL,
vaultik_git_revision TEXT NOT NULL,
started_at INTEGER NOT NULL,
completed_at INTEGER,
file_count INTEGER NOT NULL DEFAULT 0,
chunk_count INTEGER NOT NULL DEFAULT 0,
blob_count INTEGER NOT NULL DEFAULT 0,
total_size INTEGER NOT NULL DEFAULT 0,
blob_size INTEGER NOT NULL DEFAULT 0,
blob_uncompressed_size INTEGER NOT NULL DEFAULT 0,
compression_ratio REAL NOT NULL DEFAULT 1.0,
compression_level INTEGER NOT NULL DEFAULT 3,
upload_bytes INTEGER NOT NULL DEFAULT 0,
upload_duration_ms INTEGER NOT NULL DEFAULT 0
);
-- Snapshot files table: maps snapshots to files
CREATE TABLE IF NOT EXISTS snapshot_files (
snapshot_id TEXT NOT NULL,
file_id TEXT NOT NULL,
PRIMARY KEY (snapshot_id, file_id),
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
);
-- Snapshot blobs table: maps snapshots to blobs
CREATE TABLE IF NOT EXISTS snapshot_blobs (
snapshot_id TEXT NOT NULL,
blob_id TEXT NOT NULL,
blob_hash TEXT NOT NULL,
PRIMARY KEY (snapshot_id, blob_id),
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
FOREIGN KEY (blob_id) REFERENCES blobs(id) ON DELETE CASCADE
);
-- Uploads table: tracks blob upload metrics
CREATE TABLE IF NOT EXISTS uploads (
blob_hash TEXT PRIMARY KEY,
uploaded_at INTEGER NOT NULL,
size INTEGER NOT NULL,
duration_ms INTEGER NOT NULL
);