Add exclude patterns, snapshot prune, and other improvements

- Implement exclude patterns with anchored pattern support:
  - Patterns starting with / only match from root of source dir
  - Unanchored patterns match anywhere in path
  - Support for glob patterns (*.log, .*, **/*.pack)
  - Directory patterns skip entire subtrees
  - Add gobwas/glob dependency for pattern matching
  - Add 16 comprehensive tests for exclude functionality

- Add snapshot prune command to clean orphaned data:
  - Removes incomplete snapshots from database
  - Cleans orphaned files, chunks, and blobs
  - Runs automatically at backup start for consistency

- Add snapshot remove command for deleting snapshots

- Add VAULTIK_AGE_SECRET_KEY environment variable support

- Fix duplicate fx module provider in restore command

- Change snapshot ID format to hostname_YYYY-MM-DDTHH:MM:SSZ
This commit is contained in:
2026-01-01 05:42:56 -08:00
parent 05286bed01
commit 2afd54d693
23 changed files with 1769 additions and 98 deletions

View File

@@ -132,3 +132,80 @@ func (r *ChunkFileRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fi
return nil
}
// DeleteByFileIDs deletes all chunk_files for multiple files in a single statement.
func (r *ChunkFileRepository) DeleteByFileIDs(ctx context.Context, tx *sql.Tx, fileIDs []string) error {
if len(fileIDs) == 0 {
return nil
}
// Batch at 500 to stay within SQLite's variable limit
const batchSize = 500
for i := 0; i < len(fileIDs); i += batchSize {
end := i + batchSize
if end > len(fileIDs) {
end = len(fileIDs)
}
batch := fileIDs[i:end]
query := "DELETE FROM chunk_files WHERE file_id IN (?" + repeatPlaceholder(len(batch)-1) + ")"
args := make([]interface{}, len(batch))
for j, id := range batch {
args[j] = id
}
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, args...)
} else {
_, err = r.db.ExecWithLog(ctx, query, args...)
}
if err != nil {
return fmt.Errorf("batch deleting chunk_files: %w", err)
}
}
return nil
}
// CreateBatch inserts multiple chunk_files in a single statement for efficiency.
func (r *ChunkFileRepository) CreateBatch(ctx context.Context, tx *sql.Tx, cfs []ChunkFile) error {
if len(cfs) == 0 {
return nil
}
// Each ChunkFile has 4 values, so batch at 200 to be safe with SQLite's variable limit
const batchSize = 200
for i := 0; i < len(cfs); i += batchSize {
end := i + batchSize
if end > len(cfs) {
end = len(cfs)
}
batch := cfs[i:end]
query := "INSERT INTO chunk_files (chunk_hash, file_id, file_offset, length) VALUES "
args := make([]interface{}, 0, len(batch)*4)
for j, cf := range batch {
if j > 0 {
query += ", "
}
query += "(?, ?, ?, ?)"
args = append(args, cf.ChunkHash, cf.FileID, cf.FileOffset, cf.Length)
}
query += " ON CONFLICT(chunk_hash, file_id) DO NOTHING"
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, args...)
} else {
_, err = r.db.ExecWithLog(ctx, query, args...)
}
if err != nil {
return fmt.Errorf("batch inserting chunk_files: %w", err)
}
}
return nil
}

View File

@@ -205,6 +205,15 @@ func NewTestDB() (*DB, error) {
return New(context.Background(), ":memory:")
}
// repeatPlaceholder generates a string of ", ?" repeated n times for IN clause construction.
// For example, repeatPlaceholder(2) returns ", ?, ?".
func repeatPlaceholder(n int) string {
if n <= 0 {
return ""
}
return strings.Repeat(", ?", n)
}
// LogSQL logs SQL queries and their arguments when debug mode is enabled.
// Debug mode is activated by setting the GODEBUG environment variable to include "vaultik".
// This is useful for troubleshooting database operations and understanding query patterns.

View File

@@ -157,6 +157,86 @@ func (r *FileChunkRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fi
return nil
}
// DeleteByFileIDs deletes all chunks for multiple files in a single statement.
func (r *FileChunkRepository) DeleteByFileIDs(ctx context.Context, tx *sql.Tx, fileIDs []string) error {
if len(fileIDs) == 0 {
return nil
}
// Batch at 500 to stay within SQLite's variable limit
const batchSize = 500
for i := 0; i < len(fileIDs); i += batchSize {
end := i + batchSize
if end > len(fileIDs) {
end = len(fileIDs)
}
batch := fileIDs[i:end]
query := "DELETE FROM file_chunks WHERE file_id IN (?" + repeatPlaceholder(len(batch)-1) + ")"
args := make([]interface{}, len(batch))
for j, id := range batch {
args[j] = id
}
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, args...)
} else {
_, err = r.db.ExecWithLog(ctx, query, args...)
}
if err != nil {
return fmt.Errorf("batch deleting file_chunks: %w", err)
}
}
return nil
}
// CreateBatch inserts multiple file_chunks in a single statement for efficiency.
// Batches are automatically split to stay within SQLite's variable limit.
func (r *FileChunkRepository) CreateBatch(ctx context.Context, tx *sql.Tx, fcs []FileChunk) error {
if len(fcs) == 0 {
return nil
}
// SQLite has a limit on variables (typically 999 or 32766).
// Each FileChunk has 3 values, so batch at 300 to be safe.
const batchSize = 300
for i := 0; i < len(fcs); i += batchSize {
end := i + batchSize
if end > len(fcs) {
end = len(fcs)
}
batch := fcs[i:end]
// Build the query with multiple value sets
query := "INSERT INTO file_chunks (file_id, idx, chunk_hash) VALUES "
args := make([]interface{}, 0, len(batch)*3)
for j, fc := range batch {
if j > 0 {
query += ", "
}
query += "(?, ?, ?)"
args = append(args, fc.FileID, fc.Idx, fc.ChunkHash)
}
query += " ON CONFLICT(file_id, idx) DO NOTHING"
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, args...)
} else {
_, err = r.db.ExecWithLog(ctx, query, args...)
}
if err != nil {
return fmt.Errorf("batch inserting file_chunks: %w", err)
}
}
return nil
}
// GetByFile is an alias for GetByPath for compatibility
func (r *FileChunkRepository) GetByFile(ctx context.Context, path string) ([]*FileChunk, error) {
LogSQL("GetByFile", "Starting", path)

View File

@@ -302,6 +302,55 @@ func (r *FileRepository) ListByPrefix(ctx context.Context, prefix string) ([]*Fi
return files, rows.Err()
}
// CreateBatch inserts or updates multiple files in a single statement for efficiency.
// File IDs must be pre-generated before calling this method.
func (r *FileRepository) CreateBatch(ctx context.Context, tx *sql.Tx, files []*File) error {
if len(files) == 0 {
return nil
}
// Each File has 9 values, so batch at 100 to be safe with SQLite's variable limit
const batchSize = 100
for i := 0; i < len(files); i += batchSize {
end := i + batchSize
if end > len(files) {
end = len(files)
}
batch := files[i:end]
query := `INSERT INTO files (id, path, mtime, ctime, size, mode, uid, gid, link_target) VALUES `
args := make([]interface{}, 0, len(batch)*9)
for j, f := range batch {
if j > 0 {
query += ", "
}
query += "(?, ?, ?, ?, ?, ?, ?, ?, ?)"
args = append(args, f.ID, f.Path, f.MTime.Unix(), f.CTime.Unix(), f.Size, f.Mode, f.UID, f.GID, f.LinkTarget)
}
query += ` ON CONFLICT(path) DO UPDATE SET
mtime = excluded.mtime,
ctime = excluded.ctime,
size = excluded.size,
mode = excluded.mode,
uid = excluded.uid,
gid = excluded.gid,
link_target = excluded.link_target`
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, args...)
} else {
_, err = r.db.ExecWithLog(ctx, query, args...)
}
if err != nil {
return fmt.Errorf("batch inserting files: %w", err)
}
}
return nil
}
// DeleteOrphaned deletes files that are not referenced by any snapshot
func (r *FileRepository) DeleteOrphaned(ctx context.Context) error {
query := `

View File

@@ -28,6 +28,9 @@ CREATE TABLE IF NOT EXISTS file_chunks (
FOREIGN KEY (chunk_hash) REFERENCES chunks(chunk_hash)
);
-- Index for efficient chunk lookups (used in orphan detection)
CREATE INDEX IF NOT EXISTS idx_file_chunks_chunk_hash ON file_chunks(chunk_hash);
-- Chunks table: stores unique content-defined chunks
CREATE TABLE IF NOT EXISTS chunks (
chunk_hash TEXT PRIMARY KEY,
@@ -56,6 +59,9 @@ CREATE TABLE IF NOT EXISTS blob_chunks (
FOREIGN KEY (chunk_hash) REFERENCES chunks(chunk_hash)
);
-- Index for efficient chunk lookups (used in orphan detection)
CREATE INDEX IF NOT EXISTS idx_blob_chunks_chunk_hash ON blob_chunks(chunk_hash);
-- Chunk files table: reverse mapping of chunks to files
CREATE TABLE IF NOT EXISTS chunk_files (
chunk_hash TEXT NOT NULL,
@@ -67,6 +73,9 @@ CREATE TABLE IF NOT EXISTS chunk_files (
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
);
-- Index for efficient file lookups (used in orphan detection)
CREATE INDEX IF NOT EXISTS idx_chunk_files_file_id ON chunk_files(file_id);
-- Snapshots table: tracks backup snapshots
CREATE TABLE IF NOT EXISTS snapshots (
id TEXT PRIMARY KEY,
@@ -96,6 +105,9 @@ CREATE TABLE IF NOT EXISTS snapshot_files (
FOREIGN KEY (file_id) REFERENCES files(id)
);
-- Index for efficient file lookups (used in orphan detection)
CREATE INDEX IF NOT EXISTS idx_snapshot_files_file_id ON snapshot_files(file_id);
-- Snapshot blobs table: maps snapshots to blobs
CREATE TABLE IF NOT EXISTS snapshot_blobs (
snapshot_id TEXT NOT NULL,
@@ -106,6 +118,9 @@ CREATE TABLE IF NOT EXISTS snapshot_blobs (
FOREIGN KEY (blob_id) REFERENCES blobs(id)
);
-- Index for efficient blob lookups (used in orphan detection)
CREATE INDEX IF NOT EXISTS idx_snapshot_blobs_blob_id ON snapshot_blobs(blob_id);
-- Uploads table: tracks blob upload metrics
CREATE TABLE IF NOT EXISTS uploads (
blob_hash TEXT PRIMARY KEY,
@@ -115,4 +130,7 @@ CREATE TABLE IF NOT EXISTS uploads (
duration_ms INTEGER NOT NULL,
FOREIGN KEY (blob_hash) REFERENCES blobs(blob_hash),
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id)
);
);
-- Index for efficient snapshot lookups
CREATE INDEX IF NOT EXISTS idx_uploads_snapshot_id ON uploads(snapshot_id);

View File

@@ -289,6 +289,46 @@ func (r *SnapshotRepository) AddFileByID(ctx context.Context, tx *sql.Tx, snapsh
return nil
}
// AddFilesByIDBatch adds multiple files to a snapshot in batched inserts
func (r *SnapshotRepository) AddFilesByIDBatch(ctx context.Context, tx *sql.Tx, snapshotID string, fileIDs []string) error {
if len(fileIDs) == 0 {
return nil
}
// Each entry has 2 values, so batch at 400 to be safe
const batchSize = 400
for i := 0; i < len(fileIDs); i += batchSize {
end := i + batchSize
if end > len(fileIDs) {
end = len(fileIDs)
}
batch := fileIDs[i:end]
query := "INSERT OR IGNORE INTO snapshot_files (snapshot_id, file_id) VALUES "
args := make([]interface{}, 0, len(batch)*2)
for j, fileID := range batch {
if j > 0 {
query += ", "
}
query += "(?, ?)"
args = append(args, snapshotID, fileID)
}
var err error
if tx != nil {
_, err = tx.ExecContext(ctx, query, args...)
} else {
_, err = r.db.ExecWithLog(ctx, query, args...)
}
if err != nil {
return fmt.Errorf("batch adding files to snapshot: %w", err)
}
}
return nil
}
// AddBlob adds a blob to a snapshot
func (r *SnapshotRepository) AddBlob(ctx context.Context, tx *sql.Tx, snapshotID string, blobID string, blobHash string) error {
query := `