Add exclude patterns, snapshot prune, and other improvements
- Implement exclude patterns with anchored pattern support: - Patterns starting with / only match from root of source dir - Unanchored patterns match anywhere in path - Support for glob patterns (*.log, .*, **/*.pack) - Directory patterns skip entire subtrees - Add gobwas/glob dependency for pattern matching - Add 16 comprehensive tests for exclude functionality - Add snapshot prune command to clean orphaned data: - Removes incomplete snapshots from database - Cleans orphaned files, chunks, and blobs - Runs automatically at backup start for consistency - Add snapshot remove command for deleting snapshots - Add VAULTIK_AGE_SECRET_KEY environment variable support - Fix duplicate fx module provider in restore command - Change snapshot ID format to hostname_YYYY-MM-DDTHH:MM:SSZ
This commit is contained in:
@@ -132,3 +132,80 @@ func (r *ChunkFileRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fi
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteByFileIDs deletes all chunk_files for multiple files in a single statement.
|
||||
func (r *ChunkFileRepository) DeleteByFileIDs(ctx context.Context, tx *sql.Tx, fileIDs []string) error {
|
||||
if len(fileIDs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Batch at 500 to stay within SQLite's variable limit
|
||||
const batchSize = 500
|
||||
|
||||
for i := 0; i < len(fileIDs); i += batchSize {
|
||||
end := i + batchSize
|
||||
if end > len(fileIDs) {
|
||||
end = len(fileIDs)
|
||||
}
|
||||
batch := fileIDs[i:end]
|
||||
|
||||
query := "DELETE FROM chunk_files WHERE file_id IN (?" + repeatPlaceholder(len(batch)-1) + ")"
|
||||
args := make([]interface{}, len(batch))
|
||||
for j, id := range batch {
|
||||
args[j] = id
|
||||
}
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, args...)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, args...)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("batch deleting chunk_files: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// CreateBatch inserts multiple chunk_files in a single statement for efficiency.
|
||||
func (r *ChunkFileRepository) CreateBatch(ctx context.Context, tx *sql.Tx, cfs []ChunkFile) error {
|
||||
if len(cfs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Each ChunkFile has 4 values, so batch at 200 to be safe with SQLite's variable limit
|
||||
const batchSize = 200
|
||||
|
||||
for i := 0; i < len(cfs); i += batchSize {
|
||||
end := i + batchSize
|
||||
if end > len(cfs) {
|
||||
end = len(cfs)
|
||||
}
|
||||
batch := cfs[i:end]
|
||||
|
||||
query := "INSERT INTO chunk_files (chunk_hash, file_id, file_offset, length) VALUES "
|
||||
args := make([]interface{}, 0, len(batch)*4)
|
||||
for j, cf := range batch {
|
||||
if j > 0 {
|
||||
query += ", "
|
||||
}
|
||||
query += "(?, ?, ?, ?)"
|
||||
args = append(args, cf.ChunkHash, cf.FileID, cf.FileOffset, cf.Length)
|
||||
}
|
||||
query += " ON CONFLICT(chunk_hash, file_id) DO NOTHING"
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, args...)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, args...)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("batch inserting chunk_files: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -205,6 +205,15 @@ func NewTestDB() (*DB, error) {
|
||||
return New(context.Background(), ":memory:")
|
||||
}
|
||||
|
||||
// repeatPlaceholder generates a string of ", ?" repeated n times for IN clause construction.
|
||||
// For example, repeatPlaceholder(2) returns ", ?, ?".
|
||||
func repeatPlaceholder(n int) string {
|
||||
if n <= 0 {
|
||||
return ""
|
||||
}
|
||||
return strings.Repeat(", ?", n)
|
||||
}
|
||||
|
||||
// LogSQL logs SQL queries and their arguments when debug mode is enabled.
|
||||
// Debug mode is activated by setting the GODEBUG environment variable to include "vaultik".
|
||||
// This is useful for troubleshooting database operations and understanding query patterns.
|
||||
|
||||
@@ -157,6 +157,86 @@ func (r *FileChunkRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fi
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteByFileIDs deletes all chunks for multiple files in a single statement.
|
||||
func (r *FileChunkRepository) DeleteByFileIDs(ctx context.Context, tx *sql.Tx, fileIDs []string) error {
|
||||
if len(fileIDs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Batch at 500 to stay within SQLite's variable limit
|
||||
const batchSize = 500
|
||||
|
||||
for i := 0; i < len(fileIDs); i += batchSize {
|
||||
end := i + batchSize
|
||||
if end > len(fileIDs) {
|
||||
end = len(fileIDs)
|
||||
}
|
||||
batch := fileIDs[i:end]
|
||||
|
||||
query := "DELETE FROM file_chunks WHERE file_id IN (?" + repeatPlaceholder(len(batch)-1) + ")"
|
||||
args := make([]interface{}, len(batch))
|
||||
for j, id := range batch {
|
||||
args[j] = id
|
||||
}
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, args...)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, args...)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("batch deleting file_chunks: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// CreateBatch inserts multiple file_chunks in a single statement for efficiency.
|
||||
// Batches are automatically split to stay within SQLite's variable limit.
|
||||
func (r *FileChunkRepository) CreateBatch(ctx context.Context, tx *sql.Tx, fcs []FileChunk) error {
|
||||
if len(fcs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// SQLite has a limit on variables (typically 999 or 32766).
|
||||
// Each FileChunk has 3 values, so batch at 300 to be safe.
|
||||
const batchSize = 300
|
||||
|
||||
for i := 0; i < len(fcs); i += batchSize {
|
||||
end := i + batchSize
|
||||
if end > len(fcs) {
|
||||
end = len(fcs)
|
||||
}
|
||||
batch := fcs[i:end]
|
||||
|
||||
// Build the query with multiple value sets
|
||||
query := "INSERT INTO file_chunks (file_id, idx, chunk_hash) VALUES "
|
||||
args := make([]interface{}, 0, len(batch)*3)
|
||||
for j, fc := range batch {
|
||||
if j > 0 {
|
||||
query += ", "
|
||||
}
|
||||
query += "(?, ?, ?)"
|
||||
args = append(args, fc.FileID, fc.Idx, fc.ChunkHash)
|
||||
}
|
||||
query += " ON CONFLICT(file_id, idx) DO NOTHING"
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, args...)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, args...)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("batch inserting file_chunks: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetByFile is an alias for GetByPath for compatibility
|
||||
func (r *FileChunkRepository) GetByFile(ctx context.Context, path string) ([]*FileChunk, error) {
|
||||
LogSQL("GetByFile", "Starting", path)
|
||||
|
||||
@@ -302,6 +302,55 @@ func (r *FileRepository) ListByPrefix(ctx context.Context, prefix string) ([]*Fi
|
||||
return files, rows.Err()
|
||||
}
|
||||
|
||||
// CreateBatch inserts or updates multiple files in a single statement for efficiency.
|
||||
// File IDs must be pre-generated before calling this method.
|
||||
func (r *FileRepository) CreateBatch(ctx context.Context, tx *sql.Tx, files []*File) error {
|
||||
if len(files) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Each File has 9 values, so batch at 100 to be safe with SQLite's variable limit
|
||||
const batchSize = 100
|
||||
|
||||
for i := 0; i < len(files); i += batchSize {
|
||||
end := i + batchSize
|
||||
if end > len(files) {
|
||||
end = len(files)
|
||||
}
|
||||
batch := files[i:end]
|
||||
|
||||
query := `INSERT INTO files (id, path, mtime, ctime, size, mode, uid, gid, link_target) VALUES `
|
||||
args := make([]interface{}, 0, len(batch)*9)
|
||||
for j, f := range batch {
|
||||
if j > 0 {
|
||||
query += ", "
|
||||
}
|
||||
query += "(?, ?, ?, ?, ?, ?, ?, ?, ?)"
|
||||
args = append(args, f.ID, f.Path, f.MTime.Unix(), f.CTime.Unix(), f.Size, f.Mode, f.UID, f.GID, f.LinkTarget)
|
||||
}
|
||||
query += ` ON CONFLICT(path) DO UPDATE SET
|
||||
mtime = excluded.mtime,
|
||||
ctime = excluded.ctime,
|
||||
size = excluded.size,
|
||||
mode = excluded.mode,
|
||||
uid = excluded.uid,
|
||||
gid = excluded.gid,
|
||||
link_target = excluded.link_target`
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, args...)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, args...)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("batch inserting files: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteOrphaned deletes files that are not referenced by any snapshot
|
||||
func (r *FileRepository) DeleteOrphaned(ctx context.Context) error {
|
||||
query := `
|
||||
|
||||
@@ -28,6 +28,9 @@ CREATE TABLE IF NOT EXISTS file_chunks (
|
||||
FOREIGN KEY (chunk_hash) REFERENCES chunks(chunk_hash)
|
||||
);
|
||||
|
||||
-- Index for efficient chunk lookups (used in orphan detection)
|
||||
CREATE INDEX IF NOT EXISTS idx_file_chunks_chunk_hash ON file_chunks(chunk_hash);
|
||||
|
||||
-- Chunks table: stores unique content-defined chunks
|
||||
CREATE TABLE IF NOT EXISTS chunks (
|
||||
chunk_hash TEXT PRIMARY KEY,
|
||||
@@ -56,6 +59,9 @@ CREATE TABLE IF NOT EXISTS blob_chunks (
|
||||
FOREIGN KEY (chunk_hash) REFERENCES chunks(chunk_hash)
|
||||
);
|
||||
|
||||
-- Index for efficient chunk lookups (used in orphan detection)
|
||||
CREATE INDEX IF NOT EXISTS idx_blob_chunks_chunk_hash ON blob_chunks(chunk_hash);
|
||||
|
||||
-- Chunk files table: reverse mapping of chunks to files
|
||||
CREATE TABLE IF NOT EXISTS chunk_files (
|
||||
chunk_hash TEXT NOT NULL,
|
||||
@@ -67,6 +73,9 @@ CREATE TABLE IF NOT EXISTS chunk_files (
|
||||
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
-- Index for efficient file lookups (used in orphan detection)
|
||||
CREATE INDEX IF NOT EXISTS idx_chunk_files_file_id ON chunk_files(file_id);
|
||||
|
||||
-- Snapshots table: tracks backup snapshots
|
||||
CREATE TABLE IF NOT EXISTS snapshots (
|
||||
id TEXT PRIMARY KEY,
|
||||
@@ -96,6 +105,9 @@ CREATE TABLE IF NOT EXISTS snapshot_files (
|
||||
FOREIGN KEY (file_id) REFERENCES files(id)
|
||||
);
|
||||
|
||||
-- Index for efficient file lookups (used in orphan detection)
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshot_files_file_id ON snapshot_files(file_id);
|
||||
|
||||
-- Snapshot blobs table: maps snapshots to blobs
|
||||
CREATE TABLE IF NOT EXISTS snapshot_blobs (
|
||||
snapshot_id TEXT NOT NULL,
|
||||
@@ -106,6 +118,9 @@ CREATE TABLE IF NOT EXISTS snapshot_blobs (
|
||||
FOREIGN KEY (blob_id) REFERENCES blobs(id)
|
||||
);
|
||||
|
||||
-- Index for efficient blob lookups (used in orphan detection)
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshot_blobs_blob_id ON snapshot_blobs(blob_id);
|
||||
|
||||
-- Uploads table: tracks blob upload metrics
|
||||
CREATE TABLE IF NOT EXISTS uploads (
|
||||
blob_hash TEXT PRIMARY KEY,
|
||||
@@ -115,4 +130,7 @@ CREATE TABLE IF NOT EXISTS uploads (
|
||||
duration_ms INTEGER NOT NULL,
|
||||
FOREIGN KEY (blob_hash) REFERENCES blobs(blob_hash),
|
||||
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id)
|
||||
);
|
||||
);
|
||||
|
||||
-- Index for efficient snapshot lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_uploads_snapshot_id ON uploads(snapshot_id);
|
||||
@@ -289,6 +289,46 @@ func (r *SnapshotRepository) AddFileByID(ctx context.Context, tx *sql.Tx, snapsh
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddFilesByIDBatch adds multiple files to a snapshot in batched inserts
|
||||
func (r *SnapshotRepository) AddFilesByIDBatch(ctx context.Context, tx *sql.Tx, snapshotID string, fileIDs []string) error {
|
||||
if len(fileIDs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Each entry has 2 values, so batch at 400 to be safe
|
||||
const batchSize = 400
|
||||
|
||||
for i := 0; i < len(fileIDs); i += batchSize {
|
||||
end := i + batchSize
|
||||
if end > len(fileIDs) {
|
||||
end = len(fileIDs)
|
||||
}
|
||||
batch := fileIDs[i:end]
|
||||
|
||||
query := "INSERT OR IGNORE INTO snapshot_files (snapshot_id, file_id) VALUES "
|
||||
args := make([]interface{}, 0, len(batch)*2)
|
||||
for j, fileID := range batch {
|
||||
if j > 0 {
|
||||
query += ", "
|
||||
}
|
||||
query += "(?, ?)"
|
||||
args = append(args, snapshotID, fileID)
|
||||
}
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, args...)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, args...)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("batch adding files to snapshot: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddBlob adds a blob to a snapshot
|
||||
func (r *SnapshotRepository) AddBlob(ctx context.Context, tx *sql.Tx, snapshotID string, blobID string, blobHash string) error {
|
||||
query := `
|
||||
|
||||
Reference in New Issue
Block a user