Add exclude patterns, snapshot prune, and other improvements
- Implement exclude patterns with anchored pattern support: - Patterns starting with / only match from root of source dir - Unanchored patterns match anywhere in path - Support for glob patterns (*.log, .*, **/*.pack) - Directory patterns skip entire subtrees - Add gobwas/glob dependency for pattern matching - Add 16 comprehensive tests for exclude functionality - Add snapshot prune command to clean orphaned data: - Removes incomplete snapshots from database - Cleans orphaned files, chunks, and blobs - Runs automatically at backup start for consistency - Add snapshot remove command for deleting snapshots - Add VAULTIK_AGE_SECRET_KEY environment variable support - Fix duplicate fx module provider in restore command - Change snapshot ID format to hostname_YYYY-MM-DDTHH:MM:SSZ
This commit is contained in:
453
internal/snapshot/exclude_test.go
Normal file
453
internal/snapshot/exclude_test.go
Normal file
@@ -0,0 +1,453 @@
|
||||
package snapshot_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/snapshot"
|
||||
"github.com/spf13/afero"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func setupExcludeTestFS(t *testing.T) afero.Fs {
|
||||
t.Helper()
|
||||
|
||||
// Create in-memory filesystem
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create test directory structure:
|
||||
// /backup/
|
||||
// file1.txt (should be backed up)
|
||||
// file2.log (should be excluded if *.log is in patterns)
|
||||
// .git/
|
||||
// config (should be excluded if .git is in patterns)
|
||||
// objects/
|
||||
// pack/
|
||||
// data.pack (should be excluded if .git is in patterns)
|
||||
// src/
|
||||
// main.go (should be backed up)
|
||||
// test.go (should be backed up)
|
||||
// node_modules/
|
||||
// package/
|
||||
// index.js (should be excluded if node_modules is in patterns)
|
||||
// cache/
|
||||
// temp.dat (should be excluded if cache/ is in patterns)
|
||||
// build/
|
||||
// output.bin (should be excluded if build is in patterns)
|
||||
// docs/
|
||||
// readme.md (should be backed up)
|
||||
// .DS_Store (should be excluded if .DS_Store is in patterns)
|
||||
// thumbs.db (should be excluded if thumbs.db is in patterns)
|
||||
|
||||
files := map[string]string{
|
||||
"/backup/file1.txt": "content1",
|
||||
"/backup/file2.log": "log content",
|
||||
"/backup/.git/config": "git config",
|
||||
"/backup/.git/objects/pack/data.pack": "pack data",
|
||||
"/backup/src/main.go": "package main",
|
||||
"/backup/src/test.go": "package main_test",
|
||||
"/backup/node_modules/package/index.js": "module.exports = {}",
|
||||
"/backup/cache/temp.dat": "cached data",
|
||||
"/backup/build/output.bin": "binary data",
|
||||
"/backup/docs/readme.md": "# Documentation",
|
||||
"/backup/.DS_Store": "ds store data",
|
||||
"/backup/thumbs.db": "thumbs data",
|
||||
"/backup/src/.hidden": "hidden file",
|
||||
"/backup/important.log.bak": "backup of log",
|
||||
}
|
||||
|
||||
testTime := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
|
||||
for path, content := range files {
|
||||
dir := filepath.Dir(path)
|
||||
err := fs.MkdirAll(dir, 0755)
|
||||
require.NoError(t, err)
|
||||
err = afero.WriteFile(fs, path, []byte(content), 0644)
|
||||
require.NoError(t, err)
|
||||
err = fs.Chtimes(path, testTime, testTime)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
return fs
|
||||
}
|
||||
|
||||
func createTestScanner(t *testing.T, fs afero.Fs, excludePatterns []string) (*snapshot.Scanner, *database.Repositories, func()) {
|
||||
t.Helper()
|
||||
|
||||
// Initialize logger
|
||||
log.Initialize(log.Config{})
|
||||
|
||||
// Create test database
|
||||
db, err := database.NewTestDB()
|
||||
require.NoError(t, err)
|
||||
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
|
||||
FS: fs,
|
||||
ChunkSize: 64 * 1024,
|
||||
Repositories: repos,
|
||||
MaxBlobSize: 1024 * 1024,
|
||||
CompressionLevel: 3,
|
||||
AgeRecipients: []string{"age1ql3z7hjy54pw3hyww5ayyfg7zqgvc7w3j2elw8zmrj2kg5sfn9aqmcac8p"},
|
||||
Exclude: excludePatterns,
|
||||
})
|
||||
|
||||
cleanup := func() {
|
||||
_ = db.Close()
|
||||
}
|
||||
|
||||
return scanner, repos, cleanup
|
||||
}
|
||||
|
||||
func createSnapshotRecord(t *testing.T, ctx context.Context, repos *database.Repositories, snapshotID string) {
|
||||
t.Helper()
|
||||
err := repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
snap := &database.Snapshot{
|
||||
ID: snapshotID,
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "test",
|
||||
StartedAt: time.Now(),
|
||||
CompletedAt: nil,
|
||||
FileCount: 0,
|
||||
ChunkCount: 0,
|
||||
BlobCount: 0,
|
||||
TotalSize: 0,
|
||||
BlobSize: 0,
|
||||
CompressionRatio: 1.0,
|
||||
}
|
||||
return repos.Snapshots.Create(ctx, tx, snap)
|
||||
})
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestExcludePatterns_ExcludeGitDirectory(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{".git"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should have scanned files but NOT .git directory contents
|
||||
// Expected: file1.txt, file2.log, src/main.go, src/test.go, node_modules/package/index.js,
|
||||
// cache/temp.dat, build/output.bin, docs/readme.md, .DS_Store, thumbs.db,
|
||||
// src/.hidden, important.log.bak
|
||||
// Excluded: .git/config, .git/objects/pack/data.pack
|
||||
require.Equal(t, 12, result.FilesScanned, "Should exclude .git directory contents")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_ExcludeByExtension(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"*.log"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should exclude file2.log but NOT important.log.bak (different extension)
|
||||
// Total files: 14, excluded: 1 (file2.log)
|
||||
require.Equal(t, 13, result.FilesScanned, "Should exclude *.log files")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_ExcludeNodeModules(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"node_modules"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should exclude node_modules/package/index.js
|
||||
// Total files: 14, excluded: 1
|
||||
require.Equal(t, 13, result.FilesScanned, "Should exclude node_modules directory")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_MultiplePatterns(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{".git", "node_modules", "*.log", ".DS_Store", "thumbs.db", "cache", "build"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should only have: file1.txt, src/main.go, src/test.go, docs/readme.md, src/.hidden, important.log.bak
|
||||
// Excluded: .git/*, node_modules/*, *.log (file2.log), .DS_Store, thumbs.db, cache/*, build/*
|
||||
require.Equal(t, 6, result.FilesScanned, "Should exclude multiple patterns")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_NoExclusions(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should scan all 14 files
|
||||
require.Equal(t, 14, result.FilesScanned, "Should scan all files when no exclusions")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_ExcludeHiddenFiles(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{".*"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should exclude: .git/*, .DS_Store, src/.hidden
|
||||
// Total files: 14, excluded: 4 (.git/config, .git/objects/pack/data.pack, .DS_Store, src/.hidden)
|
||||
require.Equal(t, 10, result.FilesScanned, "Should exclude hidden files and directories")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_DoubleStarGlob(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"**/*.pack"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should exclude .git/objects/pack/data.pack
|
||||
// Total files: 14, excluded: 1
|
||||
require.Equal(t, 13, result.FilesScanned, "Should exclude **/*.pack files")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_ExactFileName(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"thumbs.db", ".DS_Store"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should exclude thumbs.db and .DS_Store
|
||||
// Total files: 14, excluded: 2
|
||||
require.Equal(t, 12, result.FilesScanned, "Should exclude exact file names")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_CaseSensitive(t *testing.T) {
|
||||
// Pattern matching should be case-sensitive
|
||||
fs := setupExcludeTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"THUMBS.DB"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Case-sensitive matching: THUMBS.DB should NOT match thumbs.db
|
||||
// All 14 files should be scanned
|
||||
require.Equal(t, 14, result.FilesScanned, "Pattern matching should be case-sensitive")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_DirectoryWithTrailingSlash(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
// Some users might add trailing slashes to directory patterns
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"cache/", "build/"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should exclude cache/temp.dat and build/output.bin
|
||||
// Total files: 14, excluded: 2
|
||||
require.Equal(t, 12, result.FilesScanned, "Should handle directory patterns with trailing slashes")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_PatternInSubdirectory(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
// Exclude .hidden file specifically in src directory
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"src/.hidden"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should exclude only src/.hidden
|
||||
// Total files: 14, excluded: 1
|
||||
require.Equal(t, 13, result.FilesScanned, "Should exclude specific subdirectory files")
|
||||
}
|
||||
|
||||
// setupAnchoredTestFS creates a filesystem for testing anchored patterns
|
||||
// Source dir: /backup
|
||||
// Structure:
|
||||
//
|
||||
// /backup/
|
||||
// projectname/
|
||||
// file.txt (should be excluded with /projectname)
|
||||
// otherproject/
|
||||
// projectname/
|
||||
// file.txt (should NOT be excluded with /projectname, only with projectname)
|
||||
// src/
|
||||
// file.go
|
||||
func setupAnchoredTestFS(t *testing.T) afero.Fs {
|
||||
t.Helper()
|
||||
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
files := map[string]string{
|
||||
"/backup/projectname/file.txt": "root project file",
|
||||
"/backup/otherproject/projectname/file.txt": "nested project file",
|
||||
"/backup/src/file.go": "source file",
|
||||
"/backup/file.txt": "root file",
|
||||
}
|
||||
|
||||
testTime := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
|
||||
for path, content := range files {
|
||||
dir := filepath.Dir(path)
|
||||
err := fs.MkdirAll(dir, 0755)
|
||||
require.NoError(t, err)
|
||||
err = afero.WriteFile(fs, path, []byte(content), 0644)
|
||||
require.NoError(t, err)
|
||||
err = fs.Chtimes(path, testTime, testTime)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
return fs
|
||||
}
|
||||
|
||||
func TestExcludePatterns_AnchoredPattern(t *testing.T) {
|
||||
// Pattern starting with / should only match from root of source dir
|
||||
fs := setupAnchoredTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"/projectname"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// /projectname should ONLY exclude /backup/projectname/file.txt (1 file)
|
||||
// /backup/otherproject/projectname/file.txt should NOT be excluded
|
||||
// Total files: 4, excluded: 1
|
||||
require.Equal(t, 3, result.FilesScanned, "Anchored pattern /projectname should only match at root of source dir")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_UnanchoredPattern(t *testing.T) {
|
||||
// Pattern without leading / should match anywhere in path
|
||||
fs := setupAnchoredTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"projectname"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// projectname (without /) should exclude BOTH:
|
||||
// - /backup/projectname/file.txt
|
||||
// - /backup/otherproject/projectname/file.txt
|
||||
// Total files: 4, excluded: 2
|
||||
require.Equal(t, 2, result.FilesScanned, "Unanchored pattern should match anywhere in path")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_AnchoredPatternWithGlob(t *testing.T) {
|
||||
// Anchored pattern with glob
|
||||
fs := setupAnchoredTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"/src/*.go"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// /src/*.go should exclude /backup/src/file.go
|
||||
// Total files: 4, excluded: 1
|
||||
require.Equal(t, 3, result.FilesScanned, "Anchored pattern with glob should work")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_AnchoredPatternFile(t *testing.T) {
|
||||
// Anchored pattern for exact file at root
|
||||
fs := setupAnchoredTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"/file.txt"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// /file.txt should ONLY exclude /backup/file.txt
|
||||
// NOT /backup/projectname/file.txt or /backup/otherproject/projectname/file.txt
|
||||
// Total files: 4, excluded: 1
|
||||
require.Equal(t, 3, result.FilesScanned, "Anchored pattern for file should only match at root")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_UnanchoredPatternFile(t *testing.T) {
|
||||
// Unanchored pattern for file should match anywhere
|
||||
fs := setupAnchoredTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"file.txt"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// file.txt should exclude ALL file.txt files:
|
||||
// - /backup/file.txt
|
||||
// - /backup/projectname/file.txt
|
||||
// - /backup/otherproject/projectname/file.txt
|
||||
// Total files: 4, excluded: 3
|
||||
require.Equal(t, 1, result.FilesScanned, "Unanchored pattern for file should match anywhere")
|
||||
}
|
||||
@@ -38,6 +38,7 @@ func provideScannerFactory(cfg *config.Config, repos *database.Repositories, sto
|
||||
CompressionLevel: cfg.CompressionLevel,
|
||||
AgeRecipients: cfg.AgeRecipients,
|
||||
EnableProgress: params.EnableProgress,
|
||||
Exclude: cfg.Exclude,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,8 +3,10 @@ package snapshot
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"errors"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
@@ -15,6 +17,7 @@ import (
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/storage"
|
||||
"github.com/dustin/go-humanize"
|
||||
"github.com/gobwas/glob"
|
||||
"github.com/google/uuid"
|
||||
"github.com/spf13/afero"
|
||||
)
|
||||
@@ -33,6 +36,13 @@ type pendingFileData struct {
|
||||
chunkFiles []database.ChunkFile
|
||||
}
|
||||
|
||||
// compiledPattern holds a compiled glob pattern and whether it's anchored
|
||||
type compiledPattern struct {
|
||||
pattern glob.Glob
|
||||
anchored bool // If true, only matches from root of source dir
|
||||
original string
|
||||
}
|
||||
|
||||
// Scanner scans directories and populates the database with file and chunk information
|
||||
type Scanner struct {
|
||||
fs afero.Fs
|
||||
@@ -43,7 +53,9 @@ type Scanner struct {
|
||||
maxBlobSize int64
|
||||
compressionLevel int
|
||||
ageRecipient string
|
||||
snapshotID string // Current snapshot being processed
|
||||
snapshotID string // Current snapshot being processed
|
||||
exclude []string // Glob patterns for files/directories to exclude
|
||||
compiledExclude []compiledPattern // Compiled glob patterns
|
||||
progress *ProgressReporter
|
||||
|
||||
// In-memory cache of known chunk hashes for fast existence checks
|
||||
@@ -77,6 +89,7 @@ type ScannerConfig struct {
|
||||
CompressionLevel int
|
||||
AgeRecipients []string // Optional, empty means no encryption
|
||||
EnableProgress bool // Enable progress reporting
|
||||
Exclude []string // Glob patterns for files/directories to exclude
|
||||
}
|
||||
|
||||
// ScanResult contains the results of a scan operation
|
||||
@@ -120,6 +133,9 @@ func NewScanner(cfg ScannerConfig) *Scanner {
|
||||
progress = NewProgressReporter()
|
||||
}
|
||||
|
||||
// Compile exclude patterns
|
||||
compiledExclude := compileExcludePatterns(cfg.Exclude)
|
||||
|
||||
return &Scanner{
|
||||
fs: cfg.FS,
|
||||
chunker: chunker.NewChunker(cfg.ChunkSize),
|
||||
@@ -129,6 +145,8 @@ func NewScanner(cfg ScannerConfig) *Scanner {
|
||||
maxBlobSize: cfg.MaxBlobSize,
|
||||
compressionLevel: cfg.CompressionLevel,
|
||||
ageRecipient: strings.Join(cfg.AgeRecipients, ","),
|
||||
exclude: cfg.Exclude,
|
||||
compiledExclude: compiledExclude,
|
||||
progress: progress,
|
||||
pendingChunkHashes: make(map[string]struct{}),
|
||||
}
|
||||
@@ -314,11 +332,14 @@ func (s *Scanner) addPendingChunkHash(hash string) {
|
||||
|
||||
// removePendingChunkHashes removes committed chunk hashes from the pending set
|
||||
func (s *Scanner) removePendingChunkHashes(hashes []string) {
|
||||
log.Debug("removePendingChunkHashes: starting", "count", len(hashes))
|
||||
start := time.Now()
|
||||
s.pendingChunkHashesMu.Lock()
|
||||
for _, hash := range hashes {
|
||||
delete(s.pendingChunkHashes, hash)
|
||||
}
|
||||
s.pendingChunkHashesMu.Unlock()
|
||||
log.Debug("removePendingChunkHashes: done", "count", len(hashes), "duration", time.Since(start))
|
||||
}
|
||||
|
||||
// isChunkPending returns true if the chunk is still pending (not yet committed to DB)
|
||||
@@ -395,12 +416,19 @@ func (s *Scanner) flushAllPending(ctx context.Context) error {
|
||||
// flushCompletedPendingFiles flushes only files whose chunks are all committed to DB
|
||||
// Files with pending chunks are kept in the queue for later flushing
|
||||
func (s *Scanner) flushCompletedPendingFiles(ctx context.Context) error {
|
||||
flushStart := time.Now()
|
||||
log.Debug("flushCompletedPendingFiles: starting")
|
||||
|
||||
log.Debug("flushCompletedPendingFiles: acquiring pendingFilesMu lock")
|
||||
s.pendingFilesMu.Lock()
|
||||
log.Debug("flushCompletedPendingFiles: acquired lock", "pending_files", len(s.pendingFiles))
|
||||
|
||||
// Separate files into complete (can flush) and incomplete (keep pending)
|
||||
var canFlush []pendingFileData
|
||||
var stillPending []pendingFileData
|
||||
|
||||
log.Debug("flushCompletedPendingFiles: checking which files can flush")
|
||||
checkStart := time.Now()
|
||||
for _, data := range s.pendingFiles {
|
||||
allChunksCommitted := true
|
||||
for _, fc := range data.fileChunks {
|
||||
@@ -415,11 +443,14 @@ func (s *Scanner) flushCompletedPendingFiles(ctx context.Context) error {
|
||||
stillPending = append(stillPending, data)
|
||||
}
|
||||
}
|
||||
log.Debug("flushCompletedPendingFiles: check done", "duration", time.Since(checkStart), "can_flush", len(canFlush), "still_pending", len(stillPending))
|
||||
|
||||
s.pendingFiles = stillPending
|
||||
s.pendingFilesMu.Unlock()
|
||||
log.Debug("flushCompletedPendingFiles: released lock")
|
||||
|
||||
if len(canFlush) == 0 {
|
||||
log.Debug("flushCompletedPendingFiles: nothing to flush")
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -427,43 +458,85 @@ func (s *Scanner) flushCompletedPendingFiles(ctx context.Context) error {
|
||||
"files_to_flush", len(canFlush),
|
||||
"files_still_pending", len(stillPending))
|
||||
|
||||
// Flush the complete files
|
||||
return s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||
for _, data := range canFlush {
|
||||
// Create or update the file record
|
||||
if err := s.repos.Files.Create(txCtx, tx, data.file); err != nil {
|
||||
return fmt.Errorf("creating file record: %w", err)
|
||||
}
|
||||
// Collect all data for batch operations
|
||||
log.Debug("flushCompletedPendingFiles: collecting data for batch ops")
|
||||
collectStart := time.Now()
|
||||
var allFileChunks []database.FileChunk
|
||||
var allChunkFiles []database.ChunkFile
|
||||
var allFileIDs []string
|
||||
var allFiles []*database.File
|
||||
|
||||
// Delete any existing file_chunks and chunk_files for this file
|
||||
if err := s.repos.FileChunks.DeleteByFileID(txCtx, tx, data.file.ID); err != nil {
|
||||
return fmt.Errorf("deleting old file chunks: %w", err)
|
||||
}
|
||||
if err := s.repos.ChunkFiles.DeleteByFileID(txCtx, tx, data.file.ID); err != nil {
|
||||
return fmt.Errorf("deleting old chunk files: %w", err)
|
||||
}
|
||||
for _, data := range canFlush {
|
||||
allFileChunks = append(allFileChunks, data.fileChunks...)
|
||||
allChunkFiles = append(allChunkFiles, data.chunkFiles...)
|
||||
allFileIDs = append(allFileIDs, data.file.ID)
|
||||
allFiles = append(allFiles, data.file)
|
||||
}
|
||||
log.Debug("flushCompletedPendingFiles: collected data",
|
||||
"duration", time.Since(collectStart),
|
||||
"file_chunks", len(allFileChunks),
|
||||
"chunk_files", len(allChunkFiles),
|
||||
"files", len(allFiles))
|
||||
|
||||
// Create file-chunk mappings
|
||||
for i := range data.fileChunks {
|
||||
if err := s.repos.FileChunks.Create(txCtx, tx, &data.fileChunks[i]); err != nil {
|
||||
return fmt.Errorf("creating file chunk: %w", err)
|
||||
}
|
||||
}
|
||||
// Flush the complete files using batch operations
|
||||
log.Debug("flushCompletedPendingFiles: starting transaction")
|
||||
txStart := time.Now()
|
||||
err := s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||
log.Debug("flushCompletedPendingFiles: inside transaction")
|
||||
|
||||
// Create chunk-file mappings
|
||||
for i := range data.chunkFiles {
|
||||
if err := s.repos.ChunkFiles.Create(txCtx, tx, &data.chunkFiles[i]); err != nil {
|
||||
return fmt.Errorf("creating chunk file: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Add file to snapshot
|
||||
if err := s.repos.Snapshots.AddFileByID(txCtx, tx, s.snapshotID, data.file.ID); err != nil {
|
||||
return fmt.Errorf("adding file to snapshot: %w", err)
|
||||
}
|
||||
// Batch delete old file_chunks and chunk_files
|
||||
log.Debug("flushCompletedPendingFiles: deleting old file_chunks")
|
||||
opStart := time.Now()
|
||||
if err := s.repos.FileChunks.DeleteByFileIDs(txCtx, tx, allFileIDs); err != nil {
|
||||
return fmt.Errorf("batch deleting old file chunks: %w", err)
|
||||
}
|
||||
log.Debug("flushCompletedPendingFiles: deleted file_chunks", "duration", time.Since(opStart))
|
||||
|
||||
log.Debug("flushCompletedPendingFiles: deleting old chunk_files")
|
||||
opStart = time.Now()
|
||||
if err := s.repos.ChunkFiles.DeleteByFileIDs(txCtx, tx, allFileIDs); err != nil {
|
||||
return fmt.Errorf("batch deleting old chunk files: %w", err)
|
||||
}
|
||||
log.Debug("flushCompletedPendingFiles: deleted chunk_files", "duration", time.Since(opStart))
|
||||
|
||||
// Batch create/update file records
|
||||
log.Debug("flushCompletedPendingFiles: creating files")
|
||||
opStart = time.Now()
|
||||
if err := s.repos.Files.CreateBatch(txCtx, tx, allFiles); err != nil {
|
||||
return fmt.Errorf("batch creating file records: %w", err)
|
||||
}
|
||||
log.Debug("flushCompletedPendingFiles: created files", "duration", time.Since(opStart))
|
||||
|
||||
// Batch insert file_chunks
|
||||
log.Debug("flushCompletedPendingFiles: inserting file_chunks")
|
||||
opStart = time.Now()
|
||||
if err := s.repos.FileChunks.CreateBatch(txCtx, tx, allFileChunks); err != nil {
|
||||
return fmt.Errorf("batch creating file chunks: %w", err)
|
||||
}
|
||||
log.Debug("flushCompletedPendingFiles: inserted file_chunks", "duration", time.Since(opStart))
|
||||
|
||||
// Batch insert chunk_files
|
||||
log.Debug("flushCompletedPendingFiles: inserting chunk_files")
|
||||
opStart = time.Now()
|
||||
if err := s.repos.ChunkFiles.CreateBatch(txCtx, tx, allChunkFiles); err != nil {
|
||||
return fmt.Errorf("batch creating chunk files: %w", err)
|
||||
}
|
||||
log.Debug("flushCompletedPendingFiles: inserted chunk_files", "duration", time.Since(opStart))
|
||||
|
||||
// Batch add files to snapshot
|
||||
log.Debug("flushCompletedPendingFiles: adding files to snapshot")
|
||||
opStart = time.Now()
|
||||
if err := s.repos.Snapshots.AddFilesByIDBatch(txCtx, tx, s.snapshotID, allFileIDs); err != nil {
|
||||
return fmt.Errorf("batch adding files to snapshot: %w", err)
|
||||
}
|
||||
log.Debug("flushCompletedPendingFiles: added files to snapshot", "duration", time.Since(opStart))
|
||||
|
||||
log.Debug("flushCompletedPendingFiles: transaction complete")
|
||||
return nil
|
||||
})
|
||||
log.Debug("flushCompletedPendingFiles: transaction done", "duration", time.Since(txStart))
|
||||
log.Debug("flushCompletedPendingFiles: total duration", "duration", time.Since(flushStart))
|
||||
return err
|
||||
}
|
||||
|
||||
// ScanPhaseResult contains the results of the scan phase
|
||||
@@ -504,6 +577,14 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult
|
||||
default:
|
||||
}
|
||||
|
||||
// Check exclude patterns - for directories, skip the entire subtree
|
||||
if s.shouldExclude(filePath, path) {
|
||||
if info.IsDir() {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Skip non-regular files for processing (but still count them)
|
||||
if !info.Mode().IsRegular() {
|
||||
return nil
|
||||
@@ -730,6 +811,12 @@ func (s *Scanner) processPhase(ctx context.Context, filesToProcess []*FileToProc
|
||||
|
||||
// Process file in streaming fashion
|
||||
if err := s.processFileStreaming(ctx, fileToProcess, result); err != nil {
|
||||
// Handle files that were deleted between scan and process phases
|
||||
if errors.Is(err, os.ErrNotExist) {
|
||||
log.Warn("File was deleted during backup, skipping", "path", fileToProcess.Path)
|
||||
result.FilesSkipped++
|
||||
continue
|
||||
}
|
||||
return fmt.Errorf("processing file %s: %w", fileToProcess.Path, err)
|
||||
}
|
||||
|
||||
@@ -939,14 +1026,19 @@ func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
|
||||
}
|
||||
|
||||
// Chunks from this blob are now committed to DB - remove from pending set
|
||||
log.Debug("handleBlobReady: removing pending chunk hashes")
|
||||
s.removePendingChunkHashes(blobWithReader.InsertedChunkHashes)
|
||||
log.Debug("handleBlobReady: removed pending chunk hashes")
|
||||
|
||||
// Flush files whose chunks are now all committed
|
||||
// This maintains database consistency after each blob
|
||||
log.Debug("handleBlobReady: calling flushCompletedPendingFiles")
|
||||
if err := s.flushCompletedPendingFiles(dbCtx); err != nil {
|
||||
return fmt.Errorf("flushing completed files: %w", err)
|
||||
}
|
||||
log.Debug("handleBlobReady: flushCompletedPendingFiles returned")
|
||||
|
||||
log.Debug("handleBlobReady: complete")
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -1135,6 +1227,105 @@ func (s *Scanner) detectDeletedFilesFromMap(ctx context.Context, knownFiles map[
|
||||
return nil
|
||||
}
|
||||
|
||||
// compileExcludePatterns compiles the exclude patterns into glob matchers
|
||||
func compileExcludePatterns(patterns []string) []compiledPattern {
|
||||
var compiled []compiledPattern
|
||||
for _, p := range patterns {
|
||||
if p == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Check if pattern is anchored (starts with /)
|
||||
anchored := strings.HasPrefix(p, "/")
|
||||
pattern := p
|
||||
if anchored {
|
||||
pattern = p[1:] // Remove leading /
|
||||
}
|
||||
|
||||
// Remove trailing slash if present (directory indicator)
|
||||
pattern = strings.TrimSuffix(pattern, "/")
|
||||
|
||||
// Compile the glob pattern
|
||||
// For patterns without path separators, we need to match them as components
|
||||
// e.g., ".git" should match ".git" anywhere in the path
|
||||
g, err := glob.Compile(pattern, '/')
|
||||
if err != nil {
|
||||
log.Warn("Invalid exclude pattern, skipping", "pattern", p, "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
compiled = append(compiled, compiledPattern{
|
||||
pattern: g,
|
||||
anchored: anchored,
|
||||
original: p,
|
||||
})
|
||||
}
|
||||
return compiled
|
||||
}
|
||||
|
||||
// shouldExclude checks if a path should be excluded based on exclude patterns
|
||||
// filePath is the full path to the file
|
||||
// rootPath is the root of the backup source directory
|
||||
func (s *Scanner) shouldExclude(filePath, rootPath string) bool {
|
||||
if len(s.compiledExclude) == 0 {
|
||||
return false
|
||||
}
|
||||
|
||||
// Get the relative path from root
|
||||
relPath, err := filepath.Rel(rootPath, filePath)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
|
||||
// Never exclude the root directory itself
|
||||
if relPath == "." {
|
||||
return false
|
||||
}
|
||||
|
||||
// Normalize path separators
|
||||
relPath = filepath.ToSlash(relPath)
|
||||
|
||||
// Check each pattern
|
||||
for _, cp := range s.compiledExclude {
|
||||
if cp.anchored {
|
||||
// Anchored pattern: must match from the root
|
||||
// Match the relative path directly
|
||||
if cp.pattern.Match(relPath) {
|
||||
return true
|
||||
}
|
||||
// Also check if any prefix of the path matches (for directory patterns)
|
||||
parts := strings.Split(relPath, "/")
|
||||
for i := 1; i <= len(parts); i++ {
|
||||
prefix := strings.Join(parts[:i], "/")
|
||||
if cp.pattern.Match(prefix) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Unanchored pattern: can match anywhere in path
|
||||
// Check the full relative path
|
||||
if cp.pattern.Match(relPath) {
|
||||
return true
|
||||
}
|
||||
// Check each path component and subpath
|
||||
parts := strings.Split(relPath, "/")
|
||||
for i := range parts {
|
||||
// Match individual component (e.g., ".git" matches ".git" directory)
|
||||
if cp.pattern.Match(parts[i]) {
|
||||
return true
|
||||
}
|
||||
// Match subpath from this component onwards
|
||||
subpath := strings.Join(parts[i:], "/")
|
||||
if cp.pattern.Match(subpath) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// formatNumber formats a number with comma separators
|
||||
func formatNumber(n int) string {
|
||||
if n < 1000 {
|
||||
|
||||
@@ -46,6 +46,7 @@ import (
|
||||
"io"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/blobgen"
|
||||
@@ -91,7 +92,12 @@ func (sm *SnapshotManager) SetFilesystem(fs afero.Fs) {
|
||||
|
||||
// CreateSnapshot creates a new snapshot record in the database at the start of a backup
|
||||
func (sm *SnapshotManager) CreateSnapshot(ctx context.Context, hostname, version, gitRevision string) (string, error) {
|
||||
snapshotID := fmt.Sprintf("%s-%s", hostname, time.Now().UTC().Format("20060102-150405Z"))
|
||||
// Use short hostname (strip domain if present)
|
||||
shortHostname := hostname
|
||||
if idx := strings.Index(hostname, "."); idx != -1 {
|
||||
shortHostname = hostname[:idx]
|
||||
}
|
||||
snapshotID := fmt.Sprintf("%s_%s", shortHostname, time.Now().UTC().Format("2006-01-02T15:04:05Z"))
|
||||
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID,
|
||||
@@ -688,15 +694,16 @@ func (sm *SnapshotManager) deleteSnapshot(ctx context.Context, snapshotID string
|
||||
|
||||
// Clean up orphaned data
|
||||
log.Debug("Cleaning up orphaned records in main database")
|
||||
if err := sm.cleanupOrphanedData(ctx); err != nil {
|
||||
if err := sm.CleanupOrphanedData(ctx); err != nil {
|
||||
return fmt.Errorf("cleaning up orphaned data: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanupOrphanedData removes files, chunks, and blobs that are no longer referenced by any snapshot
|
||||
func (sm *SnapshotManager) cleanupOrphanedData(ctx context.Context) error {
|
||||
// CleanupOrphanedData removes files, chunks, and blobs that are no longer referenced by any snapshot.
|
||||
// This should be called periodically to clean up data from deleted or incomplete snapshots.
|
||||
func (sm *SnapshotManager) CleanupOrphanedData(ctx context.Context) error {
|
||||
// Order is important to respect foreign key constraints:
|
||||
// 1. Delete orphaned files (will cascade delete file_chunks)
|
||||
// 2. Delete orphaned blobs (will cascade delete blob_chunks for deleted blobs)
|
||||
|
||||
Reference in New Issue
Block a user