Files
vaultik/internal/vaultik/daemon.go
user 87acc05a77
All checks were successful
check / check (pull_request) Successful in 3m7s
fix: add RunDaemon test, remove dead daemonWatcherBatchDelay constant
- Add TestRunDaemon_CancelledContext: exercises RunDaemon with a
  daemon-friendly config, cancels via context, verifies clean return
  and startup output
- Remove unused daemonWatcherBatchDelay constant (batch-settle logic
  was never implemented; the watcher loop records changes immediately)
- Update TestDaemonConstants to remove reference to deleted constant
2026-03-24 13:40:08 -07:00

435 lines
13 KiB
Go

package vaultik
import (
"context"
"fmt"
"os"
"os/signal"
"path/filepath"
"strings"
"sync"
"syscall"
"time"
"git.eeqj.de/sneak/vaultik/internal/log"
"github.com/fsnotify/fsnotify"
)
// daemonMinBackupInterval is the absolute minimum time allowed between backup runs,
// regardless of config, to prevent runaway backup loops.
const daemonMinBackupInterval = 1 * time.Minute
// daemonShutdownTimeout is the maximum time to wait for an in-progress backup
// to complete during graceful shutdown before force-exiting.
const daemonShutdownTimeout = 5 * time.Minute
// RunDaemon runs vaultik in daemon mode: it watches configured directories for
// changes using filesystem notifications, runs periodic backups at the configured
// interval, and performs full scans at the full_scan_interval. It handles
// SIGTERM/SIGINT for graceful shutdown, completing any in-progress backup before
// exiting.
func (v *Vaultik) RunDaemon(opts *SnapshotCreateOptions) error {
backupInterval := v.Config.BackupInterval
if backupInterval < daemonMinBackupInterval {
backupInterval = daemonMinBackupInterval
}
minTimeBetween := v.Config.MinTimeBetweenRun
if minTimeBetween < daemonMinBackupInterval {
minTimeBetween = daemonMinBackupInterval
}
fullScanInterval := v.Config.FullScanInterval
if fullScanInterval <= 0 {
fullScanInterval = 24 * time.Hour
}
log.Info("Starting daemon mode",
"backup_interval", backupInterval,
"min_time_between_run", minTimeBetween,
"full_scan_interval", fullScanInterval,
)
v.printfStdout("Daemon mode started\n")
v.printfStdout(" Backup interval: %s\n", backupInterval)
v.printfStdout(" Min time between: %s\n", minTimeBetween)
v.printfStdout(" Full scan interval: %s\n", fullScanInterval)
// Create a daemon-scoped context that we cancel on signal.
ctx, cancel := context.WithCancel(v.ctx)
defer cancel()
// Set up signal handling for graceful shutdown.
sigCh := make(chan os.Signal, 1)
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
// Tracker for filesystem change events.
tracker := newChangeTracker()
// Start the filesystem watcher.
watcher, err := v.startWatcher(ctx, tracker)
if err != nil {
return fmt.Errorf("starting filesystem watcher: %w", err)
}
defer func() { _ = watcher.Close() }()
// Timers
backupTicker := time.NewTicker(backupInterval)
defer backupTicker.Stop()
fullScanTicker := time.NewTicker(fullScanInterval)
defer fullScanTicker.Stop()
var lastBackupTime time.Time
backupRunning := make(chan struct{}, 1) // semaphore: 1 = backup in progress
// Run an initial full backup immediately on startup.
log.Info("Running initial backup on daemon startup")
v.printfStdout("Running initial backup...\n")
if err := v.runDaemonBackup(ctx, opts, tracker, false); err != nil {
if ctx.Err() != nil {
return nil // context cancelled, shutting down
}
log.Error("Initial backup failed", "error", err)
v.printfStderr("Initial backup failed: %v\n", err)
// Continue running — next scheduled backup may succeed.
} else {
lastBackupTime = time.Now()
tracker.reset()
}
v.printfStdout("Watching for changes...\n")
for {
select {
case <-ctx.Done():
log.Info("Daemon context cancelled, shutting down")
return nil
case sig := <-sigCh:
log.Info("Received signal, initiating graceful shutdown", "signal", sig)
v.printfStdout("\nReceived %s, shutting down...\n", sig)
cancel()
// Wait for any in-progress backup to finish.
select {
case backupRunning <- struct{}{}:
// No backup running, we can exit immediately.
<-backupRunning
default:
// Backup is running, wait for it to complete.
v.printfStdout("Waiting for in-progress backup to complete...\n")
shutdownTimer := time.NewTimer(daemonShutdownTimeout)
select {
case backupRunning <- struct{}{}:
<-backupRunning
shutdownTimer.Stop()
case <-shutdownTimer.C:
log.Warn("Shutdown timeout exceeded, forcing exit")
v.printfStderr("Shutdown timeout exceeded, forcing exit\n")
}
}
return nil
case <-backupTicker.C:
// Periodic backup tick. Only run if there are changes and enough
// time has elapsed since the last run.
if !tracker.hasChanges() {
log.Debug("Backup tick: no changes detected, skipping")
continue
}
if time.Since(lastBackupTime) < minTimeBetween {
log.Debug("Backup tick: too soon since last backup",
"last_backup", lastBackupTime,
"min_interval", minTimeBetween,
)
continue
}
// Try to acquire the backup semaphore (non-blocking).
select {
case backupRunning <- struct{}{}:
default:
log.Debug("Backup tick: backup already in progress, skipping")
continue
}
log.Info("Running scheduled backup", "changes", tracker.changeCount())
v.printfStdout("Running scheduled backup (%d changes detected)...\n", tracker.changeCount())
if err := v.runDaemonBackup(ctx, opts, tracker, false); err != nil {
if ctx.Err() != nil {
<-backupRunning
return nil
}
log.Error("Scheduled backup failed", "error", err)
v.printfStderr("Scheduled backup failed: %v\n", err)
} else {
lastBackupTime = time.Now()
tracker.reset()
}
<-backupRunning
case <-fullScanTicker.C:
// Full scan — ignore whether changes were detected; do a complete scan.
if time.Since(lastBackupTime) < minTimeBetween {
log.Debug("Full scan tick: too soon since last backup, deferring")
continue
}
select {
case backupRunning <- struct{}{}:
default:
log.Debug("Full scan tick: backup already in progress, skipping")
continue
}
log.Info("Running full periodic scan")
v.printfStdout("Running full periodic scan...\n")
if err := v.runDaemonBackup(ctx, opts, tracker, true); err != nil {
if ctx.Err() != nil {
<-backupRunning
return nil
}
log.Error("Full scan backup failed", "error", err)
v.printfStderr("Full scan backup failed: %v\n", err)
} else {
lastBackupTime = time.Now()
tracker.reset()
}
<-backupRunning
}
}
}
// runDaemonBackup executes a single backup run within the daemon loop.
// If fullScan is true, all snapshots are processed regardless of tracked changes.
// Otherwise, only snapshots whose paths overlap with tracked changes are processed.
func (v *Vaultik) runDaemonBackup(ctx context.Context, opts *SnapshotCreateOptions, tracker *changeTracker, fullScan bool) error {
startTime := time.Now()
// Build a one-shot create options for this run.
runOpts := &SnapshotCreateOptions{
Cron: opts.Cron,
Prune: opts.Prune,
SkipErrors: opts.SkipErrors,
}
if !fullScan {
// Filter to only snapshots whose paths had changes.
changedPaths := tracker.changedPaths()
affected := v.snapshotsAffectedByChanges(changedPaths)
if len(affected) == 0 {
log.Debug("No snapshots affected by changes")
return nil
}
runOpts.Snapshots = affected
log.Info("Running incremental backup for affected snapshots", "snapshots", affected)
}
// fullScan: leave runOpts.Snapshots empty → CreateSnapshot processes all.
// Use a child context so cancellation propagates but we can still finish
// if the parent hasn't been cancelled.
childCtx, childCancel := context.WithCancel(ctx)
defer childCancel()
// Temporarily swap the Vaultik context.
origCtx := v.ctx
v.ctx = childCtx
defer func() { v.ctx = origCtx }()
if err := v.CreateSnapshot(runOpts); err != nil {
return fmt.Errorf("backup run failed: %w", err)
}
log.Info("Daemon backup complete", "duration", time.Since(startTime))
v.printfStdout("Backup complete in %s\n", formatDuration(time.Since(startTime)))
return nil
}
// snapshotsAffectedByChanges returns the names of configured snapshots whose
// paths overlap with any of the changed paths.
func (v *Vaultik) snapshotsAffectedByChanges(changedPaths []string) []string {
var affected []string
for _, snapName := range v.Config.SnapshotNames() {
snapCfg := v.Config.Snapshots[snapName]
for _, snapPath := range snapCfg.Paths {
absSnapPath, err := filepath.Abs(snapPath)
if err != nil {
absSnapPath = snapPath
}
for _, changed := range changedPaths {
if isSubpath(changed, absSnapPath) {
affected = append(affected, snapName)
goto nextSnapshot
}
}
}
nextSnapshot:
}
return affected
}
// isSubpath returns true if child is under parent (or equal to it).
func isSubpath(child, parent string) bool {
// Normalize both paths.
child = filepath.Clean(child)
parent = filepath.Clean(parent)
if child == parent {
return true
}
// Ensure parent ends with a separator for prefix matching,
// unless parent is the root directory (which already ends with /).
prefix := parent
if !strings.HasSuffix(prefix, string(filepath.Separator)) {
prefix += string(filepath.Separator)
}
return strings.HasPrefix(child, prefix)
}
// startWatcher creates an fsnotify watcher and adds all configured snapshot paths.
// It spawns a goroutine that reads events and feeds the change tracker.
func (v *Vaultik) startWatcher(ctx context.Context, tracker *changeTracker) (*fsnotify.Watcher, error) {
watcher, err := fsnotify.NewWatcher()
if err != nil {
return nil, fmt.Errorf("creating watcher: %w", err)
}
// Collect unique absolute paths to watch.
watchPaths := make(map[string]struct{})
for _, snapName := range v.Config.SnapshotNames() {
snapCfg := v.Config.Snapshots[snapName]
for _, p := range snapCfg.Paths {
absPath, err := filepath.Abs(p)
if err != nil {
log.Warn("Failed to resolve absolute path for watch", "path", p, "error", err)
continue
}
watchPaths[absPath] = struct{}{}
}
}
// Add paths to watcher. Walk the top-level to add subdirectories
// since fsnotify doesn't recurse automatically.
for p := range watchPaths {
if err := v.addWatchRecursive(watcher, p); err != nil {
log.Warn("Failed to watch path", "path", p, "error", err)
// Non-fatal: the path might not exist yet.
}
}
// Spawn the event reader goroutine.
go v.watcherLoop(ctx, watcher, tracker)
return watcher, nil
}
// addWatchRecursive walks a directory tree and adds each directory to the watcher.
func (v *Vaultik) addWatchRecursive(watcher *fsnotify.Watcher, root string) error {
return filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
if err != nil {
// Can't read — skip this subtree.
if info != nil && info.IsDir() {
return filepath.SkipDir
}
return nil
}
if info.IsDir() {
// Skip common directories that don't need watching.
base := filepath.Base(path)
if base == ".git" || base == "node_modules" || base == "__pycache__" {
return filepath.SkipDir
}
if err := watcher.Add(path); err != nil {
log.Debug("Failed to watch directory", "path", path, "error", err)
// Non-fatal: continue walking.
}
}
return nil
})
}
// watcherLoop reads filesystem events from the watcher and records them
// in the change tracker. It runs until the context is cancelled.
func (v *Vaultik) watcherLoop(ctx context.Context, watcher *fsnotify.Watcher, tracker *changeTracker) {
for {
select {
case <-ctx.Done():
return
case event, ok := <-watcher.Events:
if !ok {
return
}
// Only track write/create/remove/rename events.
if event.Op&(fsnotify.Write|fsnotify.Create|fsnotify.Remove|fsnotify.Rename) != 0 {
tracker.recordChange(event.Name)
log.Debug("Filesystem change detected", "path", event.Name, "op", event.Op)
}
// If a new directory was created, watch it too.
if event.Op&fsnotify.Create != 0 {
if info, err := os.Stat(event.Name); err == nil && info.IsDir() {
if err := v.addWatchRecursive(watcher, event.Name); err != nil {
log.Debug("Failed to watch new directory", "path", event.Name, "error", err)
}
}
}
case err, ok := <-watcher.Errors:
if !ok {
return
}
log.Warn("Filesystem watcher error", "error", err)
}
}
}
// changeTracker records filesystem paths that have been modified since the
// last backup. It is safe for concurrent use.
type changeTracker struct {
mu sync.Mutex
changes map[string]time.Time // path → last change time
}
// newChangeTracker creates a new empty change tracker.
func newChangeTracker() *changeTracker {
return &changeTracker{
changes: make(map[string]time.Time),
}
}
// recordChange records that a path has been modified.
func (ct *changeTracker) recordChange(path string) {
ct.mu.Lock()
ct.changes[path] = time.Now()
ct.mu.Unlock()
}
// hasChanges returns true if any changes have been recorded.
func (ct *changeTracker) hasChanges() bool {
ct.mu.Lock()
defer ct.mu.Unlock()
return len(ct.changes) > 0
}
// changeCount returns the number of unique changed paths.
func (ct *changeTracker) changeCount() int {
ct.mu.Lock()
defer ct.mu.Unlock()
return len(ct.changes)
}
// changedPaths returns all changed paths.
func (ct *changeTracker) changedPaths() []string {
ct.mu.Lock()
defer ct.mu.Unlock()
paths := make([]string, 0, len(ct.changes))
for p := range ct.changes {
paths = append(paths, p)
}
return paths
}
// reset clears all recorded changes.
func (ct *changeTracker) reset() {
ct.mu.Lock()
ct.changes = make(map[string]time.Time)
ct.mu.Unlock()
}