vaultik/internal/vaultik/daemon.go

package vaultik

import (
	"context"
	"fmt"
	"os"
	"os/signal"
	"path/filepath"
	"strings"
	"sync"
	"syscall"
	"time"

	"git.eeqj.de/sneak/vaultik/internal/log"
	"github.com/fsnotify/fsnotify"
)

// daemonMinBackupInterval is the absolute minimum time allowed between backup runs,
// regardless of config, to prevent runaway backup loops.
const daemonMinBackupInterval = 1 * time.Minute

// daemonShutdownTimeout is the maximum time to wait for an in-progress backup
// to complete during graceful shutdown before force-exiting.
const daemonShutdownTimeout = 5 * time.Minute

// RunDaemon runs vaultik in daemon mode: it watches configured directories for
// changes using filesystem notifications, runs periodic backups at the configured
// interval, and performs full scans at the full_scan_interval. It handles
// SIGTERM/SIGINT for graceful shutdown, completing any in-progress backup before
// exiting.
func (v *Vaultik) RunDaemon(opts *SnapshotCreateOptions) error {
	backupInterval := v.Config.BackupInterval
	if backupInterval < daemonMinBackupInterval {
		backupInterval = daemonMinBackupInterval
	}

	minTimeBetween := v.Config.MinTimeBetweenRun
	if minTimeBetween < daemonMinBackupInterval {
		minTimeBetween = daemonMinBackupInterval
	}

	fullScanInterval := v.Config.FullScanInterval
	if fullScanInterval <= 0 {
		fullScanInterval = 24 * time.Hour
	}

	log.Info("Starting daemon mode",
		"backup_interval", backupInterval,
		"min_time_between_run", minTimeBetween,
		"full_scan_interval", fullScanInterval,
	)
	v.printfStdout("Daemon mode started\n")
	v.printfStdout("  Backup interval:     %s\n", backupInterval)
	v.printfStdout("  Min time between:    %s\n", minTimeBetween)
	v.printfStdout("  Full scan interval:  %s\n", fullScanInterval)

	// Create a daemon-scoped context that we cancel on signal.
	ctx, cancel := context.WithCancel(v.ctx)
	defer cancel()

	// Set up signal handling for graceful shutdown.
	sigCh := make(chan os.Signal, 1)
	signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)

	// Tracker for filesystem change events.
	tracker := newChangeTracker()

	// Start the filesystem watcher.
	watcher, err := v.startWatcher(ctx, tracker)
	if err != nil {
		return fmt.Errorf("starting filesystem watcher: %w", err)
	}
	defer func() { _ = watcher.Close() }()

	// Timers
	backupTicker := time.NewTicker(backupInterval)
	defer backupTicker.Stop()

	fullScanTicker := time.NewTicker(fullScanInterval)
	defer fullScanTicker.Stop()

	var lastBackupTime time.Time
	backupRunning := make(chan struct{}, 1) // semaphore: 1 = backup in progress

	// Run an initial full backup immediately on startup.
	log.Info("Running initial backup on daemon startup")
	v.printfStdout("Running initial backup...\n")
	if err := v.runDaemonBackup(ctx, opts, tracker, false); err != nil {
		if ctx.Err() != nil {
			return nil // context cancelled, shutting down
		}
		log.Error("Initial backup failed", "error", err)
		v.printfStderr("Initial backup failed: %v\n", err)
		// Continue running — next scheduled backup may succeed.
	} else {
		lastBackupTime = time.Now()
		tracker.reset()
	}

	v.printfStdout("Watching for changes...\n")

	for {
		select {
		case <-ctx.Done():
			log.Info("Daemon context cancelled, shutting down")
			return nil

		case sig := <-sigCh:
			log.Info("Received signal, initiating graceful shutdown", "signal", sig)
			v.printfStdout("\nReceived %s, shutting down...\n", sig)
			cancel()

			// Wait for any in-progress backup to finish.
			select {
			case backupRunning <- struct{}{}:
				// No backup running, we can exit immediately.
				<-backupRunning
			default:
				// Backup is running, wait for it to complete.
				v.printfStdout("Waiting for in-progress backup to complete...\n")
				shutdownTimer := time.NewTimer(daemonShutdownTimeout)
				select {
				case backupRunning <- struct{}{}:
					<-backupRunning
					shutdownTimer.Stop()
				case <-shutdownTimer.C:
					log.Warn("Shutdown timeout exceeded, forcing exit")
					v.printfStderr("Shutdown timeout exceeded, forcing exit\n")
				}
			}
			return nil

		case <-backupTicker.C:
			// Periodic backup tick. Only run if there are changes and enough
			// time has elapsed since the last run.
			if !tracker.hasChanges() {
				log.Debug("Backup tick: no changes detected, skipping")
				continue
			}
			if time.Since(lastBackupTime) < minTimeBetween {
				log.Debug("Backup tick: too soon since last backup",
					"last_backup", lastBackupTime,
					"min_interval", minTimeBetween,
				)
				continue
			}

			// Try to acquire the backup semaphore (non-blocking).
			select {
			case backupRunning <- struct{}{}:
			default:
				log.Debug("Backup tick: backup already in progress, skipping")
				continue
			}

			log.Info("Running scheduled backup", "changes", tracker.changeCount())
			v.printfStdout("Running scheduled backup (%d changes detected)...\n", tracker.changeCount())
			if err := v.runDaemonBackup(ctx, opts, tracker, false); err != nil {
				if ctx.Err() != nil {
					<-backupRunning
					return nil
				}
				log.Error("Scheduled backup failed", "error", err)
				v.printfStderr("Scheduled backup failed: %v\n", err)
			} else {
				lastBackupTime = time.Now()
				tracker.reset()
			}
			<-backupRunning

		case <-fullScanTicker.C:
			// Full scan — ignore whether changes were detected; do a complete scan.
			if time.Since(lastBackupTime) < minTimeBetween {
				log.Debug("Full scan tick: too soon since last backup, deferring")
				continue
			}

			select {
			case backupRunning <- struct{}{}:
			default:
				log.Debug("Full scan tick: backup already in progress, skipping")
				continue
			}

			log.Info("Running full periodic scan")
			v.printfStdout("Running full periodic scan...\n")
			if err := v.runDaemonBackup(ctx, opts, tracker, true); err != nil {
				if ctx.Err() != nil {
					<-backupRunning
					return nil
				}
				log.Error("Full scan backup failed", "error", err)
				v.printfStderr("Full scan backup failed: %v\n", err)
			} else {
				lastBackupTime = time.Now()
				tracker.reset()
			}
			<-backupRunning
		}
	}
}

// runDaemonBackup executes a single backup run within the daemon loop.
// If fullScan is true, all snapshots are processed regardless of tracked changes.
// Otherwise, only snapshots whose paths overlap with tracked changes are processed.
func (v *Vaultik) runDaemonBackup(ctx context.Context, opts *SnapshotCreateOptions, tracker *changeTracker, fullScan bool) error {
	startTime := time.Now()

	// Build a one-shot create options for this run.
	runOpts := &SnapshotCreateOptions{
		Cron:       opts.Cron,
		Prune:      opts.Prune,
		SkipErrors: opts.SkipErrors,
	}

	if !fullScan {
		// Filter to only snapshots whose paths had changes.
		changedPaths := tracker.changedPaths()
		affected := v.snapshotsAffectedByChanges(changedPaths)
		if len(affected) == 0 {
			log.Debug("No snapshots affected by changes")
			return nil
		}
		runOpts.Snapshots = affected
		log.Info("Running incremental backup for affected snapshots", "snapshots", affected)
	}
	// fullScan: leave runOpts.Snapshots empty → CreateSnapshot processes all.

	// Use a child context so cancellation propagates but we can still finish
	// if the parent hasn't been cancelled.
	childCtx, childCancel := context.WithCancel(ctx)
	defer childCancel()

	// Temporarily swap the Vaultik context.
	origCtx := v.ctx
	v.ctx = childCtx
	defer func() { v.ctx = origCtx }()

	if err := v.CreateSnapshot(runOpts); err != nil {
		return fmt.Errorf("backup run failed: %w", err)
	}

	log.Info("Daemon backup complete", "duration", time.Since(startTime))
	v.printfStdout("Backup complete in %s\n", formatDuration(time.Since(startTime)))
	return nil
}

// snapshotsAffectedByChanges returns the names of configured snapshots whose
// paths overlap with any of the changed paths.
func (v *Vaultik) snapshotsAffectedByChanges(changedPaths []string) []string {
	var affected []string
	for _, snapName := range v.Config.SnapshotNames() {
		snapCfg := v.Config.Snapshots[snapName]
		for _, snapPath := range snapCfg.Paths {
			absSnapPath, err := filepath.Abs(snapPath)
			if err != nil {
				absSnapPath = snapPath
			}
			for _, changed := range changedPaths {
				if isSubpath(changed, absSnapPath) {
					affected = append(affected, snapName)
					goto nextSnapshot
				}
			}
		}
	nextSnapshot:
	}
	return affected
}

// isSubpath returns true if child is under parent (or equal to it).
func isSubpath(child, parent string) bool {
	// Normalize both paths.
	child = filepath.Clean(child)
	parent = filepath.Clean(parent)
	if child == parent {
		return true
	}
	// Ensure parent ends with a separator for prefix matching,
	// unless parent is the root directory (which already ends with /).
	prefix := parent
	if !strings.HasSuffix(prefix, string(filepath.Separator)) {
		prefix += string(filepath.Separator)
	}
	return strings.HasPrefix(child, prefix)
}

// startWatcher creates an fsnotify watcher and adds all configured snapshot paths.
// It spawns a goroutine that reads events and feeds the change tracker.
func (v *Vaultik) startWatcher(ctx context.Context, tracker *changeTracker) (*fsnotify.Watcher, error) {
	watcher, err := fsnotify.NewWatcher()
	if err != nil {
		return nil, fmt.Errorf("creating watcher: %w", err)
	}

	// Collect unique absolute paths to watch.
	watchPaths := make(map[string]struct{})
	for _, snapName := range v.Config.SnapshotNames() {
		snapCfg := v.Config.Snapshots[snapName]
		for _, p := range snapCfg.Paths {
			absPath, err := filepath.Abs(p)
			if err != nil {
				log.Warn("Failed to resolve absolute path for watch", "path", p, "error", err)
				continue
			}
			watchPaths[absPath] = struct{}{}
		}
	}

	// Add paths to watcher. Walk the top-level to add subdirectories
	// since fsnotify doesn't recurse automatically.
	for p := range watchPaths {
		if err := v.addWatchRecursive(watcher, p); err != nil {
			log.Warn("Failed to watch path", "path", p, "error", err)
			// Non-fatal: the path might not exist yet.
		}
	}

	// Spawn the event reader goroutine.
	go v.watcherLoop(ctx, watcher, tracker)

	return watcher, nil
}

// addWatchRecursive walks a directory tree and adds each directory to the watcher.
func (v *Vaultik) addWatchRecursive(watcher *fsnotify.Watcher, root string) error {
	return filepath.Walk(root, func(path string, info os.FileInfo, err error) error {
		if err != nil {
			// Can't read — skip this subtree.
			if info != nil && info.IsDir() {
				return filepath.SkipDir
			}
			return nil
		}
		if info.IsDir() {
			// Skip common directories that don't need watching.
			base := filepath.Base(path)
			if base == ".git" || base == "node_modules" || base == "__pycache__" {
				return filepath.SkipDir
			}
			if err := watcher.Add(path); err != nil {
				log.Debug("Failed to watch directory", "path", path, "error", err)
				// Non-fatal: continue walking.
			}
		}
		return nil
	})
}

// watcherLoop reads filesystem events from the watcher and records them
// in the change tracker. It runs until the context is cancelled.
func (v *Vaultik) watcherLoop(ctx context.Context, watcher *fsnotify.Watcher, tracker *changeTracker) {
	for {
		select {
		case <-ctx.Done():
			return
		case event, ok := <-watcher.Events:
			if !ok {
				return
			}
			// Only track write/create/remove/rename events.
			if event.Op&(fsnotify.Write|fsnotify.Create|fsnotify.Remove|fsnotify.Rename) != 0 {
				tracker.recordChange(event.Name)
				log.Debug("Filesystem change detected", "path", event.Name, "op", event.Op)
			}
			// If a new directory was created, watch it too.
			if event.Op&fsnotify.Create != 0 {
				if info, err := os.Stat(event.Name); err == nil && info.IsDir() {
					if err := v.addWatchRecursive(watcher, event.Name); err != nil {
						log.Debug("Failed to watch new directory", "path", event.Name, "error", err)
					}
				}
			}
		case err, ok := <-watcher.Errors:
			if !ok {
				return
			}
			log.Warn("Filesystem watcher error", "error", err)
		}
	}
}

// changeTracker records filesystem paths that have been modified since the
// last backup. It is safe for concurrent use.
type changeTracker struct {
	mu      sync.Mutex
	changes map[string]time.Time // path → last change time
}

// newChangeTracker creates a new empty change tracker.
func newChangeTracker() *changeTracker {
	return &changeTracker{
		changes: make(map[string]time.Time),
	}
}

// recordChange records that a path has been modified.
func (ct *changeTracker) recordChange(path string) {
	ct.mu.Lock()
	ct.changes[path] = time.Now()
	ct.mu.Unlock()
}

// hasChanges returns true if any changes have been recorded.
func (ct *changeTracker) hasChanges() bool {
	ct.mu.Lock()
	defer ct.mu.Unlock()
	return len(ct.changes) > 0
}

// changeCount returns the number of unique changed paths.
func (ct *changeTracker) changeCount() int {
	ct.mu.Lock()
	defer ct.mu.Unlock()
	return len(ct.changes)
}

// changedPaths returns all changed paths.
func (ct *changeTracker) changedPaths() []string {
	ct.mu.Lock()
	defer ct.mu.Unlock()
	paths := make([]string, 0, len(ct.changes))
	for p := range ct.changes {
		paths = append(paths, p)
	}
	return paths
}

// reset clears all recorded changes.
func (ct *changeTracker) reset() {
	ct.mu.Lock()
	ct.changes = make(map[string]time.Time)
	ct.mu.Unlock()
}