Refactor blob storage to use UUID primary keys and implement streaming chunking

- Changed blob table to use ID (UUID) as primary key instead of hash
- Blob records are now created at packing start, enabling immediate chunk associations
- Implemented streaming chunking to process large files without memory exhaustion
- Fixed blob manifest generation to include all referenced blobs
- Updated all foreign key references from blob_hash to blob_id
- Added progress reporting and improved error handling
- Enforced encryption requirement for all blob packing
- Updated tests to use test encryption keys
- Added Cyrillic transliteration to README
This commit is contained in:
2025-07-22 07:43:39 +02:00
parent 26db096913
commit 86b533d6ee
49 changed files with 5709 additions and 324 deletions

View File

@@ -3,17 +3,22 @@ package cli
import (
"context"
"fmt"
"os"
"os/signal"
"syscall"
"time"
"git.eeqj.de/sneak/vaultik/internal/config"
"git.eeqj.de/sneak/vaultik/internal/database"
"git.eeqj.de/sneak/vaultik/internal/globals"
"git.eeqj.de/sneak/vaultik/internal/log"
"go.uber.org/fx"
)
// AppOptions contains common options for creating the fx application
type AppOptions struct {
ConfigPath string
LogOptions log.LogOptions
Modules []fx.Option
Invokes []fx.Option
}
@@ -32,9 +37,12 @@ func setupGlobals(lc fx.Lifecycle, g *globals.Globals) {
func NewApp(opts AppOptions) *fx.App {
baseModules := []fx.Option{
fx.Supply(config.ConfigPath(opts.ConfigPath)),
fx.Supply(opts.LogOptions),
fx.Provide(globals.New),
fx.Provide(log.New),
config.Module,
database.Module,
log.Module,
fx.Invoke(setupGlobals),
fx.NopLogger,
}
@@ -47,18 +55,50 @@ func NewApp(opts AppOptions) *fx.App {
// RunApp starts and stops the fx application within the given context
func RunApp(ctx context.Context, app *fx.App) error {
// Set up signal handling for graceful shutdown
sigChan := make(chan os.Signal, 1)
signal.Notify(sigChan, os.Interrupt, syscall.SIGTERM)
// Create a context that will be cancelled on signal
ctx, cancel := context.WithCancel(ctx)
defer cancel()
// Start the app
if err := app.Start(ctx); err != nil {
return fmt.Errorf("failed to start app: %w", err)
}
defer func() {
if err := app.Stop(ctx); err != nil {
fmt.Printf("error stopping app: %v\n", err)
// Handle shutdown
shutdownComplete := make(chan struct{})
go func() {
defer close(shutdownComplete)
<-sigChan
log.Notice("Received interrupt signal, shutting down gracefully...")
// Create a timeout context for shutdown
shutdownCtx, shutdownCancel := context.WithTimeout(context.Background(), 30*time.Second)
defer shutdownCancel()
if err := app.Stop(shutdownCtx); err != nil {
log.Error("Error during shutdown", "error", err)
}
}()
// Wait for context cancellation
<-ctx.Done()
return nil
// Wait for either the signal handler to complete shutdown or the app to request shutdown
select {
case <-shutdownComplete:
// Shutdown completed via signal
return nil
case <-ctx.Done():
// Context cancelled (shouldn't happen in normal operation)
if err := app.Stop(context.Background()); err != nil {
log.Error("Error stopping app", "error", err)
}
return ctx.Err()
case <-app.Done():
// App finished running (e.g., backup completed)
return nil
}
}
// RunWithApp is a helper that creates and runs an fx app with the given options

View File

@@ -4,10 +4,15 @@ import (
"context"
"fmt"
"os"
"path/filepath"
"git.eeqj.de/sneak/vaultik/internal/backup"
"git.eeqj.de/sneak/vaultik/internal/config"
"git.eeqj.de/sneak/vaultik/internal/crypto"
"git.eeqj.de/sneak/vaultik/internal/database"
"git.eeqj.de/sneak/vaultik/internal/globals"
"git.eeqj.de/sneak/vaultik/internal/log"
"git.eeqj.de/sneak/vaultik/internal/s3"
"github.com/spf13/cobra"
"go.uber.org/fx"
)
@@ -20,6 +25,18 @@ type BackupOptions struct {
Prune bool
}
// BackupApp contains all dependencies needed for running backups
type BackupApp struct {
Globals *globals.Globals
Config *config.Config
Repositories *database.Repositories
ScannerFactory backup.ScannerFactory
S3Client *s3.Client
DB *database.DB
Lifecycle fx.Lifecycle
Shutdowner fx.Shutdowner
}
// NewBackupCommand creates the backup command
func NewBackupCommand() *cobra.Command {
opts := &BackupOptions{}
@@ -59,25 +76,212 @@ a path using --config or by setting VAULTIK_CONFIG to a path.`,
}
func runBackup(ctx context.Context, opts *BackupOptions) error {
rootFlags := GetRootFlags()
return RunWithApp(ctx, AppOptions{
ConfigPath: opts.ConfigPath,
LogOptions: log.LogOptions{
Verbose: rootFlags.Verbose,
Debug: rootFlags.Debug,
Cron: opts.Cron,
},
Modules: []fx.Option{
backup.Module,
s3.Module,
fx.Provide(fx.Annotate(
func(g *globals.Globals, cfg *config.Config, repos *database.Repositories,
scannerFactory backup.ScannerFactory, s3Client *s3.Client, db *database.DB,
lc fx.Lifecycle, shutdowner fx.Shutdowner) *BackupApp {
return &BackupApp{
Globals: g,
Config: cfg,
Repositories: repos,
ScannerFactory: scannerFactory,
S3Client: s3Client,
DB: db,
Lifecycle: lc,
Shutdowner: shutdowner,
}
},
)),
},
Invokes: []fx.Option{
fx.Invoke(func(g *globals.Globals, cfg *config.Config, repos *database.Repositories) error {
// TODO: Implement backup logic
fmt.Printf("Running backup with config: %s\n", opts.ConfigPath)
fmt.Printf("Version: %s, Commit: %s\n", g.Version, g.Commit)
fmt.Printf("Index path: %s\n", cfg.IndexPath)
if opts.Daemon {
fmt.Println("Running in daemon mode")
}
if opts.Cron {
fmt.Println("Running in cron mode")
}
if opts.Prune {
fmt.Println("Pruning enabled - will delete old snapshots after backup")
}
return nil
fx.Invoke(func(app *BackupApp, lc fx.Lifecycle) {
// Create a cancellable context for the backup
backupCtx, backupCancel := context.WithCancel(context.Background())
lc.Append(fx.Hook{
OnStart: func(ctx context.Context) error {
// Start the backup in a goroutine
go func() {
// Run the backup
if err := app.runBackup(backupCtx, opts); err != nil {
if err != context.Canceled {
log.Error("Backup failed", "error", err)
}
}
// Shutdown the app when backup completes
if err := app.Shutdowner.Shutdown(); err != nil {
log.Error("Failed to shutdown", "error", err)
}
}()
return nil
},
OnStop: func(ctx context.Context) error {
log.Debug("Stopping backup")
// Cancel the backup context
backupCancel()
return nil
},
})
}),
},
})
}
// runBackup executes the backup operation
func (app *BackupApp) runBackup(ctx context.Context, opts *BackupOptions) error {
log.Info("Starting backup",
"config", opts.ConfigPath,
"version", app.Globals.Version,
"commit", app.Globals.Commit,
"index_path", app.Config.IndexPath,
)
if opts.Daemon {
log.Info("Running in daemon mode")
// TODO: Implement daemon mode with inotify
return fmt.Errorf("daemon mode not yet implemented")
}
// Resolve source directories to absolute paths
resolvedDirs := make([]string, 0, len(app.Config.SourceDirs))
for _, dir := range app.Config.SourceDirs {
absPath, err := filepath.Abs(dir)
if err != nil {
return fmt.Errorf("failed to resolve absolute path for %s: %w", dir, err)
}
// Resolve symlinks
resolvedPath, err := filepath.EvalSymlinks(absPath)
if err != nil {
// If the path doesn't exist yet, use the absolute path
if os.IsNotExist(err) {
resolvedPath = absPath
} else {
return fmt.Errorf("failed to resolve symlinks for %s: %w", absPath, err)
}
}
resolvedDirs = append(resolvedDirs, resolvedPath)
}
// Create scanner with progress enabled (unless in cron mode)
scanner := app.ScannerFactory(backup.ScannerParams{
EnableProgress: !opts.Cron,
})
// Perform a single backup run
log.Notice("Starting backup", "source_dirs", len(resolvedDirs))
for i, dir := range resolvedDirs {
log.Info("Source directory", "index", i+1, "path", dir)
}
totalFiles := 0
totalBytes := int64(0)
totalChunks := 0
totalBlobs := 0
// Create a new snapshot at the beginning of backup
hostname := app.Config.Hostname
if hostname == "" {
hostname, _ = os.Hostname()
}
// Create encryptor if age recipients are configured
var encryptor backup.Encryptor
if len(app.Config.AgeRecipients) > 0 {
cryptoEncryptor, err := crypto.NewEncryptor(app.Config.AgeRecipients)
if err != nil {
return fmt.Errorf("creating encryptor: %w", err)
}
encryptor = cryptoEncryptor
}
snapshotManager := backup.NewSnapshotManager(app.Repositories, app.S3Client, encryptor)
snapshotID, err := snapshotManager.CreateSnapshot(ctx, hostname, app.Globals.Version)
if err != nil {
return fmt.Errorf("creating snapshot: %w", err)
}
log.Info("Created snapshot", "snapshot_id", snapshotID)
for _, dir := range resolvedDirs {
// Check if context is cancelled
select {
case <-ctx.Done():
log.Info("Backup cancelled")
return ctx.Err()
default:
}
log.Info("Scanning directory", "path", dir)
result, err := scanner.Scan(ctx, dir, snapshotID)
if err != nil {
return fmt.Errorf("failed to scan %s: %w", dir, err)
}
totalFiles += result.FilesScanned
totalBytes += result.BytesScanned
totalChunks += result.ChunksCreated
totalBlobs += result.BlobsCreated
log.Info("Directory scan complete",
"path", dir,
"files", result.FilesScanned,
"files_skipped", result.FilesSkipped,
"bytes", result.BytesScanned,
"bytes_skipped", result.BytesSkipped,
"chunks", result.ChunksCreated,
"blobs", result.BlobsCreated,
"duration", result.EndTime.Sub(result.StartTime))
}
// Update snapshot statistics
stats := backup.BackupStats{
FilesScanned: totalFiles,
BytesScanned: totalBytes,
ChunksCreated: totalChunks,
BlobsCreated: totalBlobs,
BytesUploaded: totalBytes, // TODO: Track actual uploaded bytes
}
if err := snapshotManager.UpdateSnapshotStats(ctx, snapshotID, stats); err != nil {
return fmt.Errorf("updating snapshot stats: %w", err)
}
// Mark snapshot as complete
if err := snapshotManager.CompleteSnapshot(ctx, snapshotID); err != nil {
return fmt.Errorf("completing snapshot: %w", err)
}
// Export snapshot metadata
// Export snapshot metadata without closing the database
// The export function should handle its own database connection
if err := snapshotManager.ExportSnapshotMetadata(ctx, app.Config.IndexPath, snapshotID); err != nil {
return fmt.Errorf("exporting snapshot metadata: %w", err)
}
log.Notice("Backup complete",
"snapshot_id", snapshotID,
"total_files", totalFiles,
"total_bytes", totalBytes,
"total_chunks", totalChunks,
"total_blobs", totalBlobs)
if opts.Prune {
log.Info("Pruning enabled - will delete old snapshots after backup")
// TODO: Implement pruning
}
return nil
}

View File

@@ -4,6 +4,14 @@ import (
"github.com/spf13/cobra"
)
// RootFlags holds global flags
type RootFlags struct {
Verbose bool
Debug bool
}
var rootFlags RootFlags
// NewRootCommand creates the root cobra command
func NewRootCommand() *cobra.Command {
cmd := &cobra.Command{
@@ -15,6 +23,10 @@ on the source system.`,
SilenceUsage: true,
}
// Add global flags
cmd.PersistentFlags().BoolVarP(&rootFlags.Verbose, "verbose", "v", false, "Enable verbose output")
cmd.PersistentFlags().BoolVar(&rootFlags.Debug, "debug", false, "Enable debug output")
// Add subcommands
cmd.AddCommand(
NewBackupCommand(),
@@ -27,3 +39,8 @@ on the source system.`,
return cmd
}
// GetRootFlags returns the global flags
func GetRootFlags() RootFlags {
return rootFlags
}