vaultik/internal/backup/snapshot.go

package backup

// Snapshot Metadata Export Process
// ================================
//
// The snapshot metadata contains all information needed to restore a backup.
// Instead of creating a custom format, we use a trimmed copy of the SQLite
// database containing only data relevant to the current snapshot.
//
// Process Overview:
// 1. After all files/chunks/blobs are backed up, create a snapshot record
// 2. Close the main database to ensure consistency
// 3. Copy the entire database to a temporary file
// 4. Open the temporary database
// 5. Delete all snapshots except the current one
// 6. Delete all orphaned records:
//    - Files not referenced by any remaining snapshot
//    - Chunks not referenced by any remaining files
//    - Blobs not containing any remaining chunks
//    - All related mapping tables (file_chunks, chunk_files, blob_chunks)
// 7. Close the temporary database
// 8. Use sqlite3 to dump the cleaned database to SQL
// 9. Delete the temporary database file
// 10. Compress the SQL dump with zstd
// 11. Encrypt the compressed dump with age (if encryption is enabled)
// 12. Upload to S3 as: snapshots/{snapshot-id}.sql.zst[.age]
// 13. Reopen the main database
//
// Advantages of this approach:
// - No custom metadata format needed
// - Reuses existing database schema and relationships
// - SQL dumps are portable and compress well
// - Restore process can simply execute the SQL
// - Atomic and consistent snapshot of all metadata
//
// TODO: Future improvements:
// - Add snapshot-file relationships to track which files belong to which snapshot
// - Implement incremental snapshots that reference previous snapshots
// - Add snapshot manifest with additional metadata (size, chunk count, etc.)

import (
	"bytes"
	"context"
	"database/sql"
	"encoding/json"
	"fmt"
	"io"
	"os"
	"os/exec"
	"path/filepath"
	"runtime"
	"time"

	"git.eeqj.de/sneak/vaultik/internal/database"
	"git.eeqj.de/sneak/vaultik/internal/log"
	"github.com/klauspost/compress/zstd"
)

// SnapshotManager handles snapshot creation and metadata export
type SnapshotManager struct {
	repos     *database.Repositories
	s3Client  S3Client
	encryptor Encryptor
}

// Encryptor interface for snapshot encryption
type Encryptor interface {
	Encrypt(data []byte) ([]byte, error)
}

// NewSnapshotManager creates a new snapshot manager
func NewSnapshotManager(repos *database.Repositories, s3Client S3Client, encryptor Encryptor) *SnapshotManager {
	return &SnapshotManager{
		repos:     repos,
		s3Client:  s3Client,
		encryptor: encryptor,
	}
}

// CreateSnapshot creates a new snapshot record in the database at the start of a backup
func (sm *SnapshotManager) CreateSnapshot(ctx context.Context, hostname, version string) (string, error) {
	snapshotID := fmt.Sprintf("%s-%s", hostname, time.Now().Format("20060102-150405"))

	snapshot := &database.Snapshot{
		ID:               snapshotID,
		Hostname:         hostname,
		VaultikVersion:   version,
		StartedAt:        time.Now(),
		CompletedAt:      nil, // Not completed yet
		FileCount:        0,
		ChunkCount:       0,
		BlobCount:        0,
		TotalSize:        0,
		BlobSize:         0,
		CompressionRatio: 1.0,
	}

	err := sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
		return sm.repos.Snapshots.Create(ctx, tx, snapshot)
	})

	if err != nil {
		return "", fmt.Errorf("creating snapshot: %w", err)
	}

	log.Info("Created snapshot", "snapshot_id", snapshotID)
	return snapshotID, nil
}

// UpdateSnapshotStats updates the statistics for a snapshot during backup
func (sm *SnapshotManager) UpdateSnapshotStats(ctx context.Context, snapshotID string, stats BackupStats) error {
	err := sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
		return sm.repos.Snapshots.UpdateCounts(ctx, tx, snapshotID,
			int64(stats.FilesScanned),
			int64(stats.ChunksCreated),
			int64(stats.BlobsCreated),
			stats.BytesScanned,
			stats.BytesUploaded,
		)
	})

	if err != nil {
		return fmt.Errorf("updating snapshot stats: %w", err)
	}

	return nil
}

// CompleteSnapshot marks a snapshot as completed and exports its metadata
func (sm *SnapshotManager) CompleteSnapshot(ctx context.Context, snapshotID string) error {
	// Mark the snapshot as completed
	err := sm.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
		return sm.repos.Snapshots.MarkComplete(ctx, tx, snapshotID)
	})

	if err != nil {
		return fmt.Errorf("marking snapshot complete: %w", err)
	}

	log.Info("Completed snapshot", "snapshot_id", snapshotID)
	return nil
}

// ExportSnapshotMetadata exports snapshot metadata to S3
//
// This method executes the complete snapshot metadata export process:
// 1. Creates a temporary directory for working files
// 2. Copies the main database to preserve its state
// 3. Cleans the copy to contain only current snapshot data
// 4. Dumps the cleaned database to SQL
// 5. Compresses the SQL dump with zstd
// 6. Encrypts the compressed data (if encryption is enabled)
// 7. Uploads to S3 at: snapshots/{snapshot-id}.sql.zst[.age]
//
// The caller is responsible for:
// - Ensuring the main database is closed before calling this method
// - Reopening the main database after this method returns
//
// This ensures database consistency during the copy operation.
func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath string, snapshotID string) error {
	log.Info("Exporting snapshot metadata", "snapshot_id", snapshotID)

	// Create temp directory for all temporary files
	tempDir, err := os.MkdirTemp("", "vaultik-snapshot-*")
	if err != nil {
		return fmt.Errorf("creating temp dir: %w", err)
	}
	defer func() {
		if err := os.RemoveAll(tempDir); err != nil {
			log.Debug("Failed to remove temp dir", "path", tempDir, "error", err)
		}
	}()

	// Step 1: Copy database to temp file
	// The main database should be closed at this point
	tempDBPath := filepath.Join(tempDir, "snapshot.db")
	if err := copyFile(dbPath, tempDBPath); err != nil {
		return fmt.Errorf("copying database: %w", err)
	}

	// Step 2: Clean the temp database to only contain current snapshot data
	if err := sm.cleanSnapshotDB(ctx, tempDBPath, snapshotID); err != nil {
		return fmt.Errorf("cleaning snapshot database: %w", err)
	}

	// Step 3: Dump the cleaned database to SQL
	dumpPath := filepath.Join(tempDir, "snapshot.sql")
	if err := sm.dumpDatabase(tempDBPath, dumpPath); err != nil {
		return fmt.Errorf("dumping database: %w", err)
	}

	// Step 4: Compress the SQL dump
	compressedPath := filepath.Join(tempDir, "snapshot.sql.zst")
	if err := sm.compressDump(dumpPath, compressedPath); err != nil {
		return fmt.Errorf("compressing dump: %w", err)
	}

	// Step 5: Read compressed data for encryption/upload
	compressedData, err := os.ReadFile(compressedPath)
	if err != nil {
		return fmt.Errorf("reading compressed dump: %w", err)
	}

	// Step 6: Encrypt if encryptor is available
	finalData := compressedData
	if sm.encryptor != nil {
		encrypted, err := sm.encryptor.Encrypt(compressedData)
		if err != nil {
			return fmt.Errorf("encrypting snapshot: %w", err)
		}
		finalData = encrypted
	}

	// Step 7: Generate blob manifest (before closing temp DB)
	blobManifest, err := sm.generateBlobManifest(ctx, tempDBPath, snapshotID)
	if err != nil {
		return fmt.Errorf("generating blob manifest: %w", err)
	}

	// Step 8: Upload to S3 in snapshot subdirectory
	// Upload database backup (encrypted)
	dbKey := fmt.Sprintf("metadata/%s/db.zst", snapshotID)
	if sm.encryptor != nil {
		dbKey += ".age"
	}

	if err := sm.s3Client.PutObject(ctx, dbKey, bytes.NewReader(finalData)); err != nil {
		return fmt.Errorf("uploading snapshot database: %w", err)
	}

	// Upload blob manifest (unencrypted, compressed)
	manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
	if err := sm.s3Client.PutObject(ctx, manifestKey, bytes.NewReader(blobManifest)); err != nil {
		return fmt.Errorf("uploading blob manifest: %w", err)
	}

	log.Info("Uploaded snapshot metadata",
		"snapshot_id", snapshotID,
		"db_size", len(finalData),
		"manifest_size", len(blobManifest))
	return nil
}

// cleanSnapshotDB removes all data except for the specified snapshot
//
// Current implementation:
// Since we don't yet have snapshot-file relationships, this currently only
// removes other snapshots. In a complete implementation, it would:
//
// 1. Delete all snapshots except the current one
// 2. Delete files not belonging to the current snapshot
// 3. Delete file_chunks for deleted files (CASCADE)
// 4. Delete chunk_files for deleted files
// 5. Delete chunks with no remaining file references
// 6. Delete blob_chunks for deleted chunks
// 7. Delete blobs with no remaining chunks
//
// The order is important to maintain referential integrity.
//
// Future implementation when we have snapshot_files table:
//
//	DELETE FROM snapshots WHERE id != ?;
//	DELETE FROM files WHERE path NOT IN (
//	  SELECT file_path FROM snapshot_files WHERE snapshot_id = ?
//	);
//	DELETE FROM chunks WHERE chunk_hash NOT IN (
//	  SELECT DISTINCT chunk_hash FROM file_chunks
//	);
//	DELETE FROM blobs WHERE blob_hash NOT IN (
//	  SELECT DISTINCT blob_hash FROM blob_chunks
//	);
func (sm *SnapshotManager) cleanSnapshotDB(ctx context.Context, dbPath string, snapshotID string) error {
	// Open the temp database
	db, err := database.New(ctx, dbPath)
	if err != nil {
		return fmt.Errorf("opening temp database: %w", err)
	}
	defer func() {
		if err := db.Close(); err != nil {
			log.Debug("Failed to close temp database", "error", err)
		}
	}()

	// Start a transaction
	tx, err := db.BeginTx(ctx, nil)
	if err != nil {
		return fmt.Errorf("beginning transaction: %w", err)
	}
	defer func() {
		if rbErr := tx.Rollback(); rbErr != nil && rbErr != sql.ErrTxDone {
			log.Debug("Failed to rollback transaction", "error", rbErr)
		}
	}()

	// Step 1: Delete all other snapshots
	_, err = tx.ExecContext(ctx, "DELETE FROM snapshots WHERE id != ?", snapshotID)
	if err != nil {
		return fmt.Errorf("deleting other snapshots: %w", err)
	}

	// Step 2: Delete files not in this snapshot
	_, err = tx.ExecContext(ctx, `
		DELETE FROM files
		WHERE path NOT IN (
			SELECT file_path FROM snapshot_files WHERE snapshot_id = ?
		)`, snapshotID)
	if err != nil {
		return fmt.Errorf("deleting orphaned files: %w", err)
	}

	// Step 3: file_chunks will be deleted via CASCADE from files

	// Step 4: Delete chunk_files for deleted files
	_, err = tx.ExecContext(ctx, `
		DELETE FROM chunk_files
		WHERE file_path NOT IN (
			SELECT path FROM files
		)`)
	if err != nil {
		return fmt.Errorf("deleting orphaned chunk_files: %w", err)
	}

	// Step 5: Delete chunks with no remaining file references
	_, err = tx.ExecContext(ctx, `
		DELETE FROM chunks
		WHERE chunk_hash NOT IN (
			SELECT DISTINCT chunk_hash FROM file_chunks
		)`)
	if err != nil {
		return fmt.Errorf("deleting orphaned chunks: %w", err)
	}

	// Step 6: Delete blob_chunks for deleted chunks
	_, err = tx.ExecContext(ctx, `
		DELETE FROM blob_chunks
		WHERE chunk_hash NOT IN (
			SELECT chunk_hash FROM chunks
		)`)
	if err != nil {
		return fmt.Errorf("deleting orphaned blob_chunks: %w", err)
	}

	// Step 7: Delete blobs not in this snapshot
	_, err = tx.ExecContext(ctx, `
		DELETE FROM blobs
		WHERE blob_hash NOT IN (
			SELECT blob_hash FROM snapshot_blobs WHERE snapshot_id = ?
		)`, snapshotID)
	if err != nil {
		return fmt.Errorf("deleting orphaned blobs: %w", err)
	}

	// Step 8: Delete orphaned snapshot_files and snapshot_blobs
	_, err = tx.ExecContext(ctx, "DELETE FROM snapshot_files WHERE snapshot_id != ?", snapshotID)
	if err != nil {
		return fmt.Errorf("deleting orphaned snapshot_files: %w", err)
	}

	_, err = tx.ExecContext(ctx, "DELETE FROM snapshot_blobs WHERE snapshot_id != ?", snapshotID)
	if err != nil {
		return fmt.Errorf("deleting orphaned snapshot_blobs: %w", err)
	}

	// Commit transaction
	if err := tx.Commit(); err != nil {
		return fmt.Errorf("committing transaction: %w", err)
	}

	return nil
}

// dumpDatabase creates a SQL dump of the database
func (sm *SnapshotManager) dumpDatabase(dbPath, dumpPath string) error {
	cmd := exec.Command("sqlite3", dbPath, ".dump")

	output, err := cmd.Output()
	if err != nil {
		return fmt.Errorf("running sqlite3 dump: %w", err)
	}

	if err := os.WriteFile(dumpPath, output, 0644); err != nil {
		return fmt.Errorf("writing dump file: %w", err)
	}

	return nil
}

// compressDump compresses the SQL dump using zstd
func (sm *SnapshotManager) compressDump(inputPath, outputPath string) error {
	input, err := os.Open(inputPath)
	if err != nil {
		return fmt.Errorf("opening input file: %w", err)
	}
	defer func() {
		if err := input.Close(); err != nil {
			log.Debug("Failed to close input file", "error", err)
		}
	}()

	output, err := os.Create(outputPath)
	if err != nil {
		return fmt.Errorf("creating output file: %w", err)
	}
	defer func() {
		if err := output.Close(); err != nil {
			log.Debug("Failed to close output file", "error", err)
		}
	}()

	// Create zstd encoder with good compression and multithreading
	zstdWriter, err := zstd.NewWriter(output,
		zstd.WithEncoderLevel(zstd.SpeedBetterCompression),
		zstd.WithEncoderConcurrency(runtime.NumCPU()),
		zstd.WithWindowSize(4<<20), // 4MB window for metadata files
	)
	if err != nil {
		return fmt.Errorf("creating zstd writer: %w", err)
	}
	defer func() {
		if err := zstdWriter.Close(); err != nil {
			log.Debug("Failed to close zstd writer", "error", err)
		}
	}()

	if _, err := io.Copy(zstdWriter, input); err != nil {
		return fmt.Errorf("compressing data: %w", err)
	}

	return nil
}

// copyFile copies a file from src to dst
func copyFile(src, dst string) error {
	sourceFile, err := os.Open(src)
	if err != nil {
		return err
	}
	defer func() {
		if err := sourceFile.Close(); err != nil {
			log.Debug("Failed to close source file", "error", err)
		}
	}()

	destFile, err := os.Create(dst)
	if err != nil {
		return err
	}
	defer func() {
		if err := destFile.Close(); err != nil {
			log.Debug("Failed to close destination file", "error", err)
		}
	}()

	if _, err := io.Copy(destFile, sourceFile); err != nil {
		return err
	}

	return nil
}

// generateBlobManifest creates a compressed JSON list of all blobs in the snapshot
func (sm *SnapshotManager) generateBlobManifest(ctx context.Context, dbPath string, snapshotID string) ([]byte, error) {
	// Open the cleaned database using the database package
	db, err := database.New(ctx, dbPath)
	if err != nil {
		return nil, fmt.Errorf("opening database: %w", err)
	}
	defer func() { _ = db.Close() }()

	// Create repositories to access the data
	repos := database.NewRepositories(db)

	// Get all blobs for this snapshot
	blobs, err := repos.Snapshots.GetBlobHashes(ctx, snapshotID)
	if err != nil {
		return nil, fmt.Errorf("getting snapshot blobs: %w", err)
	}

	// Create manifest structure
	manifest := struct {
		SnapshotID string   `json:"snapshot_id"`
		Timestamp  string   `json:"timestamp"`
		BlobCount  int      `json:"blob_count"`
		Blobs      []string `json:"blobs"`
	}{
		SnapshotID: snapshotID,
		Timestamp:  time.Now().UTC().Format(time.RFC3339),
		BlobCount:  len(blobs),
		Blobs:      blobs,
	}

	// Marshal to JSON
	jsonData, err := json.MarshalIndent(manifest, "", "  ")
	if err != nil {
		return nil, fmt.Errorf("marshaling manifest: %w", err)
	}

	// Compress with zstd
	compressed, err := compressData(jsonData)
	if err != nil {
		return nil, fmt.Errorf("compressing manifest: %w", err)
	}

	log.Info("Generated blob manifest",
		"snapshot_id", snapshotID,
		"blob_count", len(blobs),
		"json_size", len(jsonData),
		"compressed_size", len(compressed))

	return compressed, nil
}

// compressData compresses data using zstd
func compressData(data []byte) ([]byte, error) {
	var buf bytes.Buffer
	w, err := zstd.NewWriter(&buf,
		zstd.WithEncoderLevel(zstd.SpeedBetterCompression),
	)
	if err != nil {
		return nil, err
	}

	if _, err := w.Write(data); err != nil {
		_ = w.Close()
		return nil, err
	}

	if err := w.Close(); err != nil {
		return nil, err
	}

	return buf.Bytes(), nil
}

// BackupStats contains statistics from a backup operation
type BackupStats struct {
	FilesScanned  int
	BytesScanned  int64
	ChunksCreated int
	BlobsCreated  int
	BytesUploaded int64
}