Fix manifest generation to not encrypt manifests

- Manifests are now only compressed (not encrypted) so pruning operations can work without private keys
- Updated generateBlobManifest to use zstd compression directly
- Updated prune command to handle unencrypted manifests
- Updated snapshot list command to handle new manifest format
- Updated documentation to reflect manifest.json.zst (not .age)
- Removed unnecessary VAULTIK_PRIVATE_KEY check from prune command
This commit is contained in:
2025-07-26 02:54:52 +02:00
parent 1d027bde57
commit fb220685a2
4 changed files with 352 additions and 34 deletions

View File

@@ -2,8 +2,9 @@ package cli
import (
"context"
"encoding/json"
"fmt"
"os"
"strings"
"git.eeqj.de/sneak/vaultik/internal/backup"
"git.eeqj.de/sneak/vaultik/internal/config"
@@ -11,6 +12,8 @@ import (
"git.eeqj.de/sneak/vaultik/internal/globals"
"git.eeqj.de/sneak/vaultik/internal/log"
"git.eeqj.de/sneak/vaultik/internal/s3"
"github.com/dustin/go-humanize"
"github.com/klauspost/compress/zstd"
"github.com/spf13/cobra"
"go.uber.org/fx"
)
@@ -40,20 +43,14 @@ func NewPruneCommand() *cobra.Command {
Long: `Delete blobs that are no longer referenced by any snapshot.
This command will:
1. Download all snapshot metadata from S3
2. Build a list of all referenced blobs
3. List all blobs in S3
4. Delete any blobs not referenced by any snapshot
1. Download the manifest from the last successful snapshot
2. List all blobs in S3
3. Delete any blobs not referenced in the manifest
Config is located at /etc/vaultik/config.yml by default, but can be overridden by
specifying a path using --config or by setting VAULTIK_CONFIG to a path.`,
Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
// Check for private key
if os.Getenv("VAULTIK_PRIVATE_KEY") == "" {
return fmt.Errorf("VAULTIK_PRIVATE_KEY environment variable must be set")
}
// Use unified config resolution
configPath, err := ResolveConfigPath()
if err != nil {
@@ -129,19 +126,188 @@ func (app *PruneApp) runPrune(ctx context.Context, opts *PruneOptions) error {
"dry_run", opts.DryRun,
)
// TODO: Implement the actual prune logic
// 1. Download all snapshot metadata
// 2. Build set of referenced blobs
// 3. List all blobs in S3
// 4. Delete unreferenced blobs
fmt.Printf("Pruning bucket %s with prefix %s\n", app.Config.S3.Bucket, app.Config.S3.Prefix)
if opts.DryRun {
fmt.Println("Running in dry-run mode")
// Step 1: Get the latest complete snapshot from the database
log.Info("Getting latest snapshot from database")
snapshots, err := app.Repositories.Snapshots.ListRecent(ctx, 1)
if err != nil {
return fmt.Errorf("listing snapshots: %w", err)
}
// For now, just show we're using the config properly
log.Info("Prune operation completed successfully")
if len(snapshots) == 0 {
return fmt.Errorf("no snapshots found in database")
}
latestSnapshot := snapshots[0]
if latestSnapshot.CompletedAt == nil {
return fmt.Errorf("latest snapshot %s is incomplete", latestSnapshot.ID)
}
log.Info("Found latest snapshot",
"id", latestSnapshot.ID,
"completed_at", latestSnapshot.CompletedAt.Format("2006-01-02 15:04:05"))
// Step 2: Find and download the manifest from the last successful snapshot in S3
log.Info("Finding last successful snapshot in S3")
metadataPrefix := "metadata/"
// List all snapshots in S3
var s3Snapshots []string
objectCh := app.S3Client.ListObjectsStream(ctx, metadataPrefix, false)
for obj := range objectCh {
if obj.Err != nil {
return fmt.Errorf("listing metadata objects: %w", obj.Err)
}
// Extract snapshot ID from path like "metadata/hostname-20240115-143052Z/manifest.json.zst"
parts := strings.Split(obj.Key, "/")
if len(parts) >= 2 && strings.HasSuffix(obj.Key, "/manifest.json.zst") {
s3Snapshots = append(s3Snapshots, parts[1])
}
}
if len(s3Snapshots) == 0 {
return fmt.Errorf("no snapshot manifests found in S3")
}
// Find the most recent snapshot (they're named with timestamps)
var lastS3Snapshot string
for _, snap := range s3Snapshots {
if lastS3Snapshot == "" || snap > lastS3Snapshot {
lastS3Snapshot = snap
}
}
log.Info("Found last S3 snapshot", "id", lastS3Snapshot)
// Step 3: Verify the last S3 snapshot matches the latest DB snapshot
if lastS3Snapshot != latestSnapshot.ID {
return fmt.Errorf("latest snapshot in database (%s) does not match last successful snapshot in S3 (%s)",
latestSnapshot.ID, lastS3Snapshot)
}
// Step 4: Download and parse the manifest
log.Info("Downloading manifest", "snapshot_id", lastS3Snapshot)
manifest, err := app.downloadManifest(ctx, lastS3Snapshot)
if err != nil {
return fmt.Errorf("downloading manifest: %w", err)
}
log.Info("Manifest loaded", "blob_count", len(manifest.Blobs))
// Step 5: Build set of referenced blobs
referencedBlobs := make(map[string]bool)
for _, blobHash := range manifest.Blobs {
referencedBlobs[blobHash] = true
}
// Step 6: List all blobs in S3
log.Info("Listing all blobs in S3")
blobPrefix := "blobs/"
var totalBlobs int
var unreferencedBlobs []s3.ObjectInfo
var unreferencedSize int64
objectCh = app.S3Client.ListObjectsStream(ctx, blobPrefix, true)
for obj := range objectCh {
if obj.Err != nil {
return fmt.Errorf("listing blobs: %w", obj.Err)
}
totalBlobs++
// Extract blob hash from path like "blobs/ca/fe/cafebabe..."
parts := strings.Split(obj.Key, "/")
if len(parts) == 4 {
blobHash := parts[3]
if !referencedBlobs[blobHash] {
unreferencedBlobs = append(unreferencedBlobs, obj)
unreferencedSize += obj.Size
}
}
}
log.Info("Blob scan complete",
"total_blobs", totalBlobs,
"referenced_blobs", len(referencedBlobs),
"unreferenced_blobs", len(unreferencedBlobs),
"unreferenced_size", humanize.Bytes(uint64(unreferencedSize)))
// Step 7: Delete or report unreferenced blobs
if opts.DryRun {
fmt.Printf("\nDry run mode - would delete %d unreferenced blobs\n", len(unreferencedBlobs))
fmt.Printf("Total size of blobs to delete: %s\n", humanize.Bytes(uint64(unreferencedSize)))
if len(unreferencedBlobs) > 0 {
log.Debug("Unreferenced blobs found", "count", len(unreferencedBlobs))
for _, obj := range unreferencedBlobs {
log.Debug("Would delete blob", "key", obj.Key, "size", humanize.Bytes(uint64(obj.Size)))
}
}
} else {
if len(unreferencedBlobs) == 0 {
fmt.Println("No unreferenced blobs to delete")
return nil
}
fmt.Printf("\nDeleting %d unreferenced blobs (%s)...\n",
len(unreferencedBlobs), humanize.Bytes(uint64(unreferencedSize)))
deletedCount := 0
deletedSize := int64(0)
for _, obj := range unreferencedBlobs {
if err := app.S3Client.RemoveObject(ctx, obj.Key); err != nil {
log.Error("Failed to delete blob", "key", obj.Key, "error", err)
continue
}
deletedCount++
deletedSize += obj.Size
// Show progress every 100 blobs
if deletedCount%100 == 0 {
fmt.Printf(" Deleted %d/%d blobs (%s)...\n",
deletedCount, len(unreferencedBlobs),
humanize.Bytes(uint64(deletedSize)))
}
}
fmt.Printf("\nDeleted %d blobs (%s)\n", deletedCount, humanize.Bytes(uint64(deletedSize)))
}
log.Info("Prune operation completed successfully")
return nil
}
}
// BlobManifest represents the structure of a snapshot's blob manifest
type BlobManifest struct {
SnapshotID string `json:"snapshot_id"`
Timestamp string `json:"timestamp"`
BlobCount int `json:"blob_count"`
Blobs []string `json:"blobs"`
}
// downloadManifest downloads and decompresses a snapshot manifest
func (app *PruneApp) downloadManifest(ctx context.Context, snapshotID string) (*BlobManifest, error) {
manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
// Download the compressed manifest
reader, err := app.S3Client.GetObject(ctx, manifestPath)
if err != nil {
return nil, fmt.Errorf("downloading manifest: %w", err)
}
defer func() { _ = reader.Close() }()
// Decompress using zstd
zr, err := zstd.NewReader(reader)
if err != nil {
return nil, fmt.Errorf("creating zstd reader: %w", err)
}
defer zr.Close()
// Decode JSON manifest
var manifest BlobManifest
if err := json.NewDecoder(zr).Decode(&manifest); err != nil {
return nil, fmt.Errorf("decoding manifest: %w", err)
}
return &manifest, nil
}