package vaultik import ( "encoding/json" "fmt" "strings" "github.com/dustin/go-humanize" "sneak.berlin/go/vaultik/internal/log" ) // PruneOptions contains options for the prune command type PruneOptions struct { Force bool JSON bool } // PruneBlobsResult contains the result of a blob prune operation type PruneBlobsResult struct { BlobsFound int `json:"blobs_found"` BlobsDeleted int `json:"blobs_deleted"` BlobsFailed int `json:"blobs_failed,omitempty"` BytesFreed int64 `json:"bytes_freed"` } // PruneBlobs removes unreferenced blobs from storage func (v *Vaultik) PruneBlobs(opts *PruneOptions) error { log.Info("Starting prune operation") allBlobsReferenced, err := v.collectReferencedBlobs() if err != nil { return err } allBlobs, err := v.listAllRemoteBlobs() if err != nil { return err } unreferencedBlobs, totalSize := v.findUnreferencedBlobs(allBlobs, allBlobsReferenced) result := &PruneBlobsResult{BlobsFound: len(unreferencedBlobs)} if len(unreferencedBlobs) == 0 { log.Info("No unreferenced blobs found") if opts.JSON { return v.outputPruneBlobsJSON(result) } v.printlnStdout("No unreferenced blobs to remove.") return nil } log.Info("Found unreferenced blobs", "count", len(unreferencedBlobs), "total_size", humanize.Bytes(uint64(totalSize))) if !opts.JSON { v.printfStdout("Found %d unreferenced blob(s) totaling %s\n", len(unreferencedBlobs), humanize.Bytes(uint64(totalSize))) } if !opts.Force && !opts.JSON { v.printfStdout("\nDelete %d unreferenced blob(s)? [y/N] ", len(unreferencedBlobs)) var confirm string if _, err := v.scanStdin(&confirm); err != nil { v.printlnStdout("Cancelled") return nil } if strings.ToLower(confirm) != "y" { v.printlnStdout("Cancelled") return nil } } v.deleteUnreferencedBlobs(unreferencedBlobs, allBlobs, result) if opts.JSON { return v.outputPruneBlobsJSON(result) } v.printfStdout("\nDeleted %d blob(s) totaling %s\n", result.BlobsDeleted, humanize.Bytes(uint64(result.BytesFreed))) if result.BlobsFailed > 0 { v.printfStdout("Failed to delete %d blob(s)\n", result.BlobsFailed) } return nil } // collectReferencedBlobs downloads all manifests and returns the set of referenced blob hashes func (v *Vaultik) collectReferencedBlobs() (map[string]bool, error) { log.Info("Listing remote snapshots") snapshotIDs, err := v.listUniqueSnapshotIDs() if err != nil { return nil, fmt.Errorf("listing snapshot IDs: %w", err) } log.Info("Found manifests in remote storage", "count", len(snapshotIDs)) allBlobsReferenced := make(map[string]bool) manifestCount := 0 for _, snapshotID := range snapshotIDs { log.Debug("Processing manifest", "snapshot_id", snapshotID) manifest, err := v.downloadManifest(snapshotID) if err != nil { log.Error("Failed to download manifest", "snapshot_id", snapshotID, "error", err) continue } for _, blob := range manifest.Blobs { allBlobsReferenced[blob.Hash] = true } manifestCount++ } log.Info("Processed manifests", "count", manifestCount, "unique_blobs_referenced", len(allBlobsReferenced)) return allBlobsReferenced, nil } // listUniqueSnapshotIDs returns deduplicated snapshot IDs from remote metadata func (v *Vaultik) listUniqueSnapshotIDs() ([]string, error) { objectCh := v.Storage.ListStream(v.ctx, "metadata/") seen := make(map[string]bool) var snapshotIDs []string for object := range objectCh { if object.Err != nil { return nil, fmt.Errorf("listing metadata objects: %w", object.Err) } parts := strings.Split(object.Key, "/") if len(parts) >= 2 && parts[0] == "metadata" && parts[1] != "" { if strings.HasSuffix(object.Key, "/") || strings.Contains(object.Key, "/manifest.json.zst") { snapshotID := parts[1] if !seen[snapshotID] { seen[snapshotID] = true snapshotIDs = append(snapshotIDs, snapshotID) } } } } return snapshotIDs, nil } // listAllRemoteBlobs returns a map of all blob hashes to their sizes in remote storage func (v *Vaultik) listAllRemoteBlobs() (map[string]int64, error) { log.Info("Listing all blobs in storage") allBlobs := make(map[string]int64) blobObjectCh := v.Storage.ListStream(v.ctx, "blobs/") for object := range blobObjectCh { if object.Err != nil { return nil, fmt.Errorf("listing blobs: %w", object.Err) } parts := strings.Split(object.Key, "/") if len(parts) == 4 && parts[0] == "blobs" { allBlobs[parts[3]] = object.Size } } log.Info("Found blobs in storage", "count", len(allBlobs)) return allBlobs, nil } // findUnreferencedBlobs returns blob hashes not referenced by any manifest and their total size func (v *Vaultik) findUnreferencedBlobs(allBlobs map[string]int64, referenced map[string]bool) ([]string, int64) { var unreferenced []string var totalSize int64 for hash, size := range allBlobs { if !referenced[hash] { unreferenced = append(unreferenced, hash) totalSize += size } } return unreferenced, totalSize } // deleteUnreferencedBlobs deletes the given blobs from storage and populates the result func (v *Vaultik) deleteUnreferencedBlobs(unreferencedBlobs []string, allBlobs map[string]int64, result *PruneBlobsResult) { log.Info("Deleting unreferenced blobs") for i, hash := range unreferencedBlobs { blobPath := fmt.Sprintf("blobs/%s/%s/%s", hash[:2], hash[2:4], hash) if err := v.Storage.Delete(v.ctx, blobPath); err != nil { log.Error("Failed to delete blob", "hash", hash, "error", err) continue } result.BlobsDeleted++ result.BytesFreed += allBlobs[hash] if (i+1)%100 == 0 || i == len(unreferencedBlobs)-1 { log.Info("Deletion progress", "deleted", i+1, "total", len(unreferencedBlobs), "percent", fmt.Sprintf("%.1f%%", float64(i+1)/float64(len(unreferencedBlobs))*100), ) } } result.BlobsFailed = len(unreferencedBlobs) - result.BlobsDeleted log.Info("Prune complete", "deleted_count", result.BlobsDeleted, "deleted_size", humanize.Bytes(uint64(result.BytesFreed)), "failed", result.BlobsFailed, ) } // outputPruneBlobsJSON outputs the prune result as JSON func (v *Vaultik) outputPruneBlobsJSON(result *PruneBlobsResult) error { encoder := json.NewEncoder(v.Stdout) encoder.SetIndent("", " ") return encoder.Encode(result) }