vaultik/internal/vaultik/info.go
sneak 470bf648c4 Add deterministic deduplication, rclone backend, and database purge command
- Implement deterministic blob hashing using double SHA256 of uncompressed
  plaintext data, enabling deduplication even after local DB is cleared
- Add Stat() check before blob upload to skip existing blobs in storage
- Add rclone storage backend for additional remote storage options
- Add 'vaultik database purge' command to erase local state DB
- Add 'vaultik remote check' command to verify remote connectivity
- Show configured snapshots in 'vaultik snapshot list' output
- Skip macOS resource fork files (._*) when listing remote snapshots
- Use multi-threaded zstd compression (CPUs - 2 threads)
- Add writer tests for double hashing behavior
2026-01-28 15:50:17 -08:00

349 lines
10 KiB
Go

package vaultik
import (
"encoding/json"
"fmt"
"runtime"
"sort"
"strings"
"git.eeqj.de/sneak/vaultik/internal/log"
"git.eeqj.de/sneak/vaultik/internal/snapshot"
"github.com/dustin/go-humanize"
)
// ShowInfo displays system and configuration information
func (v *Vaultik) ShowInfo() error {
// System Information
fmt.Printf("=== System Information ===\n")
fmt.Printf("OS/Architecture: %s/%s\n", runtime.GOOS, runtime.GOARCH)
fmt.Printf("Version: %s\n", v.Globals.Version)
fmt.Printf("Commit: %s\n", v.Globals.Commit)
fmt.Printf("Go Version: %s\n", runtime.Version())
fmt.Println()
// Storage Configuration
fmt.Printf("=== Storage Configuration ===\n")
fmt.Printf("S3 Bucket: %s\n", v.Config.S3.Bucket)
if v.Config.S3.Prefix != "" {
fmt.Printf("S3 Prefix: %s\n", v.Config.S3.Prefix)
}
fmt.Printf("S3 Endpoint: %s\n", v.Config.S3.Endpoint)
fmt.Printf("S3 Region: %s\n", v.Config.S3.Region)
fmt.Println()
// Backup Settings
fmt.Printf("=== Backup Settings ===\n")
// Show configured snapshots
fmt.Printf("Snapshots:\n")
for _, name := range v.Config.SnapshotNames() {
snap := v.Config.Snapshots[name]
fmt.Printf(" %s:\n", name)
for _, path := range snap.Paths {
fmt.Printf(" - %s\n", path)
}
if len(snap.Exclude) > 0 {
fmt.Printf(" exclude: %s\n", strings.Join(snap.Exclude, ", "))
}
}
// Global exclude patterns
if len(v.Config.Exclude) > 0 {
fmt.Printf("Global Exclude: %s\n", strings.Join(v.Config.Exclude, ", "))
}
fmt.Printf("Compression: zstd level %d\n", v.Config.CompressionLevel)
fmt.Printf("Chunk Size: %s\n", humanize.Bytes(uint64(v.Config.ChunkSize)))
fmt.Printf("Blob Size Limit: %s\n", humanize.Bytes(uint64(v.Config.BlobSizeLimit)))
fmt.Println()
// Encryption Configuration
fmt.Printf("=== Encryption Configuration ===\n")
fmt.Printf("Recipients:\n")
for _, recipient := range v.Config.AgeRecipients {
fmt.Printf(" - %s\n", recipient)
}
fmt.Println()
// Daemon Settings (if applicable)
if v.Config.BackupInterval > 0 || v.Config.MinTimeBetweenRun > 0 {
fmt.Printf("=== Daemon Settings ===\n")
if v.Config.BackupInterval > 0 {
fmt.Printf("Backup Interval: %s\n", v.Config.BackupInterval)
}
if v.Config.MinTimeBetweenRun > 0 {
fmt.Printf("Minimum Time: %s\n", v.Config.MinTimeBetweenRun)
}
fmt.Println()
}
// Local Database
fmt.Printf("=== Local Database ===\n")
fmt.Printf("Index Path: %s\n", v.Config.IndexPath)
// Check if index file exists and get its size
if info, err := v.Fs.Stat(v.Config.IndexPath); err == nil {
fmt.Printf("Index Size: %s\n", humanize.Bytes(uint64(info.Size())))
// Get snapshot count from database
query := `SELECT COUNT(*) FROM snapshots WHERE completed_at IS NOT NULL`
var snapshotCount int
if err := v.DB.Conn().QueryRowContext(v.ctx, query).Scan(&snapshotCount); err == nil {
fmt.Printf("Snapshots: %d\n", snapshotCount)
}
// Get blob count from database
query = `SELECT COUNT(*) FROM blobs`
var blobCount int
if err := v.DB.Conn().QueryRowContext(v.ctx, query).Scan(&blobCount); err == nil {
fmt.Printf("Blobs: %d\n", blobCount)
}
// Get file count from database
query = `SELECT COUNT(*) FROM files`
var fileCount int
if err := v.DB.Conn().QueryRowContext(v.ctx, query).Scan(&fileCount); err == nil {
fmt.Printf("Files: %d\n", fileCount)
}
} else {
fmt.Printf("Index Size: (not created)\n")
}
return nil
}
// SnapshotMetadataInfo contains information about a single snapshot's metadata
type SnapshotMetadataInfo struct {
SnapshotID string `json:"snapshot_id"`
ManifestSize int64 `json:"manifest_size"`
DatabaseSize int64 `json:"database_size"`
TotalSize int64 `json:"total_size"`
BlobCount int `json:"blob_count"`
BlobsSize int64 `json:"blobs_size"`
}
// RemoteInfoResult contains all remote storage information
type RemoteInfoResult struct {
// Storage info
StorageType string `json:"storage_type"`
StorageLocation string `json:"storage_location"`
// Snapshot metadata
Snapshots []SnapshotMetadataInfo `json:"snapshots"`
TotalMetadataSize int64 `json:"total_metadata_size"`
TotalMetadataCount int `json:"total_metadata_count"`
// All blobs on remote
TotalBlobCount int `json:"total_blob_count"`
TotalBlobSize int64 `json:"total_blob_size"`
// Referenced blobs (from manifests)
ReferencedBlobCount int `json:"referenced_blob_count"`
ReferencedBlobSize int64 `json:"referenced_blob_size"`
// Orphaned blobs
OrphanedBlobCount int `json:"orphaned_blob_count"`
OrphanedBlobSize int64 `json:"orphaned_blob_size"`
}
// RemoteInfo displays information about remote storage
func (v *Vaultik) RemoteInfo(jsonOutput bool) error {
result := &RemoteInfoResult{}
// Get storage info
storageInfo := v.Storage.Info()
result.StorageType = storageInfo.Type
result.StorageLocation = storageInfo.Location
if !jsonOutput {
fmt.Printf("=== Remote Storage ===\n")
fmt.Printf("Type: %s\n", storageInfo.Type)
fmt.Printf("Location: %s\n", storageInfo.Location)
fmt.Println()
}
// List all snapshot metadata
if !jsonOutput {
fmt.Printf("Scanning snapshot metadata...\n")
}
snapshotMetadata := make(map[string]*SnapshotMetadataInfo)
// Collect metadata files
metadataCh := v.Storage.ListStream(v.ctx, "metadata/")
for obj := range metadataCh {
if obj.Err != nil {
return fmt.Errorf("listing metadata: %w", obj.Err)
}
// Parse key: metadata/<snapshot-id>/<filename>
parts := strings.Split(obj.Key, "/")
if len(parts) < 3 {
continue
}
snapshotID := parts[1]
if _, exists := snapshotMetadata[snapshotID]; !exists {
snapshotMetadata[snapshotID] = &SnapshotMetadataInfo{
SnapshotID: snapshotID,
}
}
info := snapshotMetadata[snapshotID]
filename := parts[2]
if strings.HasPrefix(filename, "manifest") {
info.ManifestSize = obj.Size
} else if strings.HasPrefix(filename, "db") {
info.DatabaseSize = obj.Size
}
info.TotalSize = info.ManifestSize + info.DatabaseSize
}
// Sort snapshots by ID for consistent output
var snapshotIDs []string
for id := range snapshotMetadata {
snapshotIDs = append(snapshotIDs, id)
}
sort.Strings(snapshotIDs)
// Download and parse all manifests to get referenced blobs
if !jsonOutput {
fmt.Printf("Downloading %d manifest(s)...\n", len(snapshotIDs))
}
referencedBlobs := make(map[string]int64) // hash -> compressed size
for _, snapshotID := range snapshotIDs {
manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
reader, err := v.Storage.Get(v.ctx, manifestKey)
if err != nil {
log.Warn("Failed to get manifest", "snapshot", snapshotID, "error", err)
continue
}
manifest, err := snapshot.DecodeManifest(reader)
_ = reader.Close()
if err != nil {
log.Warn("Failed to decode manifest", "snapshot", snapshotID, "error", err)
continue
}
// Record blob info from manifest
info := snapshotMetadata[snapshotID]
info.BlobCount = manifest.BlobCount
var blobsSize int64
for _, blob := range manifest.Blobs {
referencedBlobs[blob.Hash] = blob.CompressedSize
blobsSize += blob.CompressedSize
}
info.BlobsSize = blobsSize
}
// Build result snapshots
var totalMetadataSize int64
for _, id := range snapshotIDs {
info := snapshotMetadata[id]
result.Snapshots = append(result.Snapshots, *info)
totalMetadataSize += info.TotalSize
}
result.TotalMetadataSize = totalMetadataSize
result.TotalMetadataCount = len(snapshotIDs)
// Calculate referenced blob stats
for _, size := range referencedBlobs {
result.ReferencedBlobCount++
result.ReferencedBlobSize += size
}
// List all blobs on remote
if !jsonOutput {
fmt.Printf("Scanning blobs...\n")
}
allBlobs := make(map[string]int64) // hash -> size from storage
blobCh := v.Storage.ListStream(v.ctx, "blobs/")
for obj := range blobCh {
if obj.Err != nil {
return fmt.Errorf("listing blobs: %w", obj.Err)
}
// Extract hash from key: blobs/xx/yy/hash
parts := strings.Split(obj.Key, "/")
if len(parts) < 4 {
continue
}
hash := parts[3]
allBlobs[hash] = obj.Size
result.TotalBlobCount++
result.TotalBlobSize += obj.Size
}
// Calculate orphaned blobs
for hash, size := range allBlobs {
if _, referenced := referencedBlobs[hash]; !referenced {
result.OrphanedBlobCount++
result.OrphanedBlobSize += size
}
}
// Output results
if jsonOutput {
enc := json.NewEncoder(v.Stdout)
enc.SetIndent("", " ")
return enc.Encode(result)
}
// Human-readable output
fmt.Printf("\n=== Snapshot Metadata ===\n")
if len(result.Snapshots) == 0 {
fmt.Printf("No snapshots found\n")
} else {
fmt.Printf("%-45s %12s %12s %12s %10s %12s\n", "SNAPSHOT", "MANIFEST", "DATABASE", "TOTAL", "BLOBS", "BLOB SIZE")
fmt.Printf("%-45s %12s %12s %12s %10s %12s\n", strings.Repeat("-", 45), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 10), strings.Repeat("-", 12))
for _, info := range result.Snapshots {
fmt.Printf("%-45s %12s %12s %12s %10s %12s\n",
truncateString(info.SnapshotID, 45),
humanize.Bytes(uint64(info.ManifestSize)),
humanize.Bytes(uint64(info.DatabaseSize)),
humanize.Bytes(uint64(info.TotalSize)),
humanize.Comma(int64(info.BlobCount)),
humanize.Bytes(uint64(info.BlobsSize)),
)
}
fmt.Printf("%-45s %12s %12s %12s %10s %12s\n", strings.Repeat("-", 45), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 10), strings.Repeat("-", 12))
fmt.Printf("%-45s %12s %12s %12s\n", fmt.Sprintf("Total (%d snapshots)", result.TotalMetadataCount), "", "", humanize.Bytes(uint64(result.TotalMetadataSize)))
}
fmt.Printf("\n=== Blob Storage ===\n")
fmt.Printf("Total blobs on remote: %s (%s)\n",
humanize.Comma(int64(result.TotalBlobCount)),
humanize.Bytes(uint64(result.TotalBlobSize)))
fmt.Printf("Referenced by snapshots: %s (%s)\n",
humanize.Comma(int64(result.ReferencedBlobCount)),
humanize.Bytes(uint64(result.ReferencedBlobSize)))
fmt.Printf("Orphaned (unreferenced): %s (%s)\n",
humanize.Comma(int64(result.OrphanedBlobCount)),
humanize.Bytes(uint64(result.OrphanedBlobSize)))
if result.OrphanedBlobCount > 0 {
fmt.Printf("\nRun 'vaultik prune --remote' to remove orphaned blobs.\n")
}
return nil
}
// truncateString truncates a string to maxLen, adding "..." if truncated
func truncateString(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
if maxLen <= 3 {
return s[:maxLen]
}
return s[:maxLen-3] + "..."
}