Add deterministic deduplication, rclone backend, and database purge command
- Implement deterministic blob hashing using double SHA256 of uncompressed plaintext data, enabling deduplication even after local DB is cleared - Add Stat() check before blob upload to skip existing blobs in storage - Add rclone storage backend for additional remote storage options - Add 'vaultik database purge' command to erase local state DB - Add 'vaultik remote check' command to verify remote connectivity - Show configured snapshots in 'vaultik snapshot list' output - Skip macOS resource fork files (._*) when listing remote snapshots - Use multi-threaded zstd compression (CPUs - 2 threads) - Add writer tests for double hashing behavior
This commit is contained in:
@@ -7,6 +7,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/snapshot"
|
||||
@@ -36,22 +37,44 @@ type VerifyResult struct {
|
||||
|
||||
// RunDeepVerify executes deep verification operation
|
||||
func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
|
||||
result := &VerifyResult{
|
||||
SnapshotID: snapshotID,
|
||||
Mode: "deep",
|
||||
}
|
||||
|
||||
// Check for decryption capability
|
||||
if !v.CanDecrypt() {
|
||||
return fmt.Errorf("age_secret_key missing from config - required for deep verification")
|
||||
result.Status = "failed"
|
||||
result.ErrorMessage = "VAULTIK_AGE_SECRET_KEY environment variable not set - required for deep verification"
|
||||
if opts.JSON {
|
||||
return v.outputVerifyJSON(result)
|
||||
}
|
||||
return fmt.Errorf("VAULTIK_AGE_SECRET_KEY environment variable not set - required for deep verification")
|
||||
}
|
||||
|
||||
log.Info("Starting snapshot verification",
|
||||
"snapshot_id", snapshotID,
|
||||
"mode", map[bool]string{true: "deep", false: "shallow"}[opts.Deep],
|
||||
"mode", "deep",
|
||||
)
|
||||
|
||||
if !opts.JSON {
|
||||
v.Outputf("Deep verification of snapshot: %s\n\n", snapshotID)
|
||||
}
|
||||
|
||||
// Step 1: Download manifest
|
||||
manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
|
||||
log.Info("Downloading manifest", "path", manifestPath)
|
||||
if !opts.JSON {
|
||||
v.Outputf("Downloading manifest...\n")
|
||||
}
|
||||
|
||||
manifestReader, err := v.Storage.Get(v.ctx, manifestPath)
|
||||
if err != nil {
|
||||
result.Status = "failed"
|
||||
result.ErrorMessage = fmt.Sprintf("failed to download manifest: %v", err)
|
||||
if opts.JSON {
|
||||
return v.outputVerifyJSON(result)
|
||||
}
|
||||
return fmt.Errorf("failed to download manifest: %w", err)
|
||||
}
|
||||
defer func() { _ = manifestReader.Close() }()
|
||||
@@ -59,20 +82,36 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
|
||||
// Decompress manifest
|
||||
manifest, err := snapshot.DecodeManifest(manifestReader)
|
||||
if err != nil {
|
||||
result.Status = "failed"
|
||||
result.ErrorMessage = fmt.Sprintf("failed to decode manifest: %v", err)
|
||||
if opts.JSON {
|
||||
return v.outputVerifyJSON(result)
|
||||
}
|
||||
return fmt.Errorf("failed to decode manifest: %w", err)
|
||||
}
|
||||
|
||||
log.Info("Manifest loaded",
|
||||
"blob_count", manifest.BlobCount,
|
||||
"total_size", humanize.Bytes(uint64(manifest.TotalCompressedSize)),
|
||||
"manifest_blob_count", manifest.BlobCount,
|
||||
"manifest_total_size", humanize.Bytes(uint64(manifest.TotalCompressedSize)),
|
||||
)
|
||||
if !opts.JSON {
|
||||
v.Outputf("Manifest loaded: %d blobs (%s)\n", manifest.BlobCount, humanize.Bytes(uint64(manifest.TotalCompressedSize)))
|
||||
}
|
||||
|
||||
// Step 2: Download and decrypt database
|
||||
// Step 2: Download and decrypt database (authoritative source)
|
||||
dbPath := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID)
|
||||
log.Info("Downloading encrypted database", "path", dbPath)
|
||||
if !opts.JSON {
|
||||
v.Outputf("Downloading and decrypting database...\n")
|
||||
}
|
||||
|
||||
dbReader, err := v.Storage.Get(v.ctx, dbPath)
|
||||
if err != nil {
|
||||
result.Status = "failed"
|
||||
result.ErrorMessage = fmt.Sprintf("failed to download database: %v", err)
|
||||
if opts.JSON {
|
||||
return v.outputVerifyJSON(result)
|
||||
}
|
||||
return fmt.Errorf("failed to download database: %w", err)
|
||||
}
|
||||
defer func() { _ = dbReader.Close() }()
|
||||
@@ -80,6 +119,11 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
|
||||
// Decrypt and decompress database
|
||||
tempDB, err := v.decryptAndLoadDatabase(dbReader, v.Config.AgeSecretKey)
|
||||
if err != nil {
|
||||
result.Status = "failed"
|
||||
result.ErrorMessage = fmt.Sprintf("failed to decrypt database: %v", err)
|
||||
if opts.JSON {
|
||||
return v.outputVerifyJSON(result)
|
||||
}
|
||||
return fmt.Errorf("failed to decrypt database: %w", err)
|
||||
}
|
||||
defer func() {
|
||||
@@ -88,28 +132,90 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
|
||||
}
|
||||
}()
|
||||
|
||||
// Step 3: Compare blob lists
|
||||
if err := v.verifyBlobLists(snapshotID, manifest, tempDB.DB); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Step 4: Verify blob existence
|
||||
if err := v.verifyBlobExistence(manifest); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Step 5: Deep verification if requested
|
||||
if opts.Deep {
|
||||
if err := v.performDeepVerification(manifest, tempDB.DB); err != nil {
|
||||
return err
|
||||
// Step 3: Get authoritative blob list from database
|
||||
dbBlobs, err := v.getBlobsFromDatabase(snapshotID, tempDB.DB)
|
||||
if err != nil {
|
||||
result.Status = "failed"
|
||||
result.ErrorMessage = fmt.Sprintf("failed to get blobs from database: %v", err)
|
||||
if opts.JSON {
|
||||
return v.outputVerifyJSON(result)
|
||||
}
|
||||
return fmt.Errorf("failed to get blobs from database: %w", err)
|
||||
}
|
||||
|
||||
result.BlobCount = len(dbBlobs)
|
||||
var totalSize int64
|
||||
for _, blob := range dbBlobs {
|
||||
totalSize += blob.CompressedSize
|
||||
}
|
||||
result.TotalSize = totalSize
|
||||
|
||||
log.Info("Database loaded",
|
||||
"db_blob_count", len(dbBlobs),
|
||||
"db_total_size", humanize.Bytes(uint64(totalSize)),
|
||||
)
|
||||
if !opts.JSON {
|
||||
v.Outputf("Database loaded: %d blobs (%s)\n", len(dbBlobs), humanize.Bytes(uint64(totalSize)))
|
||||
v.Outputf("Verifying manifest against database...\n")
|
||||
}
|
||||
|
||||
// Step 4: Verify manifest matches database
|
||||
if err := v.verifyManifestAgainstDatabase(manifest, dbBlobs); err != nil {
|
||||
result.Status = "failed"
|
||||
result.ErrorMessage = err.Error()
|
||||
if opts.JSON {
|
||||
return v.outputVerifyJSON(result)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Step 5: Verify all blobs exist in S3 (using database as source)
|
||||
if !opts.JSON {
|
||||
v.Outputf("Manifest verified.\n")
|
||||
v.Outputf("Checking blob existence in remote storage...\n")
|
||||
}
|
||||
if err := v.verifyBlobExistenceFromDB(dbBlobs); err != nil {
|
||||
result.Status = "failed"
|
||||
result.ErrorMessage = err.Error()
|
||||
if opts.JSON {
|
||||
return v.outputVerifyJSON(result)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Step 6: Deep verification - download and verify blob contents
|
||||
if !opts.JSON {
|
||||
v.Outputf("All blobs exist.\n")
|
||||
v.Outputf("Downloading and verifying blob contents (%d blobs, %s)...\n", len(dbBlobs), humanize.Bytes(uint64(totalSize)))
|
||||
}
|
||||
if err := v.performDeepVerificationFromDB(dbBlobs, tempDB.DB, opts); err != nil {
|
||||
result.Status = "failed"
|
||||
result.ErrorMessage = err.Error()
|
||||
if opts.JSON {
|
||||
return v.outputVerifyJSON(result)
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Success
|
||||
result.Status = "ok"
|
||||
result.Verified = len(dbBlobs)
|
||||
|
||||
if opts.JSON {
|
||||
return v.outputVerifyJSON(result)
|
||||
}
|
||||
|
||||
log.Info("✓ Verification completed successfully",
|
||||
"snapshot_id", snapshotID,
|
||||
"mode", map[bool]string{true: "deep", false: "shallow"}[opts.Deep],
|
||||
"mode", "deep",
|
||||
"blobs_verified", len(dbBlobs),
|
||||
)
|
||||
|
||||
v.Outputf("\n✓ Verification completed successfully\n")
|
||||
v.Outputf(" Snapshot: %s\n", snapshotID)
|
||||
v.Outputf(" Blobs verified: %d\n", len(dbBlobs))
|
||||
v.Outputf(" Total size: %s\n", humanize.Bytes(uint64(totalSize)))
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -125,7 +231,7 @@ func (t *tempDB) Close() error {
|
||||
return err
|
||||
}
|
||||
|
||||
// decryptAndLoadDatabase decrypts and loads the database from the encrypted stream
|
||||
// decryptAndLoadDatabase decrypts and loads the binary SQLite database from the encrypted stream
|
||||
func (v *Vaultik) decryptAndLoadDatabase(reader io.ReadCloser, secretKey string) (*tempDB, error) {
|
||||
// Get decryptor
|
||||
decryptor, err := v.GetDecryptor()
|
||||
@@ -139,32 +245,31 @@ func (v *Vaultik) decryptAndLoadDatabase(reader io.ReadCloser, secretKey string)
|
||||
return nil, fmt.Errorf("failed to decrypt database: %w", err)
|
||||
}
|
||||
|
||||
// Decompress the database
|
||||
// Decompress the binary database
|
||||
decompressor, err := zstd.NewReader(decryptedReader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create decompressor: %w", err)
|
||||
}
|
||||
defer decompressor.Close()
|
||||
|
||||
// Create temporary file for database
|
||||
// Create temporary file for the database
|
||||
tempFile, err := os.CreateTemp("", "vaultik-verify-*.db")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create temp file: %w", err)
|
||||
}
|
||||
tempPath := tempFile.Name()
|
||||
|
||||
// Copy decompressed data to temp file
|
||||
if _, err := io.Copy(tempFile, decompressor); err != nil {
|
||||
// Stream decompress directly to file
|
||||
log.Info("Decompressing database...")
|
||||
written, err := io.Copy(tempFile, decompressor)
|
||||
if err != nil {
|
||||
_ = tempFile.Close()
|
||||
_ = os.Remove(tempPath)
|
||||
return nil, fmt.Errorf("failed to write database: %w", err)
|
||||
return nil, fmt.Errorf("failed to decompress database: %w", err)
|
||||
}
|
||||
_ = tempFile.Close()
|
||||
|
||||
// Close temp file before opening with sqlite
|
||||
if err := tempFile.Close(); err != nil {
|
||||
_ = os.Remove(tempPath)
|
||||
return nil, fmt.Errorf("failed to close temp file: %w", err)
|
||||
}
|
||||
log.Info("Database decompressed", "size", humanize.Bytes(uint64(written)))
|
||||
|
||||
// Open the database
|
||||
db, err := sql.Open("sqlite3", tempPath)
|
||||
@@ -179,137 +284,10 @@ func (v *Vaultik) decryptAndLoadDatabase(reader io.ReadCloser, secretKey string)
|
||||
}, nil
|
||||
}
|
||||
|
||||
// verifyBlobLists compares the blob lists between manifest and database
|
||||
func (v *Vaultik) verifyBlobLists(snapshotID string, manifest *snapshot.Manifest, db *sql.DB) error {
|
||||
log.Info("Verifying blob lists match between manifest and database")
|
||||
|
||||
// Get blobs from database
|
||||
query := `
|
||||
SELECT b.blob_hash, b.compressed_size
|
||||
FROM snapshot_blobs sb
|
||||
JOIN blobs b ON sb.blob_hash = b.blob_hash
|
||||
WHERE sb.snapshot_id = ?
|
||||
ORDER BY b.blob_hash
|
||||
`
|
||||
rows, err := db.QueryContext(v.ctx, query, snapshotID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to query snapshot blobs: %w", err)
|
||||
}
|
||||
defer func() { _ = rows.Close() }()
|
||||
|
||||
// Build map of database blobs
|
||||
dbBlobs := make(map[string]int64)
|
||||
for rows.Next() {
|
||||
var hash string
|
||||
var size int64
|
||||
if err := rows.Scan(&hash, &size); err != nil {
|
||||
return fmt.Errorf("failed to scan blob row: %w", err)
|
||||
}
|
||||
dbBlobs[hash] = size
|
||||
}
|
||||
|
||||
// Build map of manifest blobs
|
||||
manifestBlobs := make(map[string]int64)
|
||||
for _, blob := range manifest.Blobs {
|
||||
manifestBlobs[blob.Hash] = blob.CompressedSize
|
||||
}
|
||||
|
||||
// Compare counts
|
||||
if len(dbBlobs) != len(manifestBlobs) {
|
||||
return fmt.Errorf("blob count mismatch: database has %d blobs, manifest has %d blobs",
|
||||
len(dbBlobs), len(manifestBlobs))
|
||||
}
|
||||
|
||||
// Check each blob exists in both
|
||||
for hash, dbSize := range dbBlobs {
|
||||
manifestSize, exists := manifestBlobs[hash]
|
||||
if !exists {
|
||||
return fmt.Errorf("blob %s exists in database but not in manifest", hash)
|
||||
}
|
||||
if dbSize != manifestSize {
|
||||
return fmt.Errorf("blob %s size mismatch: database has %d bytes, manifest has %d bytes",
|
||||
hash, dbSize, manifestSize)
|
||||
}
|
||||
}
|
||||
|
||||
for hash := range manifestBlobs {
|
||||
if _, exists := dbBlobs[hash]; !exists {
|
||||
return fmt.Errorf("blob %s exists in manifest but not in database", hash)
|
||||
}
|
||||
}
|
||||
|
||||
log.Info("✓ Blob lists match", "blob_count", len(dbBlobs))
|
||||
return nil
|
||||
}
|
||||
|
||||
// verifyBlobExistence checks that all blobs exist in S3
|
||||
func (v *Vaultik) verifyBlobExistence(manifest *snapshot.Manifest) error {
|
||||
log.Info("Verifying blob existence in S3", "blob_count", len(manifest.Blobs))
|
||||
|
||||
for i, blob := range manifest.Blobs {
|
||||
// Construct blob path
|
||||
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blob.Hash[:2], blob.Hash[2:4], blob.Hash)
|
||||
|
||||
// Check blob exists
|
||||
stat, err := v.Storage.Stat(v.ctx, blobPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("blob %s missing from storage: %w", blob.Hash, err)
|
||||
}
|
||||
|
||||
// Verify size matches
|
||||
if stat.Size != blob.CompressedSize {
|
||||
return fmt.Errorf("blob %s size mismatch: S3 has %d bytes, manifest has %d bytes",
|
||||
blob.Hash, stat.Size, blob.CompressedSize)
|
||||
}
|
||||
|
||||
// Progress update every 100 blobs
|
||||
if (i+1)%100 == 0 || i == len(manifest.Blobs)-1 {
|
||||
log.Info("Blob existence check progress",
|
||||
"checked", i+1,
|
||||
"total", len(manifest.Blobs),
|
||||
"percent", fmt.Sprintf("%.1f%%", float64(i+1)/float64(len(manifest.Blobs))*100),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
log.Info("✓ All blobs exist in storage")
|
||||
return nil
|
||||
}
|
||||
|
||||
// performDeepVerification downloads and verifies the content of each blob
|
||||
func (v *Vaultik) performDeepVerification(manifest *snapshot.Manifest, db *sql.DB) error {
|
||||
log.Info("Starting deep verification - downloading and verifying all blobs")
|
||||
|
||||
totalBytes := int64(0)
|
||||
for i, blobInfo := range manifest.Blobs {
|
||||
// Verify individual blob
|
||||
if err := v.verifyBlob(blobInfo, db); err != nil {
|
||||
return fmt.Errorf("blob %s verification failed: %w", blobInfo.Hash, err)
|
||||
}
|
||||
|
||||
totalBytes += blobInfo.CompressedSize
|
||||
|
||||
// Progress update
|
||||
log.Info("Deep verification progress",
|
||||
"blob", fmt.Sprintf("%d/%d", i+1, len(manifest.Blobs)),
|
||||
"total_downloaded", humanize.Bytes(uint64(totalBytes)),
|
||||
"percent", fmt.Sprintf("%.1f%%", float64(i+1)/float64(len(manifest.Blobs))*100),
|
||||
)
|
||||
}
|
||||
|
||||
log.Info("✓ Deep verification completed successfully",
|
||||
"blobs_verified", len(manifest.Blobs),
|
||||
"total_size", humanize.Bytes(uint64(totalBytes)),
|
||||
)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// verifyBlob downloads and verifies a single blob
|
||||
func (v *Vaultik) verifyBlob(blobInfo snapshot.BlobInfo, db *sql.DB) error {
|
||||
// Download blob
|
||||
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobInfo.Hash[:2], blobInfo.Hash[2:4], blobInfo.Hash)
|
||||
reader, err := v.Storage.Get(v.ctx, blobPath)
|
||||
// Download blob using shared fetch method
|
||||
reader, _, err := v.FetchBlob(v.ctx, blobInfo.Hash, blobInfo.CompressedSize)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to download: %w", err)
|
||||
}
|
||||
@@ -321,8 +299,12 @@ func (v *Vaultik) verifyBlob(blobInfo snapshot.BlobInfo, db *sql.DB) error {
|
||||
return fmt.Errorf("failed to get decryptor: %w", err)
|
||||
}
|
||||
|
||||
// Decrypt blob
|
||||
decryptedReader, err := decryptor.DecryptStream(reader)
|
||||
// Hash the encrypted blob data as it streams through to decryption
|
||||
blobHasher := sha256.New()
|
||||
teeReader := io.TeeReader(reader, blobHasher)
|
||||
|
||||
// Decrypt blob (reading through teeReader to hash encrypted data)
|
||||
decryptedReader, err := decryptor.DecryptStream(teeReader)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to decrypt: %w", err)
|
||||
}
|
||||
@@ -400,11 +382,209 @@ func (v *Vaultik) verifyBlob(blobInfo snapshot.BlobInfo, db *sql.DB) error {
|
||||
return fmt.Errorf("error iterating blob chunks: %w", err)
|
||||
}
|
||||
|
||||
log.Debug("Blob verified",
|
||||
"hash", blobInfo.Hash,
|
||||
// Verify no remaining data in blob - if chunk list is accurate, blob should be fully consumed
|
||||
remaining, err := io.Copy(io.Discard, decompressor)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to check for remaining blob data: %w", err)
|
||||
}
|
||||
if remaining > 0 {
|
||||
return fmt.Errorf("blob has %d unexpected trailing bytes not covered by chunk list", remaining)
|
||||
}
|
||||
|
||||
// Verify blob hash matches the encrypted data we downloaded
|
||||
calculatedBlobHash := hex.EncodeToString(blobHasher.Sum(nil))
|
||||
if calculatedBlobHash != blobInfo.Hash {
|
||||
return fmt.Errorf("blob hash mismatch: calculated %s, expected %s",
|
||||
calculatedBlobHash, blobInfo.Hash)
|
||||
}
|
||||
|
||||
log.Info("Blob verified",
|
||||
"hash", blobInfo.Hash[:16]+"...",
|
||||
"chunks", chunkCount,
|
||||
"size", humanize.Bytes(uint64(blobInfo.CompressedSize)),
|
||||
)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// getBlobsFromDatabase gets all blobs for the snapshot from the database
|
||||
func (v *Vaultik) getBlobsFromDatabase(snapshotID string, db *sql.DB) ([]snapshot.BlobInfo, error) {
|
||||
query := `
|
||||
SELECT b.blob_hash, b.compressed_size
|
||||
FROM snapshot_blobs sb
|
||||
JOIN blobs b ON sb.blob_hash = b.blob_hash
|
||||
WHERE sb.snapshot_id = ?
|
||||
ORDER BY b.blob_hash
|
||||
`
|
||||
rows, err := db.QueryContext(v.ctx, query, snapshotID)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to query snapshot blobs: %w", err)
|
||||
}
|
||||
defer func() { _ = rows.Close() }()
|
||||
|
||||
var blobs []snapshot.BlobInfo
|
||||
for rows.Next() {
|
||||
var hash string
|
||||
var size int64
|
||||
if err := rows.Scan(&hash, &size); err != nil {
|
||||
return nil, fmt.Errorf("failed to scan blob row: %w", err)
|
||||
}
|
||||
blobs = append(blobs, snapshot.BlobInfo{
|
||||
Hash: hash,
|
||||
CompressedSize: size,
|
||||
})
|
||||
}
|
||||
|
||||
if err := rows.Err(); err != nil {
|
||||
return nil, fmt.Errorf("error iterating blobs: %w", err)
|
||||
}
|
||||
|
||||
return blobs, nil
|
||||
}
|
||||
|
||||
// verifyManifestAgainstDatabase verifies the manifest matches the authoritative database
|
||||
func (v *Vaultik) verifyManifestAgainstDatabase(manifest *snapshot.Manifest, dbBlobs []snapshot.BlobInfo) error {
|
||||
log.Info("Verifying manifest against database")
|
||||
|
||||
// Build map of database blobs
|
||||
dbBlobMap := make(map[string]int64)
|
||||
for _, blob := range dbBlobs {
|
||||
dbBlobMap[blob.Hash] = blob.CompressedSize
|
||||
}
|
||||
|
||||
// Build map of manifest blobs
|
||||
manifestBlobMap := make(map[string]int64)
|
||||
for _, blob := range manifest.Blobs {
|
||||
manifestBlobMap[blob.Hash] = blob.CompressedSize
|
||||
}
|
||||
|
||||
// Check counts match
|
||||
if len(dbBlobMap) != len(manifestBlobMap) {
|
||||
log.Warn("Manifest blob count mismatch",
|
||||
"database_blobs", len(dbBlobMap),
|
||||
"manifest_blobs", len(manifestBlobMap),
|
||||
)
|
||||
// This is a warning, not an error - database is authoritative
|
||||
}
|
||||
|
||||
// Check each manifest blob exists in database with correct size
|
||||
for hash, manifestSize := range manifestBlobMap {
|
||||
dbSize, exists := dbBlobMap[hash]
|
||||
if !exists {
|
||||
return fmt.Errorf("manifest contains blob %s not in database", hash)
|
||||
}
|
||||
if dbSize != manifestSize {
|
||||
return fmt.Errorf("blob %s size mismatch: database has %d bytes, manifest has %d bytes",
|
||||
hash, dbSize, manifestSize)
|
||||
}
|
||||
}
|
||||
|
||||
log.Info("✓ Manifest verified against database",
|
||||
"manifest_blobs", len(manifestBlobMap),
|
||||
"database_blobs", len(dbBlobMap),
|
||||
)
|
||||
return nil
|
||||
}
|
||||
|
||||
// verifyBlobExistenceFromDB checks that all blobs from database exist in S3
|
||||
func (v *Vaultik) verifyBlobExistenceFromDB(blobs []snapshot.BlobInfo) error {
|
||||
log.Info("Verifying blob existence in S3", "blob_count", len(blobs))
|
||||
|
||||
for i, blob := range blobs {
|
||||
// Construct blob path
|
||||
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blob.Hash[:2], blob.Hash[2:4], blob.Hash)
|
||||
|
||||
// Check blob exists
|
||||
stat, err := v.Storage.Stat(v.ctx, blobPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("blob %s missing from storage: %w", blob.Hash, err)
|
||||
}
|
||||
|
||||
// Verify size matches
|
||||
if stat.Size != blob.CompressedSize {
|
||||
return fmt.Errorf("blob %s size mismatch: S3 has %d bytes, database has %d bytes",
|
||||
blob.Hash, stat.Size, blob.CompressedSize)
|
||||
}
|
||||
|
||||
// Progress update every 100 blobs
|
||||
if (i+1)%100 == 0 || i == len(blobs)-1 {
|
||||
log.Info("Blob existence check progress",
|
||||
"checked", i+1,
|
||||
"total", len(blobs),
|
||||
"percent", fmt.Sprintf("%.1f%%", float64(i+1)/float64(len(blobs))*100),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
log.Info("✓ All blobs exist in storage")
|
||||
return nil
|
||||
}
|
||||
|
||||
// performDeepVerificationFromDB downloads and verifies the content of each blob using database as source
|
||||
func (v *Vaultik) performDeepVerificationFromDB(blobs []snapshot.BlobInfo, db *sql.DB, opts *VerifyOptions) error {
|
||||
// Calculate total bytes for ETA
|
||||
var totalBytesExpected int64
|
||||
for _, b := range blobs {
|
||||
totalBytesExpected += b.CompressedSize
|
||||
}
|
||||
|
||||
log.Info("Starting deep verification - downloading and verifying all blobs",
|
||||
"blob_count", len(blobs),
|
||||
"total_size", humanize.Bytes(uint64(totalBytesExpected)),
|
||||
)
|
||||
|
||||
startTime := time.Now()
|
||||
bytesProcessed := int64(0)
|
||||
|
||||
for i, blobInfo := range blobs {
|
||||
// Verify individual blob
|
||||
if err := v.verifyBlob(blobInfo, db); err != nil {
|
||||
return fmt.Errorf("blob %s verification failed: %w", blobInfo.Hash, err)
|
||||
}
|
||||
|
||||
bytesProcessed += blobInfo.CompressedSize
|
||||
elapsed := time.Since(startTime)
|
||||
remaining := len(blobs) - (i + 1)
|
||||
|
||||
// Calculate ETA based on bytes processed
|
||||
var eta time.Duration
|
||||
if bytesProcessed > 0 {
|
||||
bytesPerSec := float64(bytesProcessed) / elapsed.Seconds()
|
||||
bytesRemaining := totalBytesExpected - bytesProcessed
|
||||
if bytesPerSec > 0 {
|
||||
eta = time.Duration(float64(bytesRemaining)/bytesPerSec) * time.Second
|
||||
}
|
||||
}
|
||||
|
||||
log.Info("Verification progress",
|
||||
"blobs_done", i+1,
|
||||
"blobs_total", len(blobs),
|
||||
"blobs_remaining", remaining,
|
||||
"bytes_done", bytesProcessed,
|
||||
"bytes_done_human", humanize.Bytes(uint64(bytesProcessed)),
|
||||
"bytes_total", totalBytesExpected,
|
||||
"bytes_total_human", humanize.Bytes(uint64(totalBytesExpected)),
|
||||
"elapsed", elapsed.Round(time.Second),
|
||||
"eta", eta.Round(time.Second),
|
||||
)
|
||||
|
||||
if !opts.JSON {
|
||||
v.Outputf(" Verified %d/%d blobs (%d remaining) - %s/%s - elapsed %s, eta %s\n",
|
||||
i+1, len(blobs), remaining,
|
||||
humanize.Bytes(uint64(bytesProcessed)),
|
||||
humanize.Bytes(uint64(totalBytesExpected)),
|
||||
elapsed.Round(time.Second),
|
||||
eta.Round(time.Second))
|
||||
}
|
||||
}
|
||||
|
||||
totalElapsed := time.Since(startTime)
|
||||
log.Info("✓ Deep verification completed successfully",
|
||||
"blobs_verified", len(blobs),
|
||||
"total_bytes", bytesProcessed,
|
||||
"total_bytes_human", humanize.Bytes(uint64(bytesProcessed)),
|
||||
"duration", totalElapsed.Round(time.Second),
|
||||
)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user