Add deterministic deduplication, rclone backend, and database purge command

- Implement deterministic blob hashing using double SHA256 of uncompressed
  plaintext data, enabling deduplication even after local DB is cleared
- Add Stat() check before blob upload to skip existing blobs in storage
- Add rclone storage backend for additional remote storage options
- Add 'vaultik database purge' command to erase local state DB
- Add 'vaultik remote check' command to verify remote connectivity
- Show configured snapshots in 'vaultik snapshot list' output
- Skip macOS resource fork files (._*) when listing remote snapshots
- Use multi-threaded zstd compression (CPUs - 2 threads)
- Add writer tests for double hashing behavior
This commit is contained in:
Jeffrey Paul 2026-01-28 15:50:17 -08:00
parent bdaaadf990
commit 470bf648c4
26 changed files with 2966 additions and 777 deletions

View File

@ -64,3 +64,6 @@ test-integration:
local: local:
VAULTIK_CONFIG=$(HOME)/etc/vaultik/config.yml ./vaultik snapshot --debug list 2>&1 VAULTIK_CONFIG=$(HOME)/etc/vaultik/config.yml ./vaultik snapshot --debug list 2>&1
VAULTIK_CONFIG=$(HOME)/etc/vaultik/config.yml ./vaultik snapshot --debug create 2>&1 VAULTIK_CONFIG=$(HOME)/etc/vaultik/config.yml ./vaultik snapshot --debug create 2>&1
install: vaultik
cp ./vaultik $(HOME)/bin/

31
TODO.md
View File

@ -2,6 +2,37 @@
Linear list of tasks to complete before 1.0 release. Linear list of tasks to complete before 1.0 release.
## Rclone Storage Backend (Complete)
Add rclone as a storage backend via Go library import, allowing vaultik to use any of rclone's 70+ supported cloud storage providers.
**Configuration:**
```yaml
storage_url: "rclone://myremote/path/to/backups"
```
User must have rclone configured separately (via `rclone config`).
**Implementation Steps:**
1. [x] Add rclone dependency to go.mod
2. [x] Create `internal/storage/rclone.go` implementing `Storer` interface
- `NewRcloneStorer(remote, path)` - init with `configfile.Install()` and `fs.NewFs()`
- `Put` / `PutWithProgress` - use `operations.Rcat()`
- `Get` - use `fs.NewObject()` then `obj.Open()`
- `Stat` - use `fs.NewObject()` for size/metadata
- `Delete` - use `obj.Remove()`
- `List` / `ListStream` - use `operations.ListFn()`
- `Info` - return remote name
3. [x] Update `internal/storage/url.go` - parse `rclone://remote/path` URLs
4. [x] Update `internal/storage/module.go` - add rclone case to `storerFromURL()`
5. [x] Test with real rclone remote
**Error Mapping:**
- `fs.ErrorObjectNotFound``ErrNotFound`
- `fs.ErrorDirNotFound``ErrNotFound`
- `fs.ErrorNotFoundInConfigFile``ErrRemoteNotFound` (new)
---
## CLI Polish (Priority) ## CLI Polish (Priority)
1. Improve error messages throughout 1. Improve error messages throughout

View File

@ -11,9 +11,84 @@ age_recipients:
# Named snapshots - each snapshot can contain multiple paths # Named snapshots - each snapshot can contain multiple paths
# Each snapshot gets its own ID and can have snapshot-specific excludes # Each snapshot gets its own ID and can have snapshot-specific excludes
snapshots: snapshots:
testing:
paths:
- ~/dev/vaultik
apps: apps:
paths: paths:
- /Applications - /Applications
exclude:
- "/App Store.app"
- "/Apps.app"
- "/Automator.app"
- "/Books.app"
- "/Calculator.app"
- "/Calendar.app"
- "/Chess.app"
- "/Clock.app"
- "/Contacts.app"
- "/Dictionary.app"
- "/FaceTime.app"
- "/FindMy.app"
- "/Font Book.app"
- "/Freeform.app"
- "/Games.app"
- "/GarageBand.app"
- "/Home.app"
- "/Image Capture.app"
- "/Image Playground.app"
- "/Journal.app"
- "/Keynote.app"
- "/Mail.app"
- "/Maps.app"
- "/Messages.app"
- "/Mission Control.app"
- "/Music.app"
- "/News.app"
- "/Notes.app"
- "/Numbers.app"
- "/Pages.app"
- "/Passwords.app"
- "/Phone.app"
- "/Photo Booth.app"
- "/Photos.app"
- "/Podcasts.app"
- "/Preview.app"
- "/QuickTime Player.app"
- "/Reminders.app"
- "/Safari.app"
- "/Shortcuts.app"
- "/Siri.app"
- "/Stickies.app"
- "/Stocks.app"
- "/System Settings.app"
- "/TV.app"
- "/TextEdit.app"
- "/Time Machine.app"
- "/Tips.app"
- "/Utilities/Activity Monitor.app"
- "/Utilities/AirPort Utility.app"
- "/Utilities/Audio MIDI Setup.app"
- "/Utilities/Bluetooth File Exchange.app"
- "/Utilities/Boot Camp Assistant.app"
- "/Utilities/ColorSync Utility.app"
- "/Utilities/Console.app"
- "/Utilities/Digital Color Meter.app"
- "/Utilities/Disk Utility.app"
- "/Utilities/Grapher.app"
- "/Utilities/Magnifier.app"
- "/Utilities/Migration Assistant.app"
- "/Utilities/Print Center.app"
- "/Utilities/Screen Sharing.app"
- "/Utilities/Screenshot.app"
- "/Utilities/Script Editor.app"
- "/Utilities/System Information.app"
- "/Utilities/Terminal.app"
- "/Utilities/VoiceOver Utility.app"
- "/VoiceMemos.app"
- "/Weather.app"
- "/iMovie.app"
- "/iPhone Mirroring.app"
home: home:
paths: paths:
- "~" - "~"
@ -180,37 +255,41 @@ snapshots:
exclude: exclude:
- "*.tmp" - "*.tmp"
# Storage URL - use either this OR the s3 section below
# Supports: s3://bucket/prefix, file:///path, rclone://remote/path
storage_url: "rclone://las1stor1//srv/pool.2024.04/backups/heraklion"
# S3-compatible storage configuration # S3-compatible storage configuration
s3: #s3:
# S3-compatible endpoint URL # # S3-compatible endpoint URL
# Examples: https://s3.amazonaws.com, https://storage.googleapis.com # # Examples: https://s3.amazonaws.com, https://storage.googleapis.com
endpoint: http://10.100.205.122:8333 # endpoint: http://10.100.205.122:8333
#
# Bucket name where backups will be stored # # Bucket name where backups will be stored
bucket: testbucket # bucket: testbucket
#
# Prefix (folder) within the bucket for this host's backups # # Prefix (folder) within the bucket for this host's backups
# Useful for organizing backups from multiple hosts # # Useful for organizing backups from multiple hosts
# Default: empty (root of bucket) # # Default: empty (root of bucket)
#prefix: "hosts/myserver/" # #prefix: "hosts/myserver/"
#
# S3 access credentials # # S3 access credentials
access_key_id: Z9GT22M9YFU08WRMC5D4 # access_key_id: Z9GT22M9YFU08WRMC5D4
secret_access_key: Pi0tPKjFbN4rZlRhcA4zBtEkib04yy2WcIzI+AXk # secret_access_key: Pi0tPKjFbN4rZlRhcA4zBtEkib04yy2WcIzI+AXk
#
# S3 region # # S3 region
# Default: us-east-1 # # Default: us-east-1
#region: us-east-1 # #region: us-east-1
#
# Use SSL/TLS for S3 connections # # Use SSL/TLS for S3 connections
# Default: true # # Default: true
#use_ssl: true # #use_ssl: true
#
# Part size for multipart uploads # # Part size for multipart uploads
# Minimum 5MB, affects memory usage during upload # # Minimum 5MB, affects memory usage during upload
# Supports: 5MB, 10M, 100MiB, etc. # # Supports: 5MB, 10M, 100MiB, etc.
# Default: 5MB # # Default: 5MB
#part_size: 5MB # #part_size: 5MB
# How often to run backups in daemon mode # How often to run backups in daemon mode
# Format: 1h, 30m, 24h, etc # Format: 1h, 30m, 24h, etc
@ -248,7 +327,6 @@ s3:
# Higher = better compression but slower # Higher = better compression but slower
# Default: 3 # Default: 3
compression_level: 5 compression_level: 5
# Hostname to use in backup metadata # Hostname to use in backup metadata
# Default: system hostname # Default: system hostname
#hostname: myserver #hostname: myserver

261
go.mod
View File

@ -6,78 +6,141 @@ require (
filippo.io/age v1.2.1 filippo.io/age v1.2.1
git.eeqj.de/sneak/smartconfig v1.0.0 git.eeqj.de/sneak/smartconfig v1.0.0
github.com/adrg/xdg v0.5.3 github.com/adrg/xdg v0.5.3
github.com/aws/aws-sdk-go-v2 v1.36.6 github.com/aws/aws-sdk-go-v2 v1.39.6
github.com/aws/aws-sdk-go-v2/config v1.29.18 github.com/aws/aws-sdk-go-v2/config v1.31.17
github.com/aws/aws-sdk-go-v2/credentials v1.17.71 github.com/aws/aws-sdk-go-v2/credentials v1.18.21
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.85 github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.20.4
github.com/aws/aws-sdk-go-v2/service/s3 v1.84.1 github.com/aws/aws-sdk-go-v2/service/s3 v1.90.0
github.com/aws/smithy-go v1.22.4 github.com/aws/smithy-go v1.23.2
github.com/dustin/go-humanize v1.0.1 github.com/dustin/go-humanize v1.0.1
github.com/gobwas/glob v0.2.3 github.com/gobwas/glob v0.2.3
github.com/google/uuid v1.6.0 github.com/google/uuid v1.6.0
github.com/johannesboyne/gofakes3 v0.0.0-20250603205740-ed9094be7668 github.com/johannesboyne/gofakes3 v0.0.0-20250603205740-ed9094be7668
github.com/klauspost/compress v1.18.0 github.com/klauspost/compress v1.18.1
github.com/mattn/go-sqlite3 v1.14.29 github.com/mattn/go-sqlite3 v1.14.29
github.com/rclone/rclone v1.72.1
github.com/schollz/progressbar/v3 v3.19.0 github.com/schollz/progressbar/v3 v3.19.0
github.com/spf13/afero v1.14.0 github.com/spf13/afero v1.15.0
github.com/spf13/cobra v1.9.1 github.com/spf13/cobra v1.10.1
github.com/stretchr/testify v1.10.0 github.com/stretchr/testify v1.11.1
go.uber.org/fx v1.24.0 go.uber.org/fx v1.24.0
golang.org/x/term v0.33.0 golang.org/x/term v0.37.0
gopkg.in/yaml.v3 v3.0.1 gopkg.in/yaml.v3 v3.0.1
modernc.org/sqlite v1.38.0 modernc.org/sqlite v1.38.0
) )
require ( require (
cloud.google.com/go/auth v0.16.2 // indirect cloud.google.com/go/auth v0.17.0 // indirect
cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect
cloud.google.com/go/compute/metadata v0.7.0 // indirect cloud.google.com/go/compute/metadata v0.9.0 // indirect
cloud.google.com/go/iam v1.5.2 // indirect cloud.google.com/go/iam v1.5.2 // indirect
cloud.google.com/go/secretmanager v1.15.0 // indirect cloud.google.com/go/secretmanager v1.15.0 // indirect
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 // indirect
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.10.1 // indirect github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.0 // indirect
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.1 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect
github.com/Azure/azure-sdk-for-go/sdk/keyvault/azsecrets v0.12.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/keyvault/azsecrets v0.12.0 // indirect
github.com/Azure/azure-sdk-for-go/sdk/keyvault/internal v0.7.1 // indirect github.com/Azure/azure-sdk-for-go/sdk/keyvault/internal v0.7.1 // indirect
github.com/AzureAD/microsoft-authentication-library-for-go v1.4.2 // indirect github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.3 // indirect
github.com/Azure/azure-sdk-for-go/sdk/storage/azfile v1.5.3 // indirect
github.com/Azure/go-ntlmssp v0.0.2-0.20251110135918-10b7b7e7cd26 // indirect
github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 // indirect
github.com/Files-com/files-sdk-go/v3 v3.2.264 // indirect
github.com/IBM/go-sdk-core/v5 v5.21.0 // indirect
github.com/Max-Sum/base32768 v0.0.0-20230304063302-18e6ce5945fd // indirect
github.com/Microsoft/go-winio v0.6.2 // indirect
github.com/ProtonMail/bcrypt v0.0.0-20211005172633-e235017c1baf // indirect
github.com/ProtonMail/gluon v0.17.1-0.20230724134000-308be39be96e // indirect
github.com/ProtonMail/go-crypto v1.3.0 // indirect
github.com/ProtonMail/go-mime v0.0.0-20230322103455-7d82a3887f2f // indirect
github.com/ProtonMail/go-srp v0.0.7 // indirect
github.com/ProtonMail/gopenpgp/v2 v2.9.0 // indirect
github.com/PuerkitoBio/goquery v1.10.3 // indirect
github.com/a1ex3/zstd-seekable-format-go/pkg v0.10.0 // indirect
github.com/abbot/go-http-auth v0.4.0 // indirect
github.com/anchore/go-lzo v0.1.0 // indirect
github.com/andybalholm/cascadia v1.3.3 // indirect
github.com/appscode/go-querystring v0.0.0-20170504095604-0126cfb3f1dc // indirect
github.com/armon/go-metrics v0.4.1 // indirect github.com/armon/go-metrics v0.4.1 // indirect
github.com/aws/aws-sdk-go v1.44.256 // indirect github.com/aws/aws-sdk-go v1.44.256 // indirect
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.11 // indirect github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.3 // indirect
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.33 // indirect github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.13 // indirect
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.37 // indirect github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.13 // indirect
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.37 // indirect github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.13 // indirect
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect
github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.37 // indirect github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.13 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.4 // indirect github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.3 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.5 // indirect github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.4 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.18 // indirect github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.13 // indirect
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.18 // indirect github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.13 // indirect
github.com/aws/aws-sdk-go-v2/service/secretsmanager v1.35.8 // indirect github.com/aws/aws-sdk-go-v2/service/secretsmanager v1.35.8 // indirect
github.com/aws/aws-sdk-go-v2/service/sso v1.25.6 // indirect github.com/aws/aws-sdk-go-v2/service/sso v1.30.1 // indirect
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.4 // indirect github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.5 // indirect
github.com/aws/aws-sdk-go-v2/service/sts v1.34.1 // indirect github.com/aws/aws-sdk-go-v2/service/sts v1.39.1 // indirect
github.com/bahlo/generic-list-go v0.2.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/boombuler/barcode v1.1.0 // indirect
github.com/bradenaw/juniper v0.15.3 // indirect
github.com/bradfitz/iter v0.0.0-20191230175014-e8f45d346db8 // indirect
github.com/buengese/sgzip v0.1.1 // indirect
github.com/buger/jsonparser v1.1.1 // indirect
github.com/calebcase/tmpfile v1.0.3 // indirect
github.com/cenkalti/backoff/v4 v4.3.0 // indirect github.com/cenkalti/backoff/v4 v4.3.0 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9 // indirect
github.com/clipperhouse/stringish v0.1.1 // indirect
github.com/clipperhouse/uax29/v2 v2.3.0 // indirect
github.com/cloudflare/circl v1.6.1 // indirect
github.com/cloudinary/cloudinary-go/v2 v2.13.0 // indirect
github.com/cloudsoda/go-smb2 v0.0.0-20250228001242-d4c70e6251cc // indirect
github.com/cloudsoda/sddl v0.0.0-20250224235906-926454e91efc // indirect
github.com/colinmarc/hdfs/v2 v2.4.0 // indirect
github.com/coreos/go-semver v0.3.1 // indirect github.com/coreos/go-semver v0.3.1 // indirect
github.com/coreos/go-systemd/v22 v22.5.0 // indirect github.com/coreos/go-systemd/v22 v22.6.0 // indirect
github.com/creasty/defaults v1.8.0 // indirect
github.com/cronokirby/saferith v0.33.0 // indirect
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
github.com/diskfs/go-diskfs v1.7.0 // indirect
github.com/dropbox/dropbox-sdk-go-unofficial/v6 v6.0.5 // indirect
github.com/ebitengine/purego v0.9.1 // indirect
github.com/emersion/go-message v0.18.2 // indirect
github.com/emersion/go-vcard v0.0.0-20241024213814-c9703dde27ff // indirect
github.com/emicklei/go-restful/v3 v3.11.0 // indirect github.com/emicklei/go-restful/v3 v3.11.0 // indirect
github.com/fatih/color v1.16.0 // indirect github.com/fatih/color v1.16.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/flynn/noise v1.1.0 // indirect
github.com/fxamacker/cbor/v2 v2.7.0 // indirect github.com/fxamacker/cbor/v2 v2.7.0 // indirect
github.com/go-jose/go-jose/v4 v4.0.5 // indirect github.com/gabriel-vasile/mimetype v1.4.11 // indirect
github.com/go-logr/logr v1.4.2 // indirect github.com/geoffgarside/ber v1.2.0 // indirect
github.com/go-chi/chi/v5 v5.2.3 // indirect
github.com/go-darwin/apfs v0.0.0-20211011131704-f84b94dbf348 // indirect
github.com/go-git/go-billy/v5 v5.6.2 // indirect
github.com/go-jose/go-jose/v4 v4.1.2 // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/go-logr/stdr v1.2.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect
github.com/go-ole/go-ole v1.3.0 // indirect
github.com/go-openapi/errors v0.22.4 // indirect
github.com/go-openapi/jsonpointer v0.21.0 // indirect github.com/go-openapi/jsonpointer v0.21.0 // indirect
github.com/go-openapi/jsonreference v0.20.2 // indirect github.com/go-openapi/jsonreference v0.20.2 // indirect
github.com/go-openapi/strfmt v0.25.0 // indirect
github.com/go-openapi/swag v0.23.0 // indirect github.com/go-openapi/swag v0.23.0 // indirect
github.com/go-playground/locales v0.14.1 // indirect
github.com/go-playground/universal-translator v0.18.1 // indirect
github.com/go-playground/validator/v10 v10.28.0 // indirect
github.com/go-resty/resty/v2 v2.16.5 // indirect
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
github.com/gofrs/flock v0.13.0 // indirect
github.com/gogo/protobuf v1.3.2 // indirect github.com/gogo/protobuf v1.3.2 // indirect
github.com/golang-jwt/jwt/v5 v5.2.2 // indirect github.com/golang-jwt/jwt/v4 v4.5.2 // indirect
github.com/golang-jwt/jwt/v5 v5.3.0 // indirect
github.com/golang/protobuf v1.5.4 // indirect github.com/golang/protobuf v1.5.4 // indirect
github.com/google/btree v1.1.3 // indirect
github.com/google/gnostic-models v0.6.9 // indirect github.com/google/gnostic-models v0.6.9 // indirect
github.com/google/go-cmp v0.7.0 // indirect github.com/google/go-cmp v0.7.0 // indirect
github.com/google/s2a-go v0.1.9 // indirect github.com/google/s2a-go v0.1.9 // indirect
github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.7 // indirect
github.com/googleapis/gax-go/v2 v2.14.2 // indirect github.com/googleapis/gax-go/v2 v2.15.0 // indirect
github.com/gopherjs/gopherjs v1.17.2 // indirect
github.com/gorilla/schema v1.4.1 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 // indirect github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 // indirect
github.com/hashicorp/consul/api v1.32.1 // indirect github.com/hashicorp/consul/api v1.32.1 // indirect
github.com/hashicorp/errwrap v1.1.0 // indirect github.com/hashicorp/errwrap v1.1.0 // indirect
@ -85,22 +148,44 @@ require (
github.com/hashicorp/go-hclog v1.6.3 // indirect github.com/hashicorp/go-hclog v1.6.3 // indirect
github.com/hashicorp/go-immutable-radix v1.3.1 // indirect github.com/hashicorp/go-immutable-radix v1.3.1 // indirect
github.com/hashicorp/go-multierror v1.1.1 // indirect github.com/hashicorp/go-multierror v1.1.1 // indirect
github.com/hashicorp/go-retryablehttp v0.7.7 // indirect github.com/hashicorp/go-retryablehttp v0.7.8 // indirect
github.com/hashicorp/go-rootcerts v1.0.2 // indirect github.com/hashicorp/go-rootcerts v1.0.2 // indirect
github.com/hashicorp/go-secure-stdlib/parseutil v0.1.6 // indirect github.com/hashicorp/go-secure-stdlib/parseutil v0.1.6 // indirect
github.com/hashicorp/go-secure-stdlib/strutil v0.1.2 // indirect github.com/hashicorp/go-secure-stdlib/strutil v0.1.2 // indirect
github.com/hashicorp/go-sockaddr v1.0.2 // indirect github.com/hashicorp/go-sockaddr v1.0.2 // indirect
github.com/hashicorp/go-uuid v1.0.3 // indirect
github.com/hashicorp/golang-lru v0.5.4 // indirect github.com/hashicorp/golang-lru v0.5.4 // indirect
github.com/hashicorp/hcl v1.0.1-vault-7 // indirect github.com/hashicorp/hcl v1.0.1-vault-7 // indirect
github.com/hashicorp/serf v0.10.1 // indirect github.com/hashicorp/serf v0.10.1 // indirect
github.com/hashicorp/vault/api v1.20.0 // indirect github.com/hashicorp/vault/api v1.20.0 // indirect
github.com/henrybear327/Proton-API-Bridge v1.0.0 // indirect
github.com/henrybear327/go-proton-api v1.0.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/jcmturner/aescts/v2 v2.0.0 // indirect
github.com/jcmturner/dnsutils/v2 v2.0.0 // indirect
github.com/jcmturner/gofork v1.7.6 // indirect
github.com/jcmturner/goidentity/v6 v6.0.1 // indirect
github.com/jcmturner/gokrb5/v8 v8.4.4 // indirect
github.com/jcmturner/rpc/v2 v2.0.3 // indirect
github.com/jlaffaye/ftp v0.2.1-0.20240918233326-1b970516f5d3 // indirect
github.com/josharian/intern v1.0.0 // indirect github.com/josharian/intern v1.0.0 // indirect
github.com/json-iterator/go v1.1.12 // indirect github.com/json-iterator/go v1.1.12 // indirect
github.com/jtolds/gls v4.20.0+incompatible // indirect
github.com/jtolio/noiseconn v0.0.0-20231127013910-f6d9ecbf1de7 // indirect
github.com/jzelinskie/whirlpool v0.0.0-20201016144138-0675e54bb004 // indirect
github.com/klauspost/cpuid/v2 v2.3.0 // indirect
github.com/koofr/go-httpclient v0.0.0-20240520111329-e20f8f203988 // indirect
github.com/koofr/go-koofrclient v0.0.0-20221207135200-cbd7fc9ad6a6 // indirect
github.com/kr/fs v0.1.0 // indirect
github.com/kylelemons/godebug v1.1.0 // indirect github.com/kylelemons/godebug v1.1.0 // indirect
github.com/mailru/easyjson v0.7.7 // indirect github.com/lanrat/extsort v1.4.2 // indirect
github.com/mattn/go-colorable v0.1.13 // indirect github.com/leodido/go-urn v1.4.0 // indirect
github.com/lpar/date v1.0.0 // indirect
github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3 // indirect
github.com/mailru/easyjson v0.9.1 // indirect
github.com/mattn/go-colorable v0.1.14 // indirect
github.com/mattn/go-isatty v0.0.20 // indirect github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-runewidth v0.0.19 // indirect
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect
@ -108,48 +193,95 @@ require (
github.com/modern-go/reflect2 v1.0.2 // indirect github.com/modern-go/reflect2 v1.0.2 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/ncruces/go-strftime v0.1.9 // indirect github.com/ncruces/go-strftime v0.1.9 // indirect
github.com/ncw/swift/v2 v2.0.5 // indirect
github.com/oklog/ulid v1.3.1 // indirect
github.com/onsi/ginkgo/v2 v2.23.3 // indirect
github.com/oracle/oci-go-sdk/v65 v65.104.0 // indirect
github.com/panjf2000/ants/v2 v2.11.3 // indirect
github.com/patrickmn/go-cache v2.1.0+incompatible // indirect
github.com/pengsrc/go-shared v0.2.1-0.20190131101655-1999055a4a14 // indirect
github.com/peterh/liner v1.2.2 // indirect
github.com/pierrec/lz4/v4 v4.1.22 // indirect
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect
github.com/pkg/errors v0.9.1 // indirect github.com/pkg/errors v0.9.1 // indirect
github.com/pkg/sftp v1.13.10 // indirect
github.com/pkg/xattr v0.4.12 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
github.com/pquerna/otp v1.5.0 // indirect
github.com/prometheus/client_golang v1.23.2 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.67.2 // indirect
github.com/prometheus/procfs v0.19.2 // indirect
github.com/putdotio/go-putio/putio v0.0.0-20200123120452-16d982cac2b8 // indirect
github.com/relvacode/iso8601 v1.7.0 // indirect
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
github.com/rfjakob/eme v1.1.2 // indirect
github.com/rivo/uniseg v0.4.7 // indirect github.com/rivo/uniseg v0.4.7 // indirect
github.com/ryanuber/go-glob v1.0.0 // indirect github.com/ryanuber/go-glob v1.0.0 // indirect
github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 // indirect github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 // indirect
github.com/spf13/pflag v1.0.6 // indirect github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 // indirect
github.com/samber/lo v1.52.0 // indirect
github.com/shirou/gopsutil/v4 v4.25.10 // indirect
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af // indirect
github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966 // indirect
github.com/smarty/assertions v1.16.0 // indirect
github.com/sony/gobreaker v1.0.0 // indirect
github.com/spacemonkeygo/monkit/v3 v3.0.25-0.20251022131615-eb24eb109368 // indirect
github.com/spf13/pflag v1.0.10 // indirect
github.com/t3rm1n4l/go-mega v0.0.0-20251031123324-a804aaa87491 // indirect
github.com/tidwall/gjson v1.18.0 // indirect github.com/tidwall/gjson v1.18.0 // indirect
github.com/tidwall/match v1.1.1 // indirect github.com/tidwall/match v1.1.1 // indirect
github.com/tidwall/pretty v1.2.0 // indirect github.com/tidwall/pretty v1.2.0 // indirect
github.com/tklauser/go-sysconf v0.3.15 // indirect
github.com/tklauser/numcpus v0.10.0 // indirect
github.com/ulikunitz/xz v0.5.15 // indirect
github.com/unknwon/goconfig v1.0.0 // indirect
github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
github.com/x448/float16 v0.8.4 // indirect github.com/x448/float16 v0.8.4 // indirect
github.com/xanzy/ssh-agent v0.3.3 // indirect
github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 // indirect
github.com/yunify/qingstor-sdk-go/v3 v3.2.0 // indirect
github.com/yusufpapurcu/wmi v1.2.4 // indirect
github.com/zeebo/blake3 v0.2.4 // indirect
github.com/zeebo/errs v1.4.0 // indirect
github.com/zeebo/xxh3 v1.0.2 // indirect
go.etcd.io/bbolt v1.4.3 // indirect
go.etcd.io/etcd/api/v3 v3.6.2 // indirect go.etcd.io/etcd/api/v3 v3.6.2 // indirect
go.etcd.io/etcd/client/pkg/v3 v3.6.2 // indirect go.etcd.io/etcd/client/pkg/v3 v3.6.2 // indirect
go.etcd.io/etcd/client/v3 v3.6.2 // indirect go.etcd.io/etcd/client/v3 v3.6.2 // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect go.mongodb.org/mongo-driver v1.17.6 // indirect
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect
go.opentelemetry.io/otel v1.36.0 // indirect go.opentelemetry.io/otel v1.38.0 // indirect
go.opentelemetry.io/otel/metric v1.36.0 // indirect go.opentelemetry.io/otel/metric v1.38.0 // indirect
go.opentelemetry.io/otel/trace v1.36.0 // indirect go.opentelemetry.io/otel/trace v1.38.0 // indirect
go.shabbyrobe.org/gocovmerge v0.0.0-20230507111327-fa4f82cfbf4d // indirect go.shabbyrobe.org/gocovmerge v0.0.0-20230507111327-fa4f82cfbf4d // indirect
go.uber.org/dig v1.19.0 // indirect go.uber.org/dig v1.19.0 // indirect
go.uber.org/multierr v1.11.0 // indirect go.uber.org/multierr v1.11.0 // indirect
go.uber.org/zap v1.27.0 // indirect go.uber.org/zap v1.27.0 // indirect
golang.org/x/crypto v0.39.0 // indirect go.yaml.in/yaml/v2 v2.4.3 // indirect
golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 // indirect golang.org/x/crypto v0.45.0 // indirect
golang.org/x/net v0.41.0 // indirect golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect
golang.org/x/oauth2 v0.30.0 // indirect golang.org/x/net v0.47.0 // indirect
golang.org/x/sync v0.15.0 // indirect golang.org/x/oauth2 v0.33.0 // indirect
golang.org/x/sys v0.34.0 // indirect golang.org/x/sync v0.18.0 // indirect
golang.org/x/text v0.26.0 // indirect golang.org/x/sys v0.38.0 // indirect
golang.org/x/time v0.12.0 // indirect golang.org/x/text v0.31.0 // indirect
golang.org/x/tools v0.33.0 // indirect golang.org/x/time v0.14.0 // indirect
google.golang.org/api v0.237.0 // indirect golang.org/x/tools v0.38.0 // indirect
google.golang.org/genproto v0.0.0-20250505200425-f936aa4a68b2 // indirect google.golang.org/api v0.255.0 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822 // indirect google.golang.org/genproto v0.0.0-20250603155806-513f23925822 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 // indirect google.golang.org/genproto/googleapis/api v0.0.0-20250804133106-a7a43d27e69b // indirect
google.golang.org/grpc v1.73.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20251103181224-f26f9409b101 // indirect
google.golang.org/protobuf v1.36.6 // indirect google.golang.org/grpc v1.76.0 // indirect
google.golang.org/protobuf v1.36.10 // indirect
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
gopkg.in/inf.v0 v0.9.1 // indirect gopkg.in/inf.v0 v0.9.1 // indirect
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
gopkg.in/validator.v2 v2.0.1 // indirect
gopkg.in/yaml.v2 v2.4.0 // indirect
k8s.io/api v0.33.3 // indirect k8s.io/api v0.33.3 // indirect
k8s.io/apimachinery v0.33.3 // indirect k8s.io/apimachinery v0.33.3 // indirect
k8s.io/client-go v0.33.3 // indirect k8s.io/client-go v0.33.3 // indirect
@ -159,8 +291,15 @@ require (
modernc.org/libc v1.65.10 // indirect modernc.org/libc v1.65.10 // indirect
modernc.org/mathutil v1.7.1 // indirect modernc.org/mathutil v1.7.1 // indirect
modernc.org/memory v1.11.0 // indirect modernc.org/memory v1.11.0 // indirect
moul.io/http2curl/v2 v2.3.0 // indirect
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
sigs.k8s.io/randfill v1.0.0 // indirect sigs.k8s.io/randfill v1.0.0 // indirect
sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect
sigs.k8s.io/yaml v1.4.0 // indirect sigs.k8s.io/yaml v1.6.0 // indirect
storj.io/common v0.0.0-20251107171817-6221ae45072c // indirect
storj.io/drpc v0.0.35-0.20250513201419-f7819ea69b55 // indirect
storj.io/eventkit v0.0.0-20250410172343-61f26d3de156 // indirect
storj.io/infectious v0.0.2 // indirect
storj.io/picobuf v0.0.4 // indirect
storj.io/uplink v1.13.1 // indirect
) )

1006
go.sum

File diff suppressed because it is too large Load Diff

View File

@ -5,30 +5,33 @@ import (
"fmt" "fmt"
"hash" "hash"
"io" "io"
"runtime"
"filippo.io/age" "filippo.io/age"
"github.com/klauspost/compress/zstd" "github.com/klauspost/compress/zstd"
) )
// Writer wraps compression and encryption with SHA256 hashing // Writer wraps compression and encryption with SHA256 hashing.
// Data flows: input -> tee(hasher, compressor -> encryptor -> destination)
// The hash is computed on the uncompressed input for deterministic content-addressing.
type Writer struct { type Writer struct {
writer io.Writer // Final destination teeWriter io.Writer // Tee to hasher and compressor
compressor *zstd.Encoder // Compression layer compressor *zstd.Encoder // Compression layer
encryptor io.WriteCloser // Encryption layer encryptor io.WriteCloser // Encryption layer
hasher hash.Hash // SHA256 hasher hasher hash.Hash // SHA256 hasher (on uncompressed input)
teeWriter io.Writer // Tees data to hasher
compressionLevel int compressionLevel int
bytesWritten int64 bytesWritten int64
} }
// NewWriter creates a new Writer that compresses, encrypts, and hashes data // NewWriter creates a new Writer that compresses, encrypts, and hashes data.
// The hash is computed on the uncompressed input for deterministic content-addressing.
func NewWriter(w io.Writer, compressionLevel int, recipients []string) (*Writer, error) { func NewWriter(w io.Writer, compressionLevel int, recipients []string) (*Writer, error) {
// Validate compression level // Validate compression level
if err := validateCompressionLevel(compressionLevel); err != nil { if err := validateCompressionLevel(compressionLevel); err != nil {
return nil, err return nil, err
} }
// Create SHA256 hasher // Create SHA256 hasher for the uncompressed input
hasher := sha256.New() hasher := sha256.New()
// Parse recipients // Parse recipients
@ -41,31 +44,36 @@ func NewWriter(w io.Writer, compressionLevel int, recipients []string) (*Writer,
ageRecipients = append(ageRecipients, r) ageRecipients = append(ageRecipients, r)
} }
// Create encryption writer // Create encryption writer that outputs to destination
encWriter, err := age.Encrypt(w, ageRecipients...) encWriter, err := age.Encrypt(w, ageRecipients...)
if err != nil { if err != nil {
return nil, fmt.Errorf("creating encryption writer: %w", err) return nil, fmt.Errorf("creating encryption writer: %w", err)
} }
// Calculate compression concurrency: CPUs - 2, minimum 1
concurrency := runtime.NumCPU() - 2
if concurrency < 1 {
concurrency = 1
}
// Create compression writer with encryption as destination // Create compression writer with encryption as destination
compressor, err := zstd.NewWriter(encWriter, compressor, err := zstd.NewWriter(encWriter,
zstd.WithEncoderLevel(zstd.EncoderLevelFromZstd(compressionLevel)), zstd.WithEncoderLevel(zstd.EncoderLevelFromZstd(compressionLevel)),
zstd.WithEncoderConcurrency(1), // Use single thread for streaming zstd.WithEncoderConcurrency(concurrency),
) )
if err != nil { if err != nil {
_ = encWriter.Close() _ = encWriter.Close()
return nil, fmt.Errorf("creating compression writer: %w", err) return nil, fmt.Errorf("creating compression writer: %w", err)
} }
// Create tee writer that writes to both compressor and hasher // Create tee writer: input goes to both hasher and compressor
teeWriter := io.MultiWriter(compressor, hasher) teeWriter := io.MultiWriter(hasher, compressor)
return &Writer{ return &Writer{
writer: w, teeWriter: teeWriter,
compressor: compressor, compressor: compressor,
encryptor: encWriter, encryptor: encWriter,
hasher: hasher, hasher: hasher,
teeWriter: teeWriter,
compressionLevel: compressionLevel, compressionLevel: compressionLevel,
}, nil }, nil
} }
@ -92,9 +100,16 @@ func (w *Writer) Close() error {
return nil return nil
} }
// Sum256 returns the SHA256 hash of all data written // Sum256 returns the double SHA256 hash of the uncompressed input data.
// Double hashing (SHA256(SHA256(data))) prevents information leakage about
// the plaintext - an attacker cannot confirm existence of known content
// by computing its hash and checking for a matching blob filename.
func (w *Writer) Sum256() []byte { func (w *Writer) Sum256() []byte {
return w.hasher.Sum(nil) // First hash: SHA256(plaintext)
firstHash := w.hasher.Sum(nil)
// Second hash: SHA256(firstHash) - this is the blob ID
secondHash := sha256.Sum256(firstHash)
return secondHash[:]
} }
// BytesWritten returns the number of uncompressed bytes written // BytesWritten returns the number of uncompressed bytes written

View File

@ -0,0 +1,105 @@
package blobgen
import (
"bytes"
"crypto/rand"
"crypto/sha256"
"encoding/hex"
"testing"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// TestWriterHashIsDoubleHash verifies that Writer.Sum256() returns
// the double hash SHA256(SHA256(plaintext)) for security.
// Double hashing prevents attackers from confirming existence of known content.
func TestWriterHashIsDoubleHash(t *testing.T) {
// Test data - random data that doesn't compress well
testData := make([]byte, 1024*1024) // 1MB
_, err := rand.Read(testData)
require.NoError(t, err)
// Test recipient (generated with age-keygen)
testRecipient := "age1cplgrwj77ta54dnmydvvmzn64ltk83ankxl5sww04mrtmu62kv3s89gmvv"
// Create a buffer to capture the encrypted output
var encryptedBuf bytes.Buffer
// Create blobgen writer
writer, err := NewWriter(&encryptedBuf, 3, []string{testRecipient})
require.NoError(t, err)
// Write test data
n, err := writer.Write(testData)
require.NoError(t, err)
assert.Equal(t, len(testData), n)
// Close to flush all data
err = writer.Close()
require.NoError(t, err)
// Get the hash from the writer
writerHash := hex.EncodeToString(writer.Sum256())
// Calculate the expected double hash: SHA256(SHA256(plaintext))
firstHash := sha256.Sum256(testData)
secondHash := sha256.Sum256(firstHash[:])
expectedDoubleHash := hex.EncodeToString(secondHash[:])
// Also compute single hash to verify it's different
singleHashStr := hex.EncodeToString(firstHash[:])
t.Logf("Input size: %d bytes", len(testData))
t.Logf("Single hash (SHA256(data)): %s", singleHashStr)
t.Logf("Double hash (SHA256(SHA256(data))): %s", expectedDoubleHash)
t.Logf("Writer hash: %s", writerHash)
// The writer hash should match the double hash
assert.Equal(t, expectedDoubleHash, writerHash,
"Writer.Sum256() should return SHA256(SHA256(plaintext)) for security")
// Verify it's NOT the single hash (would leak information)
assert.NotEqual(t, singleHashStr, writerHash,
"Writer hash should not be single hash (would allow content confirmation attacks)")
}
// TestWriterDeterministicHash verifies that the same input always produces
// the same hash, even with non-deterministic encryption.
func TestWriterDeterministicHash(t *testing.T) {
// Test data
testData := []byte("Hello, World! This is test data for deterministic hashing.")
// Test recipient
testRecipient := "age1cplgrwj77ta54dnmydvvmzn64ltk83ankxl5sww04mrtmu62kv3s89gmvv"
// Create two writers and verify they produce the same hash
var buf1, buf2 bytes.Buffer
writer1, err := NewWriter(&buf1, 3, []string{testRecipient})
require.NoError(t, err)
_, err = writer1.Write(testData)
require.NoError(t, err)
require.NoError(t, writer1.Close())
writer2, err := NewWriter(&buf2, 3, []string{testRecipient})
require.NoError(t, err)
_, err = writer2.Write(testData)
require.NoError(t, err)
require.NoError(t, writer2.Close())
hash1 := hex.EncodeToString(writer1.Sum256())
hash2 := hex.EncodeToString(writer2.Sum256())
// Hashes should be identical (deterministic)
assert.Equal(t, hash1, hash2, "Same input should produce same hash")
// Encrypted outputs should be different (non-deterministic encryption)
assert.NotEqual(t, buf1.Bytes(), buf2.Bytes(),
"Encrypted outputs should differ due to non-deterministic encryption")
t.Logf("Hash 1: %s", hash1)
t.Logf("Hash 2: %s", hash2)
t.Logf("Encrypted size 1: %d bytes", buf1.Len())
t.Logf("Encrypted size 2: %d bytes", buf2.Len())
}

102
internal/cli/database.go Normal file
View File

@ -0,0 +1,102 @@
package cli
import (
"fmt"
"os"
"git.eeqj.de/sneak/vaultik/internal/config"
"git.eeqj.de/sneak/vaultik/internal/log"
"github.com/spf13/cobra"
)
// NewDatabaseCommand creates the database command group
func NewDatabaseCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "database",
Short: "Manage the local state database",
Long: `Commands for managing the local SQLite state database.`,
}
cmd.AddCommand(
newDatabasePurgeCommand(),
)
return cmd
}
// newDatabasePurgeCommand creates the database purge command
func newDatabasePurgeCommand() *cobra.Command {
var force bool
cmd := &cobra.Command{
Use: "purge",
Short: "Delete the local state database",
Long: `Completely removes the local SQLite state database.
This will erase all local tracking of:
- File metadata and change detection state
- Chunk and blob mappings
- Local snapshot records
The remote storage is NOT affected. After purging, the next backup will
perform a full scan and re-deduplicate against existing remote blobs.
Use --force to skip the confirmation prompt.`,
Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
// Resolve config path
configPath, err := ResolveConfigPath()
if err != nil {
return err
}
// Load config to get database path
cfg, err := config.Load(configPath)
if err != nil {
return fmt.Errorf("failed to load config: %w", err)
}
dbPath := cfg.IndexPath
// Check if database exists
if _, err := os.Stat(dbPath); os.IsNotExist(err) {
fmt.Printf("Database does not exist: %s\n", dbPath)
return nil
}
// Confirm unless --force
if !force {
fmt.Printf("This will delete the local state database at:\n %s\n\n", dbPath)
fmt.Print("Are you sure? Type 'yes' to confirm: ")
var confirm string
if _, err := fmt.Scanln(&confirm); err != nil || confirm != "yes" {
fmt.Println("Aborted.")
return nil
}
}
// Delete the database file
if err := os.Remove(dbPath); err != nil {
return fmt.Errorf("failed to delete database: %w", err)
}
// Also delete WAL and SHM files if they exist
walPath := dbPath + "-wal"
shmPath := dbPath + "-shm"
_ = os.Remove(walPath) // Ignore errors - files may not exist
_ = os.Remove(shmPath)
rootFlags := GetRootFlags()
if !rootFlags.Quiet {
fmt.Printf("Database purged: %s\n", dbPath)
}
log.Info("Local state database purged", "path", dbPath)
return nil
},
}
cmd.Flags().BoolVar(&force, "force", false, "Skip confirmation prompt")
return cmd
}

89
internal/cli/remote.go Normal file
View File

@ -0,0 +1,89 @@
package cli
import (
"context"
"os"
"git.eeqj.de/sneak/vaultik/internal/log"
"git.eeqj.de/sneak/vaultik/internal/vaultik"
"github.com/spf13/cobra"
"go.uber.org/fx"
)
// NewRemoteCommand creates the remote command and subcommands
func NewRemoteCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "remote",
Short: "Remote storage management commands",
Long: "Commands for inspecting and managing remote storage",
}
// Add subcommands
cmd.AddCommand(newRemoteInfoCommand())
return cmd
}
// newRemoteInfoCommand creates the 'remote info' subcommand
func newRemoteInfoCommand() *cobra.Command {
var jsonOutput bool
cmd := &cobra.Command{
Use: "info",
Short: "Display remote storage information",
Long: `Shows detailed information about remote storage, including:
- Size of all snapshot metadata (per snapshot and total)
- Count and total size of all blobs
- Count and size of referenced blobs (from all manifests)
- Count and size of orphaned blobs (not referenced by any manifest)`,
Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
// Use unified config resolution
configPath, err := ResolveConfigPath()
if err != nil {
return err
}
rootFlags := GetRootFlags()
return RunWithApp(cmd.Context(), AppOptions{
ConfigPath: configPath,
LogOptions: log.LogOptions{
Verbose: rootFlags.Verbose,
Debug: rootFlags.Debug,
Quiet: rootFlags.Quiet || jsonOutput,
},
Modules: []fx.Option{},
Invokes: []fx.Option{
fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) {
lc.Append(fx.Hook{
OnStart: func(ctx context.Context) error {
go func() {
if err := v.RemoteInfo(jsonOutput); err != nil {
if err != context.Canceled {
if !jsonOutput {
log.Error("Failed to get remote info", "error", err)
}
os.Exit(1)
}
}
if err := v.Shutdowner.Shutdown(); err != nil {
log.Error("Failed to shutdown", "error", err)
}
}()
return nil
},
OnStop: func(ctx context.Context) error {
v.Cancel()
return nil
},
})
}),
},
})
},
}
cmd.Flags().BoolVar(&jsonOutput, "json", false, "Output in JSON format")
return cmd
}

View File

@ -46,6 +46,8 @@ on the source system.`,
NewSnapshotCommand(), NewSnapshotCommand(),
NewInfoCommand(), NewInfoCommand(),
NewVersionCommand(), NewVersionCommand(),
NewRemoteCommand(),
NewDatabaseCommand(),
) )
return cmd return cmd

View File

@ -112,6 +112,7 @@ func newSnapshotListCommand() *cobra.Command {
cmd := &cobra.Command{ cmd := &cobra.Command{
Use: "list", Use: "list",
Aliases: []string{"ls"},
Short: "List all snapshots", Short: "List all snapshots",
Long: "Lists all snapshots with their ID, timestamp, and compressed size", Long: "Lists all snapshots with their ID, timestamp, and compressed size",
Args: cobra.NoArgs, Args: cobra.NoArgs,
@ -242,7 +243,16 @@ func newSnapshotVerifyCommand() *cobra.Command {
Use: "verify <snapshot-id>", Use: "verify <snapshot-id>",
Short: "Verify snapshot integrity", Short: "Verify snapshot integrity",
Long: "Verifies that all blobs referenced in a snapshot exist", Long: "Verifies that all blobs referenced in a snapshot exist",
Args: cobra.ExactArgs(1), Args: func(cmd *cobra.Command, args []string) error {
if len(args) != 1 {
_ = cmd.Help()
if len(args) == 0 {
return fmt.Errorf("snapshot ID required")
}
return fmt.Errorf("expected 1 argument, got %d", len(args))
}
return nil
},
RunE: func(cmd *cobra.Command, args []string) error { RunE: func(cmd *cobra.Command, args []string) error {
snapshotID := args[0] snapshotID := args[0]
@ -266,7 +276,13 @@ func newSnapshotVerifyCommand() *cobra.Command {
lc.Append(fx.Hook{ lc.Append(fx.Hook{
OnStart: func(ctx context.Context) error { OnStart: func(ctx context.Context) error {
go func() { go func() {
if err := v.VerifySnapshotWithOptions(snapshotID, opts); err != nil { var err error
if opts.Deep {
err = v.RunDeepVerify(snapshotID, opts)
} else {
err = v.VerifySnapshotWithOptions(snapshotID, opts)
}
if err != nil {
if err != context.Canceled { if err != context.Canceled {
if !opts.JSON { if !opts.JSON {
log.Error("Verification failed", "error", err) log.Error("Verification failed", "error", err)
@ -302,17 +318,37 @@ func newSnapshotRemoveCommand() *cobra.Command {
opts := &vaultik.RemoveOptions{} opts := &vaultik.RemoveOptions{}
cmd := &cobra.Command{ cmd := &cobra.Command{
Use: "remove <snapshot-id>", Use: "remove [snapshot-id]",
Aliases: []string{"rm"}, Aliases: []string{"rm"},
Short: "Remove a snapshot and its orphaned blobs", Short: "Remove a snapshot from the local database",
Long: `Removes a snapshot and any blobs that are no longer referenced by other snapshots. Long: `Removes a snapshot from the local database.
This command downloads manifests from all other snapshots to determine which blobs By default, only removes from the local database. Use --remote to also remove
are still in use, then deletes any blobs that would become orphaned.`, the snapshot metadata from remote storage.
Args: cobra.ExactArgs(1),
Note: This does NOT remove blobs. Use 'vaultik prune' to remove orphaned blobs
after removing snapshots.
Use --all --force to remove all snapshots.`,
Args: func(cmd *cobra.Command, args []string) error {
all, _ := cmd.Flags().GetBool("all")
if all {
if len(args) > 0 {
_ = cmd.Help()
return fmt.Errorf("--all cannot be used with a snapshot ID")
}
return nil
}
if len(args) != 1 {
_ = cmd.Help()
if len(args) == 0 {
return fmt.Errorf("snapshot ID required (or use --all --force)")
}
return fmt.Errorf("expected 1 argument, got %d", len(args))
}
return nil
},
RunE: func(cmd *cobra.Command, args []string) error { RunE: func(cmd *cobra.Command, args []string) error {
snapshotID := args[0]
// Use unified config resolution // Use unified config resolution
configPath, err := ResolveConfigPath() configPath, err := ResolveConfigPath()
if err != nil { if err != nil {
@ -333,7 +369,13 @@ are still in use, then deletes any blobs that would become orphaned.`,
lc.Append(fx.Hook{ lc.Append(fx.Hook{
OnStart: func(ctx context.Context) error { OnStart: func(ctx context.Context) error {
go func() { go func() {
if _, err := v.RemoveSnapshot(snapshotID, opts); err != nil { var err error
if opts.All {
_, err = v.RemoveAllSnapshots(opts)
} else {
_, err = v.RemoveSnapshot(args[0], opts)
}
if err != nil {
if err != context.Canceled { if err != context.Canceled {
if !opts.JSON { if !opts.JSON {
log.Error("Failed to remove snapshot", "error", err) log.Error("Failed to remove snapshot", "error", err)
@ -359,8 +401,10 @@ are still in use, then deletes any blobs that would become orphaned.`,
} }
cmd.Flags().BoolVarP(&opts.Force, "force", "f", false, "Skip confirmation prompt") cmd.Flags().BoolVarP(&opts.Force, "force", "f", false, "Skip confirmation prompt")
cmd.Flags().BoolVar(&opts.DryRun, "dry-run", false, "Show what would be deleted without deleting") cmd.Flags().BoolVar(&opts.DryRun, "dry-run", false, "Show what would be removed without removing")
cmd.Flags().BoolVar(&opts.JSON, "json", false, "Output deletion stats as JSON") cmd.Flags().BoolVar(&opts.JSON, "json", false, "Output result as JSON")
cmd.Flags().BoolVar(&opts.Remote, "remote", false, "Also remove snapshot metadata from remote storage")
cmd.Flags().BoolVar(&opts.All, "all", false, "Remove all snapshots (requires --force)")
return cmd return cmd
} }

View File

@ -23,7 +23,7 @@ func NewStoreCommand() *cobra.Command {
cmd := &cobra.Command{ cmd := &cobra.Command{
Use: "store", Use: "store",
Short: "Storage information commands", Short: "Storage information commands",
Long: "Commands for viewing information about the S3 storage backend", Long: "Commands for viewing information about the storage backend",
} }
// Add subcommands // Add subcommands
@ -37,7 +37,7 @@ func newStoreInfoCommand() *cobra.Command {
return &cobra.Command{ return &cobra.Command{
Use: "info", Use: "info",
Short: "Display storage information", Short: "Display storage information",
Long: "Shows S3 bucket configuration and storage statistics including snapshots and blobs", Long: "Shows storage configuration and statistics including snapshots and blobs",
RunE: func(cmd *cobra.Command, args []string) error { RunE: func(cmd *cobra.Command, args []string) error {
return runWithApp(cmd.Context(), func(app *StoreApp) error { return runWithApp(cmd.Context(), func(app *StoreApp) error {
return app.Info(cmd.Context()) return app.Info(cmd.Context())

View File

@ -297,7 +297,11 @@ func (c *Config) validateStorage() error {
} }
return nil return nil
} }
return fmt.Errorf("storage_url must start with s3:// or file://") if strings.HasPrefix(c.StorageURL, "rclone://") {
// Rclone storage uses rclone's own config
return nil
}
return fmt.Errorf("storage_url must start with s3://, file://, or rclone://")
} }
// Legacy S3 configuration // Legacy S3 configuration

View File

@ -10,6 +10,7 @@ import (
"github.com/aws/aws-sdk-go-v2/credentials" "github.com/aws/aws-sdk-go-v2/credentials"
"github.com/aws/aws-sdk-go-v2/feature/s3/manager" "github.com/aws/aws-sdk-go-v2/feature/s3/manager"
"github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/aws/smithy-go/logging"
) )
// Client wraps the AWS S3 client for vaultik operations. // Client wraps the AWS S3 client for vaultik operations.
@ -35,12 +36,18 @@ type Config struct {
Region string Region string
} }
// nopLogger is a logger that discards all output.
// Used to suppress SDK warnings about checksums.
type nopLogger struct{}
func (nopLogger) Logf(classification logging.Classification, format string, v ...interface{}) {}
// NewClient creates a new S3 client with the provided configuration. // NewClient creates a new S3 client with the provided configuration.
// It establishes a connection to the S3-compatible storage service and // It establishes a connection to the S3-compatible storage service and
// validates the credentials. The client uses static credentials and // validates the credentials. The client uses static credentials and
// path-style URLs for compatibility with various S3-compatible services. // path-style URLs for compatibility with various S3-compatible services.
func NewClient(ctx context.Context, cfg Config) (*Client, error) { func NewClient(ctx context.Context, cfg Config) (*Client, error) {
// Create AWS config // Create AWS config with a nop logger to suppress SDK warnings
awsCfg, err := config.LoadDefaultConfig(ctx, awsCfg, err := config.LoadDefaultConfig(ctx,
config.WithRegion(cfg.Region), config.WithRegion(cfg.Region),
config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider( config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(
@ -48,6 +55,7 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
cfg.SecretAccessKey, cfg.SecretAccessKey,
"", "",
)), )),
config.WithLogger(nopLogger{}),
) )
if err != nil { if err != nil {
return nil, err return nil, err

View File

@ -39,7 +39,7 @@ type ProgressStats struct {
BlobsCreated atomic.Int64 BlobsCreated atomic.Int64
BlobsUploaded atomic.Int64 BlobsUploaded atomic.Int64
BytesUploaded atomic.Int64 BytesUploaded atomic.Int64
UploadDurationMs atomic.Int64 // Total milliseconds spent uploading to S3 UploadDurationMs atomic.Int64 // Total milliseconds spent uploading
CurrentFile atomic.Value // stores string CurrentFile atomic.Value // stores string
TotalSize atomic.Int64 // Total size to process (set after scan phase) TotalSize atomic.Int64 // Total size to process (set after scan phase)
TotalFiles atomic.Int64 // Total files to process in phase 2 TotalFiles atomic.Int64 // Total files to process in phase 2
@ -273,7 +273,7 @@ func (pr *ProgressReporter) printDetailedStatus() {
"created", blobsCreated, "created", blobsCreated,
"uploaded", blobsUploaded, "uploaded", blobsUploaded,
"pending", blobsCreated-blobsUploaded) "pending", blobsCreated-blobsUploaded)
log.Info("Total uploaded to S3", log.Info("Total uploaded to remote",
"uploaded", humanize.Bytes(uint64(bytesUploaded)), "uploaded", humanize.Bytes(uint64(bytesUploaded)),
"compression_ratio", formatRatio(bytesUploaded, bytesScanned)) "compression_ratio", formatRatio(bytesUploaded, bytesScanned))
if currentFile != "" { if currentFile != "" {
@ -336,7 +336,7 @@ func (pr *ProgressReporter) ReportUploadStart(blobHash string, size int64) {
pr.stats.CurrentUpload.Store(info) pr.stats.CurrentUpload.Store(info)
// Log the start of upload // Log the start of upload
log.Info("Starting blob upload to S3", log.Info("Starting blob upload",
"hash", blobHash[:8]+"...", "hash", blobHash[:8]+"...",
"size", humanize.Bytes(uint64(size))) "size", humanize.Bytes(uint64(size)))
} }

View File

@ -625,11 +625,21 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult
// Update result stats // Update result stats
if needsProcessing { if needsProcessing {
result.BytesScanned += info.Size() result.BytesScanned += info.Size()
if s.progress != nil {
s.progress.GetStats().BytesScanned.Add(info.Size())
}
} else { } else {
result.FilesSkipped++ result.FilesSkipped++
result.BytesSkipped += info.Size() result.BytesSkipped += info.Size()
if s.progress != nil {
s.progress.GetStats().FilesSkipped.Add(1)
s.progress.GetStats().BytesSkipped.Add(info.Size())
}
} }
result.FilesScanned++ result.FilesScanned++
if s.progress != nil {
s.progress.GetStats().FilesScanned.Add(1)
}
// Output periodic status // Output periodic status
if time.Since(lastStatusTime) >= statusInterval { if time.Since(lastStatusTime) >= statusInterval {
@ -921,9 +931,10 @@ func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
startTime := time.Now().UTC() startTime := time.Now().UTC()
finishedBlob := blobWithReader.FinishedBlob finishedBlob := blobWithReader.FinishedBlob
// Report upload start // Report upload start and increment blobs created
if s.progress != nil { if s.progress != nil {
s.progress.ReportUploadStart(finishedBlob.Hash, finishedBlob.Compressed) s.progress.ReportUploadStart(finishedBlob.Hash, finishedBlob.Compressed)
s.progress.GetStats().BlobsCreated.Add(1)
} }
// Upload to storage first (without holding any locks) // Upload to storage first (without holding any locks)
@ -964,6 +975,20 @@ func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
// Create sharded path: blobs/ca/fe/cafebabe... // Create sharded path: blobs/ca/fe/cafebabe...
blobPath := fmt.Sprintf("blobs/%s/%s/%s", finishedBlob.Hash[:2], finishedBlob.Hash[2:4], finishedBlob.Hash) blobPath := fmt.Sprintf("blobs/%s/%s/%s", finishedBlob.Hash[:2], finishedBlob.Hash[2:4], finishedBlob.Hash)
// Check if blob already exists in remote storage (deduplication after restart)
blobExists := false
if _, err := s.storage.Stat(ctx, blobPath); err == nil {
blobExists = true
log.Info("Blob already exists in storage, skipping upload",
"hash", finishedBlob.Hash,
"size", humanize.Bytes(uint64(finishedBlob.Compressed)))
fmt.Printf("Blob exists: %s (%s, skipped upload)\n",
finishedBlob.Hash[:12]+"...",
humanize.Bytes(uint64(finishedBlob.Compressed)))
}
if !blobExists {
if err := s.storage.PutWithProgress(ctx, blobPath, blobWithReader.Reader, finishedBlob.Compressed, progressCallback); err != nil { if err := s.storage.PutWithProgress(ctx, blobPath, blobWithReader.Reader, finishedBlob.Compressed, progressCallback); err != nil {
return fmt.Errorf("uploading blob %s to storage: %w", finishedBlob.Hash, err) return fmt.Errorf("uploading blob %s to storage: %w", finishedBlob.Hash, err)
} }
@ -993,12 +1018,12 @@ func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
s.progress.ReportUploadComplete(finishedBlob.Hash, finishedBlob.Compressed, uploadDuration) s.progress.ReportUploadComplete(finishedBlob.Hash, finishedBlob.Compressed, uploadDuration)
} }
// Update progress // Update progress after upload completes
if s.progress != nil { if s.progress != nil {
stats := s.progress.GetStats() stats := s.progress.GetStats()
stats.BlobsUploaded.Add(1) stats.BlobsUploaded.Add(1)
stats.BytesUploaded.Add(finishedBlob.Compressed) stats.BytesUploaded.Add(finishedBlob.Compressed)
stats.BlobsCreated.Add(1) }
} }
// Store metadata in database (after upload is complete) // Store metadata in database (after upload is complete)
@ -1013,6 +1038,9 @@ func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
return fmt.Errorf("parsing finished blob ID: %w", err) return fmt.Errorf("parsing finished blob ID: %w", err)
} }
// Track upload duration (0 if blob already existed)
uploadDuration := time.Since(startTime)
err = s.repos.WithTx(dbCtx, func(ctx context.Context, tx *sql.Tx) error { err = s.repos.WithTx(dbCtx, func(ctx context.Context, tx *sql.Tx) error {
// Update blob upload timestamp // Update blob upload timestamp
if err := s.repos.Blobs.UpdateUploaded(ctx, tx, finishedBlob.ID); err != nil { if err := s.repos.Blobs.UpdateUploaded(ctx, tx, finishedBlob.ID); err != nil {
@ -1024,7 +1052,8 @@ func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
return fmt.Errorf("adding blob to snapshot: %w", err) return fmt.Errorf("adding blob to snapshot: %w", err)
} }
// Record upload metrics // Record upload metrics (only for actual uploads, not deduplicated blobs)
if !blobExists {
upload := &database.Upload{ upload := &database.Upload{
BlobHash: finishedBlob.Hash, BlobHash: finishedBlob.Hash,
SnapshotID: s.snapshotID, SnapshotID: s.snapshotID,
@ -1035,6 +1064,7 @@ func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
if err := s.repos.Uploads.Create(ctx, tx, upload); err != nil { if err := s.repos.Uploads.Create(ctx, tx, upload); err != nil {
return fmt.Errorf("recording upload metrics: %w", err) return fmt.Errorf("recording upload metrics: %w", err)
} }
}
return nil return nil
}) })

View File

@ -19,24 +19,19 @@ package snapshot
// - Blobs not containing any remaining chunks // - Blobs not containing any remaining chunks
// - All related mapping tables (file_chunks, chunk_files, blob_chunks) // - All related mapping tables (file_chunks, chunk_files, blob_chunks)
// 7. Close the temporary database // 7. Close the temporary database
// 8. Use sqlite3 to dump the cleaned database to SQL // 8. VACUUM the database to remove deleted data and compact (security critical)
// 9. Delete the temporary database file // 9. Compress the binary database with zstd
// 10. Compress the SQL dump with zstd // 10. Encrypt the compressed database with age (if encryption is enabled)
// 11. Encrypt the compressed dump with age (if encryption is enabled) // 11. Upload to S3 as: metadata/{snapshot-id}/db.zst.age
// 12. Upload to S3 as: snapshots/{snapshot-id}.sql.zst[.age] // 12. Reopen the main database
// 13. Reopen the main database
// //
// Advantages of this approach: // Advantages of this approach:
// - No custom metadata format needed // - No custom metadata format needed
// - Reuses existing database schema and relationships // - Reuses existing database schema and relationships
// - SQL dumps are portable and compress well // - Binary SQLite files are portable and compress well
// - Restore process can simply execute the SQL // - Fast restore - just decompress and open (no SQL parsing)
// - VACUUM ensures no deleted data leaks
// - Atomic and consistent snapshot of all metadata // - Atomic and consistent snapshot of all metadata
//
// TODO: Future improvements:
// - Add snapshot-file relationships to track which files belong to which snapshot
// - Implement incremental snapshots that reference previous snapshots
// - Add snapshot manifest with additional metadata (size, chunk count, etc.)
import ( import (
"bytes" "bytes"
@ -257,20 +252,20 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
"total_uncompressed_size", humanize.Bytes(uint64(stats.UncompressedSize)), "total_uncompressed_size", humanize.Bytes(uint64(stats.UncompressedSize)),
"compression_ratio", fmt.Sprintf("%.2fx", float64(stats.UncompressedSize)/float64(stats.CompressedSize))) "compression_ratio", fmt.Sprintf("%.2fx", float64(stats.UncompressedSize)/float64(stats.CompressedSize)))
// Step 3: Dump the cleaned database to SQL // Step 3: VACUUM the database to remove deleted data and compact
dumpPath := filepath.Join(tempDir, "snapshot.sql") // This is critical for security - ensures no stale/deleted data is uploaded
if err := sm.dumpDatabase(tempDBPath, dumpPath); err != nil { if err := sm.vacuumDatabase(tempDBPath); err != nil {
return fmt.Errorf("dumping database: %w", err) return fmt.Errorf("vacuuming database: %w", err)
} }
log.Debug("SQL dump complete", "size", humanize.Bytes(uint64(sm.getFileSize(dumpPath)))) log.Debug("Database vacuumed", "size", humanize.Bytes(uint64(sm.getFileSize(tempDBPath))))
// Step 4: Compress and encrypt the SQL dump // Step 4: Compress and encrypt the binary database file
compressedPath := filepath.Join(tempDir, "snapshot.sql.zst.age") compressedPath := filepath.Join(tempDir, "db.zst.age")
if err := sm.compressDump(dumpPath, compressedPath); err != nil { if err := sm.compressFile(tempDBPath, compressedPath); err != nil {
return fmt.Errorf("compressing dump: %w", err) return fmt.Errorf("compressing database: %w", err)
} }
log.Debug("Compression complete", log.Debug("Compression complete",
"original_size", humanize.Bytes(uint64(sm.getFileSize(dumpPath))), "original_size", humanize.Bytes(uint64(sm.getFileSize(tempDBPath))),
"compressed_size", humanize.Bytes(uint64(sm.getFileSize(compressedPath)))) "compressed_size", humanize.Bytes(uint64(sm.getFileSize(compressedPath))))
// Step 5: Read compressed and encrypted data for upload // Step 5: Read compressed and encrypted data for upload
@ -295,7 +290,7 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
} }
dbUploadDuration := time.Since(dbUploadStart) dbUploadDuration := time.Since(dbUploadStart)
dbUploadSpeed := float64(len(finalData)) * 8 / dbUploadDuration.Seconds() // bits per second dbUploadSpeed := float64(len(finalData)) * 8 / dbUploadDuration.Seconds() // bits per second
log.Info("Uploaded snapshot database to S3", log.Info("Uploaded snapshot database",
"path", dbKey, "path", dbKey,
"size", humanize.Bytes(uint64(len(finalData))), "size", humanize.Bytes(uint64(len(finalData))),
"duration", dbUploadDuration, "duration", dbUploadDuration,
@ -309,7 +304,7 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
} }
manifestUploadDuration := time.Since(manifestUploadStart) manifestUploadDuration := time.Since(manifestUploadStart)
manifestUploadSpeed := float64(len(blobManifest)) * 8 / manifestUploadDuration.Seconds() // bits per second manifestUploadSpeed := float64(len(blobManifest)) * 8 / manifestUploadDuration.Seconds() // bits per second
log.Info("Uploaded blob manifest to S3", log.Info("Uploaded blob manifest",
"path", manifestKey, "path", manifestKey,
"size", humanize.Bytes(uint64(len(blobManifest))), "size", humanize.Bytes(uint64(len(blobManifest))),
"duration", manifestUploadDuration, "duration", manifestUploadDuration,
@ -438,26 +433,21 @@ func (sm *SnapshotManager) cleanSnapshotDB(ctx context.Context, dbPath string, s
return stats, nil return stats, nil
} }
// dumpDatabase creates a SQL dump of the database // vacuumDatabase runs VACUUM on the database to remove deleted data and compact
func (sm *SnapshotManager) dumpDatabase(dbPath, dumpPath string) error { // This is critical for security - ensures no stale/deleted data pages are uploaded
log.Debug("Running sqlite3 dump command", "source", dbPath, "destination", dumpPath) func (sm *SnapshotManager) vacuumDatabase(dbPath string) error {
cmd := exec.Command("sqlite3", dbPath, ".dump") log.Debug("Running VACUUM on database", "path", dbPath)
cmd := exec.Command("sqlite3", dbPath, "VACUUM;")
output, err := cmd.Output() if output, err := cmd.CombinedOutput(); err != nil {
if err != nil { return fmt.Errorf("running VACUUM: %w (output: %s)", err, string(output))
return fmt.Errorf("running sqlite3 dump: %w", err)
}
log.Debug("SQL dump generated", "size", humanize.Bytes(uint64(len(output))))
if err := afero.WriteFile(sm.fs, dumpPath, output, 0644); err != nil {
return fmt.Errorf("writing dump file: %w", err)
} }
return nil return nil
} }
// compressDump compresses the SQL dump using zstd // compressFile compresses a file using zstd and encrypts with age
func (sm *SnapshotManager) compressDump(inputPath, outputPath string) error { func (sm *SnapshotManager) compressFile(inputPath, outputPath string) error {
input, err := sm.fs.Open(inputPath) input, err := sm.fs.Open(inputPath)
if err != nil { if err != nil {
return fmt.Errorf("opening input file: %w", err) return fmt.Errorf("opening input file: %w", err)
@ -676,7 +666,7 @@ func (sm *SnapshotManager) CleanupIncompleteSnapshots(ctx context.Context, hostn
} else { } else {
// Metadata exists - this snapshot was completed but database wasn't updated // Metadata exists - this snapshot was completed but database wasn't updated
// This shouldn't happen in normal operation, but mark it complete // This shouldn't happen in normal operation, but mark it complete
log.Warn("Found snapshot with S3 metadata but incomplete in database", "snapshot_id", snapshot.ID) log.Warn("Found snapshot with remote metadata but incomplete in database", "snapshot_id", snapshot.ID)
if err := sm.repos.Snapshots.MarkComplete(ctx, nil, snapshot.ID.String()); err != nil { if err := sm.repos.Snapshots.MarkComplete(ctx, nil, snapshot.ID.String()); err != nil {
log.Error("Failed to mark snapshot as complete in database", "snapshot_id", snapshot.ID, "error", err) log.Error("Failed to mark snapshot as complete in database", "snapshot_id", snapshot.ID, "error", err)
} }

View File

@ -73,6 +73,9 @@ func storerFromURL(rawURL string, cfg *config.Config) (Storer, error) {
} }
return NewS3Storer(client), nil return NewS3Storer(client), nil
case "rclone":
return NewRcloneStorer(context.Background(), parsed.RcloneRemote, parsed.Prefix)
default: default:
return nil, fmt.Errorf("unsupported storage scheme: %s", parsed.Scheme) return nil, fmt.Errorf("unsupported storage scheme: %s", parsed.Scheme)
} }

236
internal/storage/rclone.go Normal file
View File

@ -0,0 +1,236 @@
package storage
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"strings"
"time"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/config/configfile"
"github.com/rclone/rclone/fs/operations"
// Import all rclone backends
_ "github.com/rclone/rclone/backend/all"
)
// ErrRemoteNotFound is returned when an rclone remote is not configured.
var ErrRemoteNotFound = errors.New("rclone remote not found in config")
// RcloneStorer implements Storer using rclone's filesystem abstraction.
// This allows vaultik to use any of rclone's 70+ supported storage providers.
type RcloneStorer struct {
fsys fs.Fs // rclone filesystem
remote string // remote name (for Info())
path string // path within remote (for Info())
}
// NewRcloneStorer creates a new rclone storage backend.
// The remote parameter is the rclone remote name (as configured via `rclone config`).
// The path parameter is the path within the remote.
func NewRcloneStorer(ctx context.Context, remote, path string) (*RcloneStorer, error) {
// Install the default config file handler
configfile.Install()
// Build the rclone path string (e.g., "myremote:path/to/backups")
rclonePath := remote + ":"
if path != "" {
rclonePath += path
}
// Create the rclone filesystem
fsys, err := fs.NewFs(ctx, rclonePath)
if err != nil {
// Check for remote not found error
if strings.Contains(err.Error(), "didn't find section in config file") ||
strings.Contains(err.Error(), "failed to find remote") {
return nil, fmt.Errorf("%w: %s", ErrRemoteNotFound, remote)
}
return nil, fmt.Errorf("creating rclone filesystem: %w", err)
}
return &RcloneStorer{
fsys: fsys,
remote: remote,
path: path,
}, nil
}
// Put stores data at the specified key.
func (r *RcloneStorer) Put(ctx context.Context, key string, data io.Reader) error {
// Read all data into memory to get size (required by rclone)
buf, err := io.ReadAll(data)
if err != nil {
return fmt.Errorf("reading data: %w", err)
}
// Upload the object
_, err = operations.Rcat(ctx, r.fsys, key, io.NopCloser(bytes.NewReader(buf)), time.Now(), nil)
if err != nil {
return fmt.Errorf("uploading object: %w", err)
}
return nil
}
// PutWithProgress stores data with progress reporting.
func (r *RcloneStorer) PutWithProgress(ctx context.Context, key string, data io.Reader, size int64, progress ProgressCallback) error {
// Wrap reader with progress tracking
pr := &progressReader{
reader: data,
callback: progress,
}
// Upload the object
_, err := operations.Rcat(ctx, r.fsys, key, io.NopCloser(pr), time.Now(), nil)
if err != nil {
return fmt.Errorf("uploading object: %w", err)
}
return nil
}
// Get retrieves data from the specified key.
func (r *RcloneStorer) Get(ctx context.Context, key string) (io.ReadCloser, error) {
// Get the object
obj, err := r.fsys.NewObject(ctx, key)
if err != nil {
if errors.Is(err, fs.ErrorObjectNotFound) {
return nil, ErrNotFound
}
if errors.Is(err, fs.ErrorDirNotFound) {
return nil, ErrNotFound
}
return nil, fmt.Errorf("getting object: %w", err)
}
// Open the object for reading
reader, err := obj.Open(ctx)
if err != nil {
return nil, fmt.Errorf("opening object: %w", err)
}
return reader, nil
}
// Stat returns metadata about an object without retrieving its contents.
func (r *RcloneStorer) Stat(ctx context.Context, key string) (*ObjectInfo, error) {
obj, err := r.fsys.NewObject(ctx, key)
if err != nil {
if errors.Is(err, fs.ErrorObjectNotFound) {
return nil, ErrNotFound
}
if errors.Is(err, fs.ErrorDirNotFound) {
return nil, ErrNotFound
}
return nil, fmt.Errorf("getting object: %w", err)
}
return &ObjectInfo{
Key: key,
Size: obj.Size(),
}, nil
}
// Delete removes an object.
func (r *RcloneStorer) Delete(ctx context.Context, key string) error {
obj, err := r.fsys.NewObject(ctx, key)
if err != nil {
if errors.Is(err, fs.ErrorObjectNotFound) {
return nil // Match S3 behavior: no error if doesn't exist
}
if errors.Is(err, fs.ErrorDirNotFound) {
return nil
}
return fmt.Errorf("getting object: %w", err)
}
if err := obj.Remove(ctx); err != nil {
return fmt.Errorf("removing object: %w", err)
}
return nil
}
// List returns all keys with the given prefix.
func (r *RcloneStorer) List(ctx context.Context, prefix string) ([]string, error) {
var keys []string
err := operations.ListFn(ctx, r.fsys, func(obj fs.Object) {
key := obj.Remote()
if prefix == "" || strings.HasPrefix(key, prefix) {
keys = append(keys, key)
}
})
if err != nil {
return nil, fmt.Errorf("listing objects: %w", err)
}
return keys, nil
}
// ListStream returns a channel of ObjectInfo for large result sets.
func (r *RcloneStorer) ListStream(ctx context.Context, prefix string) <-chan ObjectInfo {
ch := make(chan ObjectInfo)
go func() {
defer close(ch)
err := operations.ListFn(ctx, r.fsys, func(obj fs.Object) {
// Check context cancellation
select {
case <-ctx.Done():
return
default:
}
key := obj.Remote()
if prefix == "" || strings.HasPrefix(key, prefix) {
ch <- ObjectInfo{
Key: key,
Size: obj.Size(),
}
}
})
if err != nil {
ch <- ObjectInfo{Err: fmt.Errorf("listing objects: %w", err)}
}
}()
return ch
}
// Info returns human-readable storage location information.
func (r *RcloneStorer) Info() StorageInfo {
location := r.remote
if r.path != "" {
location += ":" + r.path
}
return StorageInfo{
Type: "rclone",
Location: location,
}
}
// progressReader wraps an io.Reader to track read progress.
type progressReader struct {
reader io.Reader
read int64
callback ProgressCallback
}
func (pr *progressReader) Read(p []byte) (int, error) {
n, err := pr.reader.Read(p)
if n > 0 {
pr.read += int64(n)
if pr.callback != nil {
if callbackErr := pr.callback(pr.read); callbackErr != nil {
return n, callbackErr
}
}
}
return n, err
}

View File

@ -8,18 +8,20 @@ import (
// StorageURL represents a parsed storage URL. // StorageURL represents a parsed storage URL.
type StorageURL struct { type StorageURL struct {
Scheme string // "s3" or "file" Scheme string // "s3", "file", or "rclone"
Bucket string // S3 bucket name (empty for file) Bucket string // S3 bucket name (empty for file/rclone)
Prefix string // Path within bucket or filesystem base path Prefix string // Path within bucket or filesystem base path
Endpoint string // S3 endpoint (optional, default AWS) Endpoint string // S3 endpoint (optional, default AWS)
Region string // S3 region (optional) Region string // S3 region (optional)
UseSSL bool // Use HTTPS for S3 (default true) UseSSL bool // Use HTTPS for S3 (default true)
RcloneRemote string // rclone remote name (for rclone:// URLs)
} }
// ParseStorageURL parses a storage URL string. // ParseStorageURL parses a storage URL string.
// Supported formats: // Supported formats:
// - s3://bucket/prefix?endpoint=host&region=us-east-1&ssl=true // - s3://bucket/prefix?endpoint=host&region=us-east-1&ssl=true
// - file:///absolute/path/to/backup // - file:///absolute/path/to/backup
// - rclone://remote/path/to/backups
func ParseStorageURL(rawURL string) (*StorageURL, error) { func ParseStorageURL(rawURL string) (*StorageURL, error) {
if rawURL == "" { if rawURL == "" {
return nil, fmt.Errorf("storage URL is empty") return nil, fmt.Errorf("storage URL is empty")
@ -67,7 +69,28 @@ func ParseStorageURL(rawURL string) (*StorageURL, error) {
}, nil }, nil
} }
return nil, fmt.Errorf("unsupported URL scheme: must start with s3:// or file://") // Handle rclone:// URLs
if strings.HasPrefix(rawURL, "rclone://") {
u, err := url.Parse(rawURL)
if err != nil {
return nil, fmt.Errorf("invalid URL: %w", err)
}
remote := u.Host
if remote == "" {
return nil, fmt.Errorf("rclone URL missing remote name")
}
path := strings.TrimPrefix(u.Path, "/")
return &StorageURL{
Scheme: "rclone",
Prefix: path,
RcloneRemote: remote,
}, nil
}
return nil, fmt.Errorf("unsupported URL scheme: must start with s3://, file://, or rclone://")
} }
// String returns a human-readable representation of the storage URL. // String returns a human-readable representation of the storage URL.
@ -84,6 +107,11 @@ func (u *StorageURL) String() string {
return fmt.Sprintf("s3://%s/%s (endpoint: %s)", u.Bucket, u.Prefix, endpoint) return fmt.Sprintf("s3://%s/%s (endpoint: %s)", u.Bucket, u.Prefix, endpoint)
} }
return fmt.Sprintf("s3://%s (endpoint: %s)", u.Bucket, endpoint) return fmt.Sprintf("s3://%s (endpoint: %s)", u.Bucket, endpoint)
case "rclone":
if u.Prefix != "" {
return fmt.Sprintf("rclone://%s/%s", u.RcloneRemote, u.Prefix)
}
return fmt.Sprintf("rclone://%s", u.RcloneRemote)
default: default:
return fmt.Sprintf("%s://?", u.Scheme) return fmt.Sprintf("%s://?", u.Scheme)
} }

View File

@ -1,10 +1,14 @@
package vaultik package vaultik
import ( import (
"encoding/json"
"fmt" "fmt"
"runtime" "runtime"
"sort"
"strings" "strings"
"git.eeqj.de/sneak/vaultik/internal/log"
"git.eeqj.de/sneak/vaultik/internal/snapshot"
"github.com/dustin/go-humanize" "github.com/dustin/go-humanize"
) )
@ -108,3 +112,237 @@ func (v *Vaultik) ShowInfo() error {
return nil return nil
} }
// SnapshotMetadataInfo contains information about a single snapshot's metadata
type SnapshotMetadataInfo struct {
SnapshotID string `json:"snapshot_id"`
ManifestSize int64 `json:"manifest_size"`
DatabaseSize int64 `json:"database_size"`
TotalSize int64 `json:"total_size"`
BlobCount int `json:"blob_count"`
BlobsSize int64 `json:"blobs_size"`
}
// RemoteInfoResult contains all remote storage information
type RemoteInfoResult struct {
// Storage info
StorageType string `json:"storage_type"`
StorageLocation string `json:"storage_location"`
// Snapshot metadata
Snapshots []SnapshotMetadataInfo `json:"snapshots"`
TotalMetadataSize int64 `json:"total_metadata_size"`
TotalMetadataCount int `json:"total_metadata_count"`
// All blobs on remote
TotalBlobCount int `json:"total_blob_count"`
TotalBlobSize int64 `json:"total_blob_size"`
// Referenced blobs (from manifests)
ReferencedBlobCount int `json:"referenced_blob_count"`
ReferencedBlobSize int64 `json:"referenced_blob_size"`
// Orphaned blobs
OrphanedBlobCount int `json:"orphaned_blob_count"`
OrphanedBlobSize int64 `json:"orphaned_blob_size"`
}
// RemoteInfo displays information about remote storage
func (v *Vaultik) RemoteInfo(jsonOutput bool) error {
result := &RemoteInfoResult{}
// Get storage info
storageInfo := v.Storage.Info()
result.StorageType = storageInfo.Type
result.StorageLocation = storageInfo.Location
if !jsonOutput {
fmt.Printf("=== Remote Storage ===\n")
fmt.Printf("Type: %s\n", storageInfo.Type)
fmt.Printf("Location: %s\n", storageInfo.Location)
fmt.Println()
}
// List all snapshot metadata
if !jsonOutput {
fmt.Printf("Scanning snapshot metadata...\n")
}
snapshotMetadata := make(map[string]*SnapshotMetadataInfo)
// Collect metadata files
metadataCh := v.Storage.ListStream(v.ctx, "metadata/")
for obj := range metadataCh {
if obj.Err != nil {
return fmt.Errorf("listing metadata: %w", obj.Err)
}
// Parse key: metadata/<snapshot-id>/<filename>
parts := strings.Split(obj.Key, "/")
if len(parts) < 3 {
continue
}
snapshotID := parts[1]
if _, exists := snapshotMetadata[snapshotID]; !exists {
snapshotMetadata[snapshotID] = &SnapshotMetadataInfo{
SnapshotID: snapshotID,
}
}
info := snapshotMetadata[snapshotID]
filename := parts[2]
if strings.HasPrefix(filename, "manifest") {
info.ManifestSize = obj.Size
} else if strings.HasPrefix(filename, "db") {
info.DatabaseSize = obj.Size
}
info.TotalSize = info.ManifestSize + info.DatabaseSize
}
// Sort snapshots by ID for consistent output
var snapshotIDs []string
for id := range snapshotMetadata {
snapshotIDs = append(snapshotIDs, id)
}
sort.Strings(snapshotIDs)
// Download and parse all manifests to get referenced blobs
if !jsonOutput {
fmt.Printf("Downloading %d manifest(s)...\n", len(snapshotIDs))
}
referencedBlobs := make(map[string]int64) // hash -> compressed size
for _, snapshotID := range snapshotIDs {
manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
reader, err := v.Storage.Get(v.ctx, manifestKey)
if err != nil {
log.Warn("Failed to get manifest", "snapshot", snapshotID, "error", err)
continue
}
manifest, err := snapshot.DecodeManifest(reader)
_ = reader.Close()
if err != nil {
log.Warn("Failed to decode manifest", "snapshot", snapshotID, "error", err)
continue
}
// Record blob info from manifest
info := snapshotMetadata[snapshotID]
info.BlobCount = manifest.BlobCount
var blobsSize int64
for _, blob := range manifest.Blobs {
referencedBlobs[blob.Hash] = blob.CompressedSize
blobsSize += blob.CompressedSize
}
info.BlobsSize = blobsSize
}
// Build result snapshots
var totalMetadataSize int64
for _, id := range snapshotIDs {
info := snapshotMetadata[id]
result.Snapshots = append(result.Snapshots, *info)
totalMetadataSize += info.TotalSize
}
result.TotalMetadataSize = totalMetadataSize
result.TotalMetadataCount = len(snapshotIDs)
// Calculate referenced blob stats
for _, size := range referencedBlobs {
result.ReferencedBlobCount++
result.ReferencedBlobSize += size
}
// List all blobs on remote
if !jsonOutput {
fmt.Printf("Scanning blobs...\n")
}
allBlobs := make(map[string]int64) // hash -> size from storage
blobCh := v.Storage.ListStream(v.ctx, "blobs/")
for obj := range blobCh {
if obj.Err != nil {
return fmt.Errorf("listing blobs: %w", obj.Err)
}
// Extract hash from key: blobs/xx/yy/hash
parts := strings.Split(obj.Key, "/")
if len(parts) < 4 {
continue
}
hash := parts[3]
allBlobs[hash] = obj.Size
result.TotalBlobCount++
result.TotalBlobSize += obj.Size
}
// Calculate orphaned blobs
for hash, size := range allBlobs {
if _, referenced := referencedBlobs[hash]; !referenced {
result.OrphanedBlobCount++
result.OrphanedBlobSize += size
}
}
// Output results
if jsonOutput {
enc := json.NewEncoder(v.Stdout)
enc.SetIndent("", " ")
return enc.Encode(result)
}
// Human-readable output
fmt.Printf("\n=== Snapshot Metadata ===\n")
if len(result.Snapshots) == 0 {
fmt.Printf("No snapshots found\n")
} else {
fmt.Printf("%-45s %12s %12s %12s %10s %12s\n", "SNAPSHOT", "MANIFEST", "DATABASE", "TOTAL", "BLOBS", "BLOB SIZE")
fmt.Printf("%-45s %12s %12s %12s %10s %12s\n", strings.Repeat("-", 45), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 10), strings.Repeat("-", 12))
for _, info := range result.Snapshots {
fmt.Printf("%-45s %12s %12s %12s %10s %12s\n",
truncateString(info.SnapshotID, 45),
humanize.Bytes(uint64(info.ManifestSize)),
humanize.Bytes(uint64(info.DatabaseSize)),
humanize.Bytes(uint64(info.TotalSize)),
humanize.Comma(int64(info.BlobCount)),
humanize.Bytes(uint64(info.BlobsSize)),
)
}
fmt.Printf("%-45s %12s %12s %12s %10s %12s\n", strings.Repeat("-", 45), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 10), strings.Repeat("-", 12))
fmt.Printf("%-45s %12s %12s %12s\n", fmt.Sprintf("Total (%d snapshots)", result.TotalMetadataCount), "", "", humanize.Bytes(uint64(result.TotalMetadataSize)))
}
fmt.Printf("\n=== Blob Storage ===\n")
fmt.Printf("Total blobs on remote: %s (%s)\n",
humanize.Comma(int64(result.TotalBlobCount)),
humanize.Bytes(uint64(result.TotalBlobSize)))
fmt.Printf("Referenced by snapshots: %s (%s)\n",
humanize.Comma(int64(result.ReferencedBlobCount)),
humanize.Bytes(uint64(result.ReferencedBlobSize)))
fmt.Printf("Orphaned (unreferenced): %s (%s)\n",
humanize.Comma(int64(result.OrphanedBlobCount)),
humanize.Bytes(uint64(result.OrphanedBlobSize)))
if result.OrphanedBlobCount > 0 {
fmt.Printf("\nRun 'vaultik prune --remote' to remove orphaned blobs.\n")
}
return nil
}
// truncateString truncates a string to maxLen, adding "..." if truncated
func truncateString(s string, maxLen int) string {
if len(s) <= maxLen {
return s
}
if maxLen <= 3 {
return s[:maxLen]
}
return s[:maxLen-3] + "..."
}

View File

@ -5,6 +5,8 @@ import (
"context" "context"
"database/sql" "database/sql"
"io" "io"
"os"
"path/filepath"
"sync" "sync"
"testing" "testing"
"time" "time"
@ -15,6 +17,7 @@ import (
"git.eeqj.de/sneak/vaultik/internal/snapshot" "git.eeqj.de/sneak/vaultik/internal/snapshot"
"git.eeqj.de/sneak/vaultik/internal/storage" "git.eeqj.de/sneak/vaultik/internal/storage"
"git.eeqj.de/sneak/vaultik/internal/types" "git.eeqj.de/sneak/vaultik/internal/types"
"git.eeqj.de/sneak/vaultik/internal/vaultik"
"github.com/spf13/afero" "github.com/spf13/afero"
"github.com/stretchr/testify/assert" "github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require" "github.com/stretchr/testify/require"
@ -403,3 +406,138 @@ func TestBackupAndVerify(t *testing.T) {
t.Logf("Backup and verify test completed successfully") t.Logf("Backup and verify test completed successfully")
} }
// TestBackupAndRestore tests the full backup and restore workflow
// This test verifies that the restore code correctly handles the binary SQLite
// database format that is exported by the snapshot manager.
func TestBackupAndRestore(t *testing.T) {
// Initialize logger
log.Initialize(log.Config{})
// Create real temp directory for the database (SQLite needs real filesystem)
realTempDir, err := os.MkdirTemp("", "vaultik-test-")
require.NoError(t, err)
defer func() { _ = os.RemoveAll(realTempDir) }()
// Use real OS filesystem for this test
fs := afero.NewOsFs()
// Create test directory structure and files
dataDir := filepath.Join(realTempDir, "data")
testFiles := map[string]string{
filepath.Join(dataDir, "file1.txt"): "This is file 1 content",
filepath.Join(dataDir, "file2.txt"): "This is file 2 content with more data",
filepath.Join(dataDir, "subdir", "file3.txt"): "This is file 3 in a subdirectory",
}
// Create directories and files
for path, content := range testFiles {
dir := filepath.Dir(path)
if err := fs.MkdirAll(dir, 0755); err != nil {
t.Fatalf("failed to create directory %s: %v", dir, err)
}
if err := afero.WriteFile(fs, path, []byte(content), 0644); err != nil {
t.Fatalf("failed to create test file %s: %v", path, err)
}
}
ctx := context.Background()
// Create mock storage
mockStorage := NewMockStorer()
// Test keypair
agePublicKey := "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
ageSecretKey := "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
// Create database file
dbPath := filepath.Join(realTempDir, "test.db")
db, err := database.New(ctx, dbPath)
require.NoError(t, err)
defer func() { _ = db.Close() }()
repos := database.NewRepositories(db)
// Create config for snapshot manager
cfg := &config.Config{
AgeSecretKey: ageSecretKey,
AgeRecipients: []string{agePublicKey},
CompressionLevel: 3,
}
// Create snapshot manager
sm := snapshot.NewSnapshotManager(snapshot.SnapshotManagerParams{
Repos: repos,
Storage: mockStorage,
Config: cfg,
})
sm.SetFilesystem(fs)
// Create scanner
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
FS: fs,
Storage: mockStorage,
ChunkSize: int64(16 * 1024),
MaxBlobSize: int64(100 * 1024),
CompressionLevel: 3,
AgeRecipients: []string{agePublicKey},
Repositories: repos,
})
// Create a snapshot
snapshotID, err := sm.CreateSnapshot(ctx, "test-host", "test-version", "test-git")
require.NoError(t, err)
t.Logf("Created snapshot: %s", snapshotID)
// Run the backup (scan)
result, err := scanner.Scan(ctx, dataDir, snapshotID)
require.NoError(t, err)
t.Logf("Scan complete: %d files, %d blobs", result.FilesScanned, result.BlobsCreated)
// Complete the snapshot
err = sm.CompleteSnapshot(ctx, snapshotID)
require.NoError(t, err)
// Export snapshot metadata (this uploads db.zst.age and manifest.json.zst)
err = sm.ExportSnapshotMetadata(ctx, dbPath, snapshotID)
require.NoError(t, err)
t.Logf("Exported snapshot metadata")
// Verify metadata was uploaded
keys, err := mockStorage.List(ctx, "metadata/")
require.NoError(t, err)
t.Logf("Metadata keys: %v", keys)
assert.GreaterOrEqual(t, len(keys), 2, "Should have at least db.zst.age and manifest.json.zst")
// Close the source database
err = db.Close()
require.NoError(t, err)
// Create Vaultik instance for restore
vaultikApp := &vaultik.Vaultik{
Config: cfg,
Storage: mockStorage,
Fs: fs,
Stdout: io.Discard,
Stderr: io.Discard,
}
vaultikApp.SetContext(ctx)
// Try to restore - this should work with binary SQLite format
restoreDir := filepath.Join(realTempDir, "restored")
err = vaultikApp.Restore(&vaultik.RestoreOptions{
SnapshotID: snapshotID,
TargetDir: restoreDir,
})
require.NoError(t, err, "Restore should succeed with binary SQLite database format")
// Verify restored files match originals
for origPath, expectedContent := range testFiles {
restoredPath := filepath.Join(restoreDir, origPath)
restoredContent, err := afero.ReadFile(fs, restoredPath)
require.NoError(t, err, "Should be able to read restored file: %s", restoredPath)
assert.Equal(t, expectedContent, string(restoredContent), "Restored content should match original for: %s", origPath)
}
t.Log("Backup and restore test completed successfully")
}

View File

@ -8,7 +8,6 @@ import (
"fmt" "fmt"
"io" "io"
"os" "os"
"os/exec"
"path/filepath" "path/filepath"
"time" "time"
@ -173,7 +172,7 @@ func (v *Vaultik) Restore(opts *RestoreOptions) error {
// downloadSnapshotDB downloads and decrypts the snapshot metadata database // downloadSnapshotDB downloads and decrypts the snapshot metadata database
func (v *Vaultik) downloadSnapshotDB(snapshotID string, identity age.Identity) (*database.DB, error) { func (v *Vaultik) downloadSnapshotDB(snapshotID string, identity age.Identity) (*database.DB, error) {
// Download encrypted database from S3 // Download encrypted database from storage
dbKey := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID) dbKey := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID)
reader, err := v.Storage.Get(v.ctx, dbKey) reader, err := v.Storage.Get(v.ctx, dbKey)
@ -196,43 +195,30 @@ func (v *Vaultik) downloadSnapshotDB(snapshotID string, identity age.Identity) (
} }
defer func() { _ = blobReader.Close() }() defer func() { _ = blobReader.Close() }()
// Read the SQL dump // Read the binary SQLite database
sqlDump, err := io.ReadAll(blobReader) dbData, err := io.ReadAll(blobReader)
if err != nil { if err != nil {
return nil, fmt.Errorf("decrypting and decompressing: %w", err) return nil, fmt.Errorf("decrypting and decompressing: %w", err)
} }
log.Debug("Decrypted database SQL dump", "size", humanize.Bytes(uint64(len(sqlDump)))) log.Debug("Decrypted database", "size", humanize.Bytes(uint64(len(dbData))))
// Create a temporary database file // Create a temporary database file and write the binary SQLite data directly
tempFile, err := afero.TempFile(v.Fs, "", "vaultik-restore-*.db") tempFile, err := afero.TempFile(v.Fs, "", "vaultik-restore-*.db")
if err != nil { if err != nil {
return nil, fmt.Errorf("creating temp file: %w", err) return nil, fmt.Errorf("creating temp file: %w", err)
} }
tempPath := tempFile.Name() tempPath := tempFile.Name()
// Write the binary SQLite database directly
if _, err := tempFile.Write(dbData); err != nil {
_ = tempFile.Close()
_ = v.Fs.Remove(tempPath)
return nil, fmt.Errorf("writing database file: %w", err)
}
if err := tempFile.Close(); err != nil { if err := tempFile.Close(); err != nil {
_ = v.Fs.Remove(tempPath)
return nil, fmt.Errorf("closing temp file: %w", err) return nil, fmt.Errorf("closing temp file: %w", err)
} }
// Write SQL to a temp file for sqlite3 to read
sqlTempFile, err := afero.TempFile(v.Fs, "", "vaultik-restore-*.sql")
if err != nil {
return nil, fmt.Errorf("creating SQL temp file: %w", err)
}
sqlTempPath := sqlTempFile.Name()
if _, err := sqlTempFile.Write(sqlDump); err != nil {
_ = sqlTempFile.Close()
return nil, fmt.Errorf("writing SQL dump: %w", err)
}
if err := sqlTempFile.Close(); err != nil {
return nil, fmt.Errorf("closing SQL temp file: %w", err)
}
defer func() { _ = v.Fs.Remove(sqlTempPath) }()
// Execute the SQL dump to create the database
cmd := exec.Command("sqlite3", tempPath, ".read "+sqlTempPath)
if output, err := cmd.CombinedOutput(); err != nil {
return nil, fmt.Errorf("executing SQL dump: %w\nOutput: %s", err, output)
}
log.Debug("Created restore database", "path", tempPath) log.Debug("Created restore database", "path", tempPath)
// Open the database // Open the database
@ -433,13 +419,13 @@ func (v *Vaultik) restoreRegularFile(
blobHashStr := blob.Hash.String() blobHashStr := blob.Hash.String()
blobData, ok := blobCache[blobHashStr] blobData, ok := blobCache[blobHashStr]
if !ok { if !ok {
blobData, err = v.downloadBlob(ctx, blobHashStr, identity) blobData, err = v.downloadBlob(ctx, blobHashStr, blob.CompressedSize, identity)
if err != nil { if err != nil {
return fmt.Errorf("downloading blob %s: %w", blobHashStr[:16], err) return fmt.Errorf("downloading blob %s: %w", blobHashStr[:16], err)
} }
blobCache[blobHashStr] = blobData blobCache[blobHashStr] = blobData
result.BlobsDownloaded++ result.BlobsDownloaded++
result.BytesDownloaded += int64(len(blobData)) result.BytesDownloaded += blob.CompressedSize
} }
// Extract chunk from blob // Extract chunk from blob
@ -488,41 +474,12 @@ func (v *Vaultik) restoreRegularFile(
} }
// downloadBlob downloads and decrypts a blob // downloadBlob downloads and decrypts a blob
func (v *Vaultik) downloadBlob(ctx context.Context, blobHash string, identity age.Identity) ([]byte, error) { func (v *Vaultik) downloadBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) ([]byte, error) {
// Construct blob path with sharding result, err := v.FetchAndDecryptBlob(ctx, blobHash, expectedSize, identity)
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobHash[:2], blobHash[2:4], blobHash)
reader, err := v.Storage.Get(ctx, blobPath)
if err != nil { if err != nil {
return nil, fmt.Errorf("downloading blob: %w", err) return nil, err
} }
defer func() { _ = reader.Close() }() return result.Data, nil
// Read encrypted data
encryptedData, err := io.ReadAll(reader)
if err != nil {
return nil, fmt.Errorf("reading blob data: %w", err)
}
// Decrypt and decompress
blobReader, err := blobgen.NewReader(bytes.NewReader(encryptedData), identity)
if err != nil {
return nil, fmt.Errorf("creating decryption reader: %w", err)
}
defer func() { _ = blobReader.Close() }()
data, err := io.ReadAll(blobReader)
if err != nil {
return nil, fmt.Errorf("decrypting blob: %w", err)
}
log.Debug("Downloaded and decrypted blob",
"hash", blobHash[:16],
"encrypted_size", humanize.Bytes(uint64(len(encryptedData))),
"decrypted_size", humanize.Bytes(uint64(len(data))),
)
return data, nil
} }
// verifyRestoredFiles verifies that all restored files match their expected chunk hashes // verifyRestoredFiles verifies that all restored files match their expected chunk hashes

View File

@ -327,6 +327,10 @@ func (v *Vaultik) ListSnapshots(jsonOutput bool) error {
// Extract snapshot ID from paths like metadata/hostname-20240115-143052Z/ // Extract snapshot ID from paths like metadata/hostname-20240115-143052Z/
parts := strings.Split(object.Key, "/") parts := strings.Split(object.Key, "/")
if len(parts) >= 2 && parts[0] == "metadata" && parts[1] != "" { if len(parts) >= 2 && parts[0] == "metadata" && parts[1] != "" {
// Skip macOS resource fork files (._*) and other hidden files
if strings.HasPrefix(parts[1], ".") {
continue
}
remoteSnapshots[parts[1]] = true remoteSnapshots[parts[1]] = true
} }
} }
@ -425,6 +429,32 @@ func (v *Vaultik) ListSnapshots(jsonOutput bool) error {
// Table output // Table output
w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0) w := tabwriter.NewWriter(os.Stdout, 0, 0, 3, ' ', 0)
// Show configured snapshots from config file
if _, err := fmt.Fprintln(w, "CONFIGURED SNAPSHOTS:"); err != nil {
return err
}
if _, err := fmt.Fprintln(w, "NAME\tPATHS"); err != nil {
return err
}
if _, err := fmt.Fprintln(w, "────\t─────"); err != nil {
return err
}
for _, name := range v.Config.SnapshotNames() {
snap := v.Config.Snapshots[name]
paths := strings.Join(snap.Paths, ", ")
if _, err := fmt.Fprintf(w, "%s\t%s\n", name, paths); err != nil {
return err
}
}
if _, err := fmt.Fprintln(w); err != nil {
return err
}
// Show remote snapshots
if _, err := fmt.Fprintln(w, "REMOTE SNAPSHOTS:"); err != nil {
return err
}
if _, err := fmt.Fprintln(w, "SNAPSHOT ID\tTIMESTAMP\tCOMPRESSED SIZE"); err != nil { if _, err := fmt.Fprintln(w, "SNAPSHOT ID\tTIMESTAMP\tCOMPRESSED SIZE"); err != nil {
return err return err
} }
@ -527,11 +557,15 @@ func (v *Vaultik) PurgeSnapshots(keepLatest bool, olderThan string, force bool)
fmt.Printf("\nDeleting %d snapshot(s) (--force specified)\n", len(toDelete)) fmt.Printf("\nDeleting %d snapshot(s) (--force specified)\n", len(toDelete))
} }
// Delete snapshots // Delete snapshots (both local and remote)
for _, snap := range toDelete { for _, snap := range toDelete {
log.Info("Deleting snapshot", "id", snap.ID) snapshotID := snap.ID.String()
if err := v.deleteSnapshot(snap.ID.String()); err != nil { log.Info("Deleting snapshot", "id", snapshotID)
return fmt.Errorf("deleting snapshot %s: %w", snap.ID, err) if err := v.deleteSnapshotFromLocalDB(snapshotID); err != nil {
log.Error("Failed to delete from local database", "snapshot_id", snapshotID, "error", err)
}
if err := v.deleteSnapshotFromRemote(snapshotID); err != nil {
return fmt.Errorf("deleting snapshot %s from remote: %w", snapshotID, err)
} }
} }
@ -722,49 +756,6 @@ func (v *Vaultik) downloadManifest(snapshotID string) (*snapshot.Manifest, error
return manifest, nil return manifest, nil
} }
func (v *Vaultik) deleteSnapshot(snapshotID string) error {
// First, delete from storage
// List all objects under metadata/{snapshotID}/
prefix := fmt.Sprintf("metadata/%s/", snapshotID)
objectCh := v.Storage.ListStream(v.ctx, prefix)
var objectsToDelete []string
for object := range objectCh {
if object.Err != nil {
return fmt.Errorf("listing objects: %w", object.Err)
}
objectsToDelete = append(objectsToDelete, object.Key)
}
// Delete all objects
for _, key := range objectsToDelete {
if err := v.Storage.Delete(v.ctx, key); err != nil {
return fmt.Errorf("removing %s: %w", key, err)
}
}
// Then, delete from local database (if we have a local database)
if v.Repositories != nil {
// Delete related records first to avoid foreign key constraints
if err := v.Repositories.Snapshots.DeleteSnapshotFiles(v.ctx, snapshotID); err != nil {
log.Error("Failed to delete snapshot files", "snapshot_id", snapshotID, "error", err)
}
if err := v.Repositories.Snapshots.DeleteSnapshotBlobs(v.ctx, snapshotID); err != nil {
log.Error("Failed to delete snapshot blobs", "snapshot_id", snapshotID, "error", err)
}
if err := v.Repositories.Snapshots.DeleteSnapshotUploads(v.ctx, snapshotID); err != nil {
log.Error("Failed to delete snapshot uploads", "snapshot_id", snapshotID, "error", err)
}
// Now delete the snapshot itself
if err := v.Repositories.Snapshots.Delete(v.ctx, snapshotID); err != nil {
return fmt.Errorf("deleting snapshot from database: %w", err)
}
}
return nil
}
func (v *Vaultik) syncWithRemote() error { func (v *Vaultik) syncWithRemote() error {
log.Info("Syncing with remote snapshots") log.Info("Syncing with remote snapshots")
@ -780,6 +771,10 @@ func (v *Vaultik) syncWithRemote() error {
// Extract snapshot ID from paths like metadata/hostname-20240115-143052Z/ // Extract snapshot ID from paths like metadata/hostname-20240115-143052Z/
parts := strings.Split(object.Key, "/") parts := strings.Split(object.Key, "/")
if len(parts) >= 2 && parts[0] == "metadata" && parts[1] != "" { if len(parts) >= 2 && parts[0] == "metadata" && parts[1] != "" {
// Skip macOS resource fork files (._*) and other hidden files
if strings.HasPrefix(parts[1], ".") {
continue
}
remoteSnapshots[parts[1]] = true remoteSnapshots[parts[1]] = true
} }
} }
@ -818,137 +813,47 @@ type RemoveOptions struct {
Force bool Force bool
DryRun bool DryRun bool
JSON bool JSON bool
Remote bool // Also remove metadata from remote storage
All bool // Remove all snapshots (requires Force)
} }
// RemoveResult contains the result of a snapshot removal // RemoveResult contains the result of a snapshot removal
type RemoveResult struct { type RemoveResult struct {
SnapshotID string `json:"snapshot_id"` SnapshotID string `json:"snapshot_id,omitempty"`
BlobsDeleted int `json:"blobs_deleted"` SnapshotsRemoved []string `json:"snapshots_removed,omitempty"`
BytesFreed int64 `json:"bytes_freed"` RemoteRemoved bool `json:"remote_removed,omitempty"`
BlobsFailed int `json:"blobs_failed,omitempty"`
DryRun bool `json:"dry_run,omitempty"` DryRun bool `json:"dry_run,omitempty"`
} }
// RemoveSnapshot removes a snapshot and any blobs that become orphaned // RemoveSnapshot removes a snapshot from the local database and optionally from remote storage
// Note: This does NOT remove blobs. Use 'vaultik prune' to remove orphaned blobs.
func (v *Vaultik) RemoveSnapshot(snapshotID string, opts *RemoveOptions) (*RemoveResult, error) { func (v *Vaultik) RemoveSnapshot(snapshotID string, opts *RemoveOptions) (*RemoveResult, error) {
log.Info("Starting snapshot removal", "snapshot_id", snapshotID)
result := &RemoveResult{ result := &RemoveResult{
SnapshotID: snapshotID, SnapshotID: snapshotID,
} }
// Step 1: List all snapshots in storage
log.Info("Listing remote snapshots")
objectCh := v.Storage.ListStream(v.ctx, "metadata/")
var allSnapshotIDs []string
targetExists := false
for object := range objectCh {
if object.Err != nil {
return nil, fmt.Errorf("listing remote snapshots: %w", object.Err)
}
// Extract snapshot ID from paths like metadata/hostname-20240115-143052Z/
parts := strings.Split(object.Key, "/")
if len(parts) >= 2 && parts[0] == "metadata" && parts[1] != "" {
if strings.HasSuffix(object.Key, "/") || strings.Contains(object.Key, "/manifest.json.zst") {
sid := parts[1]
// Only add unique snapshot IDs
found := false
for _, id := range allSnapshotIDs {
if id == sid {
found = true
break
}
}
if !found {
allSnapshotIDs = append(allSnapshotIDs, sid)
if sid == snapshotID {
targetExists = true
}
}
}
}
}
if !targetExists {
return nil, fmt.Errorf("snapshot not found: %s", snapshotID)
}
log.Info("Found snapshots", "total", len(allSnapshotIDs))
// Step 2: Download target snapshot's manifest
log.Info("Downloading target manifest", "snapshot_id", snapshotID)
targetManifest, err := v.downloadManifest(snapshotID)
if err != nil {
return nil, fmt.Errorf("downloading target manifest: %w", err)
}
// Build set of target blob hashes with sizes
targetBlobs := make(map[string]int64) // hash -> size
for _, blob := range targetManifest.Blobs {
targetBlobs[blob.Hash] = blob.CompressedSize
}
log.Info("Target snapshot has blobs", "count", len(targetBlobs))
// Step 3: Download manifests from all OTHER snapshots to build "in-use" set
inUseBlobs := make(map[string]bool)
otherCount := 0
for _, sid := range allSnapshotIDs {
if sid == snapshotID {
continue // Skip target snapshot
}
log.Debug("Processing manifest", "snapshot_id", sid)
manifest, err := v.downloadManifest(sid)
if err != nil {
log.Error("Failed to download manifest", "snapshot_id", sid, "error", err)
continue
}
for _, blob := range manifest.Blobs {
inUseBlobs[blob.Hash] = true
}
otherCount++
}
log.Info("Processed other manifests", "count", otherCount, "in_use_blobs", len(inUseBlobs))
// Step 4: Find orphaned blobs (in target but not in use by others)
var orphanedBlobs []string
var totalSize int64
for hash, size := range targetBlobs {
if !inUseBlobs[hash] {
orphanedBlobs = append(orphanedBlobs, hash)
totalSize += size
}
}
log.Info("Found orphaned blobs",
"count", len(orphanedBlobs),
"total_size", humanize.Bytes(uint64(totalSize)),
)
// Show summary (unless JSON mode)
if !opts.JSON {
_, _ = fmt.Fprintf(v.Stdout, "\nSnapshot: %s\n", snapshotID)
_, _ = fmt.Fprintf(v.Stdout, "Blobs in snapshot: %d\n", len(targetBlobs))
_, _ = fmt.Fprintf(v.Stdout, "Orphaned blobs to delete: %d (%s)\n", len(orphanedBlobs), humanize.Bytes(uint64(totalSize)))
}
if opts.DryRun { if opts.DryRun {
result.DryRun = true result.DryRun = true
if !opts.JSON {
_, _ = fmt.Fprintf(v.Stdout, "Would remove snapshot: %s\n", snapshotID)
if opts.Remote {
_, _ = fmt.Fprintln(v.Stdout, "Would also remove from remote storage")
}
_, _ = fmt.Fprintln(v.Stdout, "[Dry run - no changes made]")
}
if opts.JSON { if opts.JSON {
return result, v.outputRemoveJSON(result) return result, v.outputRemoveJSON(result)
} }
_, _ = fmt.Fprintln(v.Stdout, "\n[Dry run - no changes made]")
return result, nil return result, nil
} }
// Confirm unless --force is used (skip in JSON mode - require --force) // Confirm unless --force is used (skip in JSON mode - require --force)
if !opts.Force && !opts.JSON { if !opts.Force && !opts.JSON {
_, _ = fmt.Fprintf(v.Stdout, "\nDelete snapshot and %d orphaned blob(s)? [y/N] ", len(orphanedBlobs)) if opts.Remote {
_, _ = fmt.Fprintf(v.Stdout, "Remove snapshot '%s' from local database and remote storage? [y/N] ", snapshotID)
} else {
_, _ = fmt.Fprintf(v.Stdout, "Remove snapshot '%s' from local database? [y/N] ", snapshotID)
}
var confirm string var confirm string
if _, err := fmt.Fscanln(v.Stdin, &confirm); err != nil { if _, err := fmt.Fscanln(v.Stdin, &confirm); err != nil {
_, _ = fmt.Fprintln(v.Stdout, "Cancelled") _, _ = fmt.Fprintln(v.Stdout, "Cancelled")
@ -960,36 +865,20 @@ func (v *Vaultik) RemoveSnapshot(snapshotID string, opts *RemoveOptions) (*Remov
} }
} }
// Step 5: Delete orphaned blobs log.Info("Removing snapshot from local database", "snapshot_id", snapshotID)
if len(orphanedBlobs) > 0 {
log.Info("Deleting orphaned blobs")
for i, hash := range orphanedBlobs {
blobPath := fmt.Sprintf("blobs/%s/%s/%s", hash[:2], hash[2:4], hash)
if err := v.Storage.Delete(v.ctx, blobPath); err != nil { // Remove from local database
log.Error("Failed to delete blob", "hash", hash, "error", err) if err := v.deleteSnapshotFromLocalDB(snapshotID); err != nil {
result.BlobsFailed++ return result, fmt.Errorf("removing from local database: %w", err)
continue
} }
result.BlobsDeleted++ // If --remote, also remove from remote storage
result.BytesFreed += targetBlobs[hash] if opts.Remote {
log.Info("Removing snapshot metadata from remote storage", "snapshot_id", snapshotID)
// Progress update every 100 blobs if err := v.deleteSnapshotFromRemote(snapshotID); err != nil {
if (i+1)%100 == 0 || i == len(orphanedBlobs)-1 { return result, fmt.Errorf("removing from remote storage: %w", err)
log.Info("Deletion progress",
"deleted", i+1,
"total", len(orphanedBlobs),
"percent", fmt.Sprintf("%.1f%%", float64(i+1)/float64(len(orphanedBlobs))*100),
)
} }
} result.RemoteRemoved = true
}
// Step 6: Delete snapshot metadata
log.Info("Deleting snapshot metadata")
if err := v.deleteSnapshot(snapshotID); err != nil {
return result, fmt.Errorf("deleting snapshot metadata: %w", err)
} }
// Output result // Output result
@ -998,16 +887,165 @@ func (v *Vaultik) RemoveSnapshot(snapshotID string, opts *RemoveOptions) (*Remov
} }
// Print summary // Print summary
_, _ = fmt.Fprintf(v.Stdout, "\nRemoved snapshot %s\n", snapshotID) _, _ = fmt.Fprintf(v.Stdout, "Removed snapshot '%s' from local database\n", snapshotID)
_, _ = fmt.Fprintf(v.Stdout, " Blobs deleted: %d\n", result.BlobsDeleted) if opts.Remote {
_, _ = fmt.Fprintf(v.Stdout, " Storage freed: %s\n", humanize.Bytes(uint64(result.BytesFreed))) _, _ = fmt.Fprintln(v.Stdout, "Removed snapshot metadata from remote storage")
if result.BlobsFailed > 0 { _, _ = fmt.Fprintln(v.Stdout, "\nNote: Blobs were not removed. Run 'vaultik prune' to remove orphaned blobs.")
_, _ = fmt.Fprintf(v.Stdout, " Blobs failed: %d\n", result.BlobsFailed)
} }
return result, nil return result, nil
} }
// RemoveAllSnapshots removes all snapshots from local database and optionally from remote
func (v *Vaultik) RemoveAllSnapshots(opts *RemoveOptions) (*RemoveResult, error) {
result := &RemoveResult{}
// List all snapshots
log.Info("Listing all snapshots")
objectCh := v.Storage.ListStream(v.ctx, "metadata/")
var snapshotIDs []string
for object := range objectCh {
if object.Err != nil {
return nil, fmt.Errorf("listing remote snapshots: %w", object.Err)
}
parts := strings.Split(object.Key, "/")
if len(parts) >= 2 && parts[0] == "metadata" && parts[1] != "" {
// Skip macOS resource fork files (._*) and other hidden files
if strings.HasPrefix(parts[1], ".") {
continue
}
if strings.HasSuffix(object.Key, "/") || strings.Contains(object.Key, "/manifest.json.zst") {
sid := parts[1]
found := false
for _, id := range snapshotIDs {
if id == sid {
found = true
break
}
}
if !found {
snapshotIDs = append(snapshotIDs, sid)
}
}
}
}
if len(snapshotIDs) == 0 {
if !opts.JSON {
_, _ = fmt.Fprintln(v.Stdout, "No snapshots found")
}
return result, nil
}
if opts.DryRun {
result.DryRun = true
result.SnapshotsRemoved = snapshotIDs
if !opts.JSON {
_, _ = fmt.Fprintf(v.Stdout, "Would remove %d snapshot(s):\n", len(snapshotIDs))
for _, id := range snapshotIDs {
_, _ = fmt.Fprintf(v.Stdout, " %s\n", id)
}
if opts.Remote {
_, _ = fmt.Fprintln(v.Stdout, "Would also remove from remote storage")
}
_, _ = fmt.Fprintln(v.Stdout, "[Dry run - no changes made]")
}
if opts.JSON {
return result, v.outputRemoveJSON(result)
}
return result, nil
}
// --all requires --force
if !opts.Force {
return nil, fmt.Errorf("--all requires --force")
}
log.Info("Removing all snapshots", "count", len(snapshotIDs))
for _, snapshotID := range snapshotIDs {
log.Info("Removing snapshot", "snapshot_id", snapshotID)
if err := v.deleteSnapshotFromLocalDB(snapshotID); err != nil {
log.Error("Failed to remove from local database", "snapshot_id", snapshotID, "error", err)
continue
}
if opts.Remote {
if err := v.deleteSnapshotFromRemote(snapshotID); err != nil {
log.Error("Failed to remove from remote", "snapshot_id", snapshotID, "error", err)
continue
}
}
result.SnapshotsRemoved = append(result.SnapshotsRemoved, snapshotID)
}
if opts.Remote {
result.RemoteRemoved = true
}
if opts.JSON {
return result, v.outputRemoveJSON(result)
}
_, _ = fmt.Fprintf(v.Stdout, "Removed %d snapshot(s)\n", len(result.SnapshotsRemoved))
if opts.Remote {
_, _ = fmt.Fprintln(v.Stdout, "Removed snapshot metadata from remote storage")
_, _ = fmt.Fprintln(v.Stdout, "\nNote: Blobs were not removed. Run 'vaultik prune' to remove orphaned blobs.")
}
return result, nil
}
// deleteSnapshotFromLocalDB removes a snapshot from the local database only
func (v *Vaultik) deleteSnapshotFromLocalDB(snapshotID string) error {
if v.Repositories == nil {
return nil // No local database
}
// Delete related records first to avoid foreign key constraints
if err := v.Repositories.Snapshots.DeleteSnapshotFiles(v.ctx, snapshotID); err != nil {
log.Error("Failed to delete snapshot files", "snapshot_id", snapshotID, "error", err)
}
if err := v.Repositories.Snapshots.DeleteSnapshotBlobs(v.ctx, snapshotID); err != nil {
log.Error("Failed to delete snapshot blobs", "snapshot_id", snapshotID, "error", err)
}
if err := v.Repositories.Snapshots.DeleteSnapshotUploads(v.ctx, snapshotID); err != nil {
log.Error("Failed to delete snapshot uploads", "snapshot_id", snapshotID, "error", err)
}
if err := v.Repositories.Snapshots.Delete(v.ctx, snapshotID); err != nil {
log.Error("Failed to delete snapshot record", "snapshot_id", snapshotID, "error", err)
}
return nil
}
// deleteSnapshotFromRemote removes snapshot metadata files from remote storage
func (v *Vaultik) deleteSnapshotFromRemote(snapshotID string) error {
prefix := fmt.Sprintf("metadata/%s/", snapshotID)
objectCh := v.Storage.ListStream(v.ctx, prefix)
var objectsToDelete []string
for object := range objectCh {
if object.Err != nil {
return fmt.Errorf("listing objects: %w", object.Err)
}
objectsToDelete = append(objectsToDelete, object.Key)
}
for _, key := range objectsToDelete {
if err := v.Storage.Delete(v.ctx, key); err != nil {
return fmt.Errorf("removing %s: %w", key, err)
}
log.Debug("Deleted remote object", "key", key)
}
return nil
}
// outputRemoveJSON outputs the removal result as JSON // outputRemoveJSON outputs the removal result as JSON
func (v *Vaultik) outputRemoveJSON(result *RemoveResult) error { func (v *Vaultik) outputRemoveJSON(result *RemoveResult) error {
encoder := json.NewEncoder(os.Stdout) encoder := json.NewEncoder(os.Stdout)
@ -1027,7 +1065,7 @@ type PruneResult struct {
// and blobs from the local database. This ensures database consistency // and blobs from the local database. This ensures database consistency
// before starting a new backup or on-demand via the prune command. // before starting a new backup or on-demand via the prune command.
func (v *Vaultik) PruneDatabase() (*PruneResult, error) { func (v *Vaultik) PruneDatabase() (*PruneResult, error) {
log.Info("Pruning database: removing incomplete snapshots and orphaned data") log.Info("Pruning local database: removing incomplete snapshots and orphaned data")
result := &PruneResult{} result := &PruneResult{}
@ -1076,7 +1114,7 @@ func (v *Vaultik) PruneDatabase() (*PruneResult, error) {
result.ChunksDeleted = chunkCountBefore - chunkCountAfter result.ChunksDeleted = chunkCountBefore - chunkCountAfter
result.BlobsDeleted = blobCountBefore - blobCountAfter result.BlobsDeleted = blobCountBefore - blobCountAfter
log.Info("Prune complete", log.Info("Local database prune complete",
"incomplete_snapshots", result.SnapshotsDeleted, "incomplete_snapshots", result.SnapshotsDeleted,
"orphaned_files", result.FilesDeleted, "orphaned_files", result.FilesDeleted,
"orphaned_chunks", result.ChunksDeleted, "orphaned_chunks", result.ChunksDeleted,
@ -1084,7 +1122,7 @@ func (v *Vaultik) PruneDatabase() (*PruneResult, error) {
) )
// Print summary // Print summary
_, _ = fmt.Fprintf(v.Stdout, "Prune complete:\n") _, _ = fmt.Fprintf(v.Stdout, "Local database prune complete:\n")
_, _ = fmt.Fprintf(v.Stdout, " Incomplete snapshots removed: %d\n", result.SnapshotsDeleted) _, _ = fmt.Fprintf(v.Stdout, " Incomplete snapshots removed: %d\n", result.SnapshotsDeleted)
_, _ = fmt.Fprintf(v.Stdout, " Orphaned files removed: %d\n", result.FilesDeleted) _, _ = fmt.Fprintf(v.Stdout, " Orphaned files removed: %d\n", result.FilesDeleted)
_, _ = fmt.Fprintf(v.Stdout, " Orphaned chunks removed: %d\n", result.ChunksDeleted) _, _ = fmt.Fprintf(v.Stdout, " Orphaned chunks removed: %d\n", result.ChunksDeleted)

View File

@ -91,6 +91,11 @@ func (v *Vaultik) Context() context.Context {
return v.ctx return v.ctx
} }
// SetContext sets the Vaultik's context (primarily for testing)
func (v *Vaultik) SetContext(ctx context.Context) {
v.ctx = ctx
}
// Cancel cancels the Vaultik's context // Cancel cancels the Vaultik's context
func (v *Vaultik) Cancel() { func (v *Vaultik) Cancel() {
v.cancel() v.cancel()
@ -124,6 +129,12 @@ func (v *Vaultik) GetFilesystem() afero.Fs {
return v.Fs return v.Fs
} }
// Outputf writes formatted output to stdout for user-facing messages.
// This should be used for all non-log user output.
func (v *Vaultik) Outputf(format string, args ...any) {
_, _ = fmt.Fprintf(v.Stdout, format, args...)
}
// TestVaultik wraps a Vaultik with captured stdout/stderr for testing // TestVaultik wraps a Vaultik with captured stdout/stderr for testing
type TestVaultik struct { type TestVaultik struct {
*Vaultik *Vaultik

View File

@ -7,6 +7,7 @@ import (
"fmt" "fmt"
"io" "io"
"os" "os"
"time"
"git.eeqj.de/sneak/vaultik/internal/log" "git.eeqj.de/sneak/vaultik/internal/log"
"git.eeqj.de/sneak/vaultik/internal/snapshot" "git.eeqj.de/sneak/vaultik/internal/snapshot"
@ -36,22 +37,44 @@ type VerifyResult struct {
// RunDeepVerify executes deep verification operation // RunDeepVerify executes deep verification operation
func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error { func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
result := &VerifyResult{
SnapshotID: snapshotID,
Mode: "deep",
}
// Check for decryption capability // Check for decryption capability
if !v.CanDecrypt() { if !v.CanDecrypt() {
return fmt.Errorf("age_secret_key missing from config - required for deep verification") result.Status = "failed"
result.ErrorMessage = "VAULTIK_AGE_SECRET_KEY environment variable not set - required for deep verification"
if opts.JSON {
return v.outputVerifyJSON(result)
}
return fmt.Errorf("VAULTIK_AGE_SECRET_KEY environment variable not set - required for deep verification")
} }
log.Info("Starting snapshot verification", log.Info("Starting snapshot verification",
"snapshot_id", snapshotID, "snapshot_id", snapshotID,
"mode", map[bool]string{true: "deep", false: "shallow"}[opts.Deep], "mode", "deep",
) )
if !opts.JSON {
v.Outputf("Deep verification of snapshot: %s\n\n", snapshotID)
}
// Step 1: Download manifest // Step 1: Download manifest
manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID) manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
log.Info("Downloading manifest", "path", manifestPath) log.Info("Downloading manifest", "path", manifestPath)
if !opts.JSON {
v.Outputf("Downloading manifest...\n")
}
manifestReader, err := v.Storage.Get(v.ctx, manifestPath) manifestReader, err := v.Storage.Get(v.ctx, manifestPath)
if err != nil { if err != nil {
result.Status = "failed"
result.ErrorMessage = fmt.Sprintf("failed to download manifest: %v", err)
if opts.JSON {
return v.outputVerifyJSON(result)
}
return fmt.Errorf("failed to download manifest: %w", err) return fmt.Errorf("failed to download manifest: %w", err)
} }
defer func() { _ = manifestReader.Close() }() defer func() { _ = manifestReader.Close() }()
@ -59,20 +82,36 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
// Decompress manifest // Decompress manifest
manifest, err := snapshot.DecodeManifest(manifestReader) manifest, err := snapshot.DecodeManifest(manifestReader)
if err != nil { if err != nil {
result.Status = "failed"
result.ErrorMessage = fmt.Sprintf("failed to decode manifest: %v", err)
if opts.JSON {
return v.outputVerifyJSON(result)
}
return fmt.Errorf("failed to decode manifest: %w", err) return fmt.Errorf("failed to decode manifest: %w", err)
} }
log.Info("Manifest loaded", log.Info("Manifest loaded",
"blob_count", manifest.BlobCount, "manifest_blob_count", manifest.BlobCount,
"total_size", humanize.Bytes(uint64(manifest.TotalCompressedSize)), "manifest_total_size", humanize.Bytes(uint64(manifest.TotalCompressedSize)),
) )
if !opts.JSON {
v.Outputf("Manifest loaded: %d blobs (%s)\n", manifest.BlobCount, humanize.Bytes(uint64(manifest.TotalCompressedSize)))
}
// Step 2: Download and decrypt database // Step 2: Download and decrypt database (authoritative source)
dbPath := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID) dbPath := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID)
log.Info("Downloading encrypted database", "path", dbPath) log.Info("Downloading encrypted database", "path", dbPath)
if !opts.JSON {
v.Outputf("Downloading and decrypting database...\n")
}
dbReader, err := v.Storage.Get(v.ctx, dbPath) dbReader, err := v.Storage.Get(v.ctx, dbPath)
if err != nil { if err != nil {
result.Status = "failed"
result.ErrorMessage = fmt.Sprintf("failed to download database: %v", err)
if opts.JSON {
return v.outputVerifyJSON(result)
}
return fmt.Errorf("failed to download database: %w", err) return fmt.Errorf("failed to download database: %w", err)
} }
defer func() { _ = dbReader.Close() }() defer func() { _ = dbReader.Close() }()
@ -80,6 +119,11 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
// Decrypt and decompress database // Decrypt and decompress database
tempDB, err := v.decryptAndLoadDatabase(dbReader, v.Config.AgeSecretKey) tempDB, err := v.decryptAndLoadDatabase(dbReader, v.Config.AgeSecretKey)
if err != nil { if err != nil {
result.Status = "failed"
result.ErrorMessage = fmt.Sprintf("failed to decrypt database: %v", err)
if opts.JSON {
return v.outputVerifyJSON(result)
}
return fmt.Errorf("failed to decrypt database: %w", err) return fmt.Errorf("failed to decrypt database: %w", err)
} }
defer func() { defer func() {
@ -88,28 +132,90 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
} }
}() }()
// Step 3: Compare blob lists // Step 3: Get authoritative blob list from database
if err := v.verifyBlobLists(snapshotID, manifest, tempDB.DB); err != nil { dbBlobs, err := v.getBlobsFromDatabase(snapshotID, tempDB.DB)
if err != nil {
result.Status = "failed"
result.ErrorMessage = fmt.Sprintf("failed to get blobs from database: %v", err)
if opts.JSON {
return v.outputVerifyJSON(result)
}
return fmt.Errorf("failed to get blobs from database: %w", err)
}
result.BlobCount = len(dbBlobs)
var totalSize int64
for _, blob := range dbBlobs {
totalSize += blob.CompressedSize
}
result.TotalSize = totalSize
log.Info("Database loaded",
"db_blob_count", len(dbBlobs),
"db_total_size", humanize.Bytes(uint64(totalSize)),
)
if !opts.JSON {
v.Outputf("Database loaded: %d blobs (%s)\n", len(dbBlobs), humanize.Bytes(uint64(totalSize)))
v.Outputf("Verifying manifest against database...\n")
}
// Step 4: Verify manifest matches database
if err := v.verifyManifestAgainstDatabase(manifest, dbBlobs); err != nil {
result.Status = "failed"
result.ErrorMessage = err.Error()
if opts.JSON {
return v.outputVerifyJSON(result)
}
return err return err
} }
// Step 4: Verify blob existence // Step 5: Verify all blobs exist in S3 (using database as source)
if err := v.verifyBlobExistence(manifest); err != nil { if !opts.JSON {
v.Outputf("Manifest verified.\n")
v.Outputf("Checking blob existence in remote storage...\n")
}
if err := v.verifyBlobExistenceFromDB(dbBlobs); err != nil {
result.Status = "failed"
result.ErrorMessage = err.Error()
if opts.JSON {
return v.outputVerifyJSON(result)
}
return err return err
} }
// Step 5: Deep verification if requested // Step 6: Deep verification - download and verify blob contents
if opts.Deep { if !opts.JSON {
if err := v.performDeepVerification(manifest, tempDB.DB); err != nil { v.Outputf("All blobs exist.\n")
v.Outputf("Downloading and verifying blob contents (%d blobs, %s)...\n", len(dbBlobs), humanize.Bytes(uint64(totalSize)))
}
if err := v.performDeepVerificationFromDB(dbBlobs, tempDB.DB, opts); err != nil {
result.Status = "failed"
result.ErrorMessage = err.Error()
if opts.JSON {
return v.outputVerifyJSON(result)
}
return err return err
} }
// Success
result.Status = "ok"
result.Verified = len(dbBlobs)
if opts.JSON {
return v.outputVerifyJSON(result)
} }
log.Info("✓ Verification completed successfully", log.Info("✓ Verification completed successfully",
"snapshot_id", snapshotID, "snapshot_id", snapshotID,
"mode", map[bool]string{true: "deep", false: "shallow"}[opts.Deep], "mode", "deep",
"blobs_verified", len(dbBlobs),
) )
v.Outputf("\n✓ Verification completed successfully\n")
v.Outputf(" Snapshot: %s\n", snapshotID)
v.Outputf(" Blobs verified: %d\n", len(dbBlobs))
v.Outputf(" Total size: %s\n", humanize.Bytes(uint64(totalSize)))
return nil return nil
} }
@ -125,7 +231,7 @@ func (t *tempDB) Close() error {
return err return err
} }
// decryptAndLoadDatabase decrypts and loads the database from the encrypted stream // decryptAndLoadDatabase decrypts and loads the binary SQLite database from the encrypted stream
func (v *Vaultik) decryptAndLoadDatabase(reader io.ReadCloser, secretKey string) (*tempDB, error) { func (v *Vaultik) decryptAndLoadDatabase(reader io.ReadCloser, secretKey string) (*tempDB, error) {
// Get decryptor // Get decryptor
decryptor, err := v.GetDecryptor() decryptor, err := v.GetDecryptor()
@ -139,32 +245,31 @@ func (v *Vaultik) decryptAndLoadDatabase(reader io.ReadCloser, secretKey string)
return nil, fmt.Errorf("failed to decrypt database: %w", err) return nil, fmt.Errorf("failed to decrypt database: %w", err)
} }
// Decompress the database // Decompress the binary database
decompressor, err := zstd.NewReader(decryptedReader) decompressor, err := zstd.NewReader(decryptedReader)
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create decompressor: %w", err) return nil, fmt.Errorf("failed to create decompressor: %w", err)
} }
defer decompressor.Close() defer decompressor.Close()
// Create temporary file for database // Create temporary file for the database
tempFile, err := os.CreateTemp("", "vaultik-verify-*.db") tempFile, err := os.CreateTemp("", "vaultik-verify-*.db")
if err != nil { if err != nil {
return nil, fmt.Errorf("failed to create temp file: %w", err) return nil, fmt.Errorf("failed to create temp file: %w", err)
} }
tempPath := tempFile.Name() tempPath := tempFile.Name()
// Copy decompressed data to temp file // Stream decompress directly to file
if _, err := io.Copy(tempFile, decompressor); err != nil { log.Info("Decompressing database...")
written, err := io.Copy(tempFile, decompressor)
if err != nil {
_ = tempFile.Close() _ = tempFile.Close()
_ = os.Remove(tempPath) _ = os.Remove(tempPath)
return nil, fmt.Errorf("failed to write database: %w", err) return nil, fmt.Errorf("failed to decompress database: %w", err)
} }
_ = tempFile.Close()
// Close temp file before opening with sqlite log.Info("Database decompressed", "size", humanize.Bytes(uint64(written)))
if err := tempFile.Close(); err != nil {
_ = os.Remove(tempPath)
return nil, fmt.Errorf("failed to close temp file: %w", err)
}
// Open the database // Open the database
db, err := sql.Open("sqlite3", tempPath) db, err := sql.Open("sqlite3", tempPath)
@ -179,137 +284,10 @@ func (v *Vaultik) decryptAndLoadDatabase(reader io.ReadCloser, secretKey string)
}, nil }, nil
} }
// verifyBlobLists compares the blob lists between manifest and database
func (v *Vaultik) verifyBlobLists(snapshotID string, manifest *snapshot.Manifest, db *sql.DB) error {
log.Info("Verifying blob lists match between manifest and database")
// Get blobs from database
query := `
SELECT b.blob_hash, b.compressed_size
FROM snapshot_blobs sb
JOIN blobs b ON sb.blob_hash = b.blob_hash
WHERE sb.snapshot_id = ?
ORDER BY b.blob_hash
`
rows, err := db.QueryContext(v.ctx, query, snapshotID)
if err != nil {
return fmt.Errorf("failed to query snapshot blobs: %w", err)
}
defer func() { _ = rows.Close() }()
// Build map of database blobs
dbBlobs := make(map[string]int64)
for rows.Next() {
var hash string
var size int64
if err := rows.Scan(&hash, &size); err != nil {
return fmt.Errorf("failed to scan blob row: %w", err)
}
dbBlobs[hash] = size
}
// Build map of manifest blobs
manifestBlobs := make(map[string]int64)
for _, blob := range manifest.Blobs {
manifestBlobs[blob.Hash] = blob.CompressedSize
}
// Compare counts
if len(dbBlobs) != len(manifestBlobs) {
return fmt.Errorf("blob count mismatch: database has %d blobs, manifest has %d blobs",
len(dbBlobs), len(manifestBlobs))
}
// Check each blob exists in both
for hash, dbSize := range dbBlobs {
manifestSize, exists := manifestBlobs[hash]
if !exists {
return fmt.Errorf("blob %s exists in database but not in manifest", hash)
}
if dbSize != manifestSize {
return fmt.Errorf("blob %s size mismatch: database has %d bytes, manifest has %d bytes",
hash, dbSize, manifestSize)
}
}
for hash := range manifestBlobs {
if _, exists := dbBlobs[hash]; !exists {
return fmt.Errorf("blob %s exists in manifest but not in database", hash)
}
}
log.Info("✓ Blob lists match", "blob_count", len(dbBlobs))
return nil
}
// verifyBlobExistence checks that all blobs exist in S3
func (v *Vaultik) verifyBlobExistence(manifest *snapshot.Manifest) error {
log.Info("Verifying blob existence in S3", "blob_count", len(manifest.Blobs))
for i, blob := range manifest.Blobs {
// Construct blob path
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blob.Hash[:2], blob.Hash[2:4], blob.Hash)
// Check blob exists
stat, err := v.Storage.Stat(v.ctx, blobPath)
if err != nil {
return fmt.Errorf("blob %s missing from storage: %w", blob.Hash, err)
}
// Verify size matches
if stat.Size != blob.CompressedSize {
return fmt.Errorf("blob %s size mismatch: S3 has %d bytes, manifest has %d bytes",
blob.Hash, stat.Size, blob.CompressedSize)
}
// Progress update every 100 blobs
if (i+1)%100 == 0 || i == len(manifest.Blobs)-1 {
log.Info("Blob existence check progress",
"checked", i+1,
"total", len(manifest.Blobs),
"percent", fmt.Sprintf("%.1f%%", float64(i+1)/float64(len(manifest.Blobs))*100),
)
}
}
log.Info("✓ All blobs exist in storage")
return nil
}
// performDeepVerification downloads and verifies the content of each blob
func (v *Vaultik) performDeepVerification(manifest *snapshot.Manifest, db *sql.DB) error {
log.Info("Starting deep verification - downloading and verifying all blobs")
totalBytes := int64(0)
for i, blobInfo := range manifest.Blobs {
// Verify individual blob
if err := v.verifyBlob(blobInfo, db); err != nil {
return fmt.Errorf("blob %s verification failed: %w", blobInfo.Hash, err)
}
totalBytes += blobInfo.CompressedSize
// Progress update
log.Info("Deep verification progress",
"blob", fmt.Sprintf("%d/%d", i+1, len(manifest.Blobs)),
"total_downloaded", humanize.Bytes(uint64(totalBytes)),
"percent", fmt.Sprintf("%.1f%%", float64(i+1)/float64(len(manifest.Blobs))*100),
)
}
log.Info("✓ Deep verification completed successfully",
"blobs_verified", len(manifest.Blobs),
"total_size", humanize.Bytes(uint64(totalBytes)),
)
return nil
}
// verifyBlob downloads and verifies a single blob // verifyBlob downloads and verifies a single blob
func (v *Vaultik) verifyBlob(blobInfo snapshot.BlobInfo, db *sql.DB) error { func (v *Vaultik) verifyBlob(blobInfo snapshot.BlobInfo, db *sql.DB) error {
// Download blob // Download blob using shared fetch method
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobInfo.Hash[:2], blobInfo.Hash[2:4], blobInfo.Hash) reader, _, err := v.FetchBlob(v.ctx, blobInfo.Hash, blobInfo.CompressedSize)
reader, err := v.Storage.Get(v.ctx, blobPath)
if err != nil { if err != nil {
return fmt.Errorf("failed to download: %w", err) return fmt.Errorf("failed to download: %w", err)
} }
@ -321,8 +299,12 @@ func (v *Vaultik) verifyBlob(blobInfo snapshot.BlobInfo, db *sql.DB) error {
return fmt.Errorf("failed to get decryptor: %w", err) return fmt.Errorf("failed to get decryptor: %w", err)
} }
// Decrypt blob // Hash the encrypted blob data as it streams through to decryption
decryptedReader, err := decryptor.DecryptStream(reader) blobHasher := sha256.New()
teeReader := io.TeeReader(reader, blobHasher)
// Decrypt blob (reading through teeReader to hash encrypted data)
decryptedReader, err := decryptor.DecryptStream(teeReader)
if err != nil { if err != nil {
return fmt.Errorf("failed to decrypt: %w", err) return fmt.Errorf("failed to decrypt: %w", err)
} }
@ -400,11 +382,209 @@ func (v *Vaultik) verifyBlob(blobInfo snapshot.BlobInfo, db *sql.DB) error {
return fmt.Errorf("error iterating blob chunks: %w", err) return fmt.Errorf("error iterating blob chunks: %w", err)
} }
log.Debug("Blob verified", // Verify no remaining data in blob - if chunk list is accurate, blob should be fully consumed
"hash", blobInfo.Hash, remaining, err := io.Copy(io.Discard, decompressor)
if err != nil {
return fmt.Errorf("failed to check for remaining blob data: %w", err)
}
if remaining > 0 {
return fmt.Errorf("blob has %d unexpected trailing bytes not covered by chunk list", remaining)
}
// Verify blob hash matches the encrypted data we downloaded
calculatedBlobHash := hex.EncodeToString(blobHasher.Sum(nil))
if calculatedBlobHash != blobInfo.Hash {
return fmt.Errorf("blob hash mismatch: calculated %s, expected %s",
calculatedBlobHash, blobInfo.Hash)
}
log.Info("Blob verified",
"hash", blobInfo.Hash[:16]+"...",
"chunks", chunkCount, "chunks", chunkCount,
"size", humanize.Bytes(uint64(blobInfo.CompressedSize)), "size", humanize.Bytes(uint64(blobInfo.CompressedSize)),
) )
return nil return nil
} }
// getBlobsFromDatabase gets all blobs for the snapshot from the database
func (v *Vaultik) getBlobsFromDatabase(snapshotID string, db *sql.DB) ([]snapshot.BlobInfo, error) {
query := `
SELECT b.blob_hash, b.compressed_size
FROM snapshot_blobs sb
JOIN blobs b ON sb.blob_hash = b.blob_hash
WHERE sb.snapshot_id = ?
ORDER BY b.blob_hash
`
rows, err := db.QueryContext(v.ctx, query, snapshotID)
if err != nil {
return nil, fmt.Errorf("failed to query snapshot blobs: %w", err)
}
defer func() { _ = rows.Close() }()
var blobs []snapshot.BlobInfo
for rows.Next() {
var hash string
var size int64
if err := rows.Scan(&hash, &size); err != nil {
return nil, fmt.Errorf("failed to scan blob row: %w", err)
}
blobs = append(blobs, snapshot.BlobInfo{
Hash: hash,
CompressedSize: size,
})
}
if err := rows.Err(); err != nil {
return nil, fmt.Errorf("error iterating blobs: %w", err)
}
return blobs, nil
}
// verifyManifestAgainstDatabase verifies the manifest matches the authoritative database
func (v *Vaultik) verifyManifestAgainstDatabase(manifest *snapshot.Manifest, dbBlobs []snapshot.BlobInfo) error {
log.Info("Verifying manifest against database")
// Build map of database blobs
dbBlobMap := make(map[string]int64)
for _, blob := range dbBlobs {
dbBlobMap[blob.Hash] = blob.CompressedSize
}
// Build map of manifest blobs
manifestBlobMap := make(map[string]int64)
for _, blob := range manifest.Blobs {
manifestBlobMap[blob.Hash] = blob.CompressedSize
}
// Check counts match
if len(dbBlobMap) != len(manifestBlobMap) {
log.Warn("Manifest blob count mismatch",
"database_blobs", len(dbBlobMap),
"manifest_blobs", len(manifestBlobMap),
)
// This is a warning, not an error - database is authoritative
}
// Check each manifest blob exists in database with correct size
for hash, manifestSize := range manifestBlobMap {
dbSize, exists := dbBlobMap[hash]
if !exists {
return fmt.Errorf("manifest contains blob %s not in database", hash)
}
if dbSize != manifestSize {
return fmt.Errorf("blob %s size mismatch: database has %d bytes, manifest has %d bytes",
hash, dbSize, manifestSize)
}
}
log.Info("✓ Manifest verified against database",
"manifest_blobs", len(manifestBlobMap),
"database_blobs", len(dbBlobMap),
)
return nil
}
// verifyBlobExistenceFromDB checks that all blobs from database exist in S3
func (v *Vaultik) verifyBlobExistenceFromDB(blobs []snapshot.BlobInfo) error {
log.Info("Verifying blob existence in S3", "blob_count", len(blobs))
for i, blob := range blobs {
// Construct blob path
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blob.Hash[:2], blob.Hash[2:4], blob.Hash)
// Check blob exists
stat, err := v.Storage.Stat(v.ctx, blobPath)
if err != nil {
return fmt.Errorf("blob %s missing from storage: %w", blob.Hash, err)
}
// Verify size matches
if stat.Size != blob.CompressedSize {
return fmt.Errorf("blob %s size mismatch: S3 has %d bytes, database has %d bytes",
blob.Hash, stat.Size, blob.CompressedSize)
}
// Progress update every 100 blobs
if (i+1)%100 == 0 || i == len(blobs)-1 {
log.Info("Blob existence check progress",
"checked", i+1,
"total", len(blobs),
"percent", fmt.Sprintf("%.1f%%", float64(i+1)/float64(len(blobs))*100),
)
}
}
log.Info("✓ All blobs exist in storage")
return nil
}
// performDeepVerificationFromDB downloads and verifies the content of each blob using database as source
func (v *Vaultik) performDeepVerificationFromDB(blobs []snapshot.BlobInfo, db *sql.DB, opts *VerifyOptions) error {
// Calculate total bytes for ETA
var totalBytesExpected int64
for _, b := range blobs {
totalBytesExpected += b.CompressedSize
}
log.Info("Starting deep verification - downloading and verifying all blobs",
"blob_count", len(blobs),
"total_size", humanize.Bytes(uint64(totalBytesExpected)),
)
startTime := time.Now()
bytesProcessed := int64(0)
for i, blobInfo := range blobs {
// Verify individual blob
if err := v.verifyBlob(blobInfo, db); err != nil {
return fmt.Errorf("blob %s verification failed: %w", blobInfo.Hash, err)
}
bytesProcessed += blobInfo.CompressedSize
elapsed := time.Since(startTime)
remaining := len(blobs) - (i + 1)
// Calculate ETA based on bytes processed
var eta time.Duration
if bytesProcessed > 0 {
bytesPerSec := float64(bytesProcessed) / elapsed.Seconds()
bytesRemaining := totalBytesExpected - bytesProcessed
if bytesPerSec > 0 {
eta = time.Duration(float64(bytesRemaining)/bytesPerSec) * time.Second
}
}
log.Info("Verification progress",
"blobs_done", i+1,
"blobs_total", len(blobs),
"blobs_remaining", remaining,
"bytes_done", bytesProcessed,
"bytes_done_human", humanize.Bytes(uint64(bytesProcessed)),
"bytes_total", totalBytesExpected,
"bytes_total_human", humanize.Bytes(uint64(totalBytesExpected)),
"elapsed", elapsed.Round(time.Second),
"eta", eta.Round(time.Second),
)
if !opts.JSON {
v.Outputf(" Verified %d/%d blobs (%d remaining) - %s/%s - elapsed %s, eta %s\n",
i+1, len(blobs), remaining,
humanize.Bytes(uint64(bytesProcessed)),
humanize.Bytes(uint64(totalBytesExpected)),
elapsed.Round(time.Second),
eta.Round(time.Second))
}
}
totalElapsed := time.Since(startTime)
log.Info("✓ Deep verification completed successfully",
"blobs_verified", len(blobs),
"total_bytes", bytesProcessed,
"total_bytes_human", humanize.Bytes(uint64(bytesProcessed)),
"duration", totalElapsed.Round(time.Second),
)
return nil
}