Compare commits
8 Commits
b250ddfa94
...
4a3e61f8e1
| Author | SHA1 | Date | |
|---|---|---|---|
| 4a3e61f8e1 | |||
| 6fbcac0cd8 | |||
| 34f73f72d8 | |||
| ee240faa32 | |||
| f719ab3adc | |||
| 1a8baf7491 | |||
| 7d5d3fa598 | |||
| ac5d2f4a0d |
@@ -53,8 +53,8 @@ The database tracks five primary entities and their relationships:
|
|||||||
### Entity Descriptions
|
### Entity Descriptions
|
||||||
|
|
||||||
#### File (`database.File`)
|
#### File (`database.File`)
|
||||||
Represents a file or directory in the backup system. Stores metadata needed for restoration:
|
Represents a file, directory, or symlink in the backup system. Stores metadata needed for restoration:
|
||||||
- Path, mtime
|
- Path, source_path (for restore path stripping), mtime
|
||||||
- Size, mode, ownership (uid, gid)
|
- Size, mode, ownership (uid, gid)
|
||||||
- Symlink target (if applicable)
|
- Symlink target (if applicable)
|
||||||
|
|
||||||
@@ -95,7 +95,7 @@ Maps chunks to their position within blobs:
|
|||||||
|
|
||||||
#### Snapshot (`database.Snapshot`)
|
#### Snapshot (`database.Snapshot`)
|
||||||
Represents a point-in-time backup:
|
Represents a point-in-time backup:
|
||||||
- `ID`: Format is `{hostname}-{YYYYMMDD}-{HHMMSS}Z`
|
- `ID`: Format is `{hostname}_{snapshot-name}_{RFC3339}` (e.g. `server1_home_2025-06-01T12:00:00Z`)
|
||||||
- Tracks file count, chunk count, blob count, sizes, compression ratio
|
- Tracks file count, chunk count, blob count, sizes, compression ratio
|
||||||
- `CompletedAt`: Null until snapshot finishes successfully
|
- `CompletedAt`: Null until snapshot finishes successfully
|
||||||
|
|
||||||
@@ -127,7 +127,7 @@ fx.New(
|
|||||||
config.Module, // 5. Config
|
config.Module, // 5. Config
|
||||||
database.Module, // 6. Database + Repositories
|
database.Module, // 6. Database + Repositories
|
||||||
log.Module, // 7. Logger initialization
|
log.Module, // 7. Logger initialization
|
||||||
s3.Module, // 8. S3 client
|
storage.Module, // 8. Storage backend (S3/file/rclone)
|
||||||
snapshot.Module, // 9. SnapshotManager + ScannerFactory
|
snapshot.Module, // 9. SnapshotManager + ScannerFactory
|
||||||
fx.Provide(vaultik.New), // 10. Vaultik orchestrator
|
fx.Provide(vaultik.New), // 10. Vaultik orchestrator
|
||||||
)
|
)
|
||||||
@@ -161,7 +161,7 @@ type Vaultik struct {
|
|||||||
Config *config.Config
|
Config *config.Config
|
||||||
DB *database.DB
|
DB *database.DB
|
||||||
Repositories *database.Repositories
|
Repositories *database.Repositories
|
||||||
S3Client *s3.Client
|
Storage storage.Storer
|
||||||
ScannerFactory snapshot.ScannerFactory
|
ScannerFactory snapshot.ScannerFactory
|
||||||
SnapshotManager *snapshot.SnapshotManager
|
SnapshotManager *snapshot.SnapshotManager
|
||||||
Shutdowner fx.Shutdowner
|
Shutdowner fx.Shutdowner
|
||||||
@@ -341,12 +341,11 @@ CreateSnapshot(opts)
|
|||||||
└─► SnapshotManager.ExportSnapshotMetadata()
|
└─► SnapshotManager.ExportSnapshotMetadata()
|
||||||
│
|
│
|
||||||
├─► Copy database to temp file
|
├─► Copy database to temp file
|
||||||
├─► Clean to only current snapshot data
|
├─► Clean to only current snapshot data (VACUUM)
|
||||||
├─► Dump to SQL
|
├─► Compress binary SQLite with zstd
|
||||||
├─► Compress with zstd
|
|
||||||
├─► Encrypt with age
|
├─► Encrypt with age
|
||||||
├─► Upload db.zst.age to S3
|
├─► Upload db.zst.age to storage
|
||||||
└─► Upload manifest.json.zst to S3
|
└─► Upload manifest.json.zst to storage
|
||||||
```
|
```
|
||||||
|
|
||||||
## Deduplication Strategy
|
## Deduplication Strategy
|
||||||
@@ -368,8 +367,8 @@ bucket/
|
|||||||
│
|
│
|
||||||
└── metadata/
|
└── metadata/
|
||||||
└── {snapshot-id}/
|
└── {snapshot-id}/
|
||||||
├── db.zst.age # Encrypted database dump
|
├── db.zst.age # Encrypted binary SQLite database
|
||||||
└── manifest.json.zst # Blob list (for verification)
|
└── manifest.json.zst # Blob list (for pruning/verification)
|
||||||
```
|
```
|
||||||
|
|
||||||
## Thread Safety
|
## Thread Safety
|
||||||
|
|||||||
457
README.md
457
README.md
@@ -1,43 +1,35 @@
|
|||||||
# vaultik (ваултик)
|
# vaultik (ваултик)
|
||||||
|
|
||||||
WIP: pre-1.0, some functions may not be fully implemented yet
|
|
||||||
|
|
||||||
`vaultik` is an incremental backup tool written in Go. It encrypts data
|
`vaultik` is an incremental backup tool written in Go. It encrypts data
|
||||||
using an `age` public key and uploads each encrypted blob directly to a
|
using an `age` public key and uploads each encrypted blob directly to a
|
||||||
remote S3-compatible object store. It requires no private keys, secrets, or
|
remote S3-compatible object store. It requires no private keys, secrets, or
|
||||||
credentials (other than those required to PUT to encrypted object storage,
|
credentials (other than those required to PUT to encrypted object storage,
|
||||||
such as S3 API keys) stored on the backed-up system.
|
such as S3 API keys) stored on the backed-up system.
|
||||||
|
|
||||||
It includes table-stakes features such as:
|
Features:
|
||||||
|
|
||||||
* modern encryption (the excellent `age`)
|
* modern encryption ([age](https://age-encryption.org/), X25519 + XChaCha20-Poly1305)
|
||||||
* deduplication
|
* content-defined chunking with deduplication (FastCDC)
|
||||||
* incremental backups
|
* incremental backups (only changed files are re-chunked)
|
||||||
* modern multithreaded zstd compression with configurable levels
|
* multithreaded zstd compression at configurable levels
|
||||||
* content-addressed immutable storage
|
* content-addressed immutable storage
|
||||||
* local state tracking in standard SQLite database, enables write-only
|
* local state tracking in SQLite (enables write-only incremental backups)
|
||||||
incremental backups to destination
|
|
||||||
* no mutable remote metadata
|
* no mutable remote metadata
|
||||||
* no plaintext file paths or metadata stored in remote
|
* no plaintext file paths or metadata in remote storage
|
||||||
* does not create huge numbers of small files (to keep S3 operation counts
|
* packs small files into large blobs (keeps S3 operation counts down)
|
||||||
down) even if the source system has many small files
|
* backs up regular files, symlinks, empty directories, and file permissions
|
||||||
|
* pluggable storage backends: S3, local filesystem, rclone (70+ providers)
|
||||||
|
* pure Go (no CGO), cross-compiles to linux/darwin × amd64/arm64
|
||||||
|
|
||||||
## why
|
## why
|
||||||
|
|
||||||
Existing backup software fails under one or more of these conditions:
|
|
||||||
|
|
||||||
* Requires secrets (passwords, private keys) on the source system, which
|
|
||||||
compromises encrypted backups in the case of host system compromise
|
|
||||||
* Depends on symmetric encryption unsuitable for zero-trust environments
|
|
||||||
* Creates one-blob-per-file, which results in excessive S3 operation counts
|
|
||||||
* is slow
|
|
||||||
|
|
||||||
Other backup tools like `restic`, `borg`, and `duplicity` are designed for
|
Other backup tools like `restic`, `borg`, and `duplicity` are designed for
|
||||||
environments where the source host can store secrets and has access to
|
environments where the source host can store secrets and has access to
|
||||||
decryption keys. I don't want to store backup decryption keys on my hosts,
|
decryption keys. `vaultik` is for environments where you don't want to
|
||||||
only public keys for encryption.
|
store backup decryption keys on your hosts — only public keys for
|
||||||
|
encryption.
|
||||||
|
|
||||||
My requirements are:
|
Requirements that no existing tool meets:
|
||||||
|
|
||||||
* open source
|
* open source
|
||||||
* no passphrases or private keys on the source host
|
* no passphrases or private keys on the source host
|
||||||
@@ -46,40 +38,13 @@ My requirements are:
|
|||||||
* encrypted
|
* encrypted
|
||||||
* s3 compatible without an intermediate step or tool
|
* s3 compatible without an intermediate step or tool
|
||||||
|
|
||||||
Surprisingly, no existing tool meets these requirements, so I wrote `vaultik`.
|
## install
|
||||||
|
|
||||||
## design goals
|
```sh
|
||||||
|
go install git.eeqj.de/sneak/vaultik@latest
|
||||||
|
```
|
||||||
|
|
||||||
1. Backups must require only a public key on the source host.
|
## quick start
|
||||||
1. No secrets or private keys may exist on the source system.
|
|
||||||
1. Restore must be possible using **only** the backup bucket and a private key.
|
|
||||||
1. Prune must be possible (requires private key, done on different hosts).
|
|
||||||
1. All encryption uses [`age`](https://age-encryption.org/) (X25519, XChaCha20-Poly1305).
|
|
||||||
1. Compression uses `zstd` at a configurable level.
|
|
||||||
1. Files are chunked, and multiple chunks are packed into encrypted blobs
|
|
||||||
to reduce object count for filesystems with many small files.
|
|
||||||
1. All metadata (snapshots) is stored remotely as encrypted SQLite DBs.
|
|
||||||
|
|
||||||
## what
|
|
||||||
|
|
||||||
`vaultik` walks a set of configured directories and builds a
|
|
||||||
content-addressable chunk map of changed files using deterministic chunking.
|
|
||||||
Each chunk is streamed into a blob packer. Blobs are compressed with `zstd`,
|
|
||||||
encrypted with `age`, and uploaded directly to remote storage under a
|
|
||||||
content-addressed S3 path. At the end, a pruned snapshot-specific sqlite
|
|
||||||
database of metadata is created, encrypted, and uploaded alongside the
|
|
||||||
blobs.
|
|
||||||
|
|
||||||
No plaintext file contents ever hit disk. No private key or secret
|
|
||||||
passphrase is needed or stored locally.
|
|
||||||
|
|
||||||
## how
|
|
||||||
|
|
||||||
1. **install**
|
|
||||||
|
|
||||||
```sh
|
|
||||||
go install git.eeqj.de/sneak/vaultik@latest
|
|
||||||
```
|
|
||||||
|
|
||||||
1. **generate keypair**
|
1. **generate keypair**
|
||||||
|
|
||||||
@@ -88,23 +53,21 @@ passphrase is needed or stored locally.
|
|||||||
grep 'public key:' agekey.txt
|
grep 'public key:' agekey.txt
|
||||||
```
|
```
|
||||||
|
|
||||||
1. **write config**
|
2. **write config** (see `config.example.yml` for all options)
|
||||||
|
|
||||||
```yaml
|
```yaml
|
||||||
# Named snapshots - each snapshot can contain multiple paths
|
|
||||||
snapshots:
|
snapshots:
|
||||||
system:
|
system:
|
||||||
paths:
|
paths:
|
||||||
- /etc
|
- /etc
|
||||||
- /var/lib
|
- /var/lib
|
||||||
exclude:
|
exclude:
|
||||||
- '*.cache' # Snapshot-specific exclusions
|
- '*.cache'
|
||||||
home:
|
home:
|
||||||
paths:
|
paths:
|
||||||
- /home/user/documents
|
- /home/user/documents
|
||||||
- /home/user/photos
|
- /home/user/photos
|
||||||
|
|
||||||
# Global exclusions (apply to all snapshots)
|
|
||||||
exclude:
|
exclude:
|
||||||
- '*.log'
|
- '*.log'
|
||||||
- '*.tmp'
|
- '*.tmp'
|
||||||
@@ -112,29 +75,36 @@ passphrase is needed or stored locally.
|
|||||||
- 'node_modules'
|
- 'node_modules'
|
||||||
|
|
||||||
age_recipients:
|
age_recipients:
|
||||||
- age1278m9q7dp3chsh2dcy82qk27v047zywyvtxwnj4cvt0z65jw6a7q5dqhfj
|
- age1YOUR_PUBLIC_KEY_HERE
|
||||||
|
|
||||||
|
# Storage backend (pick one):
|
||||||
|
storage_url: "s3://mybucket/backups?endpoint=s3.example.com®ion=us-east-1"
|
||||||
|
# storage_url: "file:///mnt/backups"
|
||||||
|
# storage_url: "rclone://myremote/path/to/backups"
|
||||||
|
|
||||||
|
# For s3:// URLs, credentials are still required:
|
||||||
s3:
|
s3:
|
||||||
endpoint: https://s3.example.com
|
|
||||||
bucket: vaultik-data
|
|
||||||
prefix: host1/
|
|
||||||
access_key_id: ...
|
access_key_id: ...
|
||||||
secret_access_key: ...
|
secret_access_key: ...
|
||||||
region: us-east-1
|
|
||||||
chunk_size: 10MB
|
|
||||||
blob_size_limit: 1GB
|
|
||||||
```
|
```
|
||||||
|
|
||||||
1. **run**
|
3. **run**
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
# Create all configured snapshots
|
# Back up all configured snapshots
|
||||||
vaultik --config /etc/vaultik.yaml snapshot create
|
vaultik --config /etc/vaultik.yml snapshot create
|
||||||
|
|
||||||
# Create specific snapshots by name
|
# Back up specific snapshots by name
|
||||||
vaultik --config /etc/vaultik.yaml snapshot create home system
|
vaultik --config /etc/vaultik.yml snapshot create home system
|
||||||
|
|
||||||
# Silent mode for cron
|
# Silent mode for cron
|
||||||
vaultik --config /etc/vaultik.yaml snapshot create --cron
|
vaultik --config /etc/vaultik.yml snapshot create --cron
|
||||||
|
|
||||||
|
# Back up and clean up old snapshots + orphan blobs in one shot
|
||||||
|
vaultik --config /etc/vaultik.yml snapshot create --prune
|
||||||
|
|
||||||
|
# Daily cron: back up, keep last 4 weeks of snapshots
|
||||||
|
vaultik --config /etc/vaultik.yml snapshot create --cron --prune --keep-newer-than 4w
|
||||||
```
|
```
|
||||||
|
|
||||||
---
|
---
|
||||||
@@ -144,7 +114,7 @@ passphrase is needed or stored locally.
|
|||||||
### commands
|
### commands
|
||||||
|
|
||||||
```sh
|
```sh
|
||||||
vaultik [--config <path>] snapshot create [snapshot-names...] [--cron] [--prune] [--skip-errors]
|
vaultik [--config <path>] snapshot create [snapshot-names...] [--cron] [--prune] [--keep-newer-than <duration>] [--skip-errors]
|
||||||
vaultik [--config <path>] snapshot list [--json]
|
vaultik [--config <path>] snapshot list [--json]
|
||||||
vaultik [--config <path>] snapshot verify <snapshot-id> [--deep] [--json]
|
vaultik [--config <path>] snapshot verify <snapshot-id> [--deep] [--json]
|
||||||
vaultik [--config <path>] snapshot purge [--keep-latest | --older-than <duration>] [--snapshot <name>...] [--force]
|
vaultik [--config <path>] snapshot purge [--keep-latest | --older-than <duration>] [--snapshot <name>...] [--force]
|
||||||
@@ -159,245 +129,244 @@ vaultik [--config <path>] database purge [--force]
|
|||||||
vaultik version
|
vaultik version
|
||||||
```
|
```
|
||||||
|
|
||||||
### environment
|
### global flags
|
||||||
|
|
||||||
* `VAULTIK_AGE_SECRET_KEY`: Required for `restore` and deep `verify`. Contains the age private key for decryption.
|
* `--config <path>`: Path to config file (default: `$VAULTIK_CONFIG` or `/etc/vaultik/config.yml`)
|
||||||
* `VAULTIK_CONFIG`: Optional path to config file.
|
* `--verbose`, `-v`: Enable verbose output
|
||||||
|
* `--debug`: Enable debug output
|
||||||
|
* `--quiet`, `-q`: Suppress non-error output
|
||||||
|
|
||||||
|
### environment variables
|
||||||
|
|
||||||
|
* `VAULTIK_AGE_SECRET_KEY`: Age private key for decryption (required for `restore` and `verify --deep`)
|
||||||
|
* `VAULTIK_CONFIG`: Path to config file (overridden by `--config`)
|
||||||
|
* `VAULTIK_INDEX_PATH`: Override local SQLite index path
|
||||||
|
|
||||||
### command details
|
### command details
|
||||||
|
|
||||||
**snapshot create**: Perform incremental backup of configured snapshots
|
**snapshot create**: Perform incremental backup of configured snapshots.
|
||||||
* Config is located at `/etc/vaultik/config.yml` by default
|
|
||||||
* Optional snapshot names argument to create specific snapshots (default: all)
|
* Optional snapshot names argument to create specific snapshots (default: all)
|
||||||
* `--cron`: Silent unless error (for crontab)
|
* `--cron`: Silent unless error (for crontab)
|
||||||
* `--prune`: After backup, drop older snapshots of each backed-up name (keeping
|
* `--prune`: After backup, drop older snapshots of each backed-up name and
|
||||||
only the latest) and remove orphaned blobs from remote storage
|
remove orphaned blobs from remote storage. By default keeps only the latest
|
||||||
|
snapshot per name; use `--keep-newer-than` for a rolling window.
|
||||||
|
* `--keep-newer-than <duration>`: With `--prune`, keep snapshots newer than
|
||||||
|
this duration instead of only the latest (e.g. `4w`, `30d`, `6mo`, `1y`)
|
||||||
* `--skip-errors`: Skip file read errors (log them loudly but continue)
|
* `--skip-errors`: Skip file read errors (log them loudly but continue)
|
||||||
|
|
||||||
**snapshot list**: List all snapshots with their timestamps and sizes
|
**snapshot list**: List all snapshots with their timestamps and sizes.
|
||||||
* `--json`: Output in JSON format
|
* `--json`: Output in JSON format
|
||||||
|
|
||||||
**snapshot verify**: Verify snapshot integrity
|
**snapshot verify**: Verify snapshot integrity.
|
||||||
* `--deep`: Download and verify blob contents (not just existence)
|
* Default (shallow): checks that all blobs referenced in the manifest exist in storage
|
||||||
|
* `--deep`: Downloads and decrypts each blob, verifies chunk hashes against the
|
||||||
|
encrypted metadata database
|
||||||
|
* `--json`: Output results as JSON
|
||||||
|
|
||||||
**snapshot purge**: Remove old snapshots based on criteria. Retention is
|
**snapshot purge**: Remove old snapshots based on criteria. Retention is
|
||||||
applied per-snapshot-name (e.g. `--keep-latest` keeps the latest of each
|
per-snapshot-name (`--keep-latest` keeps the latest of each name, not the
|
||||||
configured name, not the latest globally).
|
latest globally).
|
||||||
* `--keep-latest`: Keep only the most recent snapshot of each name
|
* `--keep-latest`: Keep only the most recent snapshot of each name
|
||||||
* `--older-than`: Remove snapshots older than duration (e.g., 30d, 6mo, 1y)
|
* `--older-than <duration>`: Remove snapshots older than duration (e.g. `30d`, `6m`, `1y`)
|
||||||
* `--snapshot <name>`: Restrict to specific snapshot names (repeat for multiple)
|
* `--snapshot <name>`: Restrict to specific snapshot names (repeat for multiple)
|
||||||
* `--force`: Skip confirmation prompt
|
* `--force`: Skip confirmation prompt
|
||||||
|
|
||||||
**snapshot remove**: Remove a specific snapshot
|
**snapshot remove**: Remove a specific snapshot from the local database.
|
||||||
|
* `--remote`: Also remove snapshot metadata from remote storage
|
||||||
|
* `--all`: Remove all snapshots (requires `--force`)
|
||||||
* `--dry-run`: Show what would be deleted without deleting
|
* `--dry-run`: Show what would be deleted without deleting
|
||||||
* `--force`: Skip confirmation prompt
|
* `--force`: Skip confirmation prompt
|
||||||
|
* `--json`: Output result as JSON
|
||||||
|
|
||||||
**snapshot prune**: Clean orphaned data from local database
|
**snapshot prune**: Clean orphaned data from the local database (files,
|
||||||
|
chunks, blobs not referenced by any snapshot).
|
||||||
|
|
||||||
**restore**: Restore snapshot to target directory
|
**restore**: Restore files from a backup snapshot.
|
||||||
* Requires `VAULTIK_AGE_SECRET_KEY` environment variable with age private key
|
* Requires `VAULTIK_AGE_SECRET_KEY` environment variable
|
||||||
* Optional path arguments to restore specific files/directories (default: all)
|
* Optional path arguments to restore specific files/directories (default: all)
|
||||||
* Downloads and decrypts metadata, fetches required blobs, reconstructs files
|
* Preserves file permissions, timestamps, ownership (ownership requires root),
|
||||||
* Preserves file permissions, timestamps, and ownership (ownership requires root)
|
symlinks, and empty directories
|
||||||
* Handles symlinks and directories
|
* `--verify`: After restoring, verify every file's chunk hashes match
|
||||||
|
|
||||||
**prune**: Remove unreferenced blobs from remote storage
|
**prune**: Remove unreferenced blobs from remote storage.
|
||||||
* Scans all snapshots for referenced blobs
|
* Scans all snapshot manifests for referenced blobs, deletes any blob not referenced
|
||||||
* Deletes orphaned blobs
|
* `--force`: Skip confirmation prompt
|
||||||
|
* `--json`: Output stats as JSON
|
||||||
|
|
||||||
**info**: Display system and configuration information
|
**info**: Display system configuration, storage settings, encryption
|
||||||
|
recipients, and local database statistics.
|
||||||
|
|
||||||
**store info**: Display S3 bucket configuration and storage statistics
|
**remote info**: Show detailed remote storage information including per-snapshot
|
||||||
|
metadata sizes, blob counts, and orphaned blob detection.
|
||||||
|
* `--json`: Output as JSON
|
||||||
|
|
||||||
|
**store info**: Display storage backend type and statistics.
|
||||||
|
|
||||||
|
**database purge**: Delete the local SQLite state database entirely. Remote
|
||||||
|
storage is unaffected; the next backup will do a full scan and re-deduplicate
|
||||||
|
against existing remote blobs.
|
||||||
|
* `--force`: Skip confirmation prompt
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## storage backends
|
||||||
|
|
||||||
|
vaultik supports three storage backends, selected via the `storage_url` config field:
|
||||||
|
|
||||||
|
**S3** (`s3://bucket/prefix?endpoint=host®ion=us-east-1`): Any S3-compatible
|
||||||
|
object store. Credentials are read from `s3.access_key_id` and
|
||||||
|
`s3.secret_access_key` in the config file.
|
||||||
|
|
||||||
|
**Local filesystem** (`file:///path/to/backup`): Stores blobs and metadata on
|
||||||
|
a local or mounted filesystem. Useful for testing or backing up to a NAS.
|
||||||
|
|
||||||
|
**Rclone** (`rclone://remote/path`): Uses rclone's 70+ supported cloud
|
||||||
|
providers. Requires rclone to be configured separately (`rclone config`).
|
||||||
|
|
||||||
|
Legacy S3 configuration via `s3.*` fields (endpoint, bucket, prefix, etc.) is
|
||||||
|
still supported for backward compatibility. `storage_url` takes precedence if
|
||||||
|
both are set.
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## architecture
|
## architecture
|
||||||
|
|
||||||
### s3 bucket layout
|
### remote storage layout
|
||||||
|
|
||||||
```
|
```
|
||||||
s3://<bucket>/<prefix>/
|
<bucket>/<prefix>/
|
||||||
├── blobs/
|
├── blobs/
|
||||||
│ └── <aa>/<bb>/<full_blob_hash>
|
│ └── <aa>/<bb>/<full_blob_hash>
|
||||||
└── metadata/
|
└── metadata/
|
||||||
├── <snapshot_id>/
|
└── <snapshot_id>/
|
||||||
│ ├── db.zst.age
|
├── db.zst.age # Encrypted binary SQLite database
|
||||||
│ └── manifest.json.zst
|
└── manifest.json.zst # Unencrypted blob list (for pruning)
|
||||||
```
|
```
|
||||||
|
|
||||||
* `blobs/<aa>/<bb>/...`: Two-level directory sharding using first 4 hex chars of blob hash
|
* Blobs are two-level directory sharded using the first 4 hex chars of the blob hash
|
||||||
* `metadata/<snapshot_id>/db.zst.age`: Encrypted, compressed SQLite database
|
* `db.zst.age` is a binary SQLite database (zstd compressed, age encrypted)
|
||||||
* `metadata/<snapshot_id>/manifest.json.zst`: Unencrypted blob list for pruning
|
containing all file metadata, chunk mappings, and relationships for the snapshot
|
||||||
|
* `manifest.json.zst` is an unencrypted compressed JSON blob list, enabling
|
||||||
|
pruning without the private key
|
||||||
|
|
||||||
### blob manifest format
|
Snapshot IDs follow the format `<hostname>_<snapshot-name>_<RFC3339-timestamp>`
|
||||||
|
(e.g. `server1_home_2025-06-01T12:00:00Z`).
|
||||||
The `manifest.json.zst` file is unencrypted (compressed JSON) to enable pruning without decryption:
|
|
||||||
|
|
||||||
```json
|
|
||||||
{
|
|
||||||
"snapshot_id": "hostname_snapshotname_2025-01-01T12:00:00Z",
|
|
||||||
"blob_hashes": [
|
|
||||||
"aa1234567890abcdef...",
|
|
||||||
"bb2345678901bcdef0..."
|
|
||||||
]
|
|
||||||
}
|
|
||||||
```
|
|
||||||
|
|
||||||
Snapshot IDs follow the format `<hostname>_<snapshot-name>_<timestamp>` (e.g., `server1_home_2025-01-01T12:00:00Z`).
|
|
||||||
|
|
||||||
### local sqlite schema
|
|
||||||
|
|
||||||
```sql
|
|
||||||
CREATE TABLE files (
|
|
||||||
id TEXT PRIMARY KEY,
|
|
||||||
path TEXT NOT NULL UNIQUE,
|
|
||||||
mtime INTEGER NOT NULL,
|
|
||||||
size INTEGER NOT NULL,
|
|
||||||
mode INTEGER NOT NULL,
|
|
||||||
uid INTEGER NOT NULL,
|
|
||||||
gid INTEGER NOT NULL
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE TABLE file_chunks (
|
|
||||||
file_id TEXT NOT NULL,
|
|
||||||
idx INTEGER NOT NULL,
|
|
||||||
chunk_hash TEXT NOT NULL,
|
|
||||||
PRIMARY KEY (file_id, idx),
|
|
||||||
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE TABLE chunks (
|
|
||||||
chunk_hash TEXT PRIMARY KEY,
|
|
||||||
size INTEGER NOT NULL
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE TABLE blobs (
|
|
||||||
id TEXT PRIMARY KEY,
|
|
||||||
blob_hash TEXT NOT NULL UNIQUE,
|
|
||||||
uncompressed INTEGER NOT NULL,
|
|
||||||
compressed INTEGER NOT NULL,
|
|
||||||
uploaded_at INTEGER
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE TABLE blob_chunks (
|
|
||||||
blob_hash TEXT NOT NULL,
|
|
||||||
chunk_hash TEXT NOT NULL,
|
|
||||||
offset INTEGER NOT NULL,
|
|
||||||
length INTEGER NOT NULL,
|
|
||||||
PRIMARY KEY (blob_hash, chunk_hash)
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE TABLE chunk_files (
|
|
||||||
chunk_hash TEXT NOT NULL,
|
|
||||||
file_id TEXT NOT NULL,
|
|
||||||
file_offset INTEGER NOT NULL,
|
|
||||||
length INTEGER NOT NULL,
|
|
||||||
PRIMARY KEY (chunk_hash, file_id)
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE TABLE snapshots (
|
|
||||||
id TEXT PRIMARY KEY,
|
|
||||||
hostname TEXT NOT NULL,
|
|
||||||
vaultik_version TEXT NOT NULL,
|
|
||||||
started_at INTEGER NOT NULL,
|
|
||||||
completed_at INTEGER,
|
|
||||||
file_count INTEGER NOT NULL,
|
|
||||||
chunk_count INTEGER NOT NULL,
|
|
||||||
blob_count INTEGER NOT NULL,
|
|
||||||
total_size INTEGER NOT NULL,
|
|
||||||
blob_size INTEGER NOT NULL,
|
|
||||||
compression_ratio REAL NOT NULL
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE TABLE snapshot_files (
|
|
||||||
snapshot_id TEXT NOT NULL,
|
|
||||||
file_id TEXT NOT NULL,
|
|
||||||
PRIMARY KEY (snapshot_id, file_id)
|
|
||||||
);
|
|
||||||
|
|
||||||
CREATE TABLE snapshot_blobs (
|
|
||||||
snapshot_id TEXT NOT NULL,
|
|
||||||
blob_id TEXT NOT NULL,
|
|
||||||
blob_hash TEXT NOT NULL,
|
|
||||||
PRIMARY KEY (snapshot_id, blob_id)
|
|
||||||
);
|
|
||||||
```
|
|
||||||
|
|
||||||
### data flow
|
### data flow
|
||||||
|
|
||||||
#### backup
|
**backup:**
|
||||||
|
|
||||||
1. Load config, open local SQLite index
|
1. Open local SQLite index, load known files and chunks into memory
|
||||||
1. Walk source directories, check mtime/size against index
|
2. Walk source directories, compare mtime/size/mode against index
|
||||||
1. For changed/new files: chunk using content-defined chunking
|
3. For changed/new files: chunk using content-defined chunking (FastCDC)
|
||||||
1. For each chunk: hash, check if already uploaded, add to blob packer
|
4. For symlinks and directories: record metadata (no chunking)
|
||||||
1. When blob reaches threshold: compress, encrypt, upload to S3
|
5. For each chunk: hash, check dedup, add to blob packer
|
||||||
1. Build snapshot metadata, compress, encrypt, upload
|
6. When blob reaches size threshold: compress (zstd), encrypt (age), upload
|
||||||
1. Create blob manifest (unencrypted) for pruning support
|
7. Build snapshot metadata database, compress, encrypt, upload
|
||||||
|
8. Create unencrypted blob manifest for pruning support
|
||||||
|
|
||||||
#### restore
|
**restore:**
|
||||||
|
|
||||||
1. Download `metadata/<snapshot_id>/db.zst.age`
|
1. Download and decrypt `metadata/<snapshot_id>/db.zst.age`
|
||||||
1. Decrypt and decompress SQLite database
|
2. Open the binary SQLite database
|
||||||
1. Query files table (optionally filtered by paths)
|
3. Query files (optionally filtered by paths)
|
||||||
1. For each file, get ordered chunk list from file_chunks
|
4. Download and decrypt required blobs
|
||||||
1. Download required blobs, decrypt, decompress
|
5. Extract chunks, reconstruct files
|
||||||
1. Extract chunks and reconstruct files
|
6. Restore permissions, timestamps, ownership, symlinks
|
||||||
1. Restore permissions, mtime, uid/gid
|
|
||||||
|
|
||||||
#### prune
|
**prune:**
|
||||||
|
|
||||||
1. List all snapshot manifests
|
1. List all snapshot manifests
|
||||||
1. Build set of all referenced blob hashes
|
2. Build set of all referenced blob hashes
|
||||||
1. List all blobs in storage
|
3. List all blobs in storage
|
||||||
1. Delete any blob not in referenced set
|
4. Delete any blob not in the referenced set
|
||||||
|
|
||||||
### chunking
|
### chunking and deduplication
|
||||||
|
|
||||||
* Content-defined chunking using FastCDC algorithm
|
* Content-defined chunking using the FastCDC algorithm
|
||||||
* Average chunk size: configurable (default 10MB)
|
* Average chunk size: configurable (default 10MB)
|
||||||
* Deduplication at chunk level
|
* Deduplication at file level (unchanged files skipped) and chunk level
|
||||||
* Multiple chunks packed into blobs for efficiency
|
(identical chunks across files stored once)
|
||||||
|
* Multiple chunks packed into blobs to reduce object count
|
||||||
|
|
||||||
### encryption
|
### encryption
|
||||||
|
|
||||||
* Asymmetric encryption using age (X25519 + XChaCha20-Poly1305)
|
* Asymmetric encryption using age (X25519 + XChaCha20-Poly1305)
|
||||||
* Only public key needed on source host
|
* Only the public key is needed on the source host
|
||||||
* Each blob encrypted independently
|
* Each blob and each metadata database is encrypted independently
|
||||||
* Metadata databases also encrypted
|
* Multiple recipients supported (encrypt to multiple keys)
|
||||||
|
|
||||||
### compression
|
### compression
|
||||||
|
|
||||||
* zstd compression at configurable level
|
* zstd compression at configurable level (1-19, default 3)
|
||||||
* Applied before encryption
|
* Applied before encryption at the blob level
|
||||||
* Blob-level compression for efficiency
|
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## does not
|
## configuration reference
|
||||||
|
|
||||||
* Store any secrets on the backed-up machine
|
See `config.example.yml` for a complete annotated example. Key fields:
|
||||||
* Require mutable remote metadata
|
|
||||||
* Use tarballs, restic, rsync, or ssh
|
|
||||||
* Require a symmetric passphrase or password
|
|
||||||
* Trust the source system with anything
|
|
||||||
|
|
||||||
## does
|
| Field | Default | Description |
|
||||||
|
|-------|---------|-------------|
|
||||||
|
| `age_recipients` | (required) | Age public keys for encryption |
|
||||||
|
| `snapshots` | (required) | Named snapshot definitions with paths and excludes |
|
||||||
|
| `storage_url` | | Storage backend URL (`s3://`, `file://`, `rclone://`) |
|
||||||
|
| `s3.*` | | Legacy S3 configuration (endpoint, bucket, credentials) |
|
||||||
|
| `exclude` | | Global exclude patterns (applied to all snapshots) |
|
||||||
|
| `chunk_size` | `10MB` | Average chunk size for content-defined chunking |
|
||||||
|
| `blob_size_limit` | `10GB` | Maximum blob size before splitting |
|
||||||
|
| `compression_level` | `3` | zstd compression level (1-19) |
|
||||||
|
| `hostname` | system hostname | Hostname used in snapshot IDs |
|
||||||
|
| `index_path` | `~/.local/share/.../index.sqlite` | Local SQLite index path |
|
||||||
|
|
||||||
* Incremental deduplicated backup
|
---
|
||||||
* Blob-packed chunk encryption
|
|
||||||
* Content-addressed immutable blobs
|
## limitations
|
||||||
* Public-key encryption only
|
|
||||||
* SQLite-based local and snapshot metadata
|
* **No extended attributes (xattrs).** ACLs, macOS Finder metadata,
|
||||||
* Fully stream-processed storage
|
quarantine flags, SELinux labels, and other extended attributes are not
|
||||||
|
backed up or restored.
|
||||||
|
* **No hard link detection.** Two hard links to the same inode are backed
|
||||||
|
up as independent files. Content deduplication means the data is stored
|
||||||
|
once, but the hard link relationship is lost on restore.
|
||||||
|
* **No sparse file support.** Sparse files are fully materialized during
|
||||||
|
backup. A 100 GB sparse VM disk that is mostly zeros will consume the
|
||||||
|
full (compressed) size in storage.
|
||||||
|
* **No bandwidth limiting.** Uploads and downloads use whatever bandwidth
|
||||||
|
is available. There is no `--bwlimit` flag yet.
|
||||||
|
* **No parallel blob downloads during restore.** Blobs are fetched
|
||||||
|
sequentially. Restore speed is bound by single-stream throughput.
|
||||||
|
* **Device nodes, named pipes, and sockets are silently skipped.** Only
|
||||||
|
regular files, directories, and symlinks are backed up.
|
||||||
|
* **No database migrations.** If the local SQLite schema changes between
|
||||||
|
versions, delete the local database (`vaultik database purge`) and run
|
||||||
|
a full backup. Remote storage is unaffected.
|
||||||
|
* **Files that change during backup may be inconsistent.** There is no
|
||||||
|
filesystem snapshot or freeze. If a file is modified between the scan
|
||||||
|
and chunk phases, the backed-up copy may reflect a partial write.
|
||||||
|
* **Ownership restoration requires root.** File uid/gid are recorded
|
||||||
|
and restored, but `chown` requires elevated privileges. Without root,
|
||||||
|
files are restored with the current user's ownership.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## roadmap
|
||||||
|
|
||||||
|
Items for future releases:
|
||||||
|
|
||||||
|
* Error-condition tests (network failures, disk full, corrupted/missing blobs)
|
||||||
|
* Parallel blob downloads during restore
|
||||||
|
* Bandwidth limiting (`--bwlimit`)
|
||||||
|
* Security audit of encryption implementation
|
||||||
|
* Man pages and richer `--help` examples
|
||||||
|
|
||||||
---
|
---
|
||||||
|
|
||||||
## requirements
|
## requirements
|
||||||
|
|
||||||
* Go 1.26 or later
|
* Go 1.26 or later
|
||||||
* S3-compatible object storage
|
* S3-compatible object storage (or local filesystem, or rclone remote)
|
||||||
* Sufficient disk space for local index (typically <1GB)
|
|
||||||
|
|
||||||
## development workflow
|
## development workflow
|
||||||
|
|
||||||
|
|||||||
44
TODO.md
44
TODO.md
@@ -1,44 +0,0 @@
|
|||||||
# Vaultik 1.0 TODO
|
|
||||||
|
|
||||||
Remaining tasks before 1.0 release.
|
|
||||||
|
|
||||||
## Must-fix
|
|
||||||
|
|
||||||
1. Scanner uses bare `fmt.Printf` (bypasses `--cron` silence)
|
|
||||||
- Route all user-facing output through a writer gated by progress/cron flags
|
|
||||||
- Affects `internal/snapshot/scanner.go` (~24 bare print calls)
|
|
||||||
|
|
||||||
1. S3 client error type checking
|
|
||||||
- `internal/s3/client.go:207` has a TODO for proper error type checking
|
|
||||||
|
|
||||||
1. Error message polish
|
|
||||||
- Add actionable suggestions for common failures (missing config, bad
|
|
||||||
storage URL, failed S3 auth, missing age key on restore/verify)
|
|
||||||
- Only `restore.go` currently has the "did you set VAULTIK_AGE_SECRET_KEY?" hint
|
|
||||||
|
|
||||||
## Done
|
|
||||||
|
|
||||||
- [x] Rclone storage backend
|
|
||||||
- [x] Release process (goreleaser, CGO-free cross-compile, checksums)
|
|
||||||
- [x] End-to-end integration test (backup → restore → verify → byte-compare)
|
|
||||||
- [x] Restore integration tests
|
|
||||||
- [x] `--prune` flag on `snapshot create` (per-name retention + orphan blob cleanup)
|
|
||||||
- [x] Per-name purge retention (`--keep-latest` per snapshot name, `--snapshot` filter)
|
|
||||||
- [x] CLI surface dedup (removed top-level `purge` and `verify` duplicates)
|
|
||||||
- [x] Exit codes (create/restore now exit non-zero on failure)
|
|
||||||
- [x] Deep verify implemented and wired up
|
|
||||||
- [x] Shallow verify timestamp parsing fixed
|
|
||||||
- [x] Daemon mode removed
|
|
||||||
- [x] Makefile targets separated (`lint`/`test`/`fmt`/`check`)
|
|
||||||
- [x] CGO eliminated (pure-Go SQLite via modernc.org/sqlite)
|
|
||||||
- [x] Version set correctly in releases via goreleaser ldflags
|
|
||||||
|
|
||||||
## Post-1.0
|
|
||||||
|
|
||||||
1. Edge-case tests (empty dirs, symlinks, special chars, multi-GB files, 100k+ small files)
|
|
||||||
1. Error-condition tests (network failures, disk full, corrupted/missing blobs)
|
|
||||||
1. Parallel blob downloads during restore
|
|
||||||
1. Bandwidth limiting (`--bwlimit`)
|
|
||||||
1. Security audit of encryption (verify no plaintext leaks, correct hash computation)
|
|
||||||
1. Man pages / richer `--help` examples
|
|
||||||
1. Tag and release v1.0.0
|
|
||||||
@@ -101,6 +101,7 @@ specifying a path using --config or by setting VAULTIK_CONFIG to a path.`,
|
|||||||
|
|
||||||
cmd.Flags().BoolVar(&opts.Cron, "cron", false, "Run in cron mode (silent unless error)")
|
cmd.Flags().BoolVar(&opts.Cron, "cron", false, "Run in cron mode (silent unless error)")
|
||||||
cmd.Flags().BoolVar(&opts.Prune, "prune", false, "After backup, drop older snapshots of the same name and remove orphaned blobs")
|
cmd.Flags().BoolVar(&opts.Prune, "prune", false, "After backup, drop older snapshots of the same name and remove orphaned blobs")
|
||||||
|
cmd.Flags().StringVar(&opts.KeepNewerThan, "keep-newer-than", "", "With --prune: keep snapshots newer than this duration (e.g. 4w, 30d, 6mo) instead of only the latest")
|
||||||
cmd.Flags().BoolVar(&opts.SkipErrors, "skip-errors", false, "Skip file read errors (log them loudly but continue)")
|
cmd.Flags().BoolVar(&opts.SkipErrors, "skip-errors", false, "Skip file read errors (log them loudly but continue)")
|
||||||
|
|
||||||
return cmd
|
return cmd
|
||||||
|
|||||||
@@ -649,7 +649,40 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// Skip non-regular files for processing (but still count them)
|
// Handle symlinks
|
||||||
|
if info.Mode()&os.ModeSymlink != 0 {
|
||||||
|
file := s.buildSymlinkEntry(filePath, info)
|
||||||
|
if file != nil {
|
||||||
|
existingFiles[filePath] = struct{}{}
|
||||||
|
mu.Lock()
|
||||||
|
filesToProcess = append(filesToProcess, &FileToProcess{
|
||||||
|
Path: filePath,
|
||||||
|
FileInfo: info,
|
||||||
|
File: file,
|
||||||
|
})
|
||||||
|
filesScanned++
|
||||||
|
mu.Unlock()
|
||||||
|
s.updateScanEntryStats(result, true, info)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Handle directories (record for permission/ownership preservation and empty-dir support)
|
||||||
|
if info.IsDir() {
|
||||||
|
file := s.buildDirectoryEntry(filePath, info)
|
||||||
|
existingFiles[filePath] = struct{}{}
|
||||||
|
mu.Lock()
|
||||||
|
filesToProcess = append(filesToProcess, &FileToProcess{
|
||||||
|
Path: filePath,
|
||||||
|
FileInfo: info,
|
||||||
|
File: file,
|
||||||
|
})
|
||||||
|
filesScanned++
|
||||||
|
mu.Unlock()
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Skip other non-regular files (devices, sockets, etc.)
|
||||||
if !info.Mode().IsRegular() {
|
if !info.Mode().IsRegular() {
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -760,6 +793,71 @@ func (s *Scanner) printScanProgressLine(filesScanned int64, changedCount int, es
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// buildSymlinkEntry creates a File record for a symlink.
|
||||||
|
// Returns nil if the link target cannot be read.
|
||||||
|
func (s *Scanner) buildSymlinkEntry(path string, info os.FileInfo) *database.File {
|
||||||
|
target, err := os.Readlink(path)
|
||||||
|
if err != nil {
|
||||||
|
log.Debug("Cannot read symlink target", "path", path, "error", err)
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var uid, gid uint32
|
||||||
|
if stat, ok := info.Sys().(interface {
|
||||||
|
Uid() uint32
|
||||||
|
Gid() uint32
|
||||||
|
}); ok {
|
||||||
|
uid = stat.Uid()
|
||||||
|
gid = stat.Gid()
|
||||||
|
}
|
||||||
|
|
||||||
|
return &database.File{
|
||||||
|
ID: types.NewFileID(),
|
||||||
|
Path: types.FilePath(path),
|
||||||
|
SourcePath: types.SourcePath(s.currentSourcePath),
|
||||||
|
MTime: info.ModTime(),
|
||||||
|
Size: 0,
|
||||||
|
Mode: uint32(info.Mode()),
|
||||||
|
UID: uid,
|
||||||
|
GID: gid,
|
||||||
|
LinkTarget: types.FilePath(target),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildDirectoryEntry creates a File record for a directory.
|
||||||
|
func (s *Scanner) buildDirectoryEntry(path string, info os.FileInfo) *database.File {
|
||||||
|
var uid, gid uint32
|
||||||
|
if stat, ok := info.Sys().(interface {
|
||||||
|
Uid() uint32
|
||||||
|
Gid() uint32
|
||||||
|
}); ok {
|
||||||
|
uid = stat.Uid()
|
||||||
|
gid = stat.Gid()
|
||||||
|
}
|
||||||
|
|
||||||
|
return &database.File{
|
||||||
|
ID: types.NewFileID(),
|
||||||
|
Path: types.FilePath(path),
|
||||||
|
SourcePath: types.SourcePath(s.currentSourcePath),
|
||||||
|
MTime: info.ModTime(),
|
||||||
|
Size: 0,
|
||||||
|
Mode: uint32(info.Mode()),
|
||||||
|
UID: uid,
|
||||||
|
GID: gid,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// recordNonRegularFile writes a symlink or directory entry to the database
|
||||||
|
// and associates it with the current snapshot. No chunking is performed.
|
||||||
|
func (s *Scanner) recordNonRegularFile(ctx context.Context, ftp *FileToProcess) error {
|
||||||
|
return s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||||
|
if err := s.repos.Files.Create(txCtx, tx, ftp.File); err != nil {
|
||||||
|
return fmt.Errorf("creating non-regular file record: %w", err)
|
||||||
|
}
|
||||||
|
return s.repos.Snapshots.AddFileByID(txCtx, tx, s.snapshotID, ftp.File.ID)
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// checkFileInMemory checks if a file needs processing using the in-memory map
|
// checkFileInMemory checks if a file needs processing using the in-memory map
|
||||||
// No database access is performed - this is purely CPU/memory work
|
// No database access is performed - this is purely CPU/memory work
|
||||||
func (s *Scanner) checkFileInMemory(path string, info os.FileInfo, knownFiles map[string]*database.File) (*database.File, bool) {
|
func (s *Scanner) checkFileInMemory(path string, info os.FileInfo, knownFiles map[string]*database.File) (*database.File, bool) {
|
||||||
@@ -1184,6 +1282,12 @@ type streamingChunkInfo struct {
|
|||||||
|
|
||||||
// processFileStreaming processes a file by streaming chunks directly to the packer
|
// processFileStreaming processes a file by streaming chunks directly to the packer
|
||||||
func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileToProcess, result *ScanResult) error {
|
func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileToProcess, result *ScanResult) error {
|
||||||
|
// Symlinks and directories have no data to chunk — just record them in the DB.
|
||||||
|
mode := os.FileMode(fileToProcess.File.Mode)
|
||||||
|
if mode&os.ModeSymlink != 0 || mode.IsDir() {
|
||||||
|
return s.recordNonRegularFile(ctx, fileToProcess)
|
||||||
|
}
|
||||||
|
|
||||||
file, err := s.fs.Open(fileToProcess.Path)
|
file, err := s.fs.Open(fileToProcess.Path)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("opening file: %w", err)
|
return fmt.Errorf("opening file: %w", err)
|
||||||
|
|||||||
@@ -110,15 +110,15 @@ func TestScannerSimpleDirectory(t *testing.T) {
|
|||||||
t.Errorf("expected at least 97 bytes scanned, got %d", result.BytesScanned)
|
t.Errorf("expected at least 97 bytes scanned, got %d", result.BytesScanned)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify files in database - only regular files are stored
|
// Verify files in database - includes regular files and directories
|
||||||
files, err := repos.Files.ListByPrefix(ctx, "/source")
|
files, err := repos.Files.ListByPrefix(ctx, "/source")
|
||||||
if err != nil {
|
if err != nil {
|
||||||
t.Fatalf("failed to list files: %v", err)
|
t.Fatalf("failed to list files: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// We should have 6 files (directories are not stored)
|
// 6 regular files + 3 directories (/source, /source/subdir, /source/subdir2)
|
||||||
if len(files) != 6 {
|
if len(files) != 9 {
|
||||||
t.Errorf("expected 6 files in database, got %d", len(files))
|
t.Errorf("expected 9 entries in database (6 files + 3 dirs), got %d", len(files))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Verify specific file
|
// Verify specific file
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package vaultik
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"regexp"
|
||||||
"strconv"
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
@@ -95,18 +96,39 @@ func parseSnapshotName(snapshotID string) string {
|
|||||||
return strings.Join(parts[1:len(parts)-1], "_")
|
return strings.Join(parts[1:len(parts)-1], "_")
|
||||||
}
|
}
|
||||||
|
|
||||||
// parseDuration parses a duration string with support for days
|
// parseDuration parses a duration string with support for human-friendly units:
|
||||||
|
// d/day/days, w/week/weeks, mo/month/months, y/year/years, plus standard Go
|
||||||
|
// duration units (h, m, s).
|
||||||
func parseDuration(s string) (time.Duration, error) {
|
func parseDuration(s string) (time.Duration, error) {
|
||||||
// Check for days suffix
|
if d, err := time.ParseDuration(s); err == nil {
|
||||||
if strings.HasSuffix(s, "d") {
|
return d, nil
|
||||||
daysStr := strings.TrimSuffix(s, "d")
|
|
||||||
days, err := strconv.Atoi(daysStr)
|
|
||||||
if err != nil {
|
|
||||||
return 0, fmt.Errorf("invalid days value: %w", err)
|
|
||||||
}
|
|
||||||
return time.Duration(days) * 24 * time.Hour, nil
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Otherwise use standard Go duration parsing
|
re := regexp.MustCompile(`(\d+)\s*([a-zA-Z]+)`)
|
||||||
return time.ParseDuration(s)
|
matches := re.FindAllStringSubmatch(s, -1)
|
||||||
|
if len(matches) == 0 {
|
||||||
|
return 0, fmt.Errorf("invalid duration: %q", s)
|
||||||
|
}
|
||||||
|
|
||||||
|
var total time.Duration
|
||||||
|
for _, match := range matches {
|
||||||
|
n, err := strconv.Atoi(match[1])
|
||||||
|
if err != nil {
|
||||||
|
return 0, fmt.Errorf("invalid number %q: %w", match[1], err)
|
||||||
|
}
|
||||||
|
unit := strings.ToLower(match[2])
|
||||||
|
switch unit {
|
||||||
|
case "d", "day", "days":
|
||||||
|
total += time.Duration(n) * 24 * time.Hour
|
||||||
|
case "w", "week", "weeks":
|
||||||
|
total += time.Duration(n) * 7 * 24 * time.Hour
|
||||||
|
case "mo", "month", "months":
|
||||||
|
total += time.Duration(n) * 30 * 24 * time.Hour
|
||||||
|
case "y", "year", "years":
|
||||||
|
total += time.Duration(n) * 365 * 24 * time.Hour
|
||||||
|
default:
|
||||||
|
return 0, fmt.Errorf("unknown time unit %q", unit)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return total, nil
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -2,6 +2,7 @@ package vaultik
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"testing"
|
"testing"
|
||||||
|
"time"
|
||||||
)
|
)
|
||||||
|
|
||||||
func TestParseSnapshotName(t *testing.T) {
|
func TestParseSnapshotName(t *testing.T) {
|
||||||
@@ -37,6 +38,41 @@ func TestParseSnapshotName(t *testing.T) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestParseDuration(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
input string
|
||||||
|
want time.Duration
|
||||||
|
err bool
|
||||||
|
}{
|
||||||
|
{"30d", 30 * 24 * time.Hour, false},
|
||||||
|
{"4w", 4 * 7 * 24 * time.Hour, false},
|
||||||
|
{"6mo", 6 * 30 * 24 * time.Hour, false},
|
||||||
|
{"1y", 365 * 24 * time.Hour, false},
|
||||||
|
{"2w3d", 2*7*24*time.Hour + 3*24*time.Hour, false},
|
||||||
|
{"1h", time.Hour, false},
|
||||||
|
{"30s", 30 * time.Second, false},
|
||||||
|
{"garbage", 0, true},
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.input, func(t *testing.T) {
|
||||||
|
got, err := parseDuration(tt.input)
|
||||||
|
if tt.err {
|
||||||
|
if err == nil {
|
||||||
|
t.Fatalf("expected error for %q, got %v", tt.input, got)
|
||||||
|
}
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error for %q: %v", tt.input, err)
|
||||||
|
}
|
||||||
|
if got != tt.want {
|
||||||
|
t.Errorf("parseDuration(%q) = %v, want %v", tt.input, got, tt.want)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
func TestParseSnapshotTimestamp(t *testing.T) {
|
func TestParseSnapshotTimestamp(t *testing.T) {
|
||||||
tests := []struct {
|
tests := []struct {
|
||||||
name string
|
name string
|
||||||
|
|||||||
@@ -585,6 +585,19 @@ func TestEndToEndFileStorage(t *testing.T) {
|
|||||||
require.NoError(t, afero.WriteFile(fs, path, content, 0o644))
|
require.NoError(t, afero.WriteFile(fs, path, content, 0o644))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create a file with non-default permissions.
|
||||||
|
restrictedPath := filepath.Join(dataDir, "restricted.txt")
|
||||||
|
require.NoError(t, afero.WriteFile(fs, restrictedPath, []byte("secret"), 0o600))
|
||||||
|
testFiles[restrictedPath] = []byte("secret")
|
||||||
|
|
||||||
|
// Create an empty directory (should survive round-trip).
|
||||||
|
emptyDir := filepath.Join(dataDir, "emptydir")
|
||||||
|
require.NoError(t, fs.MkdirAll(emptyDir, 0o755))
|
||||||
|
|
||||||
|
// Create a symlink.
|
||||||
|
symlinkPath := filepath.Join(dataDir, "link-to-small")
|
||||||
|
require.NoError(t, os.Symlink("small.txt", symlinkPath))
|
||||||
|
|
||||||
// FileStorer is the real-world local-disk backend.
|
// FileStorer is the real-world local-disk backend.
|
||||||
storer, err := storage.NewFileStorer(storeDir)
|
storer, err := storage.NewFileStorer(storeDir)
|
||||||
require.NoError(t, err)
|
require.NoError(t, err)
|
||||||
@@ -669,6 +682,25 @@ func TestEndToEndFileStorage(t *testing.T) {
|
|||||||
require.NoError(t, err, "restored file missing: %s", restoredPath)
|
require.NoError(t, err, "restored file missing: %s", restoredPath)
|
||||||
require.Equalf(t, expected, got, "byte-equality failed for %s", origPath)
|
require.Equalf(t, expected, got, "byte-equality failed for %s", origPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Verify the restricted file kept its permissions.
|
||||||
|
restoredRestricted := filepath.Join(restoreDir, restrictedPath)
|
||||||
|
rInfo, err := os.Stat(restoredRestricted)
|
||||||
|
require.NoError(t, err)
|
||||||
|
assert.Equal(t, os.FileMode(0o600), rInfo.Mode().Perm(),
|
||||||
|
"restricted file should preserve 0600 permissions")
|
||||||
|
|
||||||
|
// Verify the empty directory was restored.
|
||||||
|
restoredEmptyDir := filepath.Join(restoreDir, emptyDir)
|
||||||
|
dInfo, err := os.Stat(restoredEmptyDir)
|
||||||
|
require.NoError(t, err, "empty directory should be restored")
|
||||||
|
assert.True(t, dInfo.IsDir(), "emptydir should be a directory")
|
||||||
|
|
||||||
|
// Verify the symlink was restored with the correct target.
|
||||||
|
restoredSymlink := filepath.Join(restoreDir, symlinkPath)
|
||||||
|
target, err := os.Readlink(restoredSymlink)
|
||||||
|
require.NoError(t, err, "symlink should be restored")
|
||||||
|
assert.Equal(t, "small.txt", target, "symlink target should be preserved")
|
||||||
}
|
}
|
||||||
|
|
||||||
// bytesPattern returns a deterministic byte slice of length n with a tag prefix,
|
// bytesPattern returns a deterministic byte slice of length n with a tag prefix,
|
||||||
|
|||||||
@@ -22,10 +22,11 @@ import (
|
|||||||
|
|
||||||
// SnapshotCreateOptions contains options for the snapshot create command
|
// SnapshotCreateOptions contains options for the snapshot create command
|
||||||
type SnapshotCreateOptions struct {
|
type SnapshotCreateOptions struct {
|
||||||
Cron bool
|
Cron bool
|
||||||
Prune bool
|
Prune bool
|
||||||
SkipErrors bool // Skip file read errors (log them loudly but continue)
|
KeepNewerThan string // With --prune: keep snapshots newer than this duration (e.g. "4w"); default: keep only latest
|
||||||
Snapshots []string // Optional list of snapshot names to process (empty = all)
|
SkipErrors bool // Skip file read errors (log them loudly but continue)
|
||||||
|
Snapshots []string // Optional list of snapshot names to process (empty = all)
|
||||||
}
|
}
|
||||||
|
|
||||||
// CreateSnapshot executes the snapshot creation operation
|
// CreateSnapshot executes the snapshot creation operation
|
||||||
@@ -86,7 +87,7 @@ func (v *Vaultik) CreateSnapshot(opts *SnapshotCreateOptions) error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
if opts.Prune {
|
if opts.Prune {
|
||||||
if err := v.runPostBackupPrune(snapshotNames); err != nil {
|
if err := v.runPostBackupPrune(snapshotNames, opts.KeepNewerThan); err != nil {
|
||||||
return fmt.Errorf("post-backup prune: %w", err)
|
return fmt.Errorf("post-backup prune: %w", err)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -94,19 +95,26 @@ func (v *Vaultik) CreateSnapshot(opts *SnapshotCreateOptions) error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// runPostBackupPrune drops older snapshots of the given names (keeping only
|
// runPostBackupPrune drops older snapshots of the given names and removes
|
||||||
// the latest of each) and removes orphan blobs from remote storage. Invoked
|
// orphan blobs from remote storage. If keepNewerThan is set (e.g. "4w"),
|
||||||
// when `snapshot create --prune` is used.
|
// snapshots newer than that duration are kept. Otherwise only the latest
|
||||||
func (v *Vaultik) runPostBackupPrune(snapshotNames []string) error {
|
// snapshot of each name is kept.
|
||||||
log.Info("Running post-backup prune", "snapshots", snapshotNames)
|
func (v *Vaultik) runPostBackupPrune(snapshotNames []string, keepNewerThan string) error {
|
||||||
|
log.Info("Running post-backup prune", "snapshots", snapshotNames, "keep_newer_than", keepNewerThan)
|
||||||
v.printlnStdout("\n=== Post-backup prune ===")
|
v.printlnStdout("\n=== Post-backup prune ===")
|
||||||
|
|
||||||
purgeOpts := &SnapshotPurgeOptions{
|
purgeOpts := &SnapshotPurgeOptions{
|
||||||
KeepLatest: true,
|
Force: true,
|
||||||
Force: true,
|
Names: snapshotNames,
|
||||||
Names: snapshotNames,
|
Quiet: true,
|
||||||
Quiet: true,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if keepNewerThan != "" {
|
||||||
|
purgeOpts.OlderThan = keepNewerThan
|
||||||
|
} else {
|
||||||
|
purgeOpts.KeepLatest = true
|
||||||
|
}
|
||||||
|
|
||||||
if err := v.PurgeSnapshotsWithOptions(purgeOpts); err != nil {
|
if err := v.PurgeSnapshotsWithOptions(purgeOpts); err != nil {
|
||||||
return fmt.Errorf("purging old snapshots: %w", err)
|
return fmt.Errorf("purging old snapshots: %w", err)
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user