Compare commits
84 Commits
feature/pl
...
e534746cf3
| Author | SHA1 | Date | |
|---|---|---|---|
| e534746cf3 | |||
| 5397b37c13 | |||
| 2df2792a75 | |||
| 4fe568f803 | |||
| 27e85f01f2 | |||
| d479bfcd52 | |||
| cb16d6869f | |||
| ff85f1e4f8 | |||
| b2e160944f | |||
| 307867f59e | |||
| 9d12d500fa | |||
| 2e2bf01130 | |||
| e9687c68b7 | |||
| a8970a87fc | |||
| e6ee488d9d | |||
| 2e2b02a056 | |||
| 0b95cb4308 | |||
| 4a3e61f8e1 | |||
| 6fbcac0cd8 | |||
| 34f73f72d8 | |||
| ee240faa32 | |||
| f719ab3adc | |||
| 1a8baf7491 | |||
| 7d5d3fa598 | |||
| ac5d2f4a0d | |||
| b250ddfa94 | |||
| fe3ad13a91 | |||
| ebd6619638 | |||
| 20d3a9ac8c | |||
| 0889cf2804 | |||
| f9ebb4bf25 | |||
| 9f2d722734 | |||
| 6821215b0e | |||
| f97a1dc2eb | |||
| 18c14d1507 | |||
| 65da291ddf | |||
| dcf3ec399a | |||
| 495dede1bc | |||
| 1c72a37bc8 | |||
| 60b6746db9 | |||
| f28c8a73b7 | |||
| 1c0f5b8eb2 | |||
| 689109a2b8 | |||
| ac2f21a89d | |||
| 8c59f55096 | |||
| c24e7e6360 | |||
| 7a5943958d | |||
| d8a51804d2 | |||
| 76f4421eb3 | |||
| 53ac868c5d | |||
| 8c4ea2b870 | |||
| 597b560398 | |||
| 1e2eced092 | |||
| 815b35c7ae | |||
| 9c66674683 | |||
| 49de277648 | |||
| ed5d777d05 | |||
| 76e047bbb2 | |||
| 2e7356dd85 | |||
| 70d4fe2aa0 | |||
|
|
2f249e3ddd | ||
|
|
3f834f1c9c | ||
|
|
9879668c31 | ||
|
|
0a0d9f33b0 | ||
| df0e8c275b | |||
|
|
ddc23f8057 | ||
| cafb3d45b8 | |||
|
|
d77ac18aaa | ||
| 825f25da58 | |||
| 162d76bb38 | |||
|
|
bfd7334221 | ||
|
|
9b32bf0846 | ||
| 8adc668fa6 | |||
|
|
441c441eca | ||
|
|
4d9f912a5f | ||
| 46c2ea3079 | |||
| 470bf648c4 | |||
| bdaaadf990 | |||
| 417b25a5f5 | |||
| 2afd54d693 | |||
| 05286bed01 | |||
| f2c120f026 | |||
| bbe09ec5b5 | |||
| 43a69c2cfb |
8
.dockerignore
Normal file
8
.dockerignore
Normal file
@@ -0,0 +1,8 @@
|
||||
.git
|
||||
.gitea
|
||||
*.md
|
||||
LICENSE
|
||||
vaultik
|
||||
coverage.out
|
||||
coverage.html
|
||||
.DS_Store
|
||||
14
.gitea/workflows/check.yml
Normal file
14
.gitea/workflows/check.yml
Normal file
@@ -0,0 +1,14 @@
|
||||
name: check
|
||||
on:
|
||||
push:
|
||||
branches: [main]
|
||||
pull_request:
|
||||
branches: [main]
|
||||
jobs:
|
||||
check:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
# actions/checkout v4, 2024-09-16
|
||||
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5
|
||||
- name: Build and check
|
||||
run: docker build .
|
||||
2
.gitignore
vendored
2
.gitignore
vendored
@@ -1,5 +1,5 @@
|
||||
# Binary
|
||||
vaultik
|
||||
/vaultik
|
||||
|
||||
# Test artifacts
|
||||
*.out
|
||||
|
||||
55
.goreleaser.yaml
Normal file
55
.goreleaser.yaml
Normal file
@@ -0,0 +1,55 @@
|
||||
version: 2
|
||||
|
||||
project_name: vaultik
|
||||
|
||||
before:
|
||||
hooks:
|
||||
- go mod tidy
|
||||
|
||||
builds:
|
||||
- id: vaultik
|
||||
main: ./cmd/vaultik
|
||||
binary: vaultik
|
||||
env:
|
||||
- CGO_ENABLED=0
|
||||
goos:
|
||||
- linux
|
||||
- darwin
|
||||
goarch:
|
||||
- amd64
|
||||
- arm64
|
||||
ldflags:
|
||||
- -s -w
|
||||
- -X 'sneak.berlin/go/vaultik/internal/globals.Version={{ .Version }}'
|
||||
- -X 'sneak.berlin/go/vaultik/internal/globals.Commit={{ .Commit }}'
|
||||
|
||||
archives:
|
||||
- id: default
|
||||
name_template: "{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
|
||||
formats:
|
||||
- tar.gz
|
||||
files:
|
||||
- LICENSE
|
||||
- README.md
|
||||
|
||||
checksum:
|
||||
name_template: "checksums.txt"
|
||||
algorithm: sha256
|
||||
|
||||
snapshot:
|
||||
version_template: "{{ incpatch .Version }}-next"
|
||||
|
||||
changelog:
|
||||
sort: asc
|
||||
use: git
|
||||
filters:
|
||||
exclude:
|
||||
- "^docs:"
|
||||
- "^test:"
|
||||
- "^chore:"
|
||||
- "Merge pull request"
|
||||
- "Merge branch"
|
||||
|
||||
release:
|
||||
draft: true
|
||||
prerelease: auto
|
||||
13
AGENTS.md
13
AGENTS.md
@@ -38,10 +38,9 @@ Version: 2025-06-08
|
||||
1. Before committing, tests must pass (`make test`), linting must pass
|
||||
(`make lint`), and code must be formatted (`make fmt`). For go, those
|
||||
makefile targets should use `go fmt` and `go test -v ./...` and
|
||||
`golangci-lint run`. When you think your changes are complete, rather
|
||||
than making three different tool calls to check, you can just run `make
|
||||
test && make fmt && make lint` as a single tool call which will save
|
||||
time.
|
||||
`golangci-lint run`. Each Makefile target does exactly one thing — to
|
||||
run lint + fmt-check + test together (the standard pre-commit gate),
|
||||
use `make check`.
|
||||
|
||||
2. Always write a `Makefile` with the default target being `test`, and with
|
||||
a `fmt` target that formats the code. The `test` target should run all
|
||||
@@ -103,3 +102,9 @@ Version: 2025-06-08
|
||||
build files are acceptable in the root, but source code and other files
|
||||
should be organized in appropriate subdirectories.
|
||||
|
||||
13. Pre-1.0: NEVER write database migrations. There are no live databases
|
||||
anywhere — every user's local index can be rebuilt from a fresh full
|
||||
backup. When the schema changes, just change `schema.sql` (and any code
|
||||
that touches the affected tables). The local index is disposable until
|
||||
1.0 ships and is tagged.
|
||||
|
||||
|
||||
@@ -53,8 +53,8 @@ The database tracks five primary entities and their relationships:
|
||||
### Entity Descriptions
|
||||
|
||||
#### File (`database.File`)
|
||||
Represents a file or directory in the backup system. Stores metadata needed for restoration:
|
||||
- Path, timestamps (mtime, ctime)
|
||||
Represents a file, directory, or symlink in the backup system. Stores metadata needed for restoration:
|
||||
- Path, source_path (for restore path stripping), mtime
|
||||
- Size, mode, ownership (uid, gid)
|
||||
- Symlink target (if applicable)
|
||||
|
||||
@@ -95,7 +95,7 @@ Maps chunks to their position within blobs:
|
||||
|
||||
#### Snapshot (`database.Snapshot`)
|
||||
Represents a point-in-time backup:
|
||||
- `ID`: Format is `{hostname}-{YYYYMMDD}-{HHMMSS}Z`
|
||||
- `ID`: Format is `{hostname}_{snapshot-name}_{RFC3339}` (e.g. `server1_home_2025-06-01T12:00:00Z`)
|
||||
- Tracks file count, chunk count, blob count, sizes, compression ratio
|
||||
- `CompletedAt`: Null until snapshot finishes successfully
|
||||
|
||||
@@ -127,7 +127,7 @@ fx.New(
|
||||
config.Module, // 5. Config
|
||||
database.Module, // 6. Database + Repositories
|
||||
log.Module, // 7. Logger initialization
|
||||
s3.Module, // 8. S3 client
|
||||
storage.Module, // 8. Storage backend (S3/file/rclone)
|
||||
snapshot.Module, // 9. SnapshotManager + ScannerFactory
|
||||
fx.Provide(vaultik.New), // 10. Vaultik orchestrator
|
||||
)
|
||||
@@ -161,7 +161,7 @@ type Vaultik struct {
|
||||
Config *config.Config
|
||||
DB *database.DB
|
||||
Repositories *database.Repositories
|
||||
S3Client *s3.Client
|
||||
Storage storage.Storer
|
||||
ScannerFactory snapshot.ScannerFactory
|
||||
SnapshotManager *snapshot.SnapshotManager
|
||||
Shutdowner fx.Shutdowner
|
||||
@@ -341,12 +341,11 @@ CreateSnapshot(opts)
|
||||
└─► SnapshotManager.ExportSnapshotMetadata()
|
||||
│
|
||||
├─► Copy database to temp file
|
||||
├─► Clean to only current snapshot data
|
||||
├─► Dump to SQL
|
||||
├─► Compress with zstd
|
||||
├─► Clean to only current snapshot data (VACUUM)
|
||||
├─► Compress binary SQLite with zstd
|
||||
├─► Encrypt with age
|
||||
├─► Upload db.zst.age to S3
|
||||
└─► Upload manifest.json.zst to S3
|
||||
├─► Upload db.zst.age to storage
|
||||
└─► Upload manifest.json.zst to storage
|
||||
```
|
||||
|
||||
## Deduplication Strategy
|
||||
@@ -368,8 +367,8 @@ bucket/
|
||||
│
|
||||
└── metadata/
|
||||
└── {snapshot-id}/
|
||||
├── db.zst.age # Encrypted database dump
|
||||
└── manifest.json.zst # Blob list (for verification)
|
||||
├── db.zst.age # Encrypted binary SQLite database
|
||||
└── manifest.json.zst # Blob list (for pruning/verification)
|
||||
```
|
||||
|
||||
## Thread Safety
|
||||
|
||||
10
CLAUDE.md
10
CLAUDE.md
@@ -10,6 +10,9 @@ Read the rules in AGENTS.md and follow them.
|
||||
corporate advertising for Anthropic and is therefore completely
|
||||
unacceptable in commit messages.
|
||||
|
||||
* NEVER use `git add -A`. Always add only the files you intentionally
|
||||
changed.
|
||||
|
||||
* Tests should always be run before committing code. No commits should be
|
||||
made that do not pass tests.
|
||||
|
||||
@@ -33,6 +36,9 @@ Read the rules in AGENTS.md and follow them.
|
||||
* When testing on a 2.5Gbit/s ethernet to an s3 server backed by 2000MB/sec SSD,
|
||||
estimate about 4 seconds per gigabyte of backup time.
|
||||
|
||||
* When running tests, don't run individual tests, or grep the output. run the entire test suite every time and read the full output.
|
||||
* When running tests, don't run individual tests, or grep the output. run
|
||||
the entire test suite every time and read the full output.
|
||||
|
||||
* When running tests, don't run individual tests, or try to grep the output. never run "go test". only ever run "make test" to run the full test suite, and examine the full output.
|
||||
* When running tests, don't run individual tests, or try to grep the output.
|
||||
never run "go test". only ever run "make test" to run the full test
|
||||
suite, and examine the full output.
|
||||
|
||||
387
DESIGN.md
387
DESIGN.md
@@ -1,387 +0,0 @@
|
||||
# vaultik: Design Document
|
||||
|
||||
`vaultik` is a secure backup tool written in Go. It performs
|
||||
streaming backups using content-defined chunking, blob grouping, asymmetric
|
||||
encryption, and object storage. The system is designed for environments
|
||||
where the backup source host cannot store secrets and cannot retrieve or
|
||||
decrypt any data from the destination.
|
||||
|
||||
The source host is **stateful**: it maintains a local SQLite index to detect
|
||||
changes, deduplicate content, and track uploads across backup runs. All
|
||||
remote storage is encrypted and append-only. Pruning of unreferenced data is
|
||||
done from a trusted host with access to decryption keys, as even the
|
||||
metadata indices are encrypted in the blob store.
|
||||
|
||||
---
|
||||
|
||||
## Why
|
||||
|
||||
ANOTHER backup tool??
|
||||
|
||||
Other backup tools like `restic`, `borg`, and `duplicity` are designed for
|
||||
environments where the source host can store secrets and has access to
|
||||
decryption keys. I don't want to store backup decryption keys on my hosts,
|
||||
only public keys for encryption.
|
||||
|
||||
My requirements are:
|
||||
|
||||
* open source
|
||||
* no passphrases or private keys on the source host
|
||||
* incremental
|
||||
* compressed
|
||||
* encrypted
|
||||
* s3 compatible without an intermediate step or tool
|
||||
|
||||
Surprisingly, no existing tool meets these requirements, so I wrote `vaultik`.
|
||||
|
||||
## Design Goals
|
||||
|
||||
1. Backups must require only a public key on the source host.
|
||||
2. No secrets or private keys may exist on the source system.
|
||||
3. Obviously, restore must be possible using **only** the backup bucket and
|
||||
a private key.
|
||||
4. Prune must be possible, although this requires a private key so must be
|
||||
done on different hosts.
|
||||
5. All encryption is done using [`age`](https://github.com/FiloSottile/age)
|
||||
(X25519, XChaCha20-Poly1305).
|
||||
6. Compression uses `zstd` at a configurable level.
|
||||
7. Files are chunked, and multiple chunks are packed into encrypted blobs.
|
||||
This reduces the number of objects in the blob store for filesystems with
|
||||
many small files.
|
||||
9. All metadata (snapshots) is stored remotely as encrypted SQLite DBs.
|
||||
10. If a snapshot metadata file exceeds a configured size threshold, it is
|
||||
chunked into multiple encrypted `.age` parts, to support large
|
||||
filesystems.
|
||||
11. CLI interface is structured using `cobra`.
|
||||
|
||||
---
|
||||
|
||||
## S3 Bucket Layout
|
||||
|
||||
S3 stores only four things:
|
||||
|
||||
1) Blobs: encrypted, compressed packs of file chunks.
|
||||
2) Metadata: encrypted SQLite databases containing the current state of the
|
||||
filesystem at the time of the snapshot.
|
||||
3) Metadata hashes: encrypted hashes of the metadata SQLite databases.
|
||||
4) Blob manifests: unencrypted compressed JSON files listing all blob hashes
|
||||
referenced in the snapshot, enabling pruning without decryption.
|
||||
|
||||
```
|
||||
s3://<bucket>/<prefix>/
|
||||
├── blobs/
|
||||
│ ├── <aa>/<bb>/<full_blob_hash>.zst.age
|
||||
├── metadata/
|
||||
│ ├── <snapshot_id>.sqlite.age
|
||||
│ ├── <snapshot_id>.sqlite.00.age
|
||||
│ ├── <snapshot_id>.sqlite.01.age
|
||||
│ ├── <snapshot_id>.manifest.json.zst
|
||||
```
|
||||
|
||||
To retrieve a given file, you would:
|
||||
|
||||
* fetch `metadata/<snapshot_id>.sqlite.age` or `metadata/<snapshot_id>.sqlite.{seq}.age`
|
||||
* fetch `metadata/<snapshot_id>.hash.age`
|
||||
* decrypt the metadata SQLite database using the private key and reconstruct
|
||||
the full database file
|
||||
* verify the hash of the decrypted database matches the decrypted hash
|
||||
* query the database for the file in question
|
||||
* determine all chunks for the file
|
||||
* for each chunk, look up the metadata for all blobs in the db
|
||||
* fetch each blob from `blobs/<aa>/<bb>/<blob_hash>.zst.age`
|
||||
* decrypt each blob using the private key
|
||||
* decompress each blob using `zstd`
|
||||
* reconstruct the file from set of file chunks stored in the blobs
|
||||
|
||||
If clever, it may be possible to do this chunk by chunk without touching
|
||||
disk (except for the output file) as each uncompressed blob should fit in
|
||||
memory (<10GB).
|
||||
|
||||
### Path Rules
|
||||
|
||||
* `<snapshot_id>`: UTC timestamp in iso860 format, e.g. `2023-10-01T12:00:00Z`. These are lexicographically sortable.
|
||||
* `blobs/<aa>/<bb>/...`: where `aa` and `bb` are the first 2 hex bytes of the blob hash.
|
||||
|
||||
### Blob Manifest Format
|
||||
|
||||
The `<snapshot_id>.manifest.json.zst` file is an unencrypted, compressed JSON file containing:
|
||||
|
||||
```json
|
||||
{
|
||||
"snapshot_id": "2023-10-01T12:00:00Z",
|
||||
"blob_hashes": [
|
||||
"aa1234567890abcdef...",
|
||||
"bb2345678901bcdef0...",
|
||||
...
|
||||
]
|
||||
}
|
||||
```
|
||||
|
||||
This allows pruning operations to determine which blobs are referenced without requiring decryption keys.
|
||||
|
||||
---
|
||||
|
||||
## 3. Local SQLite Index Schema (source host)
|
||||
|
||||
```sql
|
||||
CREATE TABLE files (
|
||||
id TEXT PRIMARY KEY, -- UUID
|
||||
path TEXT NOT NULL UNIQUE,
|
||||
mtime INTEGER NOT NULL,
|
||||
size INTEGER NOT NULL
|
||||
);
|
||||
|
||||
-- Maps files to their constituent chunks in sequence order
|
||||
-- Used for reconstructing files from chunks during restore
|
||||
CREATE TABLE file_chunks (
|
||||
file_id TEXT NOT NULL,
|
||||
idx INTEGER NOT NULL,
|
||||
chunk_hash TEXT NOT NULL,
|
||||
PRIMARY KEY (file_id, idx)
|
||||
);
|
||||
|
||||
CREATE TABLE chunks (
|
||||
chunk_hash TEXT PRIMARY KEY,
|
||||
sha256 TEXT NOT NULL,
|
||||
size INTEGER NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE blobs (
|
||||
blob_hash TEXT PRIMARY KEY,
|
||||
final_hash TEXT NOT NULL,
|
||||
created_ts INTEGER NOT NULL
|
||||
);
|
||||
|
||||
CREATE TABLE blob_chunks (
|
||||
blob_hash TEXT NOT NULL,
|
||||
chunk_hash TEXT NOT NULL,
|
||||
offset INTEGER NOT NULL,
|
||||
length INTEGER NOT NULL,
|
||||
PRIMARY KEY (blob_hash, chunk_hash)
|
||||
);
|
||||
|
||||
-- Reverse mapping: tracks which files contain a given chunk
|
||||
-- Used for deduplication and tracking chunk usage across files
|
||||
CREATE TABLE chunk_files (
|
||||
chunk_hash TEXT NOT NULL,
|
||||
file_id TEXT NOT NULL,
|
||||
file_offset INTEGER NOT NULL,
|
||||
length INTEGER NOT NULL,
|
||||
PRIMARY KEY (chunk_hash, file_id)
|
||||
);
|
||||
|
||||
CREATE TABLE snapshots (
|
||||
id TEXT PRIMARY KEY,
|
||||
hostname TEXT NOT NULL,
|
||||
vaultik_version TEXT NOT NULL,
|
||||
vaultik_git_revision TEXT NOT NULL,
|
||||
created_ts INTEGER NOT NULL,
|
||||
file_count INTEGER NOT NULL,
|
||||
chunk_count INTEGER NOT NULL,
|
||||
blob_count INTEGER NOT NULL
|
||||
);
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 4. Snapshot Metadata Schema (stored in S3)
|
||||
|
||||
Identical schema to the local index, filtered to live snapshot state. Stored
|
||||
as a SQLite DB, compressed with `zstd`, encrypted with `age`. If larger than
|
||||
a configured `chunk_size`, it is split and uploaded as:
|
||||
|
||||
```
|
||||
metadata/<snapshot_id>.sqlite.00.age
|
||||
metadata/<snapshot_id>.sqlite.01.age
|
||||
...
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## 5. Data Flow
|
||||
|
||||
### 5.1 Backup
|
||||
|
||||
1. Load config
|
||||
2. Open local SQLite index
|
||||
3. Walk source directories:
|
||||
|
||||
* For each file:
|
||||
|
||||
* Check mtime and size in index
|
||||
* If changed or new:
|
||||
|
||||
* Chunk file
|
||||
* For each chunk:
|
||||
|
||||
* Hash with SHA256
|
||||
* Check if already uploaded
|
||||
* If not:
|
||||
|
||||
* Add chunk to blob packer
|
||||
* Record file-chunk mapping in index
|
||||
4. When blob reaches threshold size (e.g. 1GB):
|
||||
|
||||
* Compress with `zstd`
|
||||
* Encrypt with `age`
|
||||
* Upload to: `s3://<bucket>/<prefix>/blobs/<aa>/<bb>/<hash>.zst.age`
|
||||
* Record blob-chunk layout in local index
|
||||
5. Once all files are processed:
|
||||
* Build snapshot SQLite DB from index delta
|
||||
* Compress + encrypt
|
||||
* If larger than `chunk_size`, split into parts
|
||||
* Upload to:
|
||||
`s3://<bucket>/<prefix>/metadata/<snapshot_id>.sqlite(.xx).age`
|
||||
6. Create snapshot record in local index that lists:
|
||||
* snapshot ID
|
||||
* hostname
|
||||
* vaultik version
|
||||
* timestamp
|
||||
* counts of files, chunks, and blobs
|
||||
* list of all blobs referenced in the snapshot (some new, some old) for
|
||||
efficient pruning later
|
||||
7. Create snapshot database for upload
|
||||
8. Calculate checksum of snapshot database
|
||||
9. Compress, encrypt, split, and upload to S3
|
||||
10. Encrypt the hash of the snapshot database to the backup age key
|
||||
11. Upload the encrypted hash to S3 as `metadata/<snapshot_id>.hash.age`
|
||||
12. Create blob manifest JSON listing all blob hashes referenced in snapshot
|
||||
13. Compress manifest with zstd and upload as `metadata/<snapshot_id>.manifest.json.zst`
|
||||
14. Optionally prune remote blobs that are no longer referenced in the
|
||||
snapshot, based on local state db
|
||||
|
||||
### 5.2 Manual Prune
|
||||
|
||||
1. List all objects under `metadata/`
|
||||
2. Determine the latest valid `snapshot_id` by timestamp
|
||||
3. Download and decompress the latest `<snapshot_id>.manifest.json.zst`
|
||||
4. Extract set of referenced blob hashes from manifest (no decryption needed)
|
||||
5. List all blob objects under `blobs/`
|
||||
6. For each blob:
|
||||
* If the hash is not in the manifest:
|
||||
* Issue `DeleteObject` to remove it
|
||||
|
||||
### 5.3 Verify
|
||||
|
||||
Verify runs on a host that has no state, but access to the bucket.
|
||||
|
||||
1. Fetch latest metadata snapshot files from S3
|
||||
2. Fetch latest metadata db hash from S3
|
||||
3. Decrypt the hash using the private key
|
||||
4. Decrypt the metadata SQLite database chunks using the private key and
|
||||
reassemble the snapshot db file
|
||||
5. Calculate the SHA256 hash of the decrypted snapshot database
|
||||
6. Verify the db file hash matches the decrypted hash
|
||||
7. For each blob in the snapshot:
|
||||
* Fetch the blob metadata from the snapshot db
|
||||
* Ensure the blob exists in S3
|
||||
* Check the S3 content hash matches the expected blob hash
|
||||
* If not using --quick mode:
|
||||
* Download and decrypt the blob
|
||||
* Decompress and verify chunk hashes match metadata
|
||||
|
||||
---
|
||||
|
||||
## 6. CLI Commands
|
||||
|
||||
```
|
||||
vaultik backup [--config <path>] [--cron] [--daemon] [--prune]
|
||||
vaultik restore --bucket <bucket> --prefix <prefix> --snapshot <id> --target <dir>
|
||||
vaultik prune --bucket <bucket> --prefix <prefix> [--dry-run]
|
||||
vaultik verify --bucket <bucket> --prefix <prefix> [--snapshot <id>] [--quick]
|
||||
vaultik fetch --bucket <bucket> --prefix <prefix> --snapshot <id> --file <path> --target <path>
|
||||
vaultik snapshot list --bucket <bucket> --prefix <prefix> [--limit <n>]
|
||||
vaultik snapshot rm --bucket <bucket> --prefix <prefix> --snapshot <id>
|
||||
vaultik snapshot latest --bucket <bucket> --prefix <prefix>
|
||||
```
|
||||
|
||||
* `VAULTIK_PRIVATE_KEY` is required for `restore`, `prune`, `verify`, and
|
||||
`fetch` commands.
|
||||
* It is passed via environment variable containing the age private key.
|
||||
|
||||
---
|
||||
|
||||
## 7. Function and Method Signatures
|
||||
|
||||
### 7.1 CLI
|
||||
|
||||
```go
|
||||
func RootCmd() *cobra.Command
|
||||
func backupCmd() *cobra.Command
|
||||
func restoreCmd() *cobra.Command
|
||||
func pruneCmd() *cobra.Command
|
||||
func verifyCmd() *cobra.Command
|
||||
```
|
||||
|
||||
### 7.2 Configuration
|
||||
|
||||
```go
|
||||
type Config struct {
|
||||
BackupPubKey string // age recipient
|
||||
BackupInterval time.Duration // used in daemon mode, irrelevant for cron mode
|
||||
BlobSizeLimit int64 // default 10GB
|
||||
ChunkSize int64 // default 10MB
|
||||
Exclude []string // list of regex of files to exclude from backup, absolute path
|
||||
Hostname string
|
||||
IndexPath string // path to local SQLite index db, default /var/lib/vaultik/index.db
|
||||
MetadataPrefix string // S3 prefix for metadata, default "metadata/"
|
||||
MinTimeBetweenRun time.Duration // minimum time between backup runs, default 1 hour - for daemon mode
|
||||
S3 S3Config // S3 configuration
|
||||
ScanInterval time.Duration // interval to full stat() scan source dirs, default 24h
|
||||
SourceDirs []string // list of source directories to back up, absolute paths
|
||||
}
|
||||
|
||||
type S3Config struct {
|
||||
Endpoint string
|
||||
Bucket string
|
||||
Prefix string
|
||||
AccessKeyID string
|
||||
SecretAccessKey string
|
||||
Region string
|
||||
}
|
||||
|
||||
func Load(path string) (*Config, error)
|
||||
```
|
||||
|
||||
### 7.3 Index
|
||||
|
||||
```go
|
||||
type Index struct {
|
||||
db *sql.DB
|
||||
}
|
||||
|
||||
func OpenIndex(path string) (*Index, error)
|
||||
|
||||
func (ix *Index) LookupFile(path string, mtime int64, size int64) ([]string, bool, error)
|
||||
func (ix *Index) SaveFile(path string, mtime int64, size int64, chunkHashes []string) error
|
||||
func (ix *Index) AddChunk(chunkHash string, size int64) error
|
||||
func (ix *Index) MarkBlob(blobHash, finalHash string, created time.Time) error
|
||||
func (ix *Index) MapChunkToBlob(blobHash, chunkHash string, offset, length int64) error
|
||||
func (ix *Index) MapChunkToFile(chunkHash, filePath string, offset, length int64) error
|
||||
```
|
||||
|
||||
### 7.4 Blob Packing
|
||||
|
||||
```go
|
||||
type BlobWriter struct {
|
||||
// internal buffer, current size, encrypted writer, etc
|
||||
}
|
||||
|
||||
func NewBlobWriter(...) *BlobWriter
|
||||
func (bw *BlobWriter) AddChunk(chunk []byte, chunkHash string) error
|
||||
func (bw *BlobWriter) Flush() (finalBlobHash string, err error)
|
||||
```
|
||||
|
||||
### 7.5 Metadata
|
||||
|
||||
```go
|
||||
func BuildSnapshotMetadata(ix *Index, snapshotID string) (sqlitePath string, err error)
|
||||
func EncryptAndUploadMetadata(path string, cfg *Config, snapshotID string) error
|
||||
```
|
||||
|
||||
### 7.6 Prune
|
||||
|
||||
```go
|
||||
func RunPrune(bucket, prefix, privateKey string) error
|
||||
```
|
||||
|
||||
61
Dockerfile
Normal file
61
Dockerfile
Normal file
@@ -0,0 +1,61 @@
|
||||
# Lint stage
|
||||
# golangci/golangci-lint:v2.11.3-alpine, 2026-03-17
|
||||
FROM golangci/golangci-lint:v2.11.3-alpine@sha256:b1c3de5862ad0a95b4e45a993b0f00415835d687e4f12c845c7493b86c13414e AS lint
|
||||
|
||||
RUN apk add --no-cache make build-base
|
||||
|
||||
WORKDIR /src
|
||||
|
||||
# Copy go mod files first for better layer caching
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
# Copy source code
|
||||
COPY . .
|
||||
|
||||
# Run formatting check and linter
|
||||
RUN make fmt-check
|
||||
RUN make lint
|
||||
|
||||
# Build stage
|
||||
# golang:1.26.1-alpine, 2026-03-17
|
||||
FROM golang:1.26.1-alpine@sha256:2389ebfa5b7f43eeafbd6be0c3700cc46690ef842ad962f6c5bd6be49ed82039 AS builder
|
||||
|
||||
# Depend on lint stage passing
|
||||
COPY --from=lint /src/go.sum /dev/null
|
||||
|
||||
ARG VERSION=dev
|
||||
|
||||
# Install build dependencies for CGO (mattn/go-sqlite3) and sqlite3 CLI (tests)
|
||||
RUN apk add --no-cache make build-base sqlite
|
||||
|
||||
WORKDIR /src
|
||||
|
||||
# Copy go mod files first for better layer caching
|
||||
COPY go.mod go.sum ./
|
||||
RUN go mod download
|
||||
|
||||
# Copy source code
|
||||
COPY . .
|
||||
|
||||
# Run tests
|
||||
RUN make test
|
||||
|
||||
# Build with CGO enabled (required for mattn/go-sqlite3)
|
||||
RUN CGO_ENABLED=0 go build -ldflags "-X 'sneak.berlin/go/vaultik/internal/globals.Version=${VERSION}' -X 'sneak.berlin/go/vaultik/internal/globals.Commit=$(git rev-parse HEAD 2>/dev/null || echo unknown)'" -o /vaultik ./cmd/vaultik
|
||||
|
||||
# Runtime stage
|
||||
# alpine:3.21, 2026-02-25
|
||||
FROM alpine:3.21@sha256:c3f8e73fdb79deaebaa2037150150191b9dcbfba68b4a46d70103204c53f4709
|
||||
|
||||
RUN apk add --no-cache ca-certificates sqlite
|
||||
|
||||
# Copy binary from builder
|
||||
COPY --from=builder /vaultik /usr/local/bin/vaultik
|
||||
|
||||
# Create non-root user
|
||||
RUN adduser -D -H -s /sbin/nologin vaultik
|
||||
|
||||
USER vaultik
|
||||
|
||||
ENTRYPOINT ["/usr/local/bin/vaultik"]
|
||||
80
Makefile
80
Makefile
@@ -1,62 +1,82 @@
|
||||
.PHONY: test fmt lint build clean all
|
||||
.PHONY: all check test lint fmt fmt-check build clean deps test-coverage test-integration local install release release-snapshot docker hooks
|
||||
|
||||
# Version number
|
||||
VERSION := 0.0.1
|
||||
VERSION := 1.0.0-rc.1
|
||||
|
||||
# Build variables
|
||||
GIT_REVISION := $(shell git rev-parse HEAD 2>/dev/null || echo "unknown")
|
||||
|
||||
# Linker flags
|
||||
LDFLAGS := -X 'git.eeqj.de/sneak/vaultik/internal/globals.Version=$(VERSION)' \
|
||||
-X 'git.eeqj.de/sneak/vaultik/internal/globals.Commit=$(GIT_REVISION)'
|
||||
LDFLAGS := -X 'sneak.berlin/go/vaultik/internal/globals.Version=$(VERSION)' \
|
||||
-X 'sneak.berlin/go/vaultik/internal/globals.Commit=$(GIT_REVISION)'
|
||||
|
||||
# Default target
|
||||
all: test
|
||||
all: vaultik
|
||||
|
||||
# Run tests
|
||||
test: lint fmt-check
|
||||
@echo "Running tests..."
|
||||
@if ! go test -v -timeout 10s ./... 2>&1; then \
|
||||
echo ""; \
|
||||
echo "TEST FAILURES DETECTED"; \
|
||||
echo "Run 'go test -v ./internal/database' to see database test details"; \
|
||||
exit 1; \
|
||||
fi
|
||||
# Combined pre-commit/CI gate: lint, format check, then tests.
|
||||
check: lint fmt-check test
|
||||
|
||||
# Check if code is formatted
|
||||
# Run tests only.
|
||||
test:
|
||||
go test -race -timeout 30s ./...
|
||||
|
||||
# Check if code is formatted (read-only).
|
||||
fmt-check:
|
||||
@if [ -n "$$(go fmt ./...)" ]; then \
|
||||
echo "Error: Code is not formatted. Run 'make fmt' to fix."; \
|
||||
exit 1; \
|
||||
fi
|
||||
@test -z "$$(gofmt -l .)" || (echo "Files not formatted:" && gofmt -l . && exit 1)
|
||||
|
||||
# Format code
|
||||
# Format code.
|
||||
fmt:
|
||||
go fmt ./...
|
||||
|
||||
# Run linter
|
||||
# Run linter only.
|
||||
lint:
|
||||
golangci-lint run
|
||||
golangci-lint run ./...
|
||||
|
||||
# Build binary
|
||||
build:
|
||||
go build -ldflags "$(LDFLAGS)" -o vaultik ./cmd/vaultik
|
||||
# Build binary.
|
||||
vaultik: internal/*/*.go cmd/vaultik/*.go
|
||||
go build -ldflags "$(LDFLAGS)" -o $@ ./cmd/vaultik
|
||||
|
||||
# Clean build artifacts
|
||||
# Clean build artifacts.
|
||||
clean:
|
||||
rm -f vaultik
|
||||
go clean
|
||||
|
||||
# Install dependencies
|
||||
# Install dependencies.
|
||||
deps:
|
||||
go mod download
|
||||
go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
|
||||
|
||||
# Run tests with coverage
|
||||
# Run tests with coverage.
|
||||
test-coverage:
|
||||
go test -v -coverprofile=coverage.out ./...
|
||||
go tool cover -html=coverage.out -o coverage.html
|
||||
|
||||
# Run integration tests
|
||||
# Run integration tests.
|
||||
test-integration:
|
||||
go test -v -tags=integration ./...
|
||||
go test -v -tags=integration ./...
|
||||
|
||||
local:
|
||||
VAULTIK_CONFIG=$(HOME)/etc/vaultik/config.yml ./vaultik snapshot --debug list 2>&1
|
||||
VAULTIK_CONFIG=$(HOME)/etc/vaultik/config.yml ./vaultik snapshot --debug create 2>&1
|
||||
|
||||
install: vaultik
|
||||
cp ./vaultik $(HOME)/bin/
|
||||
|
||||
# Build and publish release artifacts (linux/darwin × amd64/arm64) via goreleaser.
|
||||
release:
|
||||
goreleaser release --clean
|
||||
|
||||
# Dry-run a release build without publishing or tagging.
|
||||
release-snapshot:
|
||||
goreleaser release --clean --snapshot
|
||||
|
||||
# Build Docker image.
|
||||
docker:
|
||||
docker build -t vaultik .
|
||||
|
||||
# Install pre-commit hook.
|
||||
hooks:
|
||||
@printf '#!/bin/sh\nset -e\n' > .git/hooks/pre-commit
|
||||
@printf 'go mod tidy\ngo fmt ./...\ngit diff --exit-code -- go.mod go.sum || { echo "go mod tidy changed files; please stage and retry"; exit 1; }\n' >> .git/hooks/pre-commit
|
||||
@printf 'make check\n' >> .git/hooks/pre-commit
|
||||
@chmod +x .git/hooks/pre-commit
|
||||
|
||||
570
README.md
570
README.md
@@ -1,120 +1,84 @@
|
||||
# vaultik (ваултик)
|
||||
|
||||
`vaultik` is a incremental backup daemon written in Go. It
|
||||
encrypts data using an `age` public key and uploads each encrypted blob
|
||||
directly to a remote S3-compatible object store. It requires no private
|
||||
keys, secrets, or credentials stored on the backed-up system.
|
||||
`vaultik` is an incremental backup tool written in Go. It encrypts data
|
||||
using an `age` public key and uploads each encrypted blob directly to a
|
||||
remote S3-compatible object store. It requires no private keys, secrets, or
|
||||
credentials (other than those required to PUT to encrypted object storage,
|
||||
such as S3 API keys) stored on the backed-up system.
|
||||
|
||||
It includes table-stakes features such as:
|
||||
## quickstart
|
||||
|
||||
* modern authenticated encryption
|
||||
* deduplication
|
||||
* incremental backups
|
||||
* modern multithreaded zstd compression with configurable levels
|
||||
```sh
|
||||
# install
|
||||
go install sneak.berlin/go/vaultik/cmd/vaultik@latest
|
||||
|
||||
# create a default config file (prints the path it wrote to)
|
||||
vaultik config init
|
||||
|
||||
# generate an age keypair; keep the private key file somewhere safe and
|
||||
# offline — you need it to restore, and the backed-up machine does not need it
|
||||
age-keygen -o vaultik_backup_private_key.txt
|
||||
grep 'public key' vaultik_backup_private_key.txt
|
||||
|
||||
# configure the encryption key and backup destination
|
||||
vaultik config set age_recipients.0 age1YOUR_PUBLIC_KEY_HERE
|
||||
vaultik config set storage_url "file:///Volumes/usbstick/mybackup"
|
||||
|
||||
# back up your home directory (the default config includes a "home"
|
||||
# snapshot of ~ with sensible excludes)
|
||||
vaultik snapshot create
|
||||
|
||||
# see what you have
|
||||
vaultik snapshot list
|
||||
```
|
||||
|
||||
Features:
|
||||
|
||||
* modern encryption ([age](https://age-encryption.org/), X25519 + XChaCha20-Poly1305)
|
||||
* content-defined chunking with deduplication (FastCDC)
|
||||
* incremental backups (only changed files are re-chunked)
|
||||
* multithreaded zstd compression at configurable levels
|
||||
* content-addressed immutable storage
|
||||
* local state tracking in standard SQLite database
|
||||
* inotify-based change detection
|
||||
* streaming processing of all data to not require lots of ram or temp file
|
||||
storage
|
||||
* local state tracking in SQLite (enables write-only incremental backups)
|
||||
* no mutable remote metadata
|
||||
* no plaintext file paths or metadata stored in remote
|
||||
* does not create huge numbers of small files (to keep S3 operation counts
|
||||
down) even if the source system has many small files
|
||||
|
||||
## what
|
||||
|
||||
`vaultik` walks a set of configured directories and builds a
|
||||
content-addressable chunk map of changed files using deterministic chunking.
|
||||
Each chunk is streamed into a blob packer. Blobs are compressed with `zstd`,
|
||||
encrypted with `age`, and uploaded directly to remote storage under a
|
||||
content-addressed S3 path.
|
||||
|
||||
No plaintext file contents ever hit disk. No private key or secret
|
||||
passphrase is needed or stored locally. All encrypted data is
|
||||
streaming-processed and immediately discarded once uploaded. Metadata is
|
||||
encrypted and pushed with the same mechanism.
|
||||
* no plaintext file paths or metadata in remote storage
|
||||
* packs small files into large blobs (keeps S3 operation counts down)
|
||||
* backs up regular files, symlinks, empty directories, and file permissions
|
||||
* pluggable storage backends: S3, local filesystem, rclone (70+ providers)
|
||||
* pure Go (no CGO), cross-compiles to linux/darwin × amd64/arm64
|
||||
|
||||
## why
|
||||
|
||||
Existing backup software fails under one or more of these conditions:
|
||||
Other backup tools like `restic`, `borg`, and `duplicity` are designed for
|
||||
environments where the source host can store secrets and has access to
|
||||
decryption keys. `vaultik` is for environments where you don't want to
|
||||
store backup decryption keys on your hosts — only public keys for
|
||||
encryption.
|
||||
|
||||
* Requires secrets (passwords, private keys) on the source system, which
|
||||
compromises encrypted backups in the case of host system compromise
|
||||
* Depends on symmetric encryption unsuitable for zero-trust environments
|
||||
* Creates one-blob-per-file, which results in excessive S3 operation counts
|
||||
Requirements that no existing tool meets:
|
||||
|
||||
`vaultik` addresses these by using:
|
||||
* open source
|
||||
* no passphrases or private keys on the source host
|
||||
* incremental
|
||||
* compressed
|
||||
* encrypted
|
||||
* s3 compatible without an intermediate step or tool
|
||||
|
||||
* Public-key-only encryption (via `age`) requires no secrets (other than
|
||||
remote storage api key) on the source system
|
||||
* Local state cache for incremental detection does not require reading from
|
||||
or decrypting remote storage
|
||||
* Content-addressed immutable storage allows efficient deduplication
|
||||
* Storage only of large encrypted blobs of configurable size (1G by default)
|
||||
reduces S3 operation counts and improves performance
|
||||
## daily use
|
||||
|
||||
## how
|
||||
```sh
|
||||
# verify a snapshot (shallow: checks all blobs exist)
|
||||
vaultik snapshot verify <snapshot-id>
|
||||
|
||||
1. **install**
|
||||
# deep verify (downloads and cryptographically verifies every blob)
|
||||
VAULTIK_AGE_SECRET_KEY='AGE-SECRET-KEY-...' vaultik snapshot verify --deep <snapshot-id>
|
||||
|
||||
```sh
|
||||
go install git.eeqj.de/sneak/vaultik@latest
|
||||
```
|
||||
# restore (requires the private key)
|
||||
VAULTIK_AGE_SECRET_KEY='AGE-SECRET-KEY-...' vaultik restore <snapshot-id> /tmp/restored
|
||||
|
||||
2. **generate keypair**
|
||||
|
||||
```sh
|
||||
age-keygen -o agekey.txt
|
||||
grep 'public key:' agekey.txt
|
||||
```
|
||||
|
||||
3. **write config**
|
||||
|
||||
```yaml
|
||||
source_dirs:
|
||||
- /etc
|
||||
- /home/user/data
|
||||
exclude:
|
||||
- '*.log'
|
||||
- '*.tmp'
|
||||
age_recipient: age1278m9q7dp3chsh2dcy82qk27v047zywyvtxwnj4cvt0z65jw6a7q5dqhfj
|
||||
s3:
|
||||
# endpoint is optional if using AWS S3, but who even does that?
|
||||
endpoint: https://s3.example.com
|
||||
bucket: vaultik-data
|
||||
prefix: host1/
|
||||
access_key_id: ...
|
||||
secret_access_key: ...
|
||||
region: us-east-1
|
||||
backup_interval: 1h # only used in daemon mode, not for --cron mode
|
||||
full_scan_interval: 24h # normally we use inotify to mark dirty, but
|
||||
# every 24h we do a full stat() scan
|
||||
min_time_between_run: 15m # again, only for daemon mode
|
||||
#index_path: /var/lib/vaultik/index.sqlite
|
||||
chunk_size: 10MB
|
||||
blob_size_limit: 10GB
|
||||
```
|
||||
|
||||
4. **run**
|
||||
|
||||
```sh
|
||||
vaultik --config /etc/vaultik.yaml snapshot create
|
||||
```
|
||||
|
||||
```sh
|
||||
vaultik --config /etc/vaultik.yaml snapshot create --cron # silent unless error
|
||||
```
|
||||
|
||||
```sh
|
||||
vaultik --config /etc/vaultik.yaml snapshot daemon # runs continuously in foreground, uses inotify to detect changes
|
||||
|
||||
# TODO
|
||||
* make sure daemon mode does not make a snapshot if no files have
|
||||
changed, even if the backup_interval has passed
|
||||
* in daemon mode, if we are long enough since the last snapshot event, and we get
|
||||
an inotify event, we should schedule the next snapshot creation for 10 minutes from the
|
||||
time of the mark-dirty event.
|
||||
```
|
||||
# daily cron job: back up, keep a 4-week rolling window of snapshots
|
||||
# 0 3 * * * vaultik snapshot create --cron --prune --keep-newer-than 4w
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
@@ -123,184 +87,328 @@ Existing backup software fails under one or more of these conditions:
|
||||
### commands
|
||||
|
||||
```sh
|
||||
vaultik [--config <path>] snapshot create [--cron] [--daemon]
|
||||
vaultik [--config <path>] config init
|
||||
vaultik [--config <path>] config edit
|
||||
vaultik [--config <path>] config get <key>
|
||||
vaultik [--config <path>] config set <key> <value>
|
||||
vaultik [--config <path>] snapshot create [snapshot-names...] [--cron] [--prune] [--keep-newer-than <duration>] [--skip-errors]
|
||||
vaultik [--config <path>] snapshot list [--json]
|
||||
vaultik [--config <path>] snapshot purge [--keep-latest | --older-than <duration>] [--force]
|
||||
vaultik [--config <path>] snapshot verify <snapshot-id> [--deep]
|
||||
vaultik [--config <path>] snapshot verify <snapshot-id> [--deep] [--json]
|
||||
vaultik [--config <path>] snapshot purge [--keep-latest | --older-than <duration>] [--snapshot <name>...] [--force]
|
||||
vaultik [--config <path>] snapshot remove <snapshot-id|--all> [--dry-run] [--force] [--remote] [--json]
|
||||
vaultik [--config <path>] snapshot prune
|
||||
vaultik [--config <path>] snapshot cleanup
|
||||
vaultik [--config <path>] restore <snapshot-id> <target-dir> [paths...] [--verify]
|
||||
vaultik [--config <path>] prune [--force] [--json]
|
||||
vaultik [--config <path>] info
|
||||
vaultik [--config <path>] remote info [--json]
|
||||
vaultik [--config <path>] store info
|
||||
# FIXME: remove 'bucket' and 'prefix' and 'snapshot' flags. it should be
|
||||
# 'vaultik restore snapshot <snapshot> --target <dir>'. bucket and prefix are always
|
||||
# from config file.
|
||||
vaultik restore --bucket <bucket> --prefix <prefix> --snapshot <id> --target <dir>
|
||||
# FIXME: remove prune, it's the old version of "snapshot purge"
|
||||
vaultik prune --bucket <bucket> --prefix <prefix> [--dry-run]
|
||||
# FIXME: change fetch to 'vaultik restore path <snapshot> <path> --target <path>'
|
||||
vaultik fetch --bucket <bucket> --prefix <prefix> --snapshot <id> --file <path> --target <path>
|
||||
# FIXME: remove this, it's redundant with 'snapshot verify'
|
||||
vaultik verify --bucket <bucket> --prefix <prefix> [--snapshot <id>] [--quick]
|
||||
vaultik [--config <path>] database purge [--force]
|
||||
vaultik completion <bash|zsh|fish|powershell>
|
||||
vaultik version
|
||||
```
|
||||
|
||||
### environment
|
||||
### global flags
|
||||
|
||||
* `VAULTIK_PRIVATE_KEY`: Required for `restore`, `prune`, `fetch`, and `verify` commands. Contains the age private key for decryption.
|
||||
* `VAULTIK_CONFIG`: Optional path to config file. If set, config file path doesn't need to be specified on the command line.
|
||||
* `--config <path>`: Path to config file (default: `$VAULTIK_CONFIG`, then platform config dir, then `/etc/vaultik/config.yml`)
|
||||
* `--verbose`, `-v`: Enable verbose output
|
||||
* `--debug`: Enable debug output
|
||||
* `--quiet`, `-q`: Suppress non-error output
|
||||
|
||||
### environment variables
|
||||
|
||||
* `VAULTIK_AGE_SECRET_KEY`: Age private key for decryption (required for `restore` and `verify --deep`)
|
||||
* `VAULTIK_CONFIG`: Path to config file (overridden by `--config`)
|
||||
* `VAULTIK_INDEX_PATH`: Override local SQLite index path
|
||||
|
||||
### shell completion
|
||||
|
||||
```sh
|
||||
# zsh: load for the current session
|
||||
source <(vaultik completion zsh)
|
||||
|
||||
# zsh: install permanently
|
||||
vaultik completion zsh > "${fpath[1]}/_vaultik"
|
||||
|
||||
# bash: load for the current session
|
||||
source <(vaultik completion bash)
|
||||
|
||||
# bash: install permanently (Linux)
|
||||
vaultik completion bash > /etc/bash_completion.d/vaultik
|
||||
|
||||
# fish
|
||||
vaultik completion fish > ~/.config/fish/completions/vaultik.fish
|
||||
```
|
||||
|
||||
### command details
|
||||
|
||||
**snapshot create**: Perform incremental backup of configured directories
|
||||
* Config is located at `/etc/vaultik/config.yml` by default
|
||||
* `--cron`: Silent unless error (for crontab)
|
||||
* `--daemon`: Run continuously with inotify monitoring and periodic scans
|
||||
**`config init`**: Write a default config file with commented explanations for
|
||||
every setting. Writes to the path from `--config`, `$VAULTIK_CONFIG`, or the
|
||||
platform config directory (`~/Library/Application Support/vaultik/` on macOS,
|
||||
`~/.config/vaultik/` on Linux, `/etc/vaultik/` as root). Refuses to overwrite an
|
||||
existing file. Created with mode `0600` since it will contain credentials.
|
||||
|
||||
**snapshot list**: List all snapshots with their timestamps and sizes
|
||||
**`config edit`**: Open the config file in `$EDITOR` (falls back to `vi`).
|
||||
|
||||
**`config get`**: Print a config value addressed by dotted YAML path
|
||||
(e.g. `vaultik config get s3.bucket`). Non-scalar values print as YAML.
|
||||
|
||||
**`config set`**: Set a scalar config value by dotted YAML path
|
||||
(e.g. `vaultik config set compression_level 9`). Comments and formatting
|
||||
in the file are preserved; intermediate maps are created as needed.
|
||||
|
||||
**`snapshot create`**: Perform incremental backup of configured snapshots.
|
||||
* Optional snapshot names argument to create specific snapshots (default: all)
|
||||
* `--cron`: Silent unless error (for crontab)
|
||||
* `--prune`: After backup, drop older snapshots of each backed-up name and
|
||||
remove orphaned blobs from remote storage. By default keeps only the latest
|
||||
snapshot per name; use `--keep-newer-than` for a rolling window.
|
||||
* `--keep-newer-than <duration>`: With `--prune`, keep snapshots newer than
|
||||
this duration instead of only the latest (e.g. `4w`, `30d`, `6mo`, `1y`)
|
||||
* `--skip-errors`: Skip file read errors (log them loudly but continue)
|
||||
|
||||
**`snapshot list`**: List all snapshots with their timestamps and sizes.
|
||||
* `--json`: Output in JSON format
|
||||
|
||||
**snapshot purge**: Remove old snapshots based on criteria
|
||||
* `--keep-latest`: Keep only the most recent snapshot
|
||||
* `--older-than`: Remove snapshots older than duration (e.g., 30d, 6mo, 1y)
|
||||
**`snapshot verify`**: Verify snapshot integrity.
|
||||
* Default (shallow): checks that all blobs referenced in the manifest exist in storage
|
||||
* `--deep`: Downloads and decrypts each blob, verifies chunk hashes against the
|
||||
encrypted metadata database
|
||||
* `--json`: Output results as JSON
|
||||
|
||||
**`snapshot purge`**: Remove old snapshots based on criteria. Retention is
|
||||
per-snapshot-name (`--keep-latest` keeps the latest of each name, not the
|
||||
latest globally).
|
||||
* `--keep-latest`: Keep only the most recent snapshot of each name
|
||||
* `--older-than <duration>`: Remove snapshots older than duration (e.g. `30d`, `6m`, `1y`)
|
||||
* `--snapshot <name>`: Restrict to specific snapshot names (repeat for multiple)
|
||||
* `--force`: Skip confirmation prompt
|
||||
|
||||
**snapshot verify**: Verify snapshot integrity
|
||||
* `--deep`: Download and verify blob hashes (not just existence)
|
||||
**`snapshot remove`**: Remove a specific snapshot from the local database.
|
||||
* `--remote`: Also remove snapshot metadata from remote storage
|
||||
* `--all`: Remove all snapshots (requires `--force`)
|
||||
* `--dry-run`: Show what would be deleted without deleting
|
||||
* `--force`: Skip confirmation prompt
|
||||
* `--json`: Output result as JSON
|
||||
|
||||
**store info**: Display S3 bucket configuration and storage statistics
|
||||
**`snapshot prune`**: Clean orphaned data from the local database (files,
|
||||
chunks, blobs not referenced by any snapshot).
|
||||
|
||||
**restore**: Restore entire snapshot to target directory
|
||||
* Downloads and decrypts metadata
|
||||
* Fetches only required blobs
|
||||
* Reconstructs directory structure
|
||||
**`snapshot cleanup`**: Remove stale local snapshot records that have no
|
||||
corresponding metadata in remote storage. These are typically left behind
|
||||
by incomplete or interrupted backups. Does not touch remote storage.
|
||||
|
||||
**prune**: Remove unreferenced blobs from storage
|
||||
* Requires private key
|
||||
* Downloads latest snapshot metadata
|
||||
* Deletes orphaned blobs
|
||||
**`restore`**: Restore files from a backup snapshot.
|
||||
* Requires `VAULTIK_AGE_SECRET_KEY` environment variable
|
||||
* Optional path arguments to restore specific files/directories (default: all)
|
||||
* Preserves file permissions, timestamps, ownership (ownership requires root),
|
||||
symlinks, and empty directories
|
||||
* `--verify`: After restoring, verify every file's chunk hashes match
|
||||
|
||||
**fetch**: Extract single file from backup
|
||||
* Retrieves specific file without full restore
|
||||
* Supports extracting to different filename
|
||||
**`prune`**: Remove unreferenced blobs from remote storage.
|
||||
* Scans all snapshot manifests for referenced blobs, deletes any blob not referenced
|
||||
* `--force`: Skip confirmation prompt
|
||||
* `--json`: Output stats as JSON
|
||||
|
||||
**verify**: Validate backup integrity
|
||||
* Checks metadata hash
|
||||
* Verifies all referenced blobs exist
|
||||
* Default: Downloads blobs and validates chunk integrity
|
||||
* `--quick`: Only checks blob existence and S3 content hashes
|
||||
**`info`**: Display system configuration, storage settings, encryption
|
||||
recipients, and local database statistics.
|
||||
|
||||
**`remote info`**: Show detailed remote storage information including per-snapshot
|
||||
metadata sizes, blob counts, and orphaned blob detection.
|
||||
* `--json`: Output as JSON
|
||||
|
||||
**`store info`**: Display storage backend type and statistics.
|
||||
|
||||
**`database purge`**: Delete the local SQLite state database entirely. Remote
|
||||
storage is unaffected; the next backup will do a full scan and re-deduplicate
|
||||
against existing remote blobs.
|
||||
* `--force`: Skip confirmation prompt
|
||||
|
||||
---
|
||||
|
||||
## storage backends
|
||||
|
||||
vaultik supports three storage backends, selected via the `storage_url` config field:
|
||||
|
||||
**S3** (`s3://bucket/prefix?endpoint=host®ion=us-east-1`): Any S3-compatible
|
||||
object store. Credentials are read from `s3.access_key_id` and
|
||||
`s3.secret_access_key` in the config file.
|
||||
|
||||
**Local filesystem** (`file:///path/to/backup`): Stores blobs and metadata on
|
||||
a local or mounted filesystem. Useful for testing or backing up to a NAS.
|
||||
|
||||
**Rclone** (`rclone://remote/path`): Uses rclone's 70+ supported cloud
|
||||
providers. Requires rclone to be configured separately (`rclone config`).
|
||||
|
||||
Legacy S3 configuration via `s3.*` fields (endpoint, bucket, prefix, etc.) is
|
||||
still supported for backward compatibility. `storage_url` takes precedence if
|
||||
both are set.
|
||||
|
||||
---
|
||||
|
||||
## architecture
|
||||
|
||||
### chunking
|
||||
### remote storage layout
|
||||
|
||||
* Content-defined chunking using rolling hash (Rabin fingerprint)
|
||||
* Average chunk size: 10MB (configurable)
|
||||
* Deduplication at chunk level
|
||||
* Multiple chunks packed into blobs for efficiency
|
||||
```
|
||||
<bucket>/<prefix>/
|
||||
├── blobs/
|
||||
│ └── <aa>/<bb>/<full_blob_hash>
|
||||
└── metadata/
|
||||
└── <snapshot_id>/
|
||||
├── db.zst.age # Encrypted binary SQLite database
|
||||
└── manifest.json.zst # Unencrypted blob list (for pruning)
|
||||
```
|
||||
|
||||
* Blobs are two-level directory sharded using the first 4 hex chars of the blob hash
|
||||
* `db.zst.age` is a binary SQLite database (zstd compressed, age encrypted)
|
||||
containing all file metadata, chunk mappings, and relationships for the snapshot
|
||||
* `manifest.json.zst` is an unencrypted compressed JSON blob list, enabling
|
||||
pruning without the private key
|
||||
|
||||
Snapshot IDs follow the format `<hostname>_<snapshot-name>_<RFC3339-timestamp>`
|
||||
(e.g. `server1_home_2025-06-01T12:00:00Z`).
|
||||
|
||||
### data flow
|
||||
|
||||
**backup:**
|
||||
|
||||
1. Open local SQLite index, load known files and chunks into memory
|
||||
2. Walk source directories, compare mtime/size/mode against index
|
||||
3. For changed/new files: chunk using content-defined chunking (FastCDC)
|
||||
4. For symlinks and directories: record metadata (no chunking)
|
||||
5. For each chunk: hash, check dedup, add to blob packer
|
||||
6. When blob reaches size threshold: compress (zstd), encrypt (age), upload
|
||||
7. Build snapshot metadata database, compress, encrypt, upload
|
||||
8. Create unencrypted blob manifest for pruning support
|
||||
|
||||
**restore:**
|
||||
|
||||
1. Download and decrypt `metadata/<snapshot_id>/db.zst.age`
|
||||
2. Open the binary SQLite database
|
||||
3. Query files (optionally filtered by paths)
|
||||
4. Download and decrypt required blobs
|
||||
5. Extract chunks, reconstruct files
|
||||
6. Restore permissions, timestamps, ownership, symlinks
|
||||
|
||||
**prune:**
|
||||
|
||||
1. List all snapshot manifests
|
||||
2. Build set of all referenced blob hashes
|
||||
3. List all blobs in storage
|
||||
4. Delete any blob not in the referenced set
|
||||
|
||||
### chunking and deduplication
|
||||
|
||||
* Content-defined chunking using the FastCDC algorithm
|
||||
* Average chunk size: configurable (default 10MB)
|
||||
* Deduplication at file level (unchanged files skipped) and chunk level
|
||||
(identical chunks across files stored once)
|
||||
* Multiple chunks packed into blobs to reduce object count
|
||||
|
||||
### encryption
|
||||
|
||||
* Asymmetric encryption using age (X25519 + XChaCha20-Poly1305)
|
||||
* Only public key needed on source host
|
||||
* Each blob encrypted independently
|
||||
* Metadata databases also encrypted
|
||||
* Only the public key is needed on the source host
|
||||
* Each blob and each metadata database is encrypted independently
|
||||
* Multiple recipients supported (encrypt to multiple keys)
|
||||
|
||||
### storage
|
||||
### compression
|
||||
|
||||
* Content-addressed blob storage
|
||||
* Immutable append-only design
|
||||
* Two-level directory sharding for blobs (aa/bb/hash)
|
||||
* Compressed with zstd before encryption
|
||||
|
||||
### state tracking
|
||||
|
||||
* Local SQLite database for incremental state
|
||||
* Tracks file mtimes and chunk mappings
|
||||
* Enables efficient change detection
|
||||
* Supports inotify monitoring in daemon mode
|
||||
|
||||
## does not
|
||||
|
||||
* Store any secrets on the backed-up machine
|
||||
* Require mutable remote metadata
|
||||
* Use tarballs, restic, rsync, or ssh
|
||||
* Require a symmetric passphrase or password
|
||||
* Trust the source system with anything
|
||||
* zstd compression at configurable level (1-19, default 3)
|
||||
* Applied before encryption at the blob level
|
||||
|
||||
---
|
||||
|
||||
## does
|
||||
## configuration reference
|
||||
|
||||
* Incremental deduplicated backup
|
||||
* Blob-packed chunk encryption
|
||||
* Content-addressed immutable blobs
|
||||
* Public-key encryption only
|
||||
* SQLite-based local and snapshot metadata
|
||||
* Fully stream-processed storage
|
||||
Run `vaultik config init` to generate a fully commented config file.
|
||||
Key fields:
|
||||
|
||||
| Field | Default | Description |
|
||||
|-------|---------|-------------|
|
||||
| `age_recipients` | (required) | Age public keys for encryption |
|
||||
| `snapshots` | (required) | Named snapshot definitions with paths and excludes |
|
||||
| `storage_url` | | Storage backend URL (`s3://`, `file://`, `rclone://`) |
|
||||
| `s3.*` | | Legacy S3 configuration (endpoint, bucket, credentials) |
|
||||
| `exclude` | | Global exclude patterns (applied to all snapshots) |
|
||||
| `chunk_size` | `10MB` | Average chunk size for content-defined chunking |
|
||||
| `blob_size_limit` | `10GB` | Maximum blob size before splitting |
|
||||
| `compression_level` | `3` | zstd compression level (1-19) |
|
||||
| `hostname` | system hostname | Hostname used in snapshot IDs |
|
||||
| `index_path` | platform data dir | Local SQLite index path |
|
||||
|
||||
---
|
||||
|
||||
## restore
|
||||
## limitations
|
||||
|
||||
`vaultik restore` downloads only the snapshot metadata and required blobs. It
|
||||
never contacts the source system. All restore operations depend only on:
|
||||
|
||||
* `VAULTIK_PRIVATE_KEY`
|
||||
* The bucket
|
||||
|
||||
The entire system is restore-only from object storage.
|
||||
* **No extended attributes (xattrs).** ACLs, macOS Finder metadata,
|
||||
quarantine flags, SELinux labels, and other extended attributes are not
|
||||
backed up or restored.
|
||||
* **No hard link detection.** Two hard links to the same inode are backed
|
||||
up as independent files. Content deduplication means the data is stored
|
||||
once, but the hard link relationship is lost on restore.
|
||||
* **No sparse file support.** Sparse files are fully materialized during
|
||||
backup. A 100 GB sparse VM disk that is mostly zeros will consume the
|
||||
full (compressed) size in storage.
|
||||
* **No bandwidth limiting.** Uploads and downloads use whatever bandwidth
|
||||
is available. There is no `--bwlimit` flag yet.
|
||||
* **No parallel blob downloads during restore.** Blobs are fetched
|
||||
sequentially. Restore speed is bound by single-stream throughput.
|
||||
* **Device nodes, named pipes, and sockets are silently skipped.** Only
|
||||
regular files, directories, and symlinks are backed up.
|
||||
* **No database migrations.** If the local SQLite schema changes between
|
||||
versions, delete the local database (`vaultik database purge`) and run
|
||||
a full backup. Remote storage is unaffected.
|
||||
* **Files that change during backup may be inconsistent.** There is no
|
||||
filesystem snapshot or freeze. If a file is modified between the scan
|
||||
and chunk phases, the backed-up copy may reflect a partial write.
|
||||
* **Ownership restoration requires root.** File uid/gid are recorded
|
||||
and restored, but `chown` requires elevated privileges. Without root,
|
||||
files are restored with the current user's ownership.
|
||||
|
||||
---
|
||||
|
||||
## features
|
||||
## roadmap
|
||||
|
||||
### daemon mode
|
||||
Items for future releases:
|
||||
|
||||
* Continuous background operation
|
||||
* inotify-based change detection
|
||||
* Respects `backup_interval` and `min_time_between_run`
|
||||
* Full scan every `full_scan_interval` (default 24h)
|
||||
|
||||
### cron mode
|
||||
|
||||
* Single backup run
|
||||
* Silent output unless errors
|
||||
* Ideal for scheduled backups
|
||||
|
||||
### metadata integrity
|
||||
|
||||
* SHA256 hash of metadata stored separately
|
||||
* Encrypted hash file for verification
|
||||
* Chunked metadata support for large filesystems
|
||||
|
||||
### exclusion patterns
|
||||
|
||||
* Glob-based file exclusion
|
||||
* Configured in YAML
|
||||
* Applied during directory walk
|
||||
|
||||
## prune
|
||||
|
||||
Run `vaultik prune` on a machine with the private key. It:
|
||||
|
||||
* Downloads the most recent snapshot
|
||||
* Decrypts metadata
|
||||
* Lists referenced blobs
|
||||
* Deletes any blob in the bucket not referenced
|
||||
|
||||
This enables garbage collection from immutable storage.
|
||||
|
||||
---
|
||||
|
||||
## LICENSE
|
||||
|
||||
[MIT](https://opensource.org/license/mit/)
|
||||
* Error-condition tests (network failures, disk full, corrupted/missing blobs)
|
||||
* Parallel blob downloads during restore
|
||||
* Bandwidth limiting (`--bwlimit`)
|
||||
* Security audit of encryption implementation
|
||||
* Man pages and richer `--help` examples
|
||||
|
||||
---
|
||||
|
||||
## requirements
|
||||
|
||||
* Go 1.24.4 or later
|
||||
* S3-compatible object storage
|
||||
* Sufficient disk space for local index (typically <1GB)
|
||||
* Go 1.26 or later
|
||||
* S3-compatible object storage (or local filesystem, or rclone remote)
|
||||
|
||||
## development workflow
|
||||
|
||||
All changes follow this workflow. No exceptions.
|
||||
|
||||
1. Create a feature branch off `main`.
|
||||
2. Write tests.
|
||||
3. Write the implementation.
|
||||
4. Fix implementation errors until it compiles and tests pass.
|
||||
5. Fix linting errors (`make lint`).
|
||||
6. Update documentation and README as required by the change.
|
||||
7. Format code (`make fmt`).
|
||||
8. Run `make check` (lint + fmt-check + test). Fix any issues. Repeat until clean.
|
||||
9. Commit on the branch.
|
||||
10. Merge to `main`.
|
||||
11. Push.
|
||||
|
||||
Do not commit directly to `main`. Do not skip steps.
|
||||
|
||||
Repository policies for AI agents are in [`AGENTS.md`](AGENTS.md).
|
||||
|
||||
## license
|
||||
|
||||
[MIT](https://opensource.org/license/mit/)
|
||||
|
||||
## author
|
||||
|
||||
|
||||
@@ -1,86 +0,0 @@
|
||||
# TODO: Implement Verify Command
|
||||
|
||||
## Overview
|
||||
Implement the `verify` command to check snapshot integrity. Both shallow and deep verification require the age_secret_key from config to decrypt the database index.
|
||||
|
||||
## Implementation Steps
|
||||
|
||||
### 1. Update Config Structure
|
||||
- Add `AgeSecretKey string` field to the Config struct in `internal/config/config.go`
|
||||
- Add corresponding `age_secret_key` YAML tag
|
||||
- Ensure the field is properly loaded from config file
|
||||
|
||||
### 2. Remove Command Line Flags
|
||||
- Remove --bucket, --prefix, and --snapshot flags from:
|
||||
- `internal/cli/verify.go`
|
||||
- `internal/cli/restore.go`
|
||||
- `internal/cli/fetch.go`
|
||||
- Update all commands to use bucket/prefix from config instead of flags
|
||||
- Update verify command to take snapshot ID as first positional argument
|
||||
|
||||
### 3. Implement Shallow Verification
|
||||
**Requires age_secret_key from config**
|
||||
|
||||
1. Download from S3:
|
||||
- `metadata/{snapshot-id}/manifest.json.zst`
|
||||
- `metadata/{snapshot-id}/db.zst.age`
|
||||
|
||||
2. Process files:
|
||||
- Decompress manifest (not encrypted)
|
||||
- Decrypt db.zst.age using age_secret_key
|
||||
- Decompress decrypted database
|
||||
- Load SQLite database from dump
|
||||
|
||||
3. Verify integrity:
|
||||
- Query snapshot_blobs table for all blobs in this snapshot
|
||||
- Compare DB blob list against manifest blob list
|
||||
- **FAIL IMMEDIATELY** if lists don't match exactly
|
||||
|
||||
4. For each blob in manifest:
|
||||
- Use S3 HeadObject to check existence
|
||||
- **FAIL IMMEDIATELY** if blob is missing
|
||||
- Verify blob hash matches filename
|
||||
- **FAIL IMMEDIATELY** if hash mismatch
|
||||
|
||||
5. Only report success if ALL checks pass
|
||||
|
||||
### 4. Implement Deep Verification
|
||||
**Requires age_secret_key from config**
|
||||
|
||||
1. Run all shallow verification first (fail on any error)
|
||||
|
||||
2. For each blob referenced in snapshot:
|
||||
- Download blob from S3
|
||||
- Decrypt using age_secret_key (streaming)
|
||||
- Decompress (streaming)
|
||||
- Parse blob structure to extract chunks
|
||||
|
||||
3. For each chunk in blob:
|
||||
- Calculate SHA256 of chunk data
|
||||
- Query database for expected chunk hash
|
||||
- **FAIL IMMEDIATELY** if calculated != expected
|
||||
- Verify chunks are ordered correctly by offset
|
||||
- **FAIL IMMEDIATELY** if chunks out of order
|
||||
|
||||
4. Progress reporting:
|
||||
- Show blob-by-blob progress
|
||||
- Show chunk verification within each blob
|
||||
- But continue only if no errors
|
||||
|
||||
5. Only report success if ALL blobs and ALL chunks verify
|
||||
|
||||
### 5. Error Handling
|
||||
|
||||
- **FAIL IMMEDIATELY** if age_secret_key missing from config
|
||||
- **FAIL IMMEDIATELY** on decryption failure
|
||||
- **FAIL IMMEDIATELY** on any verification mismatch
|
||||
- Use log.Fatal() or return error to ensure non-zero exit code
|
||||
- Provide clear error messages indicating exactly what failed
|
||||
|
||||
## Success Criteria
|
||||
|
||||
- Verify command exits with code 0 only if ALL checks pass
|
||||
- Any failure results in non-zero exit code
|
||||
- Clear error messages for each failure type
|
||||
- Progress reporting during verification
|
||||
- Works with remote-only snapshots (not in local DB)
|
||||
155
TODO.md
155
TODO.md
@@ -1,155 +0,0 @@
|
||||
# Implementation TODO
|
||||
|
||||
## Proposed: Store and Snapshot Commands
|
||||
|
||||
### Overview
|
||||
Reorganize commands to provide better visibility into stored data and snapshots.
|
||||
|
||||
### Command Structure
|
||||
|
||||
#### `vaultik store` - Storage information commands
|
||||
- `vaultik store info`
|
||||
- Lists S3 bucket configuration
|
||||
- Shows total number of snapshots (from metadata/ listing)
|
||||
- Shows total number of blobs (from blobs/ listing)
|
||||
- Shows total size of all blobs
|
||||
- **No decryption required** - uses S3 listing only
|
||||
|
||||
#### `vaultik snapshot` - Snapshot management commands
|
||||
- `vaultik snapshot create [path]`
|
||||
- Renamed from `vaultik backup`
|
||||
- Same functionality as current backup command
|
||||
|
||||
- `vaultik snapshot list [--json]`
|
||||
- Lists all snapshots with:
|
||||
- Snapshot ID
|
||||
- Creation timestamp (parsed from snapshot ID)
|
||||
- Compressed size (sum of referenced blob sizes from manifest)
|
||||
- **No decryption required** - uses blob manifests only
|
||||
- `--json` flag outputs in JSON format instead of table
|
||||
|
||||
- `vaultik snapshot purge`
|
||||
- Requires one of:
|
||||
- `--keep-latest` - keeps only the most recent snapshot
|
||||
- `--older-than <duration>` - removes snapshots older than duration (e.g., "30d", "6m", "1y")
|
||||
- Removes snapshot metadata and runs pruning to clean up unreferenced blobs
|
||||
- Shows what would be deleted and requires confirmation
|
||||
|
||||
- `vaultik snapshot verify [--deep] <snapshot-id>`
|
||||
- Basic mode: Verifies all blobs referenced in manifest exist in S3
|
||||
- `--deep` mode: Downloads each blob and verifies its hash matches the stored hash
|
||||
- **Stub implementation for now**
|
||||
|
||||
### Implementation Notes
|
||||
|
||||
1. **No Decryption Required**: All commands work with unencrypted blob manifests
|
||||
2. **Blob Manifests**: Located at `metadata/{snapshot-id}/manifest.json.zst`
|
||||
3. **S3 Operations**: Use S3 ListObjects to enumerate snapshots and blobs
|
||||
4. **Size Calculations**: Sum blob sizes from S3 object metadata
|
||||
5. **Timestamp Parsing**: Extract from snapshot ID format (e.g., `2024-01-15-143052-hostname`)
|
||||
6. **S3 Metadata**: Only used for `snapshot verify` command
|
||||
|
||||
### Benefits
|
||||
- Users can see storage usage without decryption keys
|
||||
- Snapshot management doesn't require access to encrypted metadata
|
||||
- Clean separation between storage info and snapshot operations
|
||||
|
||||
## Chunking and Hashing
|
||||
1. ~~Implement content-defined chunking~~ (done with FastCDC)
|
||||
1. ~~Create streaming chunk processor~~ (done in chunker)
|
||||
1. ~~Implement SHA256 hashing for chunks~~ (done in scanner)
|
||||
1. ~~Add configurable chunk size parameters~~ (done in scanner)
|
||||
1. ~~Write tests for chunking consistency~~ (done)
|
||||
|
||||
## Compression and Encryption
|
||||
1. ~~Implement compression~~ (done with zlib in blob packer)
|
||||
1. ~~Integrate age encryption library~~ (done in crypto package)
|
||||
1. ~~Create Encryptor type for public key encryption~~ (done)
|
||||
1. ~~Implement streaming encrypt/decrypt pipelines~~ (done in packer)
|
||||
1. ~~Write tests for compression and encryption~~ (done)
|
||||
|
||||
## Blob Packing
|
||||
1. ~~Implement BlobWriter with size limits~~ (done in packer)
|
||||
1. ~~Add chunk accumulation and flushing~~ (done)
|
||||
1. ~~Create blob hash calculation~~ (done)
|
||||
1. ~~Implement proper error handling and rollback~~ (done with transactions)
|
||||
1. ~~Write tests for blob packing scenarios~~ (done)
|
||||
|
||||
## S3 Operations
|
||||
1. ~~Integrate MinIO client library~~ (done in s3 package)
|
||||
1. ~~Implement S3Client wrapper type~~ (done)
|
||||
1. ~~Add multipart upload support for large blobs~~ (done - using standard upload)
|
||||
1. ~~Implement retry logic~~ (handled by MinIO client)
|
||||
1. ~~Write tests using MinIO container~~ (done with testcontainers)
|
||||
|
||||
## Backup Command - Basic
|
||||
1. ~~Implement directory walking with exclusion patterns~~ (done with afero)
|
||||
1. Add file change detection using index
|
||||
1. ~~Integrate chunking pipeline for changed files~~ (done in scanner)
|
||||
1. Implement blob upload coordination to S3
|
||||
1. Add progress reporting to stderr
|
||||
1. Write integration tests for backup
|
||||
|
||||
## Snapshot Metadata
|
||||
1. Implement snapshot metadata extraction from index
|
||||
1. Create SQLite snapshot database builder
|
||||
1. Add metadata compression and encryption
|
||||
1. Implement metadata chunking for large snapshots
|
||||
1. Add hash calculation and verification
|
||||
1. Implement metadata upload to S3
|
||||
1. Write tests for metadata operations
|
||||
|
||||
## Restore Command
|
||||
1. Implement snapshot listing and selection
|
||||
1. Add metadata download and reconstruction
|
||||
1. Implement hash verification for metadata
|
||||
1. Create file restoration logic with chunk retrieval
|
||||
1. Add blob caching for efficiency
|
||||
1. Implement proper file permissions and mtime restoration
|
||||
1. Write integration tests for restore
|
||||
|
||||
## Prune Command
|
||||
1. Implement latest snapshot detection
|
||||
1. Add referenced blob extraction from metadata
|
||||
1. Create S3 blob listing and comparison
|
||||
1. Implement safe deletion of unreferenced blobs
|
||||
1. Add dry-run mode for safety
|
||||
1. Write tests for prune scenarios
|
||||
|
||||
## Verify Command
|
||||
1. Implement metadata integrity checking
|
||||
1. Add blob existence verification
|
||||
1. Implement quick mode (S3 hash checking)
|
||||
1. Implement deep mode (download and verify chunks)
|
||||
1. Add detailed error reporting
|
||||
1. Write tests for verification
|
||||
|
||||
## Fetch Command
|
||||
1. Implement single-file metadata query
|
||||
1. Add minimal blob downloading for file
|
||||
1. Create streaming file reconstruction
|
||||
1. Add support for output redirection
|
||||
1. Write tests for fetch command
|
||||
|
||||
## Daemon Mode
|
||||
1. Implement inotify watcher for Linux
|
||||
1. Add dirty path tracking in index
|
||||
1. Create periodic full scan scheduler
|
||||
1. Implement backup interval enforcement
|
||||
1. Add proper signal handling and shutdown
|
||||
1. Write tests for daemon behavior
|
||||
|
||||
## Cron Mode
|
||||
1. Implement silent operation mode
|
||||
1. Add proper exit codes for cron
|
||||
1. Implement lock file to prevent concurrent runs
|
||||
1. Add error summary reporting
|
||||
1. Write tests for cron mode
|
||||
|
||||
## Finalization
|
||||
1. Add comprehensive logging throughout
|
||||
1. Implement proper error wrapping and context
|
||||
1. Add performance metrics collection
|
||||
1. Create end-to-end integration tests
|
||||
1. Write documentation and examples
|
||||
1. Set up CI/CD pipeline
|
||||
@@ -1,9 +1,41 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"git.eeqj.de/sneak/vaultik/internal/cli"
|
||||
"os"
|
||||
"runtime"
|
||||
"runtime/pprof"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/cli"
|
||||
)
|
||||
|
||||
func main() {
|
||||
// CPU profiling: set VAULTIK_CPUPROFILE=/path/to/cpu.prof
|
||||
if cpuProfile := os.Getenv("VAULTIK_CPUPROFILE"); cpuProfile != "" {
|
||||
f, err := os.Create(cpuProfile)
|
||||
if err != nil {
|
||||
panic("could not create CPU profile: " + err.Error())
|
||||
}
|
||||
defer func() { _ = f.Close() }()
|
||||
if err := pprof.StartCPUProfile(f); err != nil {
|
||||
panic("could not start CPU profile: " + err.Error())
|
||||
}
|
||||
defer pprof.StopCPUProfile()
|
||||
}
|
||||
|
||||
// Memory profiling: set VAULTIK_MEMPROFILE=/path/to/mem.prof
|
||||
if memProfile := os.Getenv("VAULTIK_MEMPROFILE"); memProfile != "" {
|
||||
defer func() {
|
||||
f, err := os.Create(memProfile)
|
||||
if err != nil {
|
||||
panic("could not create memory profile: " + err.Error())
|
||||
}
|
||||
defer func() { _ = f.Close() }()
|
||||
runtime.GC() // get up-to-date statistics
|
||||
if err := pprof.WriteHeapProfile(f); err != nil {
|
||||
panic("could not write memory profile: " + err.Error())
|
||||
}
|
||||
}()
|
||||
}
|
||||
|
||||
cli.CLIEntry()
|
||||
}
|
||||
|
||||
@@ -2,116 +2,294 @@
|
||||
# This file shows all available configuration options with their default values
|
||||
# Copy this file and uncomment/modify the values you need
|
||||
|
||||
# Age recipient public key for encryption
|
||||
# This is REQUIRED - backups are encrypted to this public key
|
||||
# Age recipient public keys for encryption
|
||||
# This is REQUIRED - backups are encrypted to these public keys
|
||||
# Generate with: age-keygen | grep "public key"
|
||||
age_recipient: age1xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
|
||||
age_recipients:
|
||||
- age1cj2k2addawy294f6k2gr2mf9gps9r3syplryxca3nvxj3daqm96qfp84tz
|
||||
|
||||
# List of directories to backup
|
||||
# These paths will be scanned recursively for files to backup
|
||||
# Use absolute paths
|
||||
source_dirs:
|
||||
- /
|
||||
# - /home
|
||||
# - /etc
|
||||
# - /var
|
||||
# Named snapshots - each snapshot can contain multiple paths
|
||||
# Each snapshot gets its own ID and can have snapshot-specific excludes
|
||||
snapshots:
|
||||
testing:
|
||||
paths:
|
||||
- ~/dev/vaultik
|
||||
apps:
|
||||
paths:
|
||||
- /Applications
|
||||
exclude:
|
||||
- "/App Store.app"
|
||||
- "/Apps.app"
|
||||
- "/Automator.app"
|
||||
- "/Books.app"
|
||||
- "/Calculator.app"
|
||||
- "/Calendar.app"
|
||||
- "/Chess.app"
|
||||
- "/Clock.app"
|
||||
- "/Contacts.app"
|
||||
- "/Dictionary.app"
|
||||
- "/FaceTime.app"
|
||||
- "/FindMy.app"
|
||||
- "/Font Book.app"
|
||||
- "/Freeform.app"
|
||||
- "/Games.app"
|
||||
- "/GarageBand.app"
|
||||
- "/Home.app"
|
||||
- "/Image Capture.app"
|
||||
- "/Image Playground.app"
|
||||
- "/Journal.app"
|
||||
- "/Keynote.app"
|
||||
- "/Mail.app"
|
||||
- "/Maps.app"
|
||||
- "/Messages.app"
|
||||
- "/Mission Control.app"
|
||||
- "/Music.app"
|
||||
- "/News.app"
|
||||
- "/Notes.app"
|
||||
- "/Numbers.app"
|
||||
- "/Pages.app"
|
||||
- "/Passwords.app"
|
||||
- "/Phone.app"
|
||||
- "/Photo Booth.app"
|
||||
- "/Photos.app"
|
||||
- "/Podcasts.app"
|
||||
- "/Preview.app"
|
||||
- "/QuickTime Player.app"
|
||||
- "/Reminders.app"
|
||||
- "/Safari.app"
|
||||
- "/Shortcuts.app"
|
||||
- "/Siri.app"
|
||||
- "/Stickies.app"
|
||||
- "/Stocks.app"
|
||||
- "/System Settings.app"
|
||||
- "/TV.app"
|
||||
- "/TextEdit.app"
|
||||
- "/Time Machine.app"
|
||||
- "/Tips.app"
|
||||
- "/Utilities/Activity Monitor.app"
|
||||
- "/Utilities/AirPort Utility.app"
|
||||
- "/Utilities/Audio MIDI Setup.app"
|
||||
- "/Utilities/Bluetooth File Exchange.app"
|
||||
- "/Utilities/Boot Camp Assistant.app"
|
||||
- "/Utilities/ColorSync Utility.app"
|
||||
- "/Utilities/Console.app"
|
||||
- "/Utilities/Digital Color Meter.app"
|
||||
- "/Utilities/Disk Utility.app"
|
||||
- "/Utilities/Grapher.app"
|
||||
- "/Utilities/Magnifier.app"
|
||||
- "/Utilities/Migration Assistant.app"
|
||||
- "/Utilities/Print Center.app"
|
||||
- "/Utilities/Screen Sharing.app"
|
||||
- "/Utilities/Screenshot.app"
|
||||
- "/Utilities/Script Editor.app"
|
||||
- "/Utilities/System Information.app"
|
||||
- "/Utilities/Terminal.app"
|
||||
- "/Utilities/VoiceOver Utility.app"
|
||||
- "/VoiceMemos.app"
|
||||
- "/Weather.app"
|
||||
- "/iMovie.app"
|
||||
- "/iPhone Mirroring.app"
|
||||
home:
|
||||
paths:
|
||||
- "~"
|
||||
exclude:
|
||||
- "/.Trash"
|
||||
- "/tmp"
|
||||
- "/Library/Caches"
|
||||
- "/Library/Accounts"
|
||||
- "/Library/AppleMediaServices"
|
||||
- "/Library/Application Support/AddressBook"
|
||||
- "/Library/Application Support/CallHistoryDB"
|
||||
- "/Library/Application Support/CallHistoryTransactions"
|
||||
- "/Library/Application Support/DifferentialPrivacy"
|
||||
- "/Library/Application Support/FaceTime"
|
||||
- "/Library/Application Support/FileProvider"
|
||||
- "/Library/Application Support/Knowledge"
|
||||
- "/Library/Application Support/com.apple.TCC"
|
||||
- "/Library/Application Support/com.apple.avfoundation/Frecents"
|
||||
- "/Library/Application Support/com.apple.sharedfilelist"
|
||||
- "/Library/Assistant/SiriVocabulary"
|
||||
- "/Library/Autosave Information"
|
||||
- "/Library/Biome"
|
||||
- "/Library/ContainerManager"
|
||||
- "/Library/Containers/com.apple.Home"
|
||||
- "/Library/Containers/com.apple.Maps/Data/Maps"
|
||||
- "/Library/Containers/com.apple.MobileSMS"
|
||||
- "/Library/Containers/com.apple.Notes"
|
||||
- "/Library/Containers/com.apple.Safari"
|
||||
- "/Library/Containers/com.apple.Safari.WebApp"
|
||||
- "/Library/Containers/com.apple.VoiceMemos"
|
||||
- "/Library/Containers/com.apple.archiveutility"
|
||||
- "/Library/Containers/com.apple.corerecents.recentsd/Data/Library/Recents"
|
||||
- "/Library/Containers/com.apple.mail"
|
||||
- "/Library/Containers/com.apple.news"
|
||||
- "/Library/Containers/com.apple.stocks"
|
||||
- "/Library/Cookies"
|
||||
- "/Library/CoreFollowUp"
|
||||
- "/Library/Daemon Containers"
|
||||
- "/Library/DoNotDisturb"
|
||||
- "/Library/DuetExpertCenter"
|
||||
- "/Library/Group Containers/com.apple.Home.group"
|
||||
- "/Library/Group Containers/com.apple.MailPersonaStorage"
|
||||
- "/Library/Group Containers/com.apple.PreviewLegacySignaturesConversion"
|
||||
- "/Library/Group Containers/com.apple.bird"
|
||||
- "/Library/Group Containers/com.apple.stickersd.group"
|
||||
- "/Library/Group Containers/com.apple.systempreferences.cache"
|
||||
- "/Library/Group Containers/group.com.apple.AppleSpell"
|
||||
- "/Library/Group Containers/group.com.apple.ArchiveUtility.PKSignedContainer"
|
||||
- "/Library/Group Containers/group.com.apple.DeviceActivity"
|
||||
- "/Library/Group Containers/group.com.apple.Journal"
|
||||
- "/Library/Group Containers/group.com.apple.ManagedSettings"
|
||||
- "/Library/Group Containers/group.com.apple.PegasusConfiguration"
|
||||
- "/Library/Group Containers/group.com.apple.Safari.SandboxBroker"
|
||||
- "/Library/Group Containers/group.com.apple.SiriTTS"
|
||||
- "/Library/Group Containers/group.com.apple.UserNotifications"
|
||||
- "/Library/Group Containers/group.com.apple.VoiceMemos.shared"
|
||||
- "/Library/Group Containers/group.com.apple.accessibility.voicebanking"
|
||||
- "/Library/Group Containers/group.com.apple.amsondevicestoraged"
|
||||
- "/Library/Group Containers/group.com.apple.appstoreagent"
|
||||
- "/Library/Group Containers/group.com.apple.calendar"
|
||||
- "/Library/Group Containers/group.com.apple.chronod"
|
||||
- "/Library/Group Containers/group.com.apple.contacts"
|
||||
- "/Library/Group Containers/group.com.apple.controlcenter"
|
||||
- "/Library/Group Containers/group.com.apple.corerepair"
|
||||
- "/Library/Group Containers/group.com.apple.coreservices.useractivityd"
|
||||
- "/Library/Group Containers/group.com.apple.energykit"
|
||||
- "/Library/Group Containers/group.com.apple.feedback"
|
||||
- "/Library/Group Containers/group.com.apple.feedbacklogger"
|
||||
- "/Library/Group Containers/group.com.apple.findmy.findmylocateagent"
|
||||
- "/Library/Group Containers/group.com.apple.iCloudDrive"
|
||||
- "/Library/Group Containers/group.com.apple.icloud.fmfcore"
|
||||
- "/Library/Group Containers/group.com.apple.icloud.fmipcore"
|
||||
- "/Library/Group Containers/group.com.apple.icloud.searchpartyuseragent"
|
||||
- "/Library/Group Containers/group.com.apple.liveactivitiesd"
|
||||
- "/Library/Group Containers/group.com.apple.loginwindow.persistent-apps"
|
||||
- "/Library/Group Containers/group.com.apple.mail"
|
||||
- "/Library/Group Containers/group.com.apple.mlhost"
|
||||
- "/Library/Group Containers/group.com.apple.moments"
|
||||
- "/Library/Group Containers/group.com.apple.news"
|
||||
- "/Library/Group Containers/group.com.apple.newsd"
|
||||
- "/Library/Group Containers/group.com.apple.notes"
|
||||
- "/Library/Group Containers/group.com.apple.notes.import"
|
||||
- "/Library/Group Containers/group.com.apple.photolibraryd.private"
|
||||
- "/Library/Group Containers/group.com.apple.portrait.BackgroundReplacement"
|
||||
- "/Library/Group Containers/group.com.apple.printtool"
|
||||
- "/Library/Group Containers/group.com.apple.private.translation"
|
||||
- "/Library/Group Containers/group.com.apple.reminders"
|
||||
- "/Library/Group Containers/group.com.apple.replicatord"
|
||||
- "/Library/Group Containers/group.com.apple.scopedbookmarkagent"
|
||||
- "/Library/Group Containers/group.com.apple.secure-control-center-preferences"
|
||||
- "/Library/Group Containers/group.com.apple.sharingd"
|
||||
- "/Library/Group Containers/group.com.apple.shortcuts"
|
||||
- "/Library/Group Containers/group.com.apple.siri.inference"
|
||||
- "/Library/Group Containers/group.com.apple.siri.referenceResolution"
|
||||
- "/Library/Group Containers/group.com.apple.siri.remembers"
|
||||
- "/Library/Group Containers/group.com.apple.siri.userfeedbacklearning"
|
||||
- "/Library/Group Containers/group.com.apple.spotlight"
|
||||
- "/Library/Group Containers/group.com.apple.stocks"
|
||||
- "/Library/Group Containers/group.com.apple.stocks-news"
|
||||
- "/Library/Group Containers/group.com.apple.studentd"
|
||||
- "/Library/Group Containers/group.com.apple.swtransparency"
|
||||
- "/Library/Group Containers/group.com.apple.telephonyutilities.callservicesd"
|
||||
- "/Library/Group Containers/group.com.apple.tips"
|
||||
- "/Library/Group Containers/group.com.apple.tipsnext"
|
||||
- "/Library/Group Containers/group.com.apple.transparency"
|
||||
- "/Library/Group Containers/group.com.apple.usernoted"
|
||||
- "/Library/Group Containers/group.com.apple.weather"
|
||||
- "/Library/HomeKit"
|
||||
- "/Library/IdentityServices"
|
||||
- "/Library/IntelligencePlatform"
|
||||
- "/Library/Mail"
|
||||
- "/Library/Messages"
|
||||
- "/Library/Metadata/CoreSpotlight"
|
||||
- "/Library/Metadata/com.apple.IntelligentSuggestions"
|
||||
- "/Library/PersonalizationPortrait"
|
||||
- "/Library/Safari"
|
||||
- "/Library/Sharing"
|
||||
- "/Library/Shortcuts"
|
||||
- "/Library/StatusKit"
|
||||
- "/Library/Suggestions"
|
||||
- "/Library/Trial"
|
||||
- "/Library/Weather"
|
||||
- "/Library/com.apple.aiml.instrumentation"
|
||||
- "/Movies/TV"
|
||||
system:
|
||||
paths:
|
||||
- /
|
||||
exclude:
|
||||
# Virtual/transient filesystems
|
||||
- /proc
|
||||
- /sys
|
||||
- /dev
|
||||
- /run
|
||||
- /tmp
|
||||
- /var/tmp
|
||||
- /var/run
|
||||
- /var/lock
|
||||
- /var/cache
|
||||
- /media
|
||||
- /mnt
|
||||
# Swap
|
||||
- /swapfile
|
||||
- /swap.img
|
||||
# Package manager caches
|
||||
- /var/cache/apt
|
||||
- /var/cache/yum
|
||||
- /var/cache/dnf
|
||||
- /var/cache/pacman
|
||||
# Trash
|
||||
- "*/.local/share/Trash"
|
||||
dev:
|
||||
paths:
|
||||
- /Users/user/dev
|
||||
exclude:
|
||||
- "**/node_modules"
|
||||
- "**/target"
|
||||
- "**/build"
|
||||
- "**/__pycache__"
|
||||
- "**/*.pyc"
|
||||
- "**/.venv"
|
||||
- "**/vendor"
|
||||
|
||||
# Patterns to exclude from backup
|
||||
# Uses glob patterns to match file paths
|
||||
# Paths are matched as absolute paths
|
||||
# Global patterns to exclude from all backups
|
||||
exclude:
|
||||
# System directories that should not be backed up
|
||||
- /proc
|
||||
- /sys
|
||||
- /dev
|
||||
- /run
|
||||
- /tmp
|
||||
- /var/tmp
|
||||
- /var/run
|
||||
- /var/lock
|
||||
- /var/cache
|
||||
- /lost+found
|
||||
- /media
|
||||
- /mnt
|
||||
# Swap files
|
||||
- /swapfile
|
||||
- /swap.img
|
||||
- "*.swap"
|
||||
- "*.swp"
|
||||
# Log files (optional - you may want to keep some logs)
|
||||
- "*.log"
|
||||
- "*.log.*"
|
||||
- /var/log
|
||||
# Package manager caches
|
||||
- /var/cache/apt
|
||||
- /var/cache/yum
|
||||
- /var/cache/dnf
|
||||
- /var/cache/pacman
|
||||
# User caches and temporary files
|
||||
- "*/.cache"
|
||||
- "*/.local/share/Trash"
|
||||
- "*/Downloads"
|
||||
- "*/.thumbnails"
|
||||
# Development artifacts
|
||||
- "**/node_modules"
|
||||
- "**/.git/objects"
|
||||
- "**/target"
|
||||
- "**/build"
|
||||
- "**/__pycache__"
|
||||
- "**/*.pyc"
|
||||
# Large files you might not want to backup
|
||||
- "*.iso"
|
||||
- "*.img"
|
||||
- "*.vmdk"
|
||||
- "*.vdi"
|
||||
- "*.qcow2"
|
||||
- "*.tmp"
|
||||
|
||||
# Storage URL - use either this OR the s3 section below
|
||||
# Supports: s3://bucket/prefix, file:///path, rclone://remote/path
|
||||
storage_url: "rclone://las1stor1//srv/pool.2024.04/backups/heraklion"
|
||||
|
||||
# S3-compatible storage configuration
|
||||
s3:
|
||||
# S3-compatible endpoint URL
|
||||
# Examples: https://s3.amazonaws.com, https://storage.googleapis.com
|
||||
endpoint: https://s3.example.com
|
||||
|
||||
# Bucket name where backups will be stored
|
||||
bucket: my-backup-bucket
|
||||
|
||||
# Prefix (folder) within the bucket for this host's backups
|
||||
# Useful for organizing backups from multiple hosts
|
||||
# Default: empty (root of bucket)
|
||||
#prefix: "hosts/myserver/"
|
||||
|
||||
# S3 access credentials
|
||||
access_key_id: your-access-key
|
||||
secret_access_key: your-secret-key
|
||||
|
||||
# S3 region
|
||||
# Default: us-east-1
|
||||
#region: us-east-1
|
||||
|
||||
# Use SSL/TLS for S3 connections
|
||||
# Default: true
|
||||
#use_ssl: true
|
||||
|
||||
# Part size for multipart uploads
|
||||
# Minimum 5MB, affects memory usage during upload
|
||||
# Supports: 5MB, 10M, 100MiB, etc.
|
||||
# Default: 5MB
|
||||
#part_size: 5MB
|
||||
|
||||
# How often to run backups in daemon mode
|
||||
# Format: 1h, 30m, 24h, etc
|
||||
# Default: 1h
|
||||
#backup_interval: 1h
|
||||
|
||||
# How often to do a full filesystem scan in daemon mode
|
||||
# Between full scans, inotify is used to detect changes
|
||||
# Default: 24h
|
||||
#full_scan_interval: 24h
|
||||
|
||||
# Minimum time between backup runs in daemon mode
|
||||
# Prevents backups from running too frequently
|
||||
# Default: 15m
|
||||
#min_time_between_run: 15m
|
||||
#s3:
|
||||
# # S3-compatible endpoint URL
|
||||
# # Examples: https://s3.amazonaws.com, https://storage.googleapis.com
|
||||
# endpoint: http://10.100.205.122:8333
|
||||
#
|
||||
# # Bucket name where backups will be stored
|
||||
# bucket: testbucket
|
||||
#
|
||||
# # Prefix (folder) within the bucket for this host's backups
|
||||
# # Useful for organizing backups from multiple hosts
|
||||
# # Default: empty (root of bucket)
|
||||
# #prefix: "hosts/myserver/"
|
||||
#
|
||||
# # S3 access credentials
|
||||
# access_key_id: Z9GT22M9YFU08WRMC5D4
|
||||
# secret_access_key: Pi0tPKjFbN4rZlRhcA4zBtEkib04yy2WcIzI+AXk
|
||||
#
|
||||
# # S3 region
|
||||
# # Default: us-east-1
|
||||
# #region: us-east-1
|
||||
#
|
||||
# # Use SSL/TLS for S3 connections
|
||||
# # Default: true
|
||||
# #use_ssl: true
|
||||
#
|
||||
# # Part size for multipart uploads
|
||||
# # Minimum 5MB, affects memory usage during upload
|
||||
# # Supports: 5MB, 10M, 100MiB, etc.
|
||||
# # Default: 5MB
|
||||
# #part_size: 5MB
|
||||
|
||||
# Path to local SQLite index database
|
||||
# This database tracks file state for incremental backups
|
||||
@@ -133,8 +311,7 @@ s3:
|
||||
# Compression level (1-19)
|
||||
# Higher = better compression but slower
|
||||
# Default: 3
|
||||
#compression_level: 3
|
||||
|
||||
compression_level: 5
|
||||
# Hostname to use in backup metadata
|
||||
# Default: system hostname
|
||||
#hostname: myserver
|
||||
#hostname: myserver
|
||||
|
||||
@@ -5,8 +5,14 @@
|
||||
Vaultik uses a local SQLite database to track file metadata, chunk mappings, and blob associations during the backup process. This database serves as an index for incremental backups and enables efficient deduplication.
|
||||
|
||||
**Important Notes:**
|
||||
- **No Migration Support**: Vaultik does not support database schema migrations. If the schema changes, the local database must be deleted and recreated by performing a full backup.
|
||||
- **Version Compatibility**: In rare cases, you may need to use the same version of Vaultik to restore a backup as was used to create it. This ensures compatibility with the metadata format stored in S3.
|
||||
- **No Migration Support (pre-1.0)**: Vaultik does not support database schema
|
||||
migrations. The local index is treated as disposable — if the schema changes,
|
||||
delete the local SQLite database (`vaultik database purge`) and run a full
|
||||
backup. The remote storage is unaffected; the new index will re-deduplicate
|
||||
against existing remote blobs.
|
||||
- **Version Compatibility**: In rare cases, you may need to use the same version
|
||||
of Vaultik to restore a backup as was used to create it. This ensures
|
||||
compatibility with the metadata format stored in S3.
|
||||
|
||||
## Database Tables
|
||||
|
||||
@@ -17,7 +23,6 @@ Stores metadata about files in the filesystem being backed up.
|
||||
- `id` (TEXT PRIMARY KEY) - UUID for the file record
|
||||
- `path` (TEXT NOT NULL UNIQUE) - Absolute file path
|
||||
- `mtime` (INTEGER NOT NULL) - Modification time as Unix timestamp
|
||||
- `ctime` (INTEGER NOT NULL) - Change time as Unix timestamp
|
||||
- `size` (INTEGER NOT NULL) - File size in bytes
|
||||
- `mode` (INTEGER NOT NULL) - Unix file permissions and type
|
||||
- `uid` (INTEGER NOT NULL) - User ID of file owner
|
||||
|
||||
@@ -43,18 +43,19 @@ Blobs contain the actual file data from backups and must be encrypted for securi
|
||||
Each snapshot has its own subdirectory named with the snapshot ID.
|
||||
|
||||
### Snapshot ID Format
|
||||
- **Format**: `<hostname>-<YYYYMMDD>-<HHMMSSZ>`
|
||||
- **Example**: `laptop-20240115-143052Z`
|
||||
- **Format**: `<hostname>_<snapshot-name>_<RFC3339>` (or `<hostname>_<RFC3339>` if no
|
||||
name was specified)
|
||||
- **Example**: `laptop_home_2024-01-15T14:30:52Z`
|
||||
- **Components**:
|
||||
- Hostname (may contain hyphens)
|
||||
- Date in YYYYMMDD format
|
||||
- Time in HHMMSSZ format (Z indicates UTC)
|
||||
- Short hostname (everything before the first dot is stripped from the FQDN)
|
||||
- Snapshot name from the configured `snapshots:` map (optional)
|
||||
- RFC3339 UTC timestamp
|
||||
|
||||
### Files in Each Snapshot Directory
|
||||
|
||||
#### `db.zst.age` - Encrypted Database Dump
|
||||
- **What it contains**: Complete SQLite database dump for this snapshot
|
||||
- **Format**: SQL dump → Zstandard compressed → Age encrypted
|
||||
#### `db.zst.age` - Encrypted Database
|
||||
- **What it contains**: Pruned binary SQLite database for this snapshot
|
||||
- **Format**: Binary SQLite → Zstandard compressed → Age encrypted
|
||||
- **Encryption**: Encrypted with Age
|
||||
- **Purpose**: Contains full file metadata, chunk mappings, and all relationships
|
||||
- **Why encrypted**: Contains sensitive metadata like file paths, permissions, and ownership
|
||||
@@ -67,7 +68,7 @@ Each snapshot has its own subdirectory named with the snapshot ID.
|
||||
- **Structure**:
|
||||
```json
|
||||
{
|
||||
"snapshot_id": "laptop-20240115-143052Z",
|
||||
"snapshot_id": "laptop_home_2024-01-15T14:30:52Z",
|
||||
"timestamp": "2024-01-15T14:30:52Z",
|
||||
"blob_count": 42,
|
||||
"blobs": [
|
||||
|
||||
273
go.mod
273
go.mod
@@ -1,81 +1,146 @@
|
||||
module git.eeqj.de/sneak/vaultik
|
||||
module sneak.berlin/go/vaultik
|
||||
|
||||
go 1.24.4
|
||||
go 1.26.1
|
||||
|
||||
require (
|
||||
filippo.io/age v1.2.1
|
||||
git.eeqj.de/sneak/smartconfig v1.0.0
|
||||
github.com/aws/aws-sdk-go-v2 v1.36.6
|
||||
github.com/aws/aws-sdk-go-v2/config v1.29.18
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.17.71
|
||||
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.17.85
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.84.1
|
||||
github.com/aws/smithy-go v1.22.4
|
||||
github.com/adrg/xdg v0.5.3
|
||||
github.com/aws/aws-sdk-go-v2 v1.39.6
|
||||
github.com/aws/aws-sdk-go-v2/config v1.31.17
|
||||
github.com/aws/aws-sdk-go-v2/credentials v1.18.21
|
||||
github.com/aws/aws-sdk-go-v2/feature/s3/manager v1.20.4
|
||||
github.com/aws/aws-sdk-go-v2/service/s3 v1.90.0
|
||||
github.com/aws/smithy-go v1.23.2
|
||||
github.com/dustin/go-humanize v1.0.1
|
||||
github.com/gobwas/glob v0.2.3
|
||||
github.com/google/uuid v1.6.0
|
||||
github.com/johannesboyne/gofakes3 v0.0.0-20250603205740-ed9094be7668
|
||||
github.com/jotfs/fastcdc-go v0.2.0
|
||||
github.com/klauspost/compress v1.18.0
|
||||
github.com/spf13/afero v1.14.0
|
||||
github.com/spf13/cobra v1.9.1
|
||||
github.com/stretchr/testify v1.10.0
|
||||
github.com/klauspost/compress v1.18.1
|
||||
github.com/rclone/rclone v1.72.1
|
||||
github.com/schollz/progressbar/v3 v3.19.0
|
||||
github.com/spf13/afero v1.15.0
|
||||
github.com/spf13/cobra v1.10.1
|
||||
github.com/stretchr/testify v1.11.1
|
||||
go.uber.org/fx v1.24.0
|
||||
golang.org/x/term v0.33.0
|
||||
golang.org/x/sync v0.18.0
|
||||
golang.org/x/term v0.37.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
modernc.org/sqlite v1.38.0
|
||||
)
|
||||
|
||||
require (
|
||||
cloud.google.com/go/auth v0.16.2 // indirect
|
||||
cloud.google.com/go/auth v0.17.0 // indirect
|
||||
cloud.google.com/go/auth/oauth2adapt v0.2.8 // indirect
|
||||
cloud.google.com/go/compute/metadata v0.7.0 // indirect
|
||||
cloud.google.com/go/compute/metadata v0.9.0 // indirect
|
||||
cloud.google.com/go/iam v1.5.2 // indirect
|
||||
cloud.google.com/go/secretmanager v1.15.0 // indirect
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.18.0 // indirect
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.10.1 // indirect
|
||||
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.1 // indirect
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azcore v1.20.0 // indirect
|
||||
github.com/Azure/azure-sdk-for-go/sdk/azidentity v1.13.0 // indirect
|
||||
github.com/Azure/azure-sdk-for-go/sdk/internal v1.11.2 // indirect
|
||||
github.com/Azure/azure-sdk-for-go/sdk/keyvault/azsecrets v0.12.0 // indirect
|
||||
github.com/Azure/azure-sdk-for-go/sdk/keyvault/internal v0.7.1 // indirect
|
||||
github.com/AzureAD/microsoft-authentication-library-for-go v1.4.2 // indirect
|
||||
github.com/adrg/xdg v0.5.3 // indirect
|
||||
github.com/Azure/azure-sdk-for-go/sdk/storage/azblob v1.6.3 // indirect
|
||||
github.com/Azure/azure-sdk-for-go/sdk/storage/azfile v1.5.3 // indirect
|
||||
github.com/Azure/go-ntlmssp v0.0.2-0.20251110135918-10b7b7e7cd26 // indirect
|
||||
github.com/AzureAD/microsoft-authentication-library-for-go v1.6.0 // indirect
|
||||
github.com/Files-com/files-sdk-go/v3 v3.2.264 // indirect
|
||||
github.com/IBM/go-sdk-core/v5 v5.21.0 // indirect
|
||||
github.com/Max-Sum/base32768 v0.0.0-20230304063302-18e6ce5945fd // indirect
|
||||
github.com/Microsoft/go-winio v0.6.2 // indirect
|
||||
github.com/ProtonMail/bcrypt v0.0.0-20211005172633-e235017c1baf // indirect
|
||||
github.com/ProtonMail/gluon v0.17.1-0.20230724134000-308be39be96e // indirect
|
||||
github.com/ProtonMail/go-crypto v1.3.0 // indirect
|
||||
github.com/ProtonMail/go-mime v0.0.0-20230322103455-7d82a3887f2f // indirect
|
||||
github.com/ProtonMail/go-srp v0.0.7 // indirect
|
||||
github.com/ProtonMail/gopenpgp/v2 v2.9.0 // indirect
|
||||
github.com/PuerkitoBio/goquery v1.10.3 // indirect
|
||||
github.com/a1ex3/zstd-seekable-format-go/pkg v0.10.0 // indirect
|
||||
github.com/abbot/go-http-auth v0.4.0 // indirect
|
||||
github.com/anchore/go-lzo v0.1.0 // indirect
|
||||
github.com/andybalholm/cascadia v1.3.3 // indirect
|
||||
github.com/appscode/go-querystring v0.0.0-20170504095604-0126cfb3f1dc // indirect
|
||||
github.com/armon/go-metrics v0.4.1 // indirect
|
||||
github.com/aws/aws-sdk-go v1.44.256 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.6.11 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.16.33 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.3.37 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.6.37 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.3 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.3.37 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.12.4 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.7.5 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.12.18 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.18.18 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/aws/protocol/eventstream v1.7.3 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/feature/ec2/imds v1.18.13 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/configsources v1.4.13 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/endpoints/v2 v2.7.13 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/ini v1.8.4 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/internal/v4a v1.4.13 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/accept-encoding v1.13.3 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/checksum v1.9.4 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/presigned-url v1.13.13 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/internal/s3shared v1.19.13 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/secretsmanager v1.35.8 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.25.6 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.30.4 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.34.1 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/sso v1.30.1 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/ssooidc v1.35.5 // indirect
|
||||
github.com/aws/aws-sdk-go-v2/service/sts v1.39.1 // indirect
|
||||
github.com/bahlo/generic-list-go v0.2.0 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/boombuler/barcode v1.1.0 // indirect
|
||||
github.com/bradenaw/juniper v0.15.3 // indirect
|
||||
github.com/bradfitz/iter v0.0.0-20191230175014-e8f45d346db8 // indirect
|
||||
github.com/buengese/sgzip v0.1.1 // indirect
|
||||
github.com/buger/jsonparser v1.1.1 // indirect
|
||||
github.com/calebcase/tmpfile v1.0.3 // indirect
|
||||
github.com/cenkalti/backoff/v4 v4.3.0 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/chilts/sid v0.0.0-20190607042430-660e94789ec9 // indirect
|
||||
github.com/clipperhouse/stringish v0.1.1 // indirect
|
||||
github.com/clipperhouse/uax29/v2 v2.3.0 // indirect
|
||||
github.com/cloudflare/circl v1.6.1 // indirect
|
||||
github.com/cloudinary/cloudinary-go/v2 v2.13.0 // indirect
|
||||
github.com/cloudsoda/go-smb2 v0.0.0-20250228001242-d4c70e6251cc // indirect
|
||||
github.com/cloudsoda/sddl v0.0.0-20250224235906-926454e91efc // indirect
|
||||
github.com/colinmarc/hdfs/v2 v2.4.0 // indirect
|
||||
github.com/coreos/go-semver v0.3.1 // indirect
|
||||
github.com/coreos/go-systemd/v22 v22.5.0 // indirect
|
||||
github.com/coreos/go-systemd/v22 v22.6.0 // indirect
|
||||
github.com/creasty/defaults v1.8.0 // indirect
|
||||
github.com/cronokirby/saferith v0.33.0 // indirect
|
||||
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect
|
||||
github.com/diskfs/go-diskfs v1.7.0 // indirect
|
||||
github.com/dropbox/dropbox-sdk-go-unofficial/v6 v6.0.5 // indirect
|
||||
github.com/ebitengine/purego v0.9.1 // indirect
|
||||
github.com/emersion/go-message v0.18.2 // indirect
|
||||
github.com/emersion/go-vcard v0.0.0-20241024213814-c9703dde27ff // indirect
|
||||
github.com/emicklei/go-restful/v3 v3.11.0 // indirect
|
||||
github.com/fatih/color v1.16.0 // indirect
|
||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||
github.com/flynn/noise v1.1.0 // indirect
|
||||
github.com/fxamacker/cbor/v2 v2.7.0 // indirect
|
||||
github.com/go-jose/go-jose/v4 v4.0.5 // indirect
|
||||
github.com/go-logr/logr v1.4.2 // indirect
|
||||
github.com/gabriel-vasile/mimetype v1.4.11 // indirect
|
||||
github.com/geoffgarside/ber v1.2.0 // indirect
|
||||
github.com/go-chi/chi/v5 v5.2.3 // indirect
|
||||
github.com/go-darwin/apfs v0.0.0-20211011131704-f84b94dbf348 // indirect
|
||||
github.com/go-git/go-billy/v5 v5.6.2 // indirect
|
||||
github.com/go-jose/go-jose/v4 v4.1.2 // indirect
|
||||
github.com/go-logr/logr v1.4.3 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
github.com/go-ole/go-ole v1.3.0 // indirect
|
||||
github.com/go-openapi/errors v0.22.4 // indirect
|
||||
github.com/go-openapi/jsonpointer v0.21.0 // indirect
|
||||
github.com/go-openapi/jsonreference v0.20.2 // indirect
|
||||
github.com/go-openapi/strfmt v0.25.0 // indirect
|
||||
github.com/go-openapi/swag v0.23.0 // indirect
|
||||
github.com/go-playground/locales v0.14.1 // indirect
|
||||
github.com/go-playground/universal-translator v0.18.1 // indirect
|
||||
github.com/go-playground/validator/v10 v10.28.0 // indirect
|
||||
github.com/go-resty/resty/v2 v2.16.5 // indirect
|
||||
github.com/go-viper/mapstructure/v2 v2.4.0 // indirect
|
||||
github.com/gofrs/flock v0.13.0 // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/golang-jwt/jwt/v5 v5.2.2 // indirect
|
||||
github.com/golang-jwt/jwt/v4 v4.5.2 // indirect
|
||||
github.com/golang-jwt/jwt/v5 v5.3.0 // indirect
|
||||
github.com/golang/protobuf v1.5.4 // indirect
|
||||
github.com/google/btree v1.1.3 // indirect
|
||||
github.com/google/gnostic-models v0.6.9 // indirect
|
||||
github.com/google/go-cmp v0.7.0 // indirect
|
||||
github.com/google/s2a-go v0.1.9 // indirect
|
||||
github.com/googleapis/enterprise-certificate-proxy v0.3.6 // indirect
|
||||
github.com/googleapis/gax-go/v2 v2.14.2 // indirect
|
||||
github.com/googleapis/enterprise-certificate-proxy v0.3.7 // indirect
|
||||
github.com/googleapis/gax-go/v2 v2.15.0 // indirect
|
||||
github.com/gopherjs/gopherjs v1.17.2 // indirect
|
||||
github.com/gorilla/schema v1.4.1 // indirect
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 // indirect
|
||||
github.com/hashicorp/consul/api v1.32.1 // indirect
|
||||
github.com/hashicorp/errwrap v1.1.0 // indirect
|
||||
@@ -83,70 +148,139 @@ require (
|
||||
github.com/hashicorp/go-hclog v1.6.3 // indirect
|
||||
github.com/hashicorp/go-immutable-radix v1.3.1 // indirect
|
||||
github.com/hashicorp/go-multierror v1.1.1 // indirect
|
||||
github.com/hashicorp/go-retryablehttp v0.7.7 // indirect
|
||||
github.com/hashicorp/go-retryablehttp v0.7.8 // indirect
|
||||
github.com/hashicorp/go-rootcerts v1.0.2 // indirect
|
||||
github.com/hashicorp/go-secure-stdlib/parseutil v0.1.6 // indirect
|
||||
github.com/hashicorp/go-secure-stdlib/strutil v0.1.2 // indirect
|
||||
github.com/hashicorp/go-sockaddr v1.0.2 // indirect
|
||||
github.com/hashicorp/go-uuid v1.0.3 // indirect
|
||||
github.com/hashicorp/golang-lru v0.5.4 // indirect
|
||||
github.com/hashicorp/hcl v1.0.1-vault-7 // indirect
|
||||
github.com/hashicorp/serf v0.10.1 // indirect
|
||||
github.com/hashicorp/vault/api v1.20.0 // indirect
|
||||
github.com/henrybear327/Proton-API-Bridge v1.0.0 // indirect
|
||||
github.com/henrybear327/go-proton-api v1.0.0 // indirect
|
||||
github.com/inconshreveable/mousetrap v1.1.0 // indirect
|
||||
github.com/jcmturner/aescts/v2 v2.0.0 // indirect
|
||||
github.com/jcmturner/dnsutils/v2 v2.0.0 // indirect
|
||||
github.com/jcmturner/gofork v1.7.6 // indirect
|
||||
github.com/jcmturner/goidentity/v6 v6.0.1 // indirect
|
||||
github.com/jcmturner/gokrb5/v8 v8.4.4 // indirect
|
||||
github.com/jcmturner/rpc/v2 v2.0.3 // indirect
|
||||
github.com/jlaffaye/ftp v0.2.1-0.20240918233326-1b970516f5d3 // indirect
|
||||
github.com/josharian/intern v1.0.0 // indirect
|
||||
github.com/json-iterator/go v1.1.12 // indirect
|
||||
github.com/jtolds/gls v4.20.0+incompatible // indirect
|
||||
github.com/jtolio/noiseconn v0.0.0-20231127013910-f6d9ecbf1de7 // indirect
|
||||
github.com/jzelinskie/whirlpool v0.0.0-20201016144138-0675e54bb004 // indirect
|
||||
github.com/klauspost/cpuid/v2 v2.3.0 // indirect
|
||||
github.com/koofr/go-httpclient v0.0.0-20240520111329-e20f8f203988 // indirect
|
||||
github.com/koofr/go-koofrclient v0.0.0-20221207135200-cbd7fc9ad6a6 // indirect
|
||||
github.com/kr/fs v0.1.0 // indirect
|
||||
github.com/kylelemons/godebug v1.1.0 // indirect
|
||||
github.com/mailru/easyjson v0.7.7 // indirect
|
||||
github.com/mattn/go-colorable v0.1.13 // indirect
|
||||
github.com/lanrat/extsort v1.4.2 // indirect
|
||||
github.com/leodido/go-urn v1.4.0 // indirect
|
||||
github.com/lpar/date v1.0.0 // indirect
|
||||
github.com/lufia/plan9stats v0.0.0-20251013123823-9fd1530e3ec3 // indirect
|
||||
github.com/mailru/easyjson v0.9.1 // indirect
|
||||
github.com/mattn/go-colorable v0.1.14 // indirect
|
||||
github.com/mattn/go-isatty v0.0.20 // indirect
|
||||
github.com/mattn/go-sqlite3 v1.14.29 // indirect
|
||||
github.com/mattn/go-runewidth v0.0.19 // indirect
|
||||
github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect
|
||||
github.com/mitchellh/go-homedir v1.1.0 // indirect
|
||||
github.com/mitchellh/mapstructure v1.5.0 // indirect
|
||||
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
|
||||
github.com/modern-go/reflect2 v1.0.2 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/ncruces/go-strftime v0.1.9 // indirect
|
||||
github.com/ncw/swift/v2 v2.0.5 // indirect
|
||||
github.com/oklog/ulid v1.3.1 // indirect
|
||||
github.com/onsi/ginkgo/v2 v2.23.3 // indirect
|
||||
github.com/oracle/oci-go-sdk/v65 v65.104.0 // indirect
|
||||
github.com/panjf2000/ants/v2 v2.11.3 // indirect
|
||||
github.com/patrickmn/go-cache v2.1.0+incompatible // indirect
|
||||
github.com/pengsrc/go-shared v0.2.1-0.20190131101655-1999055a4a14 // indirect
|
||||
github.com/peterh/liner v1.2.2 // indirect
|
||||
github.com/pierrec/lz4/v4 v4.1.22 // indirect
|
||||
github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/pkg/sftp v1.13.10 // indirect
|
||||
github.com/pkg/xattr v0.4.12 // indirect
|
||||
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
|
||||
github.com/power-devops/perfstat v0.0.0-20240221224432-82ca36839d55 // indirect
|
||||
github.com/pquerna/otp v1.5.0 // indirect
|
||||
github.com/prometheus/client_golang v1.23.2 // indirect
|
||||
github.com/prometheus/client_model v0.6.2 // indirect
|
||||
github.com/prometheus/common v0.67.2 // indirect
|
||||
github.com/prometheus/procfs v0.19.2 // indirect
|
||||
github.com/putdotio/go-putio/putio v0.0.0-20200123120452-16d982cac2b8 // indirect
|
||||
github.com/relvacode/iso8601 v1.7.0 // indirect
|
||||
github.com/remyoudompheng/bigfft v0.0.0-20230129092748-24d4a6f8daec // indirect
|
||||
github.com/rfjakob/eme v1.1.2 // indirect
|
||||
github.com/rivo/uniseg v0.4.7 // indirect
|
||||
github.com/ryanuber/go-glob v1.0.0 // indirect
|
||||
github.com/ryszard/goskiplist v0.0.0-20150312221310-2dfbae5fcf46 // indirect
|
||||
github.com/spf13/pflag v1.0.6 // indirect
|
||||
github.com/sabhiram/go-gitignore v0.0.0-20210923224102-525f6e181f06 // indirect
|
||||
github.com/samber/lo v1.52.0 // indirect
|
||||
github.com/shirou/gopsutil/v4 v4.25.10 // indirect
|
||||
github.com/sirupsen/logrus v1.9.4-0.20230606125235-dd1b4c2e81af // indirect
|
||||
github.com/skratchdot/open-golang v0.0.0-20200116055534-eef842397966 // indirect
|
||||
github.com/smarty/assertions v1.16.0 // indirect
|
||||
github.com/sony/gobreaker v1.0.0 // indirect
|
||||
github.com/spacemonkeygo/monkit/v3 v3.0.25-0.20251022131615-eb24eb109368 // indirect
|
||||
github.com/spf13/pflag v1.0.10 // indirect
|
||||
github.com/t3rm1n4l/go-mega v0.0.0-20251031123324-a804aaa87491 // indirect
|
||||
github.com/tidwall/gjson v1.18.0 // indirect
|
||||
github.com/tidwall/match v1.1.1 // indirect
|
||||
github.com/tidwall/pretty v1.2.0 // indirect
|
||||
github.com/tklauser/go-sysconf v0.3.15 // indirect
|
||||
github.com/tklauser/numcpus v0.10.0 // indirect
|
||||
github.com/ulikunitz/xz v0.5.15 // indirect
|
||||
github.com/unknwon/goconfig v1.0.0 // indirect
|
||||
github.com/wk8/go-ordered-map/v2 v2.1.8 // indirect
|
||||
github.com/x448/float16 v0.8.4 // indirect
|
||||
github.com/xanzy/ssh-agent v0.3.3 // indirect
|
||||
github.com/youmark/pkcs8 v0.0.0-20240726163527-a2c0da244d78 // indirect
|
||||
github.com/yunify/qingstor-sdk-go/v3 v3.2.0 // indirect
|
||||
github.com/yusufpapurcu/wmi v1.2.4 // indirect
|
||||
github.com/zeebo/blake3 v0.2.4 // indirect
|
||||
github.com/zeebo/errs v1.4.0 // indirect
|
||||
github.com/zeebo/xxh3 v1.0.2 // indirect
|
||||
go.etcd.io/bbolt v1.4.3 // indirect
|
||||
go.etcd.io/etcd/api/v3 v3.6.2 // indirect
|
||||
go.etcd.io/etcd/client/pkg/v3 v3.6.2 // indirect
|
||||
go.etcd.io/etcd/client/v3 v3.6.2 // indirect
|
||||
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
|
||||
go.mongodb.org/mongo-driver v1.17.6 // indirect
|
||||
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.61.0 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.61.0 // indirect
|
||||
go.opentelemetry.io/otel v1.36.0 // indirect
|
||||
go.opentelemetry.io/otel/metric v1.36.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.36.0 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 // indirect
|
||||
go.opentelemetry.io/otel v1.38.0 // indirect
|
||||
go.opentelemetry.io/otel/metric v1.38.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.38.0 // indirect
|
||||
go.shabbyrobe.org/gocovmerge v0.0.0-20230507111327-fa4f82cfbf4d // indirect
|
||||
go.uber.org/dig v1.19.0 // indirect
|
||||
go.uber.org/multierr v1.11.0 // indirect
|
||||
go.uber.org/zap v1.27.0 // indirect
|
||||
golang.org/x/crypto v0.39.0 // indirect
|
||||
golang.org/x/exp v0.0.0-20250408133849-7e4ce0ab07d0 // indirect
|
||||
golang.org/x/net v0.41.0 // indirect
|
||||
golang.org/x/oauth2 v0.30.0 // indirect
|
||||
golang.org/x/sync v0.15.0 // indirect
|
||||
golang.org/x/sys v0.34.0 // indirect
|
||||
golang.org/x/text v0.26.0 // indirect
|
||||
golang.org/x/time v0.12.0 // indirect
|
||||
golang.org/x/tools v0.33.0 // indirect
|
||||
google.golang.org/api v0.237.0 // indirect
|
||||
google.golang.org/genproto v0.0.0-20250505200425-f936aa4a68b2 // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20250603155806-513f23925822 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250603155806-513f23925822 // indirect
|
||||
google.golang.org/grpc v1.73.0 // indirect
|
||||
google.golang.org/protobuf v1.36.6 // indirect
|
||||
go.yaml.in/yaml/v2 v2.4.3 // indirect
|
||||
golang.org/x/crypto v0.45.0 // indirect
|
||||
golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect
|
||||
golang.org/x/net v0.47.0 // indirect
|
||||
golang.org/x/oauth2 v0.33.0 // indirect
|
||||
golang.org/x/sys v0.38.0 // indirect
|
||||
golang.org/x/text v0.31.0 // indirect
|
||||
golang.org/x/time v0.14.0 // indirect
|
||||
golang.org/x/tools v0.38.0 // indirect
|
||||
google.golang.org/api v0.255.0 // indirect
|
||||
google.golang.org/genproto v0.0.0-20250603155806-513f23925822 // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20250804133106-a7a43d27e69b // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20251103181224-f26f9409b101 // indirect
|
||||
google.golang.org/grpc v1.76.0 // indirect
|
||||
google.golang.org/protobuf v1.36.10 // indirect
|
||||
gopkg.in/evanphx/json-patch.v4 v4.12.0 // indirect
|
||||
gopkg.in/inf.v0 v0.9.1 // indirect
|
||||
gopkg.in/natefinch/lumberjack.v2 v2.2.1 // indirect
|
||||
gopkg.in/validator.v2 v2.0.1 // indirect
|
||||
gopkg.in/yaml.v2 v2.4.0 // indirect
|
||||
k8s.io/api v0.33.3 // indirect
|
||||
k8s.io/apimachinery v0.33.3 // indirect
|
||||
k8s.io/client-go v0.33.3 // indirect
|
||||
@@ -156,8 +290,15 @@ require (
|
||||
modernc.org/libc v1.65.10 // indirect
|
||||
modernc.org/mathutil v1.7.1 // indirect
|
||||
modernc.org/memory v1.11.0 // indirect
|
||||
moul.io/http2curl/v2 v2.3.0 // indirect
|
||||
sigs.k8s.io/json v0.0.0-20241010143419-9aa6b5e7a4b3 // indirect
|
||||
sigs.k8s.io/randfill v1.0.0 // indirect
|
||||
sigs.k8s.io/structured-merge-diff/v4 v4.6.0 // indirect
|
||||
sigs.k8s.io/yaml v1.4.0 // indirect
|
||||
sigs.k8s.io/yaml v1.6.0 // indirect
|
||||
storj.io/common v0.0.0-20251107171817-6221ae45072c // indirect
|
||||
storj.io/drpc v0.0.35-0.20250513201419-f7819ea69b55 // indirect
|
||||
storj.io/eventkit v0.0.0-20250410172343-61f26d3de156 // indirect
|
||||
storj.io/infectious v0.0.2 // indirect
|
||||
storj.io/picobuf v0.0.4 // indirect
|
||||
storj.io/uplink v1.13.1 // indirect
|
||||
)
|
||||
|
||||
@@ -23,11 +23,12 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/blobgen"
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"github.com/google/uuid"
|
||||
"github.com/spf13/afero"
|
||||
"sneak.berlin/go/vaultik/internal/blobgen"
|
||||
"sneak.berlin/go/vaultik/internal/database"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
// BlobHandler is a callback function invoked when a blob is finalized and ready for upload.
|
||||
@@ -47,6 +48,12 @@ type PackerConfig struct {
|
||||
Fs afero.Fs // Filesystem for temporary files
|
||||
}
|
||||
|
||||
// PendingChunk represents a chunk waiting to be inserted into the database.
|
||||
type PendingChunk struct {
|
||||
Hash string
|
||||
Size int64
|
||||
}
|
||||
|
||||
// Packer accumulates chunks and packs them into blobs.
|
||||
// It handles compression, encryption, and coordination with the database
|
||||
// to track blob metadata. Packer is thread-safe.
|
||||
@@ -64,6 +71,9 @@ type Packer struct {
|
||||
// Current blob being packed
|
||||
currentBlob *blobInProgress
|
||||
finishedBlobs []*FinishedBlob // Only used if no handler provided
|
||||
|
||||
// Pending chunks to be inserted when blob finalizes
|
||||
pendingChunks []PendingChunk
|
||||
}
|
||||
|
||||
// blobInProgress represents a blob being assembled
|
||||
@@ -114,8 +124,9 @@ type BlobChunkRef struct {
|
||||
// BlobWithReader wraps a FinishedBlob with its data reader
|
||||
type BlobWithReader struct {
|
||||
*FinishedBlob
|
||||
Reader io.ReadSeeker
|
||||
TempFile afero.File // Optional, only set for disk-based blobs
|
||||
Reader io.ReadSeeker
|
||||
TempFile afero.File // Optional, only set for disk-based blobs
|
||||
InsertedChunkHashes []string // Chunk hashes that were inserted to DB with this blob
|
||||
}
|
||||
|
||||
// NewPacker creates a new blob packer that accumulates chunks into blobs.
|
||||
@@ -152,6 +163,15 @@ func (p *Packer) SetBlobHandler(handler BlobHandler) {
|
||||
p.blobHandler = handler
|
||||
}
|
||||
|
||||
// AddPendingChunk queues a chunk to be inserted into the database when the
|
||||
// current blob is finalized. This batches chunk inserts to reduce transaction
|
||||
// overhead. Thread-safe.
|
||||
func (p *Packer) AddPendingChunk(hash string, size int64) {
|
||||
p.mu.Lock()
|
||||
defer p.mu.Unlock()
|
||||
p.pendingChunks = append(p.pendingChunks, PendingChunk{Hash: hash, Size: size})
|
||||
}
|
||||
|
||||
// AddChunk adds a chunk to the current blob being packed.
|
||||
// If adding the chunk would exceed MaxBlobSize, returns ErrBlobSizeLimitExceeded.
|
||||
// In this case, the caller should finalize the current blob and retry.
|
||||
@@ -243,19 +263,22 @@ func (p *Packer) startNewBlob() error {
|
||||
|
||||
// Create blob record in database
|
||||
if p.repos != nil {
|
||||
blobIDTyped, err := types.ParseBlobID(blobID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("parsing blob ID: %w", err)
|
||||
}
|
||||
blob := &database.Blob{
|
||||
ID: blobID,
|
||||
Hash: "temp-placeholder-" + blobID, // Temporary placeholder until finalized
|
||||
ID: blobIDTyped,
|
||||
Hash: types.BlobHash("temp-placeholder-" + blobID), // Temporary placeholder until finalized
|
||||
CreatedTS: time.Now().UTC(),
|
||||
FinishedTS: nil,
|
||||
UncompressedSize: 0,
|
||||
CompressedSize: 0,
|
||||
UploadedTS: nil,
|
||||
}
|
||||
err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
if err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
return p.repos.Blobs.Create(ctx, tx, blob)
|
||||
})
|
||||
if err != nil {
|
||||
}); err != nil {
|
||||
return fmt.Errorf("creating blob record: %w", err)
|
||||
}
|
||||
}
|
||||
@@ -314,23 +337,9 @@ func (p *Packer) addChunkToCurrentBlob(chunk *ChunkRef) error {
|
||||
p.currentBlob.chunks = append(p.currentBlob.chunks, chunkInfo)
|
||||
p.currentBlob.chunkSet[chunk.Hash] = true
|
||||
|
||||
// Store blob-chunk association in database immediately
|
||||
if p.repos != nil {
|
||||
blobChunk := &database.BlobChunk{
|
||||
BlobID: p.currentBlob.id,
|
||||
ChunkHash: chunk.Hash,
|
||||
Offset: offset,
|
||||
Length: chunkSize,
|
||||
}
|
||||
err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
return p.repos.BlobChunks.Create(ctx, tx, blobChunk)
|
||||
})
|
||||
if err != nil {
|
||||
log.Error("Failed to store blob-chunk association in database", "error", err,
|
||||
"blob_id", p.currentBlob.id, "chunk_hash", chunk.Hash)
|
||||
// Continue anyway - we can reconstruct this later if needed
|
||||
}
|
||||
}
|
||||
// Note: blob_chunk records are inserted in batch when blob is finalized
|
||||
// to reduce transaction overhead. The chunk info is already stored in
|
||||
// p.currentBlob.chunks for later insertion.
|
||||
|
||||
// Update total size
|
||||
p.currentBlob.size += chunkSize
|
||||
@@ -352,63 +361,23 @@ func (p *Packer) finalizeCurrentBlob() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Close blobgen writer to flush all data
|
||||
if err := p.currentBlob.writer.Close(); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("closing blobgen writer: %w", err)
|
||||
}
|
||||
|
||||
// Sync file to ensure all data is written
|
||||
if err := p.currentBlob.tempFile.Sync(); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("syncing temp file: %w", err)
|
||||
}
|
||||
|
||||
// Get the final size (encrypted if applicable)
|
||||
finalSize, err := p.currentBlob.tempFile.Seek(0, io.SeekCurrent)
|
||||
blobHash, finalSize, err := p.closeBlobWriter()
|
||||
if err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("getting file size: %w", err)
|
||||
return err
|
||||
}
|
||||
|
||||
// Reset to beginning for reading
|
||||
if _, err := p.currentBlob.tempFile.Seek(0, io.SeekStart); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("seeking to start: %w", err)
|
||||
chunkRefs := p.buildChunkRefs()
|
||||
|
||||
chunksToInsert := p.pendingChunks
|
||||
p.pendingChunks = nil
|
||||
|
||||
if err := p.commitBlobToDatabase(blobHash, finalSize, chunksToInsert); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Get hash from blobgen writer (of final encrypted data)
|
||||
finalHash := p.currentBlob.writer.Sum256()
|
||||
blobHash := hex.EncodeToString(finalHash)
|
||||
|
||||
// Create chunk references with offsets
|
||||
chunkRefs := make([]*BlobChunkRef, 0, len(p.currentBlob.chunks))
|
||||
|
||||
for _, chunk := range p.currentBlob.chunks {
|
||||
chunkRefs = append(chunkRefs, &BlobChunkRef{
|
||||
ChunkHash: chunk.Hash,
|
||||
Offset: chunk.Offset,
|
||||
Length: chunk.Size,
|
||||
})
|
||||
}
|
||||
|
||||
// Update blob record in database with hash and sizes
|
||||
if p.repos != nil {
|
||||
err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
return p.repos.Blobs.UpdateFinished(ctx, tx, p.currentBlob.id, blobHash,
|
||||
p.currentBlob.size, finalSize)
|
||||
})
|
||||
if err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("updating blob record: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Create finished blob
|
||||
finished := &FinishedBlob{
|
||||
ID: p.currentBlob.id,
|
||||
Hash: blobHash,
|
||||
Data: nil, // We don't load data into memory anymore
|
||||
Chunks: chunkRefs,
|
||||
CreatedTS: p.currentBlob.startTime,
|
||||
Uncompressed: p.currentBlob.size,
|
||||
@@ -417,56 +386,136 @@ func (p *Packer) finalizeCurrentBlob() error {
|
||||
|
||||
compressionRatio := float64(finished.Compressed) / float64(finished.Uncompressed)
|
||||
log.Info("Finalized blob (compressed and encrypted)",
|
||||
"hash", blobHash,
|
||||
"chunks", len(chunkRefs),
|
||||
"uncompressed", finished.Uncompressed,
|
||||
"compressed", finished.Compressed,
|
||||
"hash", blobHash, "chunks", len(chunkRefs),
|
||||
"uncompressed", finished.Uncompressed, "compressed", finished.Compressed,
|
||||
"ratio", fmt.Sprintf("%.2f", compressionRatio),
|
||||
"duration", time.Since(p.currentBlob.startTime))
|
||||
|
||||
// Call blob handler if set
|
||||
var insertedChunkHashes []string
|
||||
for _, chunk := range chunksToInsert {
|
||||
insertedChunkHashes = append(insertedChunkHashes, chunk.Hash)
|
||||
}
|
||||
|
||||
return p.deliverFinishedBlob(finished, insertedChunkHashes)
|
||||
}
|
||||
|
||||
// closeBlobWriter closes the writer, syncs to disk, and returns the blob hash and final size
|
||||
func (p *Packer) closeBlobWriter() (string, int64, error) {
|
||||
if err := p.currentBlob.writer.Close(); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return "", 0, fmt.Errorf("closing blobgen writer: %w", err)
|
||||
}
|
||||
if err := p.currentBlob.tempFile.Sync(); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return "", 0, fmt.Errorf("syncing temp file: %w", err)
|
||||
}
|
||||
|
||||
finalSize, err := p.currentBlob.tempFile.Seek(0, io.SeekCurrent)
|
||||
if err != nil {
|
||||
p.cleanupTempFile()
|
||||
return "", 0, fmt.Errorf("getting file size: %w", err)
|
||||
}
|
||||
if _, err := p.currentBlob.tempFile.Seek(0, io.SeekStart); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return "", 0, fmt.Errorf("seeking to start: %w", err)
|
||||
}
|
||||
|
||||
finalHash := p.currentBlob.writer.Sum256()
|
||||
return hex.EncodeToString(finalHash), finalSize, nil
|
||||
}
|
||||
|
||||
// buildChunkRefs creates BlobChunkRef entries from the current blob's chunks
|
||||
func (p *Packer) buildChunkRefs() []*BlobChunkRef {
|
||||
refs := make([]*BlobChunkRef, 0, len(p.currentBlob.chunks))
|
||||
for _, chunk := range p.currentBlob.chunks {
|
||||
refs = append(refs, &BlobChunkRef{
|
||||
ChunkHash: chunk.Hash, Offset: chunk.Offset, Length: chunk.Size,
|
||||
})
|
||||
}
|
||||
return refs
|
||||
}
|
||||
|
||||
// commitBlobToDatabase inserts pending chunks, blob_chunks, and updates the blob record
|
||||
func (p *Packer) commitBlobToDatabase(blobHash string, finalSize int64, chunksToInsert []PendingChunk) error {
|
||||
if p.repos == nil {
|
||||
return nil
|
||||
}
|
||||
|
||||
blobIDTyped, parseErr := types.ParseBlobID(p.currentBlob.id)
|
||||
if parseErr != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("parsing blob ID: %w", parseErr)
|
||||
}
|
||||
|
||||
err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
for _, chunk := range chunksToInsert {
|
||||
dbChunk := &database.Chunk{ChunkHash: types.ChunkHash(chunk.Hash), Size: chunk.Size}
|
||||
if err := p.repos.Chunks.Create(ctx, tx, dbChunk); err != nil {
|
||||
return fmt.Errorf("creating chunk: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
for _, chunk := range p.currentBlob.chunks {
|
||||
blobChunk := &database.BlobChunk{
|
||||
BlobID: blobIDTyped, ChunkHash: types.ChunkHash(chunk.Hash),
|
||||
Offset: chunk.Offset, Length: chunk.Size,
|
||||
}
|
||||
if err := p.repos.BlobChunks.Create(ctx, tx, blobChunk); err != nil {
|
||||
return fmt.Errorf("creating blob_chunk: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return p.repos.Blobs.UpdateFinished(ctx, tx, p.currentBlob.id, blobHash, p.currentBlob.size, finalSize)
|
||||
})
|
||||
if err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("finalizing blob transaction: %w", err)
|
||||
}
|
||||
|
||||
log.Debug("Committed blob transaction",
|
||||
"chunks_inserted", len(chunksToInsert), "blob_chunks_inserted", len(p.currentBlob.chunks))
|
||||
return nil
|
||||
}
|
||||
|
||||
// deliverFinishedBlob passes the blob to the handler or stores it internally
|
||||
func (p *Packer) deliverFinishedBlob(finished *FinishedBlob, insertedChunkHashes []string) error {
|
||||
if p.blobHandler != nil {
|
||||
// Reset file position for handler
|
||||
if _, err := p.currentBlob.tempFile.Seek(0, io.SeekStart); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("seeking for handler: %w", err)
|
||||
}
|
||||
|
||||
// Create a blob reader that includes the data stream
|
||||
blobWithReader := &BlobWithReader{
|
||||
FinishedBlob: finished,
|
||||
Reader: p.currentBlob.tempFile,
|
||||
TempFile: p.currentBlob.tempFile,
|
||||
FinishedBlob: finished,
|
||||
Reader: p.currentBlob.tempFile,
|
||||
TempFile: p.currentBlob.tempFile,
|
||||
InsertedChunkHashes: insertedChunkHashes,
|
||||
}
|
||||
|
||||
if err := p.blobHandler(blobWithReader); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("blob handler failed: %w", err)
|
||||
}
|
||||
// Note: blob handler is responsible for closing/cleaning up temp file
|
||||
p.currentBlob = nil
|
||||
} else {
|
||||
log.Debug("No blob handler callback configured", "blob_hash", blobHash[:8]+"...")
|
||||
// No handler, need to read data for legacy behavior
|
||||
if _, err := p.currentBlob.tempFile.Seek(0, io.SeekStart); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("seeking to read data: %w", err)
|
||||
}
|
||||
|
||||
data, err := io.ReadAll(p.currentBlob.tempFile)
|
||||
if err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("reading blob data: %w", err)
|
||||
}
|
||||
finished.Data = data
|
||||
|
||||
p.finishedBlobs = append(p.finishedBlobs, finished)
|
||||
|
||||
// Cleanup
|
||||
p.cleanupTempFile()
|
||||
p.currentBlob = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
// No handler - read data for legacy behavior
|
||||
log.Debug("No blob handler callback configured", "blob_hash", finished.Hash[:8]+"...")
|
||||
if _, err := p.currentBlob.tempFile.Seek(0, io.SeekStart); err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("seeking to read data: %w", err)
|
||||
}
|
||||
|
||||
data, err := io.ReadAll(p.currentBlob.tempFile)
|
||||
if err != nil {
|
||||
p.cleanupTempFile()
|
||||
return fmt.Errorf("reading blob data: %w", err)
|
||||
}
|
||||
finished.Data = data
|
||||
p.finishedBlobs = append(p.finishedBlobs, finished)
|
||||
p.cleanupTempFile()
|
||||
p.currentBlob = nil
|
||||
return nil
|
||||
}
|
||||
|
||||
|
||||
@@ -10,10 +10,11 @@ import (
|
||||
"testing"
|
||||
|
||||
"filippo.io/age"
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
"github.com/spf13/afero"
|
||||
"sneak.berlin/go/vaultik/internal/database"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -60,7 +61,7 @@ func TestPacker(t *testing.T) {
|
||||
|
||||
// Create chunk in database first
|
||||
dbChunk := &database.Chunk{
|
||||
ChunkHash: hashStr,
|
||||
ChunkHash: types.ChunkHash(hashStr),
|
||||
Size: int64(len(data)),
|
||||
}
|
||||
err = repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
@@ -152,7 +153,7 @@ func TestPacker(t *testing.T) {
|
||||
|
||||
// Create chunk in database first
|
||||
dbChunk := &database.Chunk{
|
||||
ChunkHash: hashStr,
|
||||
ChunkHash: types.ChunkHash(hashStr),
|
||||
Size: int64(len(data)),
|
||||
}
|
||||
err = repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
@@ -235,7 +236,7 @@ func TestPacker(t *testing.T) {
|
||||
|
||||
// Create chunk in database first
|
||||
dbChunk := &database.Chunk{
|
||||
ChunkHash: hashStr,
|
||||
ChunkHash: types.ChunkHash(hashStr),
|
||||
Size: int64(len(data)),
|
||||
}
|
||||
err = repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
@@ -322,7 +323,7 @@ func TestPacker(t *testing.T) {
|
||||
|
||||
// Create chunk in database first
|
||||
dbChunk := &database.Chunk{
|
||||
ChunkHash: hashStr,
|
||||
ChunkHash: types.ChunkHash(hashStr),
|
||||
Size: int64(len(data)),
|
||||
}
|
||||
err = repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
|
||||
@@ -51,7 +51,13 @@ func CompressStream(dst io.Writer, src io.Reader, compressionLevel int, recipien
|
||||
if err != nil {
|
||||
return 0, "", fmt.Errorf("creating writer: %w", err)
|
||||
}
|
||||
defer func() { _ = w.Close() }()
|
||||
|
||||
closed := false
|
||||
defer func() {
|
||||
if !closed {
|
||||
_ = w.Close()
|
||||
}
|
||||
}()
|
||||
|
||||
// Copy data
|
||||
if _, err := io.Copy(w, src); err != nil {
|
||||
@@ -62,6 +68,7 @@ func CompressStream(dst io.Writer, src io.Reader, compressionLevel int, recipien
|
||||
if err := w.Close(); err != nil {
|
||||
return 0, "", fmt.Errorf("closing writer: %w", err)
|
||||
}
|
||||
closed = true
|
||||
|
||||
return w.BytesWritten(), hex.EncodeToString(w.Sum256()), nil
|
||||
}
|
||||
|
||||
64
internal/blobgen/compress_test.go
Normal file
64
internal/blobgen/compress_test.go
Normal file
@@ -0,0 +1,64 @@
|
||||
package blobgen
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/rand"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// testRecipient is a static age recipient for tests.
|
||||
const testRecipient = "age1cplgrwj77ta54dnmydvvmzn64ltk83ankxl5sww04mrtmu62kv3s89gmvv"
|
||||
|
||||
// TestCompressStreamNoDoubleClose is a regression test for issue #28.
|
||||
// It verifies that CompressStream does not panic or return an error due to
|
||||
// double-closing the underlying blobgen.Writer. Before the fix in PR #33,
|
||||
// the explicit Close() on the happy path combined with defer Close() would
|
||||
// cause a double close.
|
||||
func TestCompressStreamNoDoubleClose(t *testing.T) {
|
||||
input := []byte("regression test data for issue #28 double-close fix")
|
||||
var buf bytes.Buffer
|
||||
|
||||
written, hash, err := CompressStream(&buf, bytes.NewReader(input), 3, []string{testRecipient})
|
||||
require.NoError(t, err, "CompressStream should not return an error")
|
||||
assert.True(t, written > 0, "expected bytes written > 0")
|
||||
assert.NotEmpty(t, hash, "expected non-empty hash")
|
||||
assert.True(t, buf.Len() > 0, "expected non-empty output")
|
||||
}
|
||||
|
||||
// TestCompressStreamLargeInput exercises CompressStream with a larger payload
|
||||
// to ensure no double-close issues surface under heavier I/O.
|
||||
func TestCompressStreamLargeInput(t *testing.T) {
|
||||
data := make([]byte, 512*1024) // 512 KB
|
||||
_, err := rand.Read(data)
|
||||
require.NoError(t, err)
|
||||
|
||||
var buf bytes.Buffer
|
||||
written, hash, err := CompressStream(&buf, bytes.NewReader(data), 3, []string{testRecipient})
|
||||
require.NoError(t, err)
|
||||
assert.True(t, written > 0)
|
||||
assert.NotEmpty(t, hash)
|
||||
}
|
||||
|
||||
// TestCompressStreamEmptyInput verifies CompressStream handles empty input
|
||||
// without double-close issues.
|
||||
func TestCompressStreamEmptyInput(t *testing.T) {
|
||||
var buf bytes.Buffer
|
||||
_, hash, err := CompressStream(&buf, strings.NewReader(""), 3, []string{testRecipient})
|
||||
require.NoError(t, err)
|
||||
assert.NotEmpty(t, hash)
|
||||
}
|
||||
|
||||
// TestCompressDataNoDoubleClose mirrors the stream test for CompressData,
|
||||
// ensuring the explicit Close + error-path Close pattern is also safe.
|
||||
func TestCompressDataNoDoubleClose(t *testing.T) {
|
||||
input := []byte("CompressData regression test for double-close")
|
||||
result, err := CompressData(input, 3, []string{testRecipient})
|
||||
require.NoError(t, err)
|
||||
assert.True(t, result.CompressedSize > 0)
|
||||
assert.True(t, result.UncompressedSize == int64(len(input)))
|
||||
assert.NotEmpty(t, result.SHA256)
|
||||
}
|
||||
@@ -5,30 +5,33 @@ import (
|
||||
"fmt"
|
||||
"hash"
|
||||
"io"
|
||||
"runtime"
|
||||
|
||||
"filippo.io/age"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
)
|
||||
|
||||
// Writer wraps compression and encryption with SHA256 hashing
|
||||
// Writer wraps compression and encryption with SHA256 hashing.
|
||||
// Data flows: input -> tee(hasher, compressor -> encryptor -> destination)
|
||||
// The hash is computed on the uncompressed input for deterministic content-addressing.
|
||||
type Writer struct {
|
||||
writer io.Writer // Final destination
|
||||
teeWriter io.Writer // Tee to hasher and compressor
|
||||
compressor *zstd.Encoder // Compression layer
|
||||
encryptor io.WriteCloser // Encryption layer
|
||||
hasher hash.Hash // SHA256 hasher
|
||||
teeWriter io.Writer // Tees data to hasher
|
||||
hasher hash.Hash // SHA256 hasher (on uncompressed input)
|
||||
compressionLevel int
|
||||
bytesWritten int64
|
||||
}
|
||||
|
||||
// NewWriter creates a new Writer that compresses, encrypts, and hashes data
|
||||
// NewWriter creates a new Writer that compresses, encrypts, and hashes data.
|
||||
// The hash is computed on the uncompressed input for deterministic content-addressing.
|
||||
func NewWriter(w io.Writer, compressionLevel int, recipients []string) (*Writer, error) {
|
||||
// Validate compression level
|
||||
if err := validateCompressionLevel(compressionLevel); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
// Create SHA256 hasher
|
||||
// Create SHA256 hasher for the uncompressed input
|
||||
hasher := sha256.New()
|
||||
|
||||
// Parse recipients
|
||||
@@ -41,31 +44,36 @@ func NewWriter(w io.Writer, compressionLevel int, recipients []string) (*Writer,
|
||||
ageRecipients = append(ageRecipients, r)
|
||||
}
|
||||
|
||||
// Create encryption writer
|
||||
// Create encryption writer that outputs to destination
|
||||
encWriter, err := age.Encrypt(w, ageRecipients...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating encryption writer: %w", err)
|
||||
}
|
||||
|
||||
// Calculate compression concurrency: CPUs - 2, minimum 1
|
||||
concurrency := runtime.NumCPU() - 2
|
||||
if concurrency < 1 {
|
||||
concurrency = 1
|
||||
}
|
||||
|
||||
// Create compression writer with encryption as destination
|
||||
compressor, err := zstd.NewWriter(encWriter,
|
||||
zstd.WithEncoderLevel(zstd.EncoderLevelFromZstd(compressionLevel)),
|
||||
zstd.WithEncoderConcurrency(1), // Use single thread for streaming
|
||||
zstd.WithEncoderConcurrency(concurrency),
|
||||
)
|
||||
if err != nil {
|
||||
_ = encWriter.Close()
|
||||
return nil, fmt.Errorf("creating compression writer: %w", err)
|
||||
}
|
||||
|
||||
// Create tee writer that writes to both compressor and hasher
|
||||
teeWriter := io.MultiWriter(compressor, hasher)
|
||||
// Create tee writer: input goes to both hasher and compressor
|
||||
teeWriter := io.MultiWriter(hasher, compressor)
|
||||
|
||||
return &Writer{
|
||||
writer: w,
|
||||
teeWriter: teeWriter,
|
||||
compressor: compressor,
|
||||
encryptor: encWriter,
|
||||
hasher: hasher,
|
||||
teeWriter: teeWriter,
|
||||
compressionLevel: compressionLevel,
|
||||
}, nil
|
||||
}
|
||||
@@ -92,9 +100,16 @@ func (w *Writer) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Sum256 returns the SHA256 hash of all data written
|
||||
// Sum256 returns the double SHA256 hash of the uncompressed input data.
|
||||
// Double hashing (SHA256(SHA256(data))) prevents information leakage about
|
||||
// the plaintext - an attacker cannot confirm existence of known content
|
||||
// by computing its hash and checking for a matching blob filename.
|
||||
func (w *Writer) Sum256() []byte {
|
||||
return w.hasher.Sum(nil)
|
||||
// First hash: SHA256(plaintext)
|
||||
firstHash := w.hasher.Sum(nil)
|
||||
// Second hash: SHA256(firstHash) - this is the blob ID
|
||||
secondHash := sha256.Sum256(firstHash)
|
||||
return secondHash[:]
|
||||
}
|
||||
|
||||
// BytesWritten returns the number of uncompressed bytes written
|
||||
|
||||
105
internal/blobgen/writer_test.go
Normal file
105
internal/blobgen/writer_test.go
Normal file
@@ -0,0 +1,105 @@
|
||||
package blobgen
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/rand"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"testing"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
// TestWriterHashIsDoubleHash verifies that Writer.Sum256() returns
|
||||
// the double hash SHA256(SHA256(plaintext)) for security.
|
||||
// Double hashing prevents attackers from confirming existence of known content.
|
||||
func TestWriterHashIsDoubleHash(t *testing.T) {
|
||||
// Test data - random data that doesn't compress well
|
||||
testData := make([]byte, 1024*1024) // 1MB
|
||||
_, err := rand.Read(testData)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Test recipient (generated with age-keygen)
|
||||
testRecipient := "age1cplgrwj77ta54dnmydvvmzn64ltk83ankxl5sww04mrtmu62kv3s89gmvv"
|
||||
|
||||
// Create a buffer to capture the encrypted output
|
||||
var encryptedBuf bytes.Buffer
|
||||
|
||||
// Create blobgen writer
|
||||
writer, err := NewWriter(&encryptedBuf, 3, []string{testRecipient})
|
||||
require.NoError(t, err)
|
||||
|
||||
// Write test data
|
||||
n, err := writer.Write(testData)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, len(testData), n)
|
||||
|
||||
// Close to flush all data
|
||||
err = writer.Close()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Get the hash from the writer
|
||||
writerHash := hex.EncodeToString(writer.Sum256())
|
||||
|
||||
// Calculate the expected double hash: SHA256(SHA256(plaintext))
|
||||
firstHash := sha256.Sum256(testData)
|
||||
secondHash := sha256.Sum256(firstHash[:])
|
||||
expectedDoubleHash := hex.EncodeToString(secondHash[:])
|
||||
|
||||
// Also compute single hash to verify it's different
|
||||
singleHashStr := hex.EncodeToString(firstHash[:])
|
||||
|
||||
t.Logf("Input size: %d bytes", len(testData))
|
||||
t.Logf("Single hash (SHA256(data)): %s", singleHashStr)
|
||||
t.Logf("Double hash (SHA256(SHA256(data))): %s", expectedDoubleHash)
|
||||
t.Logf("Writer hash: %s", writerHash)
|
||||
|
||||
// The writer hash should match the double hash
|
||||
assert.Equal(t, expectedDoubleHash, writerHash,
|
||||
"Writer.Sum256() should return SHA256(SHA256(plaintext)) for security")
|
||||
|
||||
// Verify it's NOT the single hash (would leak information)
|
||||
assert.NotEqual(t, singleHashStr, writerHash,
|
||||
"Writer hash should not be single hash (would allow content confirmation attacks)")
|
||||
}
|
||||
|
||||
// TestWriterDeterministicHash verifies that the same input always produces
|
||||
// the same hash, even with non-deterministic encryption.
|
||||
func TestWriterDeterministicHash(t *testing.T) {
|
||||
// Test data
|
||||
testData := []byte("Hello, World! This is test data for deterministic hashing.")
|
||||
|
||||
// Test recipient
|
||||
testRecipient := "age1cplgrwj77ta54dnmydvvmzn64ltk83ankxl5sww04mrtmu62kv3s89gmvv"
|
||||
|
||||
// Create two writers and verify they produce the same hash
|
||||
var buf1, buf2 bytes.Buffer
|
||||
|
||||
writer1, err := NewWriter(&buf1, 3, []string{testRecipient})
|
||||
require.NoError(t, err)
|
||||
_, err = writer1.Write(testData)
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, writer1.Close())
|
||||
|
||||
writer2, err := NewWriter(&buf2, 3, []string{testRecipient})
|
||||
require.NoError(t, err)
|
||||
_, err = writer2.Write(testData)
|
||||
require.NoError(t, err)
|
||||
require.NoError(t, writer2.Close())
|
||||
|
||||
hash1 := hex.EncodeToString(writer1.Sum256())
|
||||
hash2 := hex.EncodeToString(writer2.Sum256())
|
||||
|
||||
// Hashes should be identical (deterministic)
|
||||
assert.Equal(t, hash1, hash2, "Same input should produce same hash")
|
||||
|
||||
// Encrypted outputs should be different (non-deterministic encryption)
|
||||
assert.NotEqual(t, buf1.Bytes(), buf2.Bytes(),
|
||||
"Encrypted outputs should differ due to non-deterministic encryption")
|
||||
|
||||
t.Logf("Hash 1: %s", hash1)
|
||||
t.Logf("Hash 2: %s", hash2)
|
||||
t.Logf("Encrypted size 1: %d bytes", buf1.Len())
|
||||
t.Logf("Encrypted size 2: %d bytes", buf2.Len())
|
||||
}
|
||||
@@ -6,8 +6,6 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/jotfs/fastcdc-go"
|
||||
)
|
||||
|
||||
// Chunk represents a single chunk of data produced by the content-defined chunking algorithm.
|
||||
@@ -48,16 +46,8 @@ func NewChunker(avgChunkSize int64) *Chunker {
|
||||
// reasonably sized inputs. For large files or streams, use ChunkReaderStreaming instead.
|
||||
// Returns an error if chunking fails or if reading from the input fails.
|
||||
func (c *Chunker) ChunkReader(r io.Reader) ([]Chunk, error) {
|
||||
opts := fastcdc.Options{
|
||||
MinSize: c.minChunkSize,
|
||||
AverageSize: c.avgChunkSize,
|
||||
MaxSize: c.maxChunkSize,
|
||||
}
|
||||
|
||||
chunker, err := fastcdc.NewChunker(r, opts)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating chunker: %w", err)
|
||||
}
|
||||
chunker := AcquireReusableChunker(r, c.minChunkSize, c.avgChunkSize, c.maxChunkSize)
|
||||
defer chunker.Release()
|
||||
|
||||
var chunks []Chunk
|
||||
offset := int64(0)
|
||||
@@ -74,7 +64,7 @@ func (c *Chunker) ChunkReader(r io.Reader) ([]Chunk, error) {
|
||||
// Calculate hash
|
||||
hash := sha256.Sum256(chunk.Data)
|
||||
|
||||
// Make a copy of the data since FastCDC reuses the buffer
|
||||
// Make a copy of the data since the chunker reuses the buffer
|
||||
chunkData := make([]byte, len(chunk.Data))
|
||||
copy(chunkData, chunk.Data)
|
||||
|
||||
@@ -107,16 +97,8 @@ func (c *Chunker) ChunkReaderStreaming(r io.Reader, callback ChunkCallback) (str
|
||||
fileHasher := sha256.New()
|
||||
teeReader := io.TeeReader(r, fileHasher)
|
||||
|
||||
opts := fastcdc.Options{
|
||||
MinSize: c.minChunkSize,
|
||||
AverageSize: c.avgChunkSize,
|
||||
MaxSize: c.maxChunkSize,
|
||||
}
|
||||
|
||||
chunker, err := fastcdc.NewChunker(teeReader, opts)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("creating chunker: %w", err)
|
||||
}
|
||||
chunker := AcquireReusableChunker(teeReader, c.minChunkSize, c.avgChunkSize, c.maxChunkSize)
|
||||
defer chunker.Release()
|
||||
|
||||
offset := int64(0)
|
||||
|
||||
@@ -132,13 +114,12 @@ func (c *Chunker) ChunkReaderStreaming(r io.Reader, callback ChunkCallback) (str
|
||||
// Calculate chunk hash
|
||||
hash := sha256.Sum256(chunk.Data)
|
||||
|
||||
// Make a copy of the data since FastCDC reuses the buffer
|
||||
chunkData := make([]byte, len(chunk.Data))
|
||||
copy(chunkData, chunk.Data)
|
||||
|
||||
// Pass the data directly - caller must process it before we call Next() again
|
||||
// (chunker reuses its internal buffer, but since we process synchronously
|
||||
// and completely before continuing, no copy is needed)
|
||||
if err := callback(Chunk{
|
||||
Hash: hex.EncodeToString(hash[:]),
|
||||
Data: chunkData,
|
||||
Data: chunk.Data,
|
||||
Offset: offset,
|
||||
Size: int64(len(chunk.Data)),
|
||||
}); err != nil {
|
||||
|
||||
265
internal/chunker/fastcdc.go
Normal file
265
internal/chunker/fastcdc.go
Normal file
@@ -0,0 +1,265 @@
|
||||
package chunker
|
||||
|
||||
import (
|
||||
"io"
|
||||
"math"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// ReusableChunker implements FastCDC with reusable buffers to minimize allocations.
|
||||
// Unlike the upstream fastcdc-go library which allocates a new buffer per file,
|
||||
// this implementation uses sync.Pool to reuse buffers across files.
|
||||
type ReusableChunker struct {
|
||||
minSize int
|
||||
maxSize int
|
||||
normSize int
|
||||
bufSize int
|
||||
|
||||
maskS uint64
|
||||
maskL uint64
|
||||
|
||||
rd io.Reader
|
||||
|
||||
buf []byte
|
||||
cursor int
|
||||
offset int
|
||||
eof bool
|
||||
}
|
||||
|
||||
// reusableChunkerPool pools ReusableChunker instances to avoid allocations.
|
||||
var reusableChunkerPool = sync.Pool{
|
||||
New: func() interface{} {
|
||||
return &ReusableChunker{}
|
||||
},
|
||||
}
|
||||
|
||||
// bufferPools contains pools for different buffer sizes.
|
||||
// Key is the buffer size.
|
||||
var bufferPools = sync.Map{}
|
||||
|
||||
func getBuffer(size int) []byte {
|
||||
poolI, _ := bufferPools.LoadOrStore(size, &sync.Pool{
|
||||
New: func() interface{} {
|
||||
buf := make([]byte, size)
|
||||
return &buf
|
||||
},
|
||||
})
|
||||
pool := poolI.(*sync.Pool)
|
||||
return *pool.Get().(*[]byte)
|
||||
}
|
||||
|
||||
func putBuffer(buf []byte) {
|
||||
size := cap(buf)
|
||||
poolI, ok := bufferPools.Load(size)
|
||||
if ok {
|
||||
pool := poolI.(*sync.Pool)
|
||||
b := buf[:size]
|
||||
pool.Put(&b)
|
||||
}
|
||||
}
|
||||
|
||||
// FastCDCChunk represents a chunk from the FastCDC algorithm.
|
||||
type FastCDCChunk struct {
|
||||
Offset int
|
||||
Length int
|
||||
Data []byte
|
||||
Fingerprint uint64
|
||||
}
|
||||
|
||||
// AcquireReusableChunker gets a chunker from the pool and initializes it for the given reader.
|
||||
func AcquireReusableChunker(rd io.Reader, minSize, avgSize, maxSize int) *ReusableChunker {
|
||||
c := reusableChunkerPool.Get().(*ReusableChunker)
|
||||
|
||||
bufSize := maxSize * 2
|
||||
|
||||
// Reuse buffer if it's the right size, otherwise get a new one
|
||||
if c.buf == nil || cap(c.buf) != bufSize {
|
||||
if c.buf != nil {
|
||||
putBuffer(c.buf)
|
||||
}
|
||||
c.buf = getBuffer(bufSize)
|
||||
} else {
|
||||
// Restore buffer to full capacity (may have been truncated by previous EOF)
|
||||
c.buf = c.buf[:cap(c.buf)]
|
||||
}
|
||||
|
||||
bits := int(math.Round(math.Log2(float64(avgSize))))
|
||||
normalization := 2
|
||||
smallBits := bits + normalization
|
||||
largeBits := bits - normalization
|
||||
|
||||
c.minSize = minSize
|
||||
c.maxSize = maxSize
|
||||
c.normSize = avgSize
|
||||
c.bufSize = bufSize
|
||||
c.maskS = (1 << smallBits) - 1
|
||||
c.maskL = (1 << largeBits) - 1
|
||||
c.rd = rd
|
||||
c.cursor = bufSize
|
||||
c.offset = 0
|
||||
c.eof = false
|
||||
|
||||
return c
|
||||
}
|
||||
|
||||
// Release returns the chunker to the pool for reuse.
|
||||
func (c *ReusableChunker) Release() {
|
||||
c.rd = nil
|
||||
reusableChunkerPool.Put(c)
|
||||
}
|
||||
|
||||
func (c *ReusableChunker) fillBuffer() error {
|
||||
n := len(c.buf) - c.cursor
|
||||
if n >= c.maxSize {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Move all data after the cursor to the start of the buffer
|
||||
copy(c.buf[:n], c.buf[c.cursor:])
|
||||
c.cursor = 0
|
||||
|
||||
if c.eof {
|
||||
c.buf = c.buf[:n]
|
||||
return nil
|
||||
}
|
||||
|
||||
// Restore buffer to full capacity for reading
|
||||
c.buf = c.buf[:c.bufSize]
|
||||
|
||||
// Fill the rest of the buffer
|
||||
m, err := io.ReadFull(c.rd, c.buf[n:])
|
||||
if err == io.EOF || err == io.ErrUnexpectedEOF {
|
||||
c.buf = c.buf[:n+m]
|
||||
c.eof = true
|
||||
} else if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Next returns the next chunk or io.EOF when done.
|
||||
// The returned Data slice is only valid until the next call to Next.
|
||||
func (c *ReusableChunker) Next() (FastCDCChunk, error) {
|
||||
if err := c.fillBuffer(); err != nil {
|
||||
return FastCDCChunk{}, err
|
||||
}
|
||||
if len(c.buf) == 0 {
|
||||
return FastCDCChunk{}, io.EOF
|
||||
}
|
||||
|
||||
length, fp := c.nextChunk(c.buf[c.cursor:])
|
||||
|
||||
chunk := FastCDCChunk{
|
||||
Offset: c.offset,
|
||||
Length: length,
|
||||
Data: c.buf[c.cursor : c.cursor+length],
|
||||
Fingerprint: fp,
|
||||
}
|
||||
|
||||
c.cursor += length
|
||||
c.offset += chunk.Length
|
||||
|
||||
return chunk, nil
|
||||
}
|
||||
|
||||
func (c *ReusableChunker) nextChunk(data []byte) (int, uint64) {
|
||||
fp := uint64(0)
|
||||
i := c.minSize
|
||||
|
||||
if len(data) <= c.minSize {
|
||||
return len(data), fp
|
||||
}
|
||||
|
||||
n := min(len(data), c.maxSize)
|
||||
|
||||
for ; i < min(n, c.normSize); i++ {
|
||||
fp = (fp << 1) + table[data[i]]
|
||||
if (fp & c.maskS) == 0 {
|
||||
return i + 1, fp
|
||||
}
|
||||
}
|
||||
|
||||
for ; i < n; i++ {
|
||||
fp = (fp << 1) + table[data[i]]
|
||||
if (fp & c.maskL) == 0 {
|
||||
return i + 1, fp
|
||||
}
|
||||
}
|
||||
|
||||
return i, fp
|
||||
}
|
||||
|
||||
func min(a, b int) int {
|
||||
if a < b {
|
||||
return a
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
// 256 random uint64s for the rolling hash function (from FastCDC paper)
|
||||
var table = [256]uint64{
|
||||
0xe80e8d55032474b3, 0x11b25b61f5924e15, 0x03aa5bd82a9eb669, 0xc45a153ef107a38c,
|
||||
0xeac874b86f0f57b9, 0xa5ccedec95ec79c7, 0xe15a3320ad42ac0a, 0x5ed3583fa63cec15,
|
||||
0xcd497bf624a4451d, 0xf9ade5b059683605, 0x773940c03fb11ca1, 0xa36b16e4a6ae15b2,
|
||||
0x67afd1adb5a89eac, 0xc44c75ee32f0038e, 0x2101790f365c0967, 0x76415c64a222fc4a,
|
||||
0x579929249a1e577a, 0xe4762fc41fdbf750, 0xea52198e57dfcdcc, 0xe2535aafe30b4281,
|
||||
0xcb1a1bd6c77c9056, 0x5a1aa9bfc4612a62, 0x15a728aef8943eb5, 0x2f8f09738a8ec8d9,
|
||||
0x200f3dec9fac8074, 0x0fa9a7b1e0d318df, 0x06c0804ffd0d8e3a, 0x630cbc412669dd25,
|
||||
0x10e34f85f4b10285, 0x2a6fe8164b9b6410, 0xcacb57d857d55810, 0x77f8a3a36ff11b46,
|
||||
0x66af517e0dc3003e, 0x76c073c789b4009a, 0x853230dbb529f22a, 0x1e9e9c09a1f77e56,
|
||||
0x1e871223802ee65d, 0x37fe4588718ff813, 0x10088539f30db464, 0x366f7470b80b72d1,
|
||||
0x33f2634d9a6b31db, 0xd43917751d69ea18, 0xa0f492bc1aa7b8de, 0x3f94e5a8054edd20,
|
||||
0xedfd6e25eb8b1dbf, 0x759517a54f196a56, 0xe81d5006ec7b6b17, 0x8dd8385fa894a6b7,
|
||||
0x45f4d5467b0d6f91, 0xa1f894699de22bc8, 0x33829d09ef93e0fe, 0x3e29e250caed603c,
|
||||
0xf7382cba7f63a45e, 0x970f95412bb569d1, 0xc7fcea456d356b4b, 0x723042513f3e7a57,
|
||||
0x17ae7688de3596f1, 0x27ac1fcd7cd23c1a, 0xf429beeb78b3f71f, 0xd0780692fb93a3f9,
|
||||
0x9f507e28a7c9842f, 0x56001ad536e433ae, 0x7e1dd1ecf58be306, 0x15fee353aa233fc6,
|
||||
0xb033a0730b7638e8, 0xeb593ad6bd2406d1, 0x7c86502574d0f133, 0xce3b008d4ccb4be7,
|
||||
0xf8566e3d383594c8, 0xb2c261e9b7af4429, 0xf685e7e253799dbb, 0x05d33ed60a494cbc,
|
||||
0xeaf88d55a4cb0d1a, 0x3ee9368a902415a1, 0x8980fe6a8493a9a4, 0x358ed008cb448631,
|
||||
0xd0cb7e37b46824b8, 0xe9bc375c0bc94f84, 0xea0bf1d8e6b55bb3, 0xb66a60d0f9f6f297,
|
||||
0x66db2cc4807b3758, 0x7e4e014afbca8b4d, 0xa5686a4938b0c730, 0xa5f0d7353d623316,
|
||||
0x26e38c349242d5e8, 0xeeefa80a29858e30, 0x8915cb912aa67386, 0x4b957a47bfc420d4,
|
||||
0xbb53d051a895f7e1, 0x09f5e3235f6911ce, 0x416b98e695cfb7ce, 0x97a08183344c5c86,
|
||||
0xbf68e0791839a861, 0xea05dde59ed3ed56, 0x0ca732280beda160, 0xac748ed62fe7f4e2,
|
||||
0xc686da075cf6e151, 0xe1ba5658f4af05c8, 0xe9ff09fbeb67cc35, 0xafaea9470323b28d,
|
||||
0x0291e8db5bb0ac2a, 0x342072a9bbee77ae, 0x03147eed6b3d0a9c, 0x21379d4de31dbadb,
|
||||
0x2388d965226fb986, 0x52c96988bfebabfa, 0xa6fc29896595bc2d, 0x38fa4af70aa46b8b,
|
||||
0xa688dd13939421ee, 0x99d5275d9b1415da, 0x453d31bb4fe73631, 0xde51debc1fbe3356,
|
||||
0x75a3c847a06c622f, 0xe80e32755d272579, 0x5444052250d8ec0d, 0x8f17dfda19580a3b,
|
||||
0xf6b3e9363a185e42, 0x7a42adec6868732f, 0x32cb6a07629203a2, 0x1eca8957defe56d9,
|
||||
0x9fa85e4bc78ff9ed, 0x20ff07224a499ca7, 0x3fa6295ff9682c70, 0xe3d5b1e3ce993eff,
|
||||
0xa341209362e0b79a, 0x64bd9eae5712ffe8, 0xceebb537babbd12a, 0x5586ef404315954f,
|
||||
0x46c3085c938ab51a, 0xa82ccb9199907cee, 0x8c51b6690a3523c8, 0xc4dbd4c9ae518332,
|
||||
0x979898dbb23db7b2, 0x1b5b585e6f672a9d, 0xce284da7c4903810, 0x841166e8bb5f1c4f,
|
||||
0xb7d884a3fceca7d0, 0xa76468f5a4572374, 0xc10c45f49ee9513d, 0x68f9a5663c1908c9,
|
||||
0x0095a13476a6339d, 0xd1d7516ffbe9c679, 0xfd94ab0c9726f938, 0x627468bbdb27c959,
|
||||
0xedc3f8988e4a8c9a, 0x58efd33f0dfaa499, 0x21e37d7e2ef4ac8b, 0x297f9ab5586259c6,
|
||||
0xda3ba4dc6cb9617d, 0xae11d8d9de2284d2, 0xcfeed88cb3729865, 0xefc2f9e4f03e2633,
|
||||
0x8226393e8f0855a4, 0xd6e25fd7acf3a767, 0x435784c3bfd6d14a, 0xf97142e6343fe757,
|
||||
0xd73b9fe826352f85, 0x6c3ac444b5b2bd76, 0xd8e88f3e9fd4a3fd, 0x31e50875c36f3460,
|
||||
0xa824f1bf88cf4d44, 0x54a4d2c8f5f25899, 0xbff254637ce3b1e6, 0xa02cfe92561b3caa,
|
||||
0x7bedb4edee9f0af7, 0x879c0620ac49a102, 0xa12c4ccd23b332e7, 0x09a5ff47bf94ed1e,
|
||||
0x7b62f43cd3046fa0, 0xaa3af0476b9c2fb9, 0x22e55301abebba8e, 0x3a6035c42747bd58,
|
||||
0x1705373106c8ec07, 0xb1f660de828d0628, 0x065fe82d89ca563d, 0xf555c2d8074d516d,
|
||||
0x6bb6c186b423ee99, 0x54a807be6f3120a8, 0x8a3c7fe2f88860b8, 0xbeffc344f5118e81,
|
||||
0xd686e80b7d1bd268, 0x661aef4ef5e5e88b, 0x5bf256c654cd1dda, 0x9adb1ab85d7640f4,
|
||||
0x68449238920833a2, 0x843279f4cebcb044, 0xc8710cdefa93f7bb, 0x236943294538f3e6,
|
||||
0x80d7d136c486d0b4, 0x61653956b28851d3, 0x3f843be9a9a956b5, 0xf73cfbbf137987e5,
|
||||
0xcf0cb6dee8ceac2c, 0x50c401f52f185cae, 0xbdbe89ce735c4c1c, 0xeef3ade9c0570bc7,
|
||||
0xbe8b066f8f64cbf6, 0x5238d6131705dcb9, 0x20219086c950e9f6, 0x634468d9ed74de02,
|
||||
0x0aba4b3d705c7fa5, 0x3374416f725a6672, 0xe7378bdf7beb3bc6, 0x0f7b6a1b1cee565b,
|
||||
0x234e4c41b0c33e64, 0x4efa9a0c3f21fe28, 0x1167fc551643e514, 0x9f81a69d3eb01fa4,
|
||||
0xdb75c22b12306ed0, 0xe25055d738fc9686, 0x9f9f167a3f8507bb, 0x195f8336d3fbe4d3,
|
||||
0x8442b6feffdcb6f6, 0x1e07ed24746ffde9, 0x140e31462d555266, 0x8bd0ce515ae1406e,
|
||||
0x2c0be0042b5584b3, 0x35a23d0e15d45a60, 0xc14f1ba147d9bc83, 0xbbf168691264b23f,
|
||||
0xad2cc7b57e589ade, 0x9501963154c7815c, 0x9664afa6b8d67d47, 0x7f9e5101fea0a81c,
|
||||
0x45ecffb610d25bfd, 0x3157f7aecf9b6ab3, 0xc43ca6f88d87501d, 0x9576ff838dee38dc,
|
||||
0x93f21afe0ce1c7d7, 0xceac699df343d8f9, 0x2fec49e29f03398d, 0x8805ccd5730281ed,
|
||||
0xf9fc16fc750a8e59, 0x35308cc771adf736, 0x4a57b7c9ee2b7def, 0x03a4c6cdc937a02a,
|
||||
0x6c9a8a269fc8c4fc, 0x4681decec7a03f43, 0x342eecded1353ef9, 0x8be0552d8413a867,
|
||||
0xc7b4ac51beda8be8, 0xebcc64fb719842c0, 0xde8e4c7fb6d40c1c, 0xcc8263b62f9738b1,
|
||||
0xd3cfc0f86511929a, 0x466024ce8bb226ea, 0x459ff690253a3c18, 0x98b27e9d91284c9c,
|
||||
0x75c3ae8aa3af373d, 0xfbf8f8e79a866ffc, 0x32327f59d0662799, 0x8228b57e729e9830,
|
||||
0x065ceb7a18381b58, 0xd2177671a31dc5ff, 0x90cd801f2f8701f9, 0x9d714428471c65fe,
|
||||
}
|
||||
@@ -10,16 +10,16 @@ import (
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/config"
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/globals"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/pidlock"
|
||||
"git.eeqj.de/sneak/vaultik/internal/snapshot"
|
||||
"git.eeqj.de/sneak/vaultik/internal/storage"
|
||||
"git.eeqj.de/sneak/vaultik/internal/vaultik"
|
||||
"github.com/adrg/xdg"
|
||||
"go.uber.org/fx"
|
||||
"sneak.berlin/go/vaultik/internal/config"
|
||||
"sneak.berlin/go/vaultik/internal/database"
|
||||
"sneak.berlin/go/vaultik/internal/globals"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/pidlock"
|
||||
"sneak.berlin/go/vaultik/internal/snapshot"
|
||||
"sneak.berlin/go/vaultik/internal/storage"
|
||||
"sneak.berlin/go/vaultik/internal/vaultik"
|
||||
)
|
||||
|
||||
// AppOptions contains common options for creating the fx application.
|
||||
@@ -125,7 +125,7 @@ func RunApp(ctx context.Context, app *fx.App) error {
|
||||
// It acquires a PID lock before starting to prevent concurrent instances.
|
||||
func RunWithApp(ctx context.Context, opts AppOptions) error {
|
||||
// Acquire PID lock to prevent concurrent instances
|
||||
lockDir := filepath.Join(xdg.DataHome, "berlin.sneak.app.vaultik")
|
||||
lockDir := filepath.Join(xdg.DataHome, "vaultik")
|
||||
lock, err := pidlock.Acquire(lockDir)
|
||||
if err != nil {
|
||||
if errors.Is(err, pidlock.ErrAlreadyRunning) {
|
||||
|
||||
522
internal/cli/config.go
Normal file
522
internal/cli/config.go
Normal file
@@ -0,0 +1,522 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
const defaultConfigTemplate = `# vaultik configuration
|
||||
# Documentation: https://sneak.berlin/go/vaultik
|
||||
|
||||
# ─── REQUIRED ────────────────────────────────────────────────────────────────
|
||||
|
||||
# Age recipient public keys for encryption.
|
||||
# Backups are encrypted to ALL listed recipients. Any one of the corresponding
|
||||
# private keys can decrypt. Generate a keypair with:
|
||||
# age-keygen -o vaultik_backup_private_key.txt
|
||||
# grep 'public key' vaultik_backup_private_key.txt
|
||||
age_recipients:
|
||||
- age1REPLACE_WITH_YOUR_PUBLIC_KEY
|
||||
|
||||
# Named snapshots. Each snapshot backs up one or more paths and can have its
|
||||
# own exclude patterns in addition to the global excludes below.
|
||||
#
|
||||
# Exclude pattern semantics:
|
||||
# - Patterns starting with / are anchored to the snapshot path root
|
||||
# (e.g. "/Library/Caches" matches only ~/Library/Caches in a ~ snapshot)
|
||||
# - Patterns without a leading / match anywhere in the tree
|
||||
# (e.g. ".cache" matches any directory named .cache at any depth)
|
||||
# - Globs are supported: *, **, ?
|
||||
snapshots:
|
||||
home:
|
||||
paths:
|
||||
- "~"
|
||||
exclude:
|
||||
# Trash, temp, and filesystem metadata
|
||||
- "/.Trash"
|
||||
- "/.Trashes"
|
||||
- "/.fseventsd"
|
||||
- "/.Spotlight-V100"
|
||||
- "/.TemporaryItems"
|
||||
- "/tmp"
|
||||
- "/.rnd"
|
||||
- ".DS_Store"
|
||||
# Caches and package manager state (rebuildable)
|
||||
- ".cache"
|
||||
- ".bundle"
|
||||
- "/.cpan/build"
|
||||
- "/.cpan/sources"
|
||||
- "/.gradle/caches"
|
||||
- "/.dropbox"
|
||||
- "/.minikube/cache"
|
||||
- "/.local/share/containers/podman/machine"
|
||||
- "/.persepolis"
|
||||
- "/Library/Caches"
|
||||
- "/Library/Logs"
|
||||
- "/Library/Cookies"
|
||||
- "/Library/Metadata"
|
||||
- "/Library/Suggestions"
|
||||
- "/Library/PubSub"
|
||||
- "/Library/Homebrew"
|
||||
- "/Library/Developer"
|
||||
- "/Library/Google/GoogleSoftwareUpdate"
|
||||
- "/Library/Preferences/Macromedia/Flash Player"
|
||||
- "/Library/Preferences/SDMHelpData"
|
||||
- "/Library/VoiceTrigger/SAT"
|
||||
# Language/toolchain package caches (rebuildable from registries)
|
||||
- "/.npm"
|
||||
- "/.cargo/registry"
|
||||
- "/.cargo/git"
|
||||
- "/.rustup/toolchains"
|
||||
- "/go/pkg/mod"
|
||||
- "/.m2/repository"
|
||||
- "/.vagrant.d/boxes"
|
||||
- "node_modules"
|
||||
- "__pycache__"
|
||||
- ".venv"
|
||||
# Virtual machine disk images (huge; remove these lines to back them up)
|
||||
- "/Parallels"
|
||||
- "/Virtual Machines.localized"
|
||||
- "/VirtualBox VMs"
|
||||
- "/.orbstack"
|
||||
- "/Library/Containers/com.utmapp.UTM"
|
||||
# Downloaded LLM models (huge, re-downloadable)
|
||||
- "/.ollama/models"
|
||||
- "/.lmstudio/models"
|
||||
# Cloud-synced storage. These are synced to a provider already, and on
|
||||
# modern macOS may contain dataless placeholder files that the backup
|
||||
# would force-download in full.
|
||||
- "/Library/CloudStorage"
|
||||
- "/Library/Mobile Documents"
|
||||
# Android SDK and emulator images (re-downloadable)
|
||||
- "/Library/Android/sdk"
|
||||
- "/.android/avd"
|
||||
# Cloud-synced or restorable-from-server data
|
||||
- "/Library/Mail"
|
||||
- "/Library/Mail Downloads"
|
||||
- "/Library/Safari"
|
||||
- "/Library/Application Support/Evernote"
|
||||
- "/Library/Application Support/MobileSync"
|
||||
- "/Library/Application Support/SyncServices"
|
||||
- "/Library/Application Support/protonmail/bridge/cache"
|
||||
- "/Library/Application Support/Syncthing/index-*"
|
||||
- "/Library/Syncthing/folders"
|
||||
- "/Documents/Dropbox/.dropbox.cache"
|
||||
# Large rebuildable app data (games, media caches, device backups)
|
||||
- "/Applications/Fortnite"
|
||||
- "/Documents/Steam Content"
|
||||
- "/Library/Application Support/Ableton"
|
||||
- "/Library/Application Support/CrossOver Games"
|
||||
- "/Library/Application Support/SecondLife/cache"
|
||||
- "/Library/Application Support/Steam/SteamApps"
|
||||
- "/Library/Containers/com.docker.docker"
|
||||
- "/Library/Group Containers/group.com.apple.secure-control-center-preferences"
|
||||
- "/Library/iTunes/iPad Software Updates"
|
||||
- "/Library/iTunes/iPhone Software Updates"
|
||||
- "/Movies/CacheClip"
|
||||
- "/Movies/ProxyMedia"
|
||||
- "/Music/iTunes/Album Artwork"
|
||||
- "/Pictures/iPod Photo Cache"
|
||||
|
||||
# Third-party applications. OS-provided apps live in /System/Applications
|
||||
# on modern macOS and are never in /Applications, but Apple-installed
|
||||
# App Store apps (Safari, GarageBand, iWork, iMovie) are excluded since
|
||||
# they are re-downloadable.
|
||||
apps:
|
||||
paths:
|
||||
- /Applications
|
||||
exclude:
|
||||
- ".DS_Store"
|
||||
- "/Safari.app"
|
||||
- "/GarageBand.app"
|
||||
- "/iMovie.app"
|
||||
- "/Keynote.app"
|
||||
- "/Numbers.app"
|
||||
- "/Pages.app"
|
||||
- "/Xcode.app"
|
||||
- "/Spotify.app"
|
||||
- "/Steam.app"
|
||||
- "/VirtualBox.app"
|
||||
- "/Utilities/Adobe Installers"
|
||||
|
||||
# Storage backend (pick ONE of the three forms below).
|
||||
#
|
||||
# S3-compatible:
|
||||
# storage_url: "s3://mybucket/backups?endpoint=s3.example.com®ion=us-east-1"
|
||||
# (also set s3.access_key_id and s3.secret_access_key below)
|
||||
#
|
||||
# Local filesystem:
|
||||
# storage_url: "file:///mnt/backups/vaultik"
|
||||
#
|
||||
# Rclone (requires rclone configured separately):
|
||||
# storage_url: "rclone://myremote/path/to/backups"
|
||||
storage_url: ""
|
||||
|
||||
# ─── S3 CREDENTIALS (required for s3:// storage_url) ────────────────────────
|
||||
|
||||
# s3:
|
||||
# access_key_id: YOUR_ACCESS_KEY
|
||||
# secret_access_key: YOUR_SECRET_KEY
|
||||
# # region: us-east-1 # Default: us-east-1
|
||||
# # use_ssl: true # Default: true
|
||||
# # part_size: 5MB # Multipart upload part size. Default: 5MB
|
||||
|
||||
# ─── OPTIONAL ────────────────────────────────────────────────────────────────
|
||||
|
||||
# Global exclude patterns applied to ALL snapshots.
|
||||
# Snapshot-specific excludes are additive.
|
||||
# exclude:
|
||||
# - "*.log"
|
||||
# - "*.tmp"
|
||||
# - ".git"
|
||||
# - "node_modules"
|
||||
|
||||
# Average chunk size for content-defined chunking (FastCDC).
|
||||
# Smaller = better deduplication but more metadata overhead.
|
||||
# Accepts: 1MB, 10M, 64KB, etc.
|
||||
# Default: 10MB
|
||||
# chunk_size: 10MB
|
||||
|
||||
# Maximum blob size before splitting into a new blob.
|
||||
# Accepts: 1GB, 10G, 500MB, etc.
|
||||
# Default: 10GB
|
||||
# blob_size_limit: 10GB
|
||||
|
||||
# Zstd compression level (1-19). Higher = better ratio but slower.
|
||||
# Default: 3
|
||||
# compression_level: 3
|
||||
|
||||
# Hostname used in snapshot IDs. Default: system hostname.
|
||||
# hostname: myserver
|
||||
|
||||
# Path to the local SQLite index database.
|
||||
# Default: the platform data directory, e.g.
|
||||
# macOS: ~/Library/Application Support/vaultik/index.sqlite
|
||||
# Linux: ~/.local/share/vaultik/index.sqlite
|
||||
# index_path: /path/to/index.sqlite
|
||||
`
|
||||
|
||||
// NewConfigCommand creates the config command group.
|
||||
func NewConfigCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "config",
|
||||
Short: "Manage the configuration file",
|
||||
Long: "Commands for creating, editing, and querying the vaultik config file.",
|
||||
}
|
||||
|
||||
cmd.AddCommand(newConfigInitCommand())
|
||||
cmd.AddCommand(newConfigEditCommand())
|
||||
cmd.AddCommand(newConfigGetCommand())
|
||||
cmd.AddCommand(newConfigSetCommand())
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
// newConfigInitCommand creates the 'config init' subcommand.
|
||||
func newConfigInitCommand() *cobra.Command {
|
||||
return &cobra.Command{
|
||||
Use: "init",
|
||||
Short: "Write a default config file",
|
||||
Long: `Creates a default configuration file with commented explanations
|
||||
for every setting. If a config file already exists at the target path,
|
||||
the command refuses to overwrite it.
|
||||
|
||||
The config is written to the path from --config, $VAULTIK_CONFIG, or
|
||||
the platform default config directory (e.g. ~/Library/Application Support/
|
||||
on macOS, ~/.config/ on Linux, /etc/vaultik/ as root).`,
|
||||
Args: cobra.NoArgs,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
path := configPathForInit()
|
||||
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
return fmt.Errorf("config file already exists: %s", path)
|
||||
}
|
||||
|
||||
dir := filepath.Dir(path)
|
||||
if err := os.MkdirAll(dir, 0o755); err != nil {
|
||||
return fmt.Errorf("creating config directory %s: %w", dir, err)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(path, []byte(defaultConfigTemplate), 0o600); err != nil {
|
||||
return fmt.Errorf("writing config file: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("Config written to %s\n", path)
|
||||
fmt.Println("Edit it to set your age_recipients, snapshots, and storage_url.")
|
||||
return nil
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// newConfigEditCommand creates the 'config edit' subcommand.
|
||||
func newConfigEditCommand() *cobra.Command {
|
||||
return &cobra.Command{
|
||||
Use: "edit",
|
||||
Short: "Open the config file in $EDITOR",
|
||||
Args: cobra.NoArgs,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
path, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
editor := os.Getenv("EDITOR")
|
||||
if editor == "" {
|
||||
editor = "vi"
|
||||
}
|
||||
|
||||
ed := exec.Command(editor, path)
|
||||
ed.Stdin = os.Stdin
|
||||
ed.Stdout = os.Stdout
|
||||
ed.Stderr = os.Stderr
|
||||
return ed.Run()
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// newConfigGetCommand creates the 'config get' subcommand.
|
||||
func newConfigGetCommand() *cobra.Command {
|
||||
return &cobra.Command{
|
||||
Use: "get <key>",
|
||||
Short: "Print a config value by dotted path (e.g. s3.bucket)",
|
||||
Args: cobra.ExactArgs(1),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
path, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
root, err := loadYAMLFile(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
node, err := yamlPathGet(root, strings.Split(args[0], "."))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if node.Kind == yaml.ScalarNode {
|
||||
fmt.Println(node.Value)
|
||||
return nil
|
||||
}
|
||||
|
||||
out, err := yaml.Marshal(node)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshaling value: %w", err)
|
||||
}
|
||||
fmt.Print(string(out))
|
||||
return nil
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// newConfigSetCommand creates the 'config set' subcommand.
|
||||
func newConfigSetCommand() *cobra.Command {
|
||||
return &cobra.Command{
|
||||
Use: "set <key> <value>",
|
||||
Short: "Set a config value by dotted path (e.g. compression_level 5)",
|
||||
Long: `Sets a scalar config value addressed by dotted YAML path and writes
|
||||
the file back, preserving comments and formatting. Intermediate maps
|
||||
are created as needed.
|
||||
|
||||
Examples:
|
||||
vaultik config set compression_level 9
|
||||
vaultik config set s3.bucket mybucket
|
||||
vaultik config set storage_url "file:///mnt/backups"`,
|
||||
Args: cobra.ExactArgs(2),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
path, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
root, err := loadYAMLFile(path)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if err := yamlPathSet(root, strings.Split(args[0], "."), args[1]); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
out, err := yaml.Marshal(root)
|
||||
if err != nil {
|
||||
return fmt.Errorf("marshaling config: %w", err)
|
||||
}
|
||||
|
||||
mode := os.FileMode(0o600)
|
||||
if info, err := os.Stat(path); err == nil {
|
||||
mode = info.Mode().Perm()
|
||||
}
|
||||
|
||||
if err := os.WriteFile(path, out, mode); err != nil {
|
||||
return fmt.Errorf("writing config file: %w", err)
|
||||
}
|
||||
|
||||
fmt.Printf("%s = %s\n", args[0], args[1])
|
||||
return nil
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
// loadYAMLFile parses a YAML file into a yaml.Node document tree,
|
||||
// which preserves comments and ordering for round-tripping.
|
||||
func loadYAMLFile(path string) (*yaml.Node, error) {
|
||||
data, err := os.ReadFile(path)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("reading config file: %w", err)
|
||||
}
|
||||
|
||||
var root yaml.Node
|
||||
if err := yaml.Unmarshal(data, &root); err != nil {
|
||||
return nil, fmt.Errorf("parsing config file: %w", err)
|
||||
}
|
||||
|
||||
// An empty file yields a zero node; normalize to an empty mapping document.
|
||||
if root.Kind == 0 {
|
||||
root = yaml.Node{
|
||||
Kind: yaml.DocumentNode,
|
||||
Content: []*yaml.Node{{Kind: yaml.MappingNode}},
|
||||
}
|
||||
}
|
||||
|
||||
return &root, nil
|
||||
}
|
||||
|
||||
// yamlPathGet navigates a dotted key path through mapping and sequence
|
||||
// nodes and returns the value node. Numeric path components index into
|
||||
// sequences (e.g. "age_recipients.0").
|
||||
func yamlPathGet(root *yaml.Node, keys []string) (*yaml.Node, error) {
|
||||
node := root
|
||||
if node.Kind == yaml.DocumentNode {
|
||||
if len(node.Content) == 0 {
|
||||
return nil, fmt.Errorf("empty config file")
|
||||
}
|
||||
node = node.Content[0]
|
||||
}
|
||||
|
||||
for i, key := range keys {
|
||||
switch node.Kind {
|
||||
case yaml.MappingNode:
|
||||
found := false
|
||||
for j := 0; j+1 < len(node.Content); j += 2 {
|
||||
if node.Content[j].Value == key {
|
||||
node = node.Content[j+1]
|
||||
found = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !found {
|
||||
return nil, fmt.Errorf("key not found: %s", strings.Join(keys[:i+1], "."))
|
||||
}
|
||||
case yaml.SequenceNode:
|
||||
idx, err := strconv.Atoi(key)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("key %q is a list; use a numeric index", strings.Join(keys[:i], "."))
|
||||
}
|
||||
if idx < 0 || idx >= len(node.Content) {
|
||||
return nil, fmt.Errorf("index %d out of range for %s (len %d)", idx, strings.Join(keys[:i], "."), len(node.Content))
|
||||
}
|
||||
node = node.Content[idx]
|
||||
default:
|
||||
return nil, fmt.Errorf("key %q is not a map or list", strings.Join(keys[:i], "."))
|
||||
}
|
||||
}
|
||||
|
||||
return node, nil
|
||||
}
|
||||
|
||||
// yamlPathSet navigates a dotted key path, creating intermediate maps as
|
||||
// needed, and sets the final key to the given scalar value. Numeric path
|
||||
// components index into sequences; an index equal to the sequence length
|
||||
// appends a new element (e.g. "age_recipients.1" on a 1-element list).
|
||||
func yamlPathSet(root *yaml.Node, keys []string, value string) error {
|
||||
node := root
|
||||
if node.Kind == yaml.DocumentNode {
|
||||
if len(node.Content) == 0 {
|
||||
node.Content = []*yaml.Node{{Kind: yaml.MappingNode}}
|
||||
}
|
||||
node = node.Content[0]
|
||||
}
|
||||
|
||||
for i, key := range keys {
|
||||
last := i == len(keys)-1
|
||||
|
||||
switch node.Kind {
|
||||
case yaml.MappingNode:
|
||||
var valueNode *yaml.Node
|
||||
for j := 0; j+1 < len(node.Content); j += 2 {
|
||||
if node.Content[j].Value == key {
|
||||
valueNode = node.Content[j+1]
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if valueNode == nil {
|
||||
keyNode := &yaml.Node{Kind: yaml.ScalarNode, Value: key}
|
||||
valueNode = &yaml.Node{Kind: yaml.MappingNode}
|
||||
if last {
|
||||
valueNode = &yaml.Node{Kind: yaml.ScalarNode, Value: value}
|
||||
}
|
||||
node.Content = append(node.Content, keyNode, valueNode)
|
||||
} else if last {
|
||||
setScalar(valueNode, value)
|
||||
}
|
||||
|
||||
node = valueNode
|
||||
|
||||
case yaml.SequenceNode:
|
||||
idx, err := strconv.Atoi(key)
|
||||
if err != nil {
|
||||
return fmt.Errorf("key %q is a list; use a numeric index", strings.Join(keys[:i], "."))
|
||||
}
|
||||
if idx < 0 || idx > len(node.Content) {
|
||||
return fmt.Errorf("index %d out of range for %s (len %d)", idx, strings.Join(keys[:i], "."), len(node.Content))
|
||||
}
|
||||
if idx == len(node.Content) {
|
||||
newNode := &yaml.Node{Kind: yaml.MappingNode}
|
||||
if last {
|
||||
newNode = &yaml.Node{Kind: yaml.ScalarNode, Value: value}
|
||||
}
|
||||
node.Content = append(node.Content, newNode)
|
||||
} else if last {
|
||||
setScalar(node.Content[idx], value)
|
||||
}
|
||||
node = node.Content[idx]
|
||||
|
||||
default:
|
||||
return fmt.Errorf("key %q is not a map or list", strings.Join(keys[:i], "."))
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// setScalar overwrites a node in place with a plain scalar value.
|
||||
func setScalar(n *yaml.Node, value string) {
|
||||
n.Kind = yaml.ScalarNode
|
||||
n.Tag = ""
|
||||
n.Value = value
|
||||
n.Content = nil
|
||||
n.Style = 0
|
||||
}
|
||||
|
||||
// configPathForInit returns the config path to write, checking --config flag,
|
||||
// VAULTIK_CONFIG env, and the platform default.
|
||||
func configPathForInit() string {
|
||||
if rootFlags.ConfigPath != "" {
|
||||
return rootFlags.ConfigPath
|
||||
}
|
||||
if envPath := os.Getenv("VAULTIK_CONFIG"); envPath != "" {
|
||||
return envPath
|
||||
}
|
||||
return DefaultConfigPath()
|
||||
}
|
||||
161
internal/cli/config_test.go
Normal file
161
internal/cli/config_test.go
Normal file
@@ -0,0 +1,161 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"gopkg.in/yaml.v3"
|
||||
"sneak.berlin/go/vaultik/internal/config"
|
||||
)
|
||||
|
||||
// TestDefaultConfigTemplateParses ensures the init template is valid YAML
|
||||
// that unmarshals into the Config struct with the expected snapshots.
|
||||
func TestDefaultConfigTemplateParses(t *testing.T) {
|
||||
var cfg config.Config
|
||||
if err := yaml.Unmarshal([]byte(defaultConfigTemplate), &cfg); err != nil {
|
||||
t.Fatalf("default config template is not valid YAML: %v", err)
|
||||
}
|
||||
|
||||
if len(cfg.AgeRecipients) != 1 {
|
||||
t.Errorf("expected 1 placeholder age recipient, got %d", len(cfg.AgeRecipients))
|
||||
}
|
||||
|
||||
home, ok := cfg.Snapshots["home"]
|
||||
if !ok {
|
||||
t.Fatal("expected 'home' snapshot in default config")
|
||||
}
|
||||
if len(home.Paths) == 0 {
|
||||
t.Error("home snapshot should have at least one path")
|
||||
}
|
||||
if len(home.Exclude) == 0 {
|
||||
t.Error("home snapshot should have exclude patterns")
|
||||
}
|
||||
|
||||
apps, ok := cfg.Snapshots["apps"]
|
||||
if !ok {
|
||||
t.Fatal("expected 'apps' snapshot in default config")
|
||||
}
|
||||
if len(apps.Paths) != 1 || apps.Paths[0] != "/Applications" {
|
||||
t.Errorf("apps snapshot should back up /Applications, got %v", apps.Paths)
|
||||
}
|
||||
if len(apps.Exclude) == 0 {
|
||||
t.Error("apps snapshot should have exclude patterns")
|
||||
}
|
||||
}
|
||||
|
||||
const testYAML = `# top comment
|
||||
compression_level: 3
|
||||
age_recipients:
|
||||
- age1aaa
|
||||
s3:
|
||||
bucket: oldbucket # inline comment
|
||||
region: us-east-1
|
||||
snapshots:
|
||||
home:
|
||||
paths:
|
||||
- "~"
|
||||
`
|
||||
|
||||
func parseTestYAML(t *testing.T) *yaml.Node {
|
||||
t.Helper()
|
||||
var root yaml.Node
|
||||
if err := yaml.Unmarshal([]byte(testYAML), &root); err != nil {
|
||||
t.Fatalf("parsing test yaml: %v", err)
|
||||
}
|
||||
return &root
|
||||
}
|
||||
|
||||
func TestYAMLPathGet(t *testing.T) {
|
||||
root := parseTestYAML(t)
|
||||
|
||||
tests := []struct {
|
||||
path string
|
||||
want string
|
||||
err bool
|
||||
}{
|
||||
{"compression_level", "3", false},
|
||||
{"s3.bucket", "oldbucket", false},
|
||||
{"s3.region", "us-east-1", false},
|
||||
{"age_recipients.0", "age1aaa", false},
|
||||
{"age_recipients.5", "", true},
|
||||
{"age_recipients.notanumber", "", true},
|
||||
{"s3.nonexistent", "", true},
|
||||
{"nonexistent", "", true},
|
||||
{"compression_level.sub", "", true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.path, func(t *testing.T) {
|
||||
node, err := yamlPathGet(root, splitPath(tt.path))
|
||||
if tt.err {
|
||||
if err == nil {
|
||||
t.Fatalf("expected error for %q", tt.path)
|
||||
}
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if node.Value != tt.want {
|
||||
t.Errorf("get %q = %q, want %q", tt.path, node.Value, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestYAMLPathSet(t *testing.T) {
|
||||
root := parseTestYAML(t)
|
||||
|
||||
// Overwrite existing nested value
|
||||
if err := yamlPathSet(root, splitPath("s3.bucket"), "newbucket"); err != nil {
|
||||
t.Fatalf("set s3.bucket: %v", err)
|
||||
}
|
||||
|
||||
// Create new nested key with intermediate map
|
||||
if err := yamlPathSet(root, splitPath("s3.endpoint"), "s3.example.com"); err != nil {
|
||||
t.Fatalf("set s3.endpoint: %v", err)
|
||||
}
|
||||
if err := yamlPathSet(root, splitPath("newmap.newkey"), "val"); err != nil {
|
||||
t.Fatalf("set newmap.newkey: %v", err)
|
||||
}
|
||||
|
||||
// Overwrite a sequence element and append a new one
|
||||
if err := yamlPathSet(root, splitPath("age_recipients.0"), "age1bbb"); err != nil {
|
||||
t.Fatalf("set age_recipients.0: %v", err)
|
||||
}
|
||||
if err := yamlPathSet(root, splitPath("age_recipients.1"), "age1ccc"); err != nil {
|
||||
t.Fatalf("append age_recipients.1: %v", err)
|
||||
}
|
||||
if err := yamlPathSet(root, splitPath("age_recipients.5"), "age1ddd"); err == nil {
|
||||
t.Error("expected out-of-range append to fail")
|
||||
}
|
||||
|
||||
// Round-trip and verify values + comment preservation
|
||||
out, err := yaml.Marshal(root)
|
||||
if err != nil {
|
||||
t.Fatalf("marshal: %v", err)
|
||||
}
|
||||
text := string(out)
|
||||
|
||||
for _, want := range []string{"newbucket", "s3.example.com", "newkey: val", "# top comment", "# inline comment", "age1bbb", "age1ccc"} {
|
||||
if !contains(text, want) {
|
||||
t.Errorf("round-tripped YAML missing %q:\n%s", want, text)
|
||||
}
|
||||
}
|
||||
|
||||
got, err := yamlPathGet(root, splitPath("s3.bucket"))
|
||||
if err != nil {
|
||||
t.Fatalf("get after set: %v", err)
|
||||
}
|
||||
if got.Value != "newbucket" {
|
||||
t.Errorf("s3.bucket = %q after set, want newbucket", got.Value)
|
||||
}
|
||||
}
|
||||
|
||||
func splitPath(s string) []string {
|
||||
return strings.Split(s, ".")
|
||||
}
|
||||
|
||||
func contains(haystack, needle string) bool {
|
||||
return strings.Contains(haystack, needle)
|
||||
}
|
||||
102
internal/cli/database.go
Normal file
102
internal/cli/database.go
Normal file
@@ -0,0 +1,102 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"sneak.berlin/go/vaultik/internal/config"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
)
|
||||
|
||||
// NewDatabaseCommand creates the database command group
|
||||
func NewDatabaseCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "database",
|
||||
Short: "Manage the local state database",
|
||||
Long: `Commands for managing the local SQLite state database.`,
|
||||
}
|
||||
|
||||
cmd.AddCommand(
|
||||
newDatabasePurgeCommand(),
|
||||
)
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
// newDatabasePurgeCommand creates the database purge command
|
||||
func newDatabasePurgeCommand() *cobra.Command {
|
||||
var force bool
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "purge",
|
||||
Short: "Delete the local state database",
|
||||
Long: `Completely removes the local SQLite state database.
|
||||
|
||||
This will erase all local tracking of:
|
||||
- File metadata and change detection state
|
||||
- Chunk and blob mappings
|
||||
- Local snapshot records
|
||||
|
||||
The remote storage is NOT affected. After purging, the next backup will
|
||||
perform a full scan and re-deduplicate against existing remote blobs.
|
||||
|
||||
Use --force to skip the confirmation prompt.`,
|
||||
Args: cobra.NoArgs,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Resolve config path
|
||||
configPath, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Load config to get database path
|
||||
cfg, err := config.Load(configPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to load config: %w", err)
|
||||
}
|
||||
|
||||
dbPath := cfg.IndexPath
|
||||
|
||||
// Check if database exists
|
||||
if _, err := os.Stat(dbPath); os.IsNotExist(err) {
|
||||
fmt.Printf("Database does not exist: %s\n", dbPath)
|
||||
return nil
|
||||
}
|
||||
|
||||
// Confirm unless --force
|
||||
if !force {
|
||||
fmt.Printf("This will delete the local state database at:\n %s\n\n", dbPath)
|
||||
fmt.Print("Are you sure? Type 'yes' to confirm: ")
|
||||
var confirm string
|
||||
if _, err := fmt.Scanln(&confirm); err != nil || confirm != "yes" {
|
||||
fmt.Println("Aborted.")
|
||||
return nil
|
||||
}
|
||||
}
|
||||
|
||||
// Delete the database file
|
||||
if err := os.Remove(dbPath); err != nil {
|
||||
return fmt.Errorf("failed to delete database: %w", err)
|
||||
}
|
||||
|
||||
// Also delete WAL and SHM files if they exist
|
||||
walPath := dbPath + "-wal"
|
||||
shmPath := dbPath + "-shm"
|
||||
_ = os.Remove(walPath) // Ignore errors - files may not exist
|
||||
_ = os.Remove(shmPath)
|
||||
|
||||
rootFlags := GetRootFlags()
|
||||
if !rootFlags.Quiet {
|
||||
fmt.Printf("Database purged: %s\n", dbPath)
|
||||
}
|
||||
|
||||
log.Info("Local state database purged", "path", dbPath)
|
||||
return nil
|
||||
},
|
||||
}
|
||||
|
||||
cmd.Flags().BoolVar(&force, "force", false, "Skip confirmation prompt")
|
||||
|
||||
return cmd
|
||||
}
|
||||
@@ -18,7 +18,7 @@ func TestCLIEntry(t *testing.T) {
|
||||
}
|
||||
|
||||
// Verify all subcommands are registered
|
||||
expectedCommands := []string{"snapshot", "store", "restore", "prune", "verify", "fetch"}
|
||||
expectedCommands := []string{"config", "snapshot", "store", "restore", "prune", "info", "version", "remote", "database"}
|
||||
for _, expected := range expectedCommands {
|
||||
found := false
|
||||
for _, cmd := range cmd.Commands() {
|
||||
@@ -38,7 +38,7 @@ func TestCLIEntry(t *testing.T) {
|
||||
t.Errorf("Failed to find snapshot command: %v", err)
|
||||
} else {
|
||||
// Check snapshot subcommands
|
||||
expectedSubCommands := []string{"create", "list", "purge", "verify"}
|
||||
expectedSubCommands := []string{"create", "list", "purge", "verify", "cleanup"}
|
||||
for _, expected := range expectedSubCommands {
|
||||
found := false
|
||||
for _, subcmd := range snapshotCmd.Commands() {
|
||||
|
||||
@@ -1,138 +0,0 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/config"
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/globals"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/snapshot"
|
||||
"git.eeqj.de/sneak/vaultik/internal/storage"
|
||||
"github.com/spf13/cobra"
|
||||
"go.uber.org/fx"
|
||||
)
|
||||
|
||||
// FetchOptions contains options for the fetch command
|
||||
type FetchOptions struct {
|
||||
}
|
||||
|
||||
// FetchApp contains all dependencies needed for fetch
|
||||
type FetchApp struct {
|
||||
Globals *globals.Globals
|
||||
Config *config.Config
|
||||
Repositories *database.Repositories
|
||||
Storage storage.Storer
|
||||
DB *database.DB
|
||||
Shutdowner fx.Shutdowner
|
||||
}
|
||||
|
||||
// NewFetchCommand creates the fetch command
|
||||
func NewFetchCommand() *cobra.Command {
|
||||
opts := &FetchOptions{}
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "fetch <snapshot-id> <file-path> <target-path>",
|
||||
Short: "Extract single file from backup",
|
||||
Long: `Download and decrypt a single file from a backup snapshot.
|
||||
|
||||
This command extracts a specific file from the snapshot and saves it to the target path.
|
||||
The age_secret_key must be configured in the config file for decryption.`,
|
||||
Args: cobra.ExactArgs(3),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
snapshotID := args[0]
|
||||
filePath := args[1]
|
||||
targetPath := args[2]
|
||||
|
||||
// Use unified config resolution
|
||||
configPath, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Use the app framework like other commands
|
||||
rootFlags := GetRootFlags()
|
||||
return RunWithApp(cmd.Context(), AppOptions{
|
||||
ConfigPath: configPath,
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
},
|
||||
Modules: []fx.Option{
|
||||
snapshot.Module,
|
||||
fx.Provide(fx.Annotate(
|
||||
func(g *globals.Globals, cfg *config.Config, repos *database.Repositories,
|
||||
storer storage.Storer, db *database.DB, shutdowner fx.Shutdowner) *FetchApp {
|
||||
return &FetchApp{
|
||||
Globals: g,
|
||||
Config: cfg,
|
||||
Repositories: repos,
|
||||
Storage: storer,
|
||||
DB: db,
|
||||
Shutdowner: shutdowner,
|
||||
}
|
||||
},
|
||||
)),
|
||||
},
|
||||
Invokes: []fx.Option{
|
||||
fx.Invoke(func(app *FetchApp, lc fx.Lifecycle) {
|
||||
lc.Append(fx.Hook{
|
||||
OnStart: func(ctx context.Context) error {
|
||||
// Start the fetch operation in a goroutine
|
||||
go func() {
|
||||
// Run the fetch operation
|
||||
if err := app.runFetch(ctx, snapshotID, filePath, targetPath, opts); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Fetch operation failed", "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown the app when fetch completes
|
||||
if err := app.Shutdowner.Shutdown(); err != nil {
|
||||
log.Error("Failed to shutdown", "error", err)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
},
|
||||
OnStop: func(ctx context.Context) error {
|
||||
log.Debug("Stopping fetch operation")
|
||||
return nil
|
||||
},
|
||||
})
|
||||
}),
|
||||
},
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
// runFetch executes the fetch operation
|
||||
func (app *FetchApp) runFetch(ctx context.Context, snapshotID, filePath, targetPath string, opts *FetchOptions) error {
|
||||
// Check for age_secret_key
|
||||
if app.Config.AgeSecretKey == "" {
|
||||
return fmt.Errorf("age_secret_key missing from config - required for fetch")
|
||||
}
|
||||
|
||||
log.Info("Starting fetch operation",
|
||||
"snapshot_id", snapshotID,
|
||||
"file_path", filePath,
|
||||
"target_path", targetPath,
|
||||
"bucket", app.Config.S3.Bucket,
|
||||
"prefix", app.Config.S3.Prefix,
|
||||
)
|
||||
|
||||
// TODO: Implement fetch logic
|
||||
// 1. Download and decrypt database from S3
|
||||
// 2. Find the file metadata and chunk list
|
||||
// 3. Download and decrypt only the necessary blobs
|
||||
// 4. Reconstruct the file from chunks
|
||||
// 5. Write file to target path with proper metadata
|
||||
|
||||
fmt.Printf("Fetching %s from snapshot %s to %s\n", filePath, snapshotID, targetPath)
|
||||
fmt.Println("TODO: Implement fetch logic")
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -4,10 +4,10 @@ import (
|
||||
"context"
|
||||
"os"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/vaultik"
|
||||
"github.com/spf13/cobra"
|
||||
"go.uber.org/fx"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/vaultik"
|
||||
)
|
||||
|
||||
// NewInfoCommand creates the info command
|
||||
@@ -36,6 +36,7 @@ func NewInfoCommand() *cobra.Command {
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
Quiet: rootFlags.Quiet,
|
||||
},
|
||||
Modules: []fx.Option{},
|
||||
Invokes: []fx.Option{
|
||||
|
||||
@@ -4,10 +4,10 @@ import (
|
||||
"context"
|
||||
"os"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/vaultik"
|
||||
"github.com/spf13/cobra"
|
||||
"go.uber.org/fx"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/vaultik"
|
||||
)
|
||||
|
||||
// NewPruneCommand creates the prune command
|
||||
@@ -19,10 +19,10 @@ func NewPruneCommand() *cobra.Command {
|
||||
Short: "Remove unreferenced blobs",
|
||||
Long: `Removes blobs that are not referenced by any snapshot.
|
||||
|
||||
This command scans all snapshots and their manifests to build a list of
|
||||
This command scans all snapshots and their manifests to build a list of
|
||||
referenced blobs, then removes any blobs in storage that are not in this list.
|
||||
|
||||
Use this command after deleting snapshots with 'vaultik purge' to reclaim
|
||||
Use this command after deleting snapshots with 'vaultik purge' to reclaim
|
||||
storage space.`,
|
||||
Args: cobra.NoArgs,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
@@ -39,6 +39,7 @@ storage space.`,
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
Quiet: rootFlags.Quiet || opts.JSON,
|
||||
},
|
||||
Modules: []fx.Option{},
|
||||
Invokes: []fx.Option{
|
||||
@@ -50,7 +51,9 @@ storage space.`,
|
||||
// Run the prune operation
|
||||
if err := v.PruneBlobs(opts); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Prune operation failed", "error", err)
|
||||
if !opts.JSON {
|
||||
log.Error("Prune operation failed", "error", err)
|
||||
}
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
@@ -75,6 +78,7 @@ storage space.`,
|
||||
}
|
||||
|
||||
cmd.Flags().BoolVar(&opts.Force, "force", false, "Skip confirmation prompt")
|
||||
cmd.Flags().BoolVar(&opts.JSON, "json", false, "Output pruning stats as JSON")
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
@@ -1,99 +0,0 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/vaultik"
|
||||
"github.com/spf13/cobra"
|
||||
"go.uber.org/fx"
|
||||
)
|
||||
|
||||
// PurgeOptions contains options for the purge command
|
||||
type PurgeOptions struct {
|
||||
KeepLatest bool
|
||||
OlderThan string
|
||||
Force bool
|
||||
}
|
||||
|
||||
// NewPurgeCommand creates the purge command
|
||||
func NewPurgeCommand() *cobra.Command {
|
||||
opts := &PurgeOptions{}
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "purge",
|
||||
Short: "Purge old snapshots",
|
||||
Long: `Removes snapshots based on age or count criteria.
|
||||
|
||||
This command allows you to:
|
||||
- Keep only the latest snapshot (--keep-latest)
|
||||
- Remove snapshots older than a specific duration (--older-than)
|
||||
|
||||
Config is located at /etc/vaultik/config.yml by default, but can be overridden by
|
||||
specifying a path using --config or by setting VAULTIK_CONFIG to a path.`,
|
||||
Args: cobra.NoArgs,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Validate flags
|
||||
if !opts.KeepLatest && opts.OlderThan == "" {
|
||||
return fmt.Errorf("must specify either --keep-latest or --older-than")
|
||||
}
|
||||
if opts.KeepLatest && opts.OlderThan != "" {
|
||||
return fmt.Errorf("cannot specify both --keep-latest and --older-than")
|
||||
}
|
||||
|
||||
// Use unified config resolution
|
||||
configPath, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Use the app framework like other commands
|
||||
rootFlags := GetRootFlags()
|
||||
return RunWithApp(cmd.Context(), AppOptions{
|
||||
ConfigPath: configPath,
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
},
|
||||
Modules: []fx.Option{},
|
||||
Invokes: []fx.Option{
|
||||
fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) {
|
||||
lc.Append(fx.Hook{
|
||||
OnStart: func(ctx context.Context) error {
|
||||
// Start the purge operation in a goroutine
|
||||
go func() {
|
||||
// Run the purge operation
|
||||
if err := v.PurgeSnapshots(opts.KeepLatest, opts.OlderThan, opts.Force); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Purge operation failed", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown the app when purge completes
|
||||
if err := v.Shutdowner.Shutdown(); err != nil {
|
||||
log.Error("Failed to shutdown", "error", err)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
},
|
||||
OnStop: func(ctx context.Context) error {
|
||||
log.Debug("Stopping purge operation")
|
||||
v.Cancel()
|
||||
return nil
|
||||
},
|
||||
})
|
||||
}),
|
||||
},
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
cmd.Flags().BoolVar(&opts.KeepLatest, "keep-latest", false, "Keep only the latest snapshot")
|
||||
cmd.Flags().StringVar(&opts.OlderThan, "older-than", "", "Remove snapshots older than duration (e.g. 30d, 6m, 1y)")
|
||||
cmd.Flags().BoolVar(&opts.Force, "force", false, "Skip confirmation prompts")
|
||||
|
||||
return cmd
|
||||
}
|
||||
89
internal/cli/remote.go
Normal file
89
internal/cli/remote.go
Normal file
@@ -0,0 +1,89 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"go.uber.org/fx"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/vaultik"
|
||||
)
|
||||
|
||||
// NewRemoteCommand creates the remote command and subcommands
|
||||
func NewRemoteCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "remote",
|
||||
Short: "Remote storage management commands",
|
||||
Long: "Commands for inspecting and managing remote storage",
|
||||
}
|
||||
|
||||
// Add subcommands
|
||||
cmd.AddCommand(newRemoteInfoCommand())
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
// newRemoteInfoCommand creates the 'remote info' subcommand
|
||||
func newRemoteInfoCommand() *cobra.Command {
|
||||
var jsonOutput bool
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "info",
|
||||
Short: "Display remote storage information",
|
||||
Long: `Shows detailed information about remote storage, including:
|
||||
- Size of all snapshot metadata (per snapshot and total)
|
||||
- Count and total size of all blobs
|
||||
- Count and size of referenced blobs (from all manifests)
|
||||
- Count and size of orphaned blobs (not referenced by any manifest)`,
|
||||
Args: cobra.NoArgs,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Use unified config resolution
|
||||
configPath, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
rootFlags := GetRootFlags()
|
||||
return RunWithApp(cmd.Context(), AppOptions{
|
||||
ConfigPath: configPath,
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
Quiet: rootFlags.Quiet || jsonOutput,
|
||||
},
|
||||
Modules: []fx.Option{},
|
||||
Invokes: []fx.Option{
|
||||
fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) {
|
||||
lc.Append(fx.Hook{
|
||||
OnStart: func(ctx context.Context) error {
|
||||
go func() {
|
||||
if err := v.RemoteInfo(jsonOutput); err != nil {
|
||||
if err != context.Canceled {
|
||||
if !jsonOutput {
|
||||
log.Error("Failed to get remote info", "error", err)
|
||||
}
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
if err := v.Shutdowner.Shutdown(); err != nil {
|
||||
log.Error("Failed to shutdown", "error", err)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
},
|
||||
OnStop: func(ctx context.Context) error {
|
||||
v.Cancel()
|
||||
return nil
|
||||
},
|
||||
})
|
||||
}),
|
||||
},
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
cmd.Flags().BoolVar(&jsonOutput, "json", false, "Output in JSON format")
|
||||
|
||||
return cmd
|
||||
}
|
||||
@@ -2,31 +2,31 @@ package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/config"
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/globals"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/snapshot"
|
||||
"git.eeqj.de/sneak/vaultik/internal/storage"
|
||||
"github.com/spf13/cobra"
|
||||
"go.uber.org/fx"
|
||||
"sneak.berlin/go/vaultik/internal/config"
|
||||
"sneak.berlin/go/vaultik/internal/globals"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/storage"
|
||||
"sneak.berlin/go/vaultik/internal/vaultik"
|
||||
)
|
||||
|
||||
// RestoreOptions contains options for the restore command
|
||||
type RestoreOptions struct {
|
||||
TargetDir string
|
||||
Paths []string // Optional paths to restore (empty = all)
|
||||
Verify bool // Verify restored files after restore
|
||||
}
|
||||
|
||||
// RestoreApp contains all dependencies needed for restore
|
||||
type RestoreApp struct {
|
||||
Globals *globals.Globals
|
||||
Config *config.Config
|
||||
Repositories *database.Repositories
|
||||
Storage storage.Storer
|
||||
DB *database.DB
|
||||
Shutdowner fx.Shutdowner
|
||||
Globals *globals.Globals
|
||||
Config *config.Config
|
||||
Storage storage.Storer
|
||||
Vaultik *vaultik.Vaultik
|
||||
Shutdowner fx.Shutdowner
|
||||
}
|
||||
|
||||
// NewRestoreCommand creates the restore command
|
||||
@@ -34,103 +34,120 @@ func NewRestoreCommand() *cobra.Command {
|
||||
opts := &RestoreOptions{}
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "restore <snapshot-id> <target-dir>",
|
||||
Use: "restore <snapshot-id> <target-dir> [paths...]",
|
||||
Short: "Restore files from backup",
|
||||
Long: `Download and decrypt files from a backup snapshot.
|
||||
|
||||
This command will restore all files from the specified snapshot to the target directory.
|
||||
The age_secret_key must be configured in the config file for decryption.`,
|
||||
Args: cobra.ExactArgs(2),
|
||||
This command will restore files from the specified snapshot to the target directory.
|
||||
If no paths are specified, all files are restored.
|
||||
If paths are specified, only matching files/directories are restored.
|
||||
|
||||
Requires the VAULTIK_AGE_SECRET_KEY environment variable to be set with the age private key.
|
||||
|
||||
Examples:
|
||||
# Restore entire snapshot
|
||||
vaultik restore myhost_docs_2025-01-01T12:00:00Z /restore
|
||||
|
||||
# Restore specific file
|
||||
vaultik restore myhost_docs_2025-01-01T12:00:00Z /restore /home/user/important.txt
|
||||
|
||||
# Restore specific directory
|
||||
vaultik restore myhost_docs_2025-01-01T12:00:00Z /restore /home/user/documents/
|
||||
|
||||
# Restore and verify all files
|
||||
vaultik restore --verify myhost_docs_2025-01-01T12:00:00Z /restore`,
|
||||
Args: cobra.MinimumNArgs(2),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
snapshotID := args[0]
|
||||
opts.TargetDir = args[1]
|
||||
|
||||
// Use unified config resolution
|
||||
configPath, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Use the app framework like other commands
|
||||
rootFlags := GetRootFlags()
|
||||
return RunWithApp(cmd.Context(), AppOptions{
|
||||
ConfigPath: configPath,
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
},
|
||||
Modules: []fx.Option{
|
||||
snapshot.Module,
|
||||
fx.Provide(fx.Annotate(
|
||||
func(g *globals.Globals, cfg *config.Config, repos *database.Repositories,
|
||||
storer storage.Storer, db *database.DB, shutdowner fx.Shutdowner) *RestoreApp {
|
||||
return &RestoreApp{
|
||||
Globals: g,
|
||||
Config: cfg,
|
||||
Repositories: repos,
|
||||
Storage: storer,
|
||||
DB: db,
|
||||
Shutdowner: shutdowner,
|
||||
}
|
||||
},
|
||||
)),
|
||||
},
|
||||
Invokes: []fx.Option{
|
||||
fx.Invoke(func(app *RestoreApp, lc fx.Lifecycle) {
|
||||
lc.Append(fx.Hook{
|
||||
OnStart: func(ctx context.Context) error {
|
||||
// Start the restore operation in a goroutine
|
||||
go func() {
|
||||
// Run the restore operation
|
||||
if err := app.runRestore(ctx, snapshotID, opts); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Restore operation failed", "error", err)
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown the app when restore completes
|
||||
if err := app.Shutdowner.Shutdown(); err != nil {
|
||||
log.Error("Failed to shutdown", "error", err)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
},
|
||||
OnStop: func(ctx context.Context) error {
|
||||
log.Debug("Stopping restore operation")
|
||||
return nil
|
||||
},
|
||||
})
|
||||
}),
|
||||
},
|
||||
})
|
||||
return runRestore(cmd, args, opts)
|
||||
},
|
||||
}
|
||||
|
||||
cmd.Flags().BoolVar(&opts.Verify, "verify", false, "Verify restored files by checking chunk hashes")
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
// runRestore executes the restore operation
|
||||
func (app *RestoreApp) runRestore(ctx context.Context, snapshotID string, opts *RestoreOptions) error {
|
||||
// Check for age_secret_key
|
||||
if app.Config.AgeSecretKey == "" {
|
||||
return fmt.Errorf("age_secret_key missing from config - required for restore")
|
||||
// runRestore parses arguments and runs the restore operation through the app framework
|
||||
func runRestore(cmd *cobra.Command, args []string, opts *RestoreOptions) error {
|
||||
snapshotID := args[0]
|
||||
opts.TargetDir = args[1]
|
||||
if len(args) > 2 {
|
||||
opts.Paths = args[2:]
|
||||
}
|
||||
|
||||
log.Info("Starting restore operation",
|
||||
"snapshot_id", snapshotID,
|
||||
"target_dir", opts.TargetDir,
|
||||
"bucket", app.Config.S3.Bucket,
|
||||
"prefix", app.Config.S3.Prefix,
|
||||
)
|
||||
// Use unified config resolution
|
||||
configPath, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// TODO: Implement restore logic
|
||||
// 1. Download and decrypt database from S3
|
||||
// 2. Download and decrypt blobs
|
||||
// 3. Reconstruct files from chunks
|
||||
// 4. Write files to target directory with proper metadata
|
||||
|
||||
fmt.Printf("Restoring snapshot %s to %s\n", snapshotID, opts.TargetDir)
|
||||
fmt.Println("TODO: Implement restore logic")
|
||||
|
||||
return nil
|
||||
// Use the app framework like other commands
|
||||
rootFlags := GetRootFlags()
|
||||
return RunWithApp(cmd.Context(), AppOptions{
|
||||
ConfigPath: configPath,
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
Quiet: rootFlags.Quiet,
|
||||
},
|
||||
Modules: buildRestoreModules(),
|
||||
Invokes: buildRestoreInvokes(snapshotID, opts),
|
||||
})
|
||||
}
|
||||
|
||||
// buildRestoreModules returns the fx.Options for dependency injection in restore
|
||||
func buildRestoreModules() []fx.Option {
|
||||
return []fx.Option{
|
||||
fx.Provide(fx.Annotate(
|
||||
func(g *globals.Globals, cfg *config.Config,
|
||||
storer storage.Storer, v *vaultik.Vaultik, shutdowner fx.Shutdowner) *RestoreApp {
|
||||
return &RestoreApp{
|
||||
Globals: g,
|
||||
Config: cfg,
|
||||
Storage: storer,
|
||||
Vaultik: v,
|
||||
Shutdowner: shutdowner,
|
||||
}
|
||||
},
|
||||
)),
|
||||
}
|
||||
}
|
||||
|
||||
// buildRestoreInvokes returns the fx.Options that wire up the restore lifecycle
|
||||
func buildRestoreInvokes(snapshotID string, opts *RestoreOptions) []fx.Option {
|
||||
return []fx.Option{
|
||||
fx.Invoke(func(app *RestoreApp, lc fx.Lifecycle) {
|
||||
lc.Append(fx.Hook{
|
||||
OnStart: func(ctx context.Context) error {
|
||||
// Start the restore operation in a goroutine
|
||||
go func() {
|
||||
// Run the restore operation
|
||||
restoreOpts := &vaultik.RestoreOptions{
|
||||
SnapshotID: snapshotID,
|
||||
TargetDir: opts.TargetDir,
|
||||
Paths: opts.Paths,
|
||||
Verify: opts.Verify,
|
||||
}
|
||||
if err := app.Vaultik.Restore(restoreOpts); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Restore operation failed", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown the app when restore completes
|
||||
if err := app.Shutdowner.Shutdown(); err != nil {
|
||||
log.Error("Failed to shutdown", "error", err)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
},
|
||||
OnStop: func(ctx context.Context) error {
|
||||
log.Debug("Stopping restore operation")
|
||||
app.Vaultik.Cancel()
|
||||
return nil
|
||||
},
|
||||
})
|
||||
}),
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,9 @@ package cli
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"github.com/adrg/xdg"
|
||||
"github.com/spf13/cobra"
|
||||
)
|
||||
|
||||
@@ -13,6 +15,7 @@ type RootFlags struct {
|
||||
ConfigPath string
|
||||
Verbose bool
|
||||
Debug bool
|
||||
Quiet bool
|
||||
}
|
||||
|
||||
var rootFlags RootFlags
|
||||
@@ -24,26 +27,29 @@ func NewRootCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "vaultik",
|
||||
Short: "Secure incremental backup tool with asymmetric encryption",
|
||||
Long: `vaultik is a secure incremental backup daemon that encrypts data using age
|
||||
Long: `vaultik is a secure incremental backup tool that encrypts data using age
|
||||
public keys and uploads to S3-compatible storage. No private keys are needed
|
||||
on the source system.`,
|
||||
SilenceUsage: true,
|
||||
}
|
||||
|
||||
// Add global flags
|
||||
cmd.PersistentFlags().StringVar(&rootFlags.ConfigPath, "config", "", "Path to config file (default: $VAULTIK_CONFIG or /etc/vaultik/config.yml)")
|
||||
cmd.PersistentFlags().StringVar(&rootFlags.ConfigPath, "config", "", "Path to config file (default: $VAULTIK_CONFIG or platform config dir)")
|
||||
cmd.PersistentFlags().BoolVarP(&rootFlags.Verbose, "verbose", "v", false, "Enable verbose output")
|
||||
cmd.PersistentFlags().BoolVar(&rootFlags.Debug, "debug", false, "Enable debug output")
|
||||
cmd.PersistentFlags().BoolVarP(&rootFlags.Quiet, "quiet", "q", false, "Suppress non-error output")
|
||||
|
||||
// Add subcommands
|
||||
cmd.AddCommand(
|
||||
NewConfigCommand(),
|
||||
NewRestoreCommand(),
|
||||
NewPruneCommand(),
|
||||
NewVerifyCommand(),
|
||||
NewFetchCommand(),
|
||||
NewStoreCommand(),
|
||||
NewSnapshotCommand(),
|
||||
NewInfoCommand(),
|
||||
NewVersionCommand(),
|
||||
NewRemoteCommand(),
|
||||
NewDatabaseCommand(),
|
||||
)
|
||||
|
||||
return cmd
|
||||
@@ -56,25 +62,41 @@ func GetRootFlags() RootFlags {
|
||||
}
|
||||
|
||||
// ResolveConfigPath resolves the config file path from flags, environment, or default.
|
||||
// It checks in order: 1) --config flag, 2) VAULTIK_CONFIG environment variable,
|
||||
// 3) default location /etc/vaultik/config.yml. Returns an error if no valid
|
||||
// config file can be found through any of these methods.
|
||||
// Search order: --config flag, VAULTIK_CONFIG env, XDG config dir, /etc/vaultik/config.yml.
|
||||
func ResolveConfigPath() (string, error) {
|
||||
// First check global flag
|
||||
if rootFlags.ConfigPath != "" {
|
||||
return rootFlags.ConfigPath, nil
|
||||
}
|
||||
|
||||
// Then check environment variable
|
||||
if envPath := os.Getenv("VAULTIK_CONFIG"); envPath != "" {
|
||||
return envPath, nil
|
||||
}
|
||||
|
||||
// Finally check default location
|
||||
defaultPath := "/etc/vaultik/config.yml"
|
||||
if _, err := os.Stat(defaultPath); err == nil {
|
||||
return defaultPath, nil
|
||||
for _, path := range defaultConfigPaths() {
|
||||
if _, err := os.Stat(path); err == nil {
|
||||
return path, nil
|
||||
}
|
||||
}
|
||||
|
||||
return "", fmt.Errorf("no config file specified, VAULTIK_CONFIG not set, and %s not found", defaultPath)
|
||||
return "", fmt.Errorf("no config file found; run 'vaultik config init' to create one, or specify with --config")
|
||||
}
|
||||
|
||||
// defaultConfigPaths returns the ordered list of config paths to search.
|
||||
// On macOS: ~/Library/Application Support/vaultik/config.yml
|
||||
// On Linux: ~/.config/vaultik/config.yml
|
||||
// Fallback: /etc/vaultik/config.yml
|
||||
func defaultConfigPaths() []string {
|
||||
return []string{
|
||||
filepath.Join(xdg.ConfigHome, "vaultik", "config.yml"),
|
||||
"/etc/vaultik/config.yml",
|
||||
}
|
||||
}
|
||||
|
||||
// DefaultConfigPath returns the platform-appropriate default config path.
|
||||
// Used by the init command and in help text.
|
||||
func DefaultConfigPath() string {
|
||||
if os.Getuid() == 0 {
|
||||
return "/etc/vaultik/config.yml"
|
||||
}
|
||||
return filepath.Join(xdg.ConfigHome, "vaultik", "config.yml")
|
||||
}
|
||||
|
||||
@@ -3,12 +3,13 @@ package cli
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/vaultik"
|
||||
"github.com/spf13/cobra"
|
||||
"go.uber.org/fx"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/vaultik"
|
||||
)
|
||||
|
||||
// NewSnapshotCommand creates the snapshot command and subcommands
|
||||
@@ -24,6 +25,9 @@ func NewSnapshotCommand() *cobra.Command {
|
||||
cmd.AddCommand(newSnapshotListCommand())
|
||||
cmd.AddCommand(newSnapshotPurgeCommand())
|
||||
cmd.AddCommand(newSnapshotVerifyCommand())
|
||||
cmd.AddCommand(newSnapshotRemoveCommand())
|
||||
cmd.AddCommand(newSnapshotPruneCommand())
|
||||
cmd.AddCommand(newSnapshotCleanupCommand())
|
||||
|
||||
return cmd
|
||||
}
|
||||
@@ -33,14 +37,19 @@ func newSnapshotCreateCommand() *cobra.Command {
|
||||
opts := &vaultik.SnapshotCreateOptions{}
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "create",
|
||||
Short: "Create a new snapshot",
|
||||
Long: `Creates a new snapshot of the configured directories.
|
||||
Use: "create [snapshot-names...]",
|
||||
Short: "Create new snapshots",
|
||||
Long: `Creates new snapshots of the configured directories.
|
||||
|
||||
Config is located at /etc/vaultik/config.yml by default, but can be overridden by
|
||||
If snapshot names are provided, only those snapshots are created.
|
||||
If no names are provided, all configured snapshots are created.
|
||||
|
||||
Config is located at /etc/vaultik/config.yml by default, but can be overridden by
|
||||
specifying a path using --config or by setting VAULTIK_CONFIG to a path.`,
|
||||
Args: cobra.NoArgs,
|
||||
Args: cobra.ArbitraryArgs,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Pass snapshot names from args
|
||||
opts.Snapshots = args
|
||||
// Use unified config resolution
|
||||
configPath, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
@@ -55,6 +64,7 @@ specifying a path using --config or by setting VAULTIK_CONFIG to a path.`,
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
Cron: opts.Cron,
|
||||
Quiet: rootFlags.Quiet,
|
||||
},
|
||||
Modules: []fx.Option{},
|
||||
Invokes: []fx.Option{
|
||||
@@ -63,10 +73,13 @@ specifying a path using --config or by setting VAULTIK_CONFIG to a path.`,
|
||||
OnStart: func(ctx context.Context) error {
|
||||
// Start the snapshot creation in a goroutine
|
||||
go func() {
|
||||
// Run the snapshot creation
|
||||
if opts.Cron {
|
||||
v.Stdout = io.Discard
|
||||
}
|
||||
if err := v.CreateSnapshot(opts); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Snapshot creation failed", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -90,9 +103,10 @@ specifying a path using --config or by setting VAULTIK_CONFIG to a path.`,
|
||||
},
|
||||
}
|
||||
|
||||
cmd.Flags().BoolVar(&opts.Daemon, "daemon", false, "Run in daemon mode with inotify monitoring")
|
||||
cmd.Flags().BoolVar(&opts.Cron, "cron", false, "Run in cron mode (silent unless error)")
|
||||
cmd.Flags().BoolVar(&opts.Prune, "prune", false, "Delete all previous snapshots and unreferenced blobs after backup")
|
||||
cmd.Flags().BoolVar(&opts.Prune, "prune", false, "After backup, drop older snapshots of the same name and remove orphaned blobs")
|
||||
cmd.Flags().StringVar(&opts.KeepNewerThan, "keep-newer-than", "", "With --prune: keep snapshots newer than this duration (e.g. 4w, 30d, 6mo) instead of only the latest")
|
||||
cmd.Flags().BoolVar(&opts.SkipErrors, "skip-errors", false, "Skip file read errors (log them loudly but continue)")
|
||||
|
||||
return cmd
|
||||
}
|
||||
@@ -102,10 +116,11 @@ func newSnapshotListCommand() *cobra.Command {
|
||||
var jsonOutput bool
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "list",
|
||||
Short: "List all snapshots",
|
||||
Long: "Lists all snapshots with their ID, timestamp, and compressed size",
|
||||
Args: cobra.NoArgs,
|
||||
Use: "list",
|
||||
Aliases: []string{"ls"},
|
||||
Short: "List all snapshots",
|
||||
Long: "Lists all snapshots with their ID, timestamp, and compressed size",
|
||||
Args: cobra.NoArgs,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Use unified config resolution
|
||||
configPath, err := ResolveConfigPath()
|
||||
@@ -119,6 +134,7 @@ func newSnapshotListCommand() *cobra.Command {
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
Quiet: rootFlags.Quiet,
|
||||
},
|
||||
Modules: []fx.Option{},
|
||||
Invokes: []fx.Option{
|
||||
@@ -156,21 +172,23 @@ func newSnapshotListCommand() *cobra.Command {
|
||||
|
||||
// newSnapshotPurgeCommand creates the 'snapshot purge' subcommand
|
||||
func newSnapshotPurgeCommand() *cobra.Command {
|
||||
var keepLatest bool
|
||||
var olderThan string
|
||||
var force bool
|
||||
opts := &vaultik.SnapshotPurgeOptions{}
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "purge",
|
||||
Short: "Purge old snapshots",
|
||||
Long: "Removes snapshots based on age or count criteria",
|
||||
Args: cobra.NoArgs,
|
||||
Long: `Removes snapshots based on age or count criteria.
|
||||
|
||||
Retention is per-snapshot-name: --keep-latest keeps the latest of each
|
||||
configured snapshot name, not the latest globally. Use --snapshot to
|
||||
restrict the operation to specific snapshot names.`,
|
||||
Args: cobra.NoArgs,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Validate flags
|
||||
if !keepLatest && olderThan == "" {
|
||||
if !opts.KeepLatest && opts.OlderThan == "" {
|
||||
return fmt.Errorf("must specify either --keep-latest or --older-than")
|
||||
}
|
||||
if keepLatest && olderThan != "" {
|
||||
if opts.KeepLatest && opts.OlderThan != "" {
|
||||
return fmt.Errorf("cannot specify both --keep-latest and --older-than")
|
||||
}
|
||||
|
||||
@@ -186,6 +204,7 @@ func newSnapshotPurgeCommand() *cobra.Command {
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
Quiet: rootFlags.Quiet,
|
||||
},
|
||||
Modules: []fx.Option{},
|
||||
Invokes: []fx.Option{
|
||||
@@ -193,7 +212,7 @@ func newSnapshotPurgeCommand() *cobra.Command {
|
||||
lc.Append(fx.Hook{
|
||||
OnStart: func(ctx context.Context) error {
|
||||
go func() {
|
||||
if err := v.PurgeSnapshots(keepLatest, olderThan, force); err != nil {
|
||||
if err := v.PurgeSnapshotsWithOptions(opts); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Failed to purge snapshots", "error", err)
|
||||
os.Exit(1)
|
||||
@@ -216,22 +235,32 @@ func newSnapshotPurgeCommand() *cobra.Command {
|
||||
},
|
||||
}
|
||||
|
||||
cmd.Flags().BoolVar(&keepLatest, "keep-latest", false, "Keep only the latest snapshot")
|
||||
cmd.Flags().StringVar(&olderThan, "older-than", "", "Remove snapshots older than duration (e.g., 30d, 6m, 1y)")
|
||||
cmd.Flags().BoolVar(&force, "force", false, "Skip confirmation prompt")
|
||||
cmd.Flags().BoolVar(&opts.KeepLatest, "keep-latest", false, "Keep only the latest snapshot of each name")
|
||||
cmd.Flags().StringVar(&opts.OlderThan, "older-than", "", "Remove snapshots older than duration (e.g., 30d, 6m, 1y)")
|
||||
cmd.Flags().BoolVar(&opts.Force, "force", false, "Skip confirmation prompt")
|
||||
cmd.Flags().StringArrayVar(&opts.Names, "snapshot", nil, "Restrict to snapshots with these names (repeat for multiple)")
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
// newSnapshotVerifyCommand creates the 'snapshot verify' subcommand
|
||||
func newSnapshotVerifyCommand() *cobra.Command {
|
||||
var deep bool
|
||||
opts := &vaultik.VerifyOptions{}
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "verify <snapshot-id>",
|
||||
Short: "Verify snapshot integrity",
|
||||
Long: "Verifies that all blobs referenced in a snapshot exist",
|
||||
Args: cobra.ExactArgs(1),
|
||||
Args: func(cmd *cobra.Command, args []string) error {
|
||||
if len(args) != 1 {
|
||||
_ = cmd.Help()
|
||||
if len(args) == 0 {
|
||||
return fmt.Errorf("snapshot ID required")
|
||||
}
|
||||
return fmt.Errorf("expected 1 argument, got %d", len(args))
|
||||
}
|
||||
return nil
|
||||
},
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
snapshotID := args[0]
|
||||
|
||||
@@ -247,6 +276,7 @@ func newSnapshotVerifyCommand() *cobra.Command {
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
Quiet: rootFlags.Quiet || opts.JSON,
|
||||
},
|
||||
Modules: []fx.Option{},
|
||||
Invokes: []fx.Option{
|
||||
@@ -254,9 +284,11 @@ func newSnapshotVerifyCommand() *cobra.Command {
|
||||
lc.Append(fx.Hook{
|
||||
OnStart: func(ctx context.Context) error {
|
||||
go func() {
|
||||
if err := v.VerifySnapshot(snapshotID, deep); err != nil {
|
||||
if err := v.VerifySnapshotWithOptions(snapshotID, opts); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Verification failed", "error", err)
|
||||
if !opts.JSON {
|
||||
log.Error("Verification failed", "error", err)
|
||||
}
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
@@ -277,7 +309,218 @@ func newSnapshotVerifyCommand() *cobra.Command {
|
||||
},
|
||||
}
|
||||
|
||||
cmd.Flags().BoolVar(&deep, "deep", false, "Download and verify blob hashes")
|
||||
cmd.Flags().BoolVar(&opts.Deep, "deep", false, "Download and verify blob hashes")
|
||||
cmd.Flags().BoolVar(&opts.JSON, "json", false, "Output verification results as JSON")
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
// newSnapshotRemoveCommand creates the 'snapshot remove' subcommand
|
||||
func newSnapshotRemoveCommand() *cobra.Command {
|
||||
opts := &vaultik.RemoveOptions{}
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "remove [snapshot-id]",
|
||||
Aliases: []string{"rm"},
|
||||
Short: "Remove a snapshot from the local database",
|
||||
Long: `Removes a snapshot from the local database.
|
||||
|
||||
By default, only removes from the local database. Use --remote to also remove
|
||||
the snapshot metadata from remote storage.
|
||||
|
||||
Note: This does NOT remove blobs. Use 'vaultik prune' to remove orphaned blobs
|
||||
after removing snapshots.
|
||||
|
||||
Use --all --force to remove all snapshots.`,
|
||||
Args: func(cmd *cobra.Command, args []string) error {
|
||||
all, _ := cmd.Flags().GetBool("all")
|
||||
if all {
|
||||
if len(args) > 0 {
|
||||
_ = cmd.Help()
|
||||
return fmt.Errorf("--all cannot be used with a snapshot ID")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
if len(args) != 1 {
|
||||
_ = cmd.Help()
|
||||
if len(args) == 0 {
|
||||
return fmt.Errorf("snapshot ID required (or use --all --force)")
|
||||
}
|
||||
return fmt.Errorf("expected 1 argument, got %d", len(args))
|
||||
}
|
||||
return nil
|
||||
},
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Use unified config resolution
|
||||
configPath, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
rootFlags := GetRootFlags()
|
||||
return RunWithApp(cmd.Context(), AppOptions{
|
||||
ConfigPath: configPath,
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
Quiet: rootFlags.Quiet || opts.JSON,
|
||||
},
|
||||
Modules: []fx.Option{},
|
||||
Invokes: []fx.Option{
|
||||
fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) {
|
||||
lc.Append(fx.Hook{
|
||||
OnStart: func(ctx context.Context) error {
|
||||
go func() {
|
||||
var err error
|
||||
if opts.All {
|
||||
_, err = v.RemoveAllSnapshots(opts)
|
||||
} else {
|
||||
_, err = v.RemoveSnapshot(args[0], opts)
|
||||
}
|
||||
if err != nil {
|
||||
if err != context.Canceled {
|
||||
if !opts.JSON {
|
||||
log.Error("Failed to remove snapshot", "error", err)
|
||||
}
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
if err := v.Shutdowner.Shutdown(); err != nil {
|
||||
log.Error("Failed to shutdown", "error", err)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
},
|
||||
OnStop: func(ctx context.Context) error {
|
||||
v.Cancel()
|
||||
return nil
|
||||
},
|
||||
})
|
||||
}),
|
||||
},
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
cmd.Flags().BoolVarP(&opts.Force, "force", "f", false, "Skip confirmation prompt")
|
||||
cmd.Flags().BoolVar(&opts.DryRun, "dry-run", false, "Show what would be removed without removing")
|
||||
cmd.Flags().BoolVar(&opts.JSON, "json", false, "Output result as JSON")
|
||||
cmd.Flags().BoolVar(&opts.Remote, "remote", false, "Also remove snapshot metadata from remote storage")
|
||||
cmd.Flags().BoolVar(&opts.All, "all", false, "Remove all snapshots (requires --force)")
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
// newSnapshotPruneCommand creates the 'snapshot prune' subcommand
|
||||
func newSnapshotPruneCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "prune",
|
||||
Short: "Remove orphaned data from local database",
|
||||
Long: `Removes orphaned files, chunks, and blobs from the local database.
|
||||
|
||||
This cleans up data that is no longer referenced by any snapshot, which can
|
||||
accumulate from incomplete backups or deleted snapshots.`,
|
||||
Args: cobra.NoArgs,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Use unified config resolution
|
||||
configPath, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
rootFlags := GetRootFlags()
|
||||
return RunWithApp(cmd.Context(), AppOptions{
|
||||
ConfigPath: configPath,
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
Quiet: rootFlags.Quiet,
|
||||
},
|
||||
Modules: []fx.Option{},
|
||||
Invokes: []fx.Option{
|
||||
fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) {
|
||||
lc.Append(fx.Hook{
|
||||
OnStart: func(ctx context.Context) error {
|
||||
go func() {
|
||||
if _, err := v.PruneDatabase(); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Failed to prune database", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
if err := v.Shutdowner.Shutdown(); err != nil {
|
||||
log.Error("Failed to shutdown", "error", err)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
},
|
||||
OnStop: func(ctx context.Context) error {
|
||||
v.Cancel()
|
||||
return nil
|
||||
},
|
||||
})
|
||||
}),
|
||||
},
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
// newSnapshotCleanupCommand creates the 'snapshot cleanup' subcommand
|
||||
func newSnapshotCleanupCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "cleanup",
|
||||
Short: "Remove stale local snapshot records not found in remote storage",
|
||||
Long: `Removes local database records for snapshots whose metadata no longer
|
||||
exists in remote storage. These are typically left behind by incomplete
|
||||
or interrupted backups.
|
||||
|
||||
This command does not delete anything from remote storage.`,
|
||||
Args: cobra.NoArgs,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
configPath, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
rootFlags := GetRootFlags()
|
||||
return RunWithApp(cmd.Context(), AppOptions{
|
||||
ConfigPath: configPath,
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
Quiet: rootFlags.Quiet,
|
||||
},
|
||||
Modules: []fx.Option{},
|
||||
Invokes: []fx.Option{
|
||||
fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) {
|
||||
lc.Append(fx.Hook{
|
||||
OnStart: func(ctx context.Context) error {
|
||||
go func() {
|
||||
if err := v.CleanupLocalSnapshots(); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Cleanup failed", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
if err := v.Shutdowner.Shutdown(); err != nil {
|
||||
log.Error("Failed to shutdown", "error", err)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
},
|
||||
OnStop: func(ctx context.Context) error {
|
||||
v.Cancel()
|
||||
return nil
|
||||
},
|
||||
})
|
||||
}),
|
||||
},
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
return cmd
|
||||
}
|
||||
|
||||
@@ -6,10 +6,10 @@ import (
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/storage"
|
||||
"github.com/spf13/cobra"
|
||||
"go.uber.org/fx"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/storage"
|
||||
)
|
||||
|
||||
// StoreApp contains dependencies for store commands
|
||||
@@ -23,7 +23,7 @@ func NewStoreCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "store",
|
||||
Short: "Storage information commands",
|
||||
Long: "Commands for viewing information about the S3 storage backend",
|
||||
Long: "Commands for viewing information about the storage backend",
|
||||
}
|
||||
|
||||
// Add subcommands
|
||||
@@ -37,7 +37,7 @@ func newStoreInfoCommand() *cobra.Command {
|
||||
return &cobra.Command{
|
||||
Use: "info",
|
||||
Short: "Display storage information",
|
||||
Long: "Shows S3 bucket configuration and storage statistics including snapshots and blobs",
|
||||
Long: "Shows storage configuration and statistics including snapshots and blobs",
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
return runWithApp(cmd.Context(), func(app *StoreApp) error {
|
||||
return app.Info(cmd.Context())
|
||||
@@ -127,6 +127,7 @@ func runWithApp(ctx context.Context, fn func(*StoreApp) error) error {
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
Quiet: rootFlags.Quiet,
|
||||
},
|
||||
Modules: []fx.Option{
|
||||
fx.Provide(func(storer storage.Storer, shutdowner fx.Shutdowner) *StoreApp {
|
||||
|
||||
@@ -1,94 +0,0 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/vaultik"
|
||||
"github.com/spf13/cobra"
|
||||
"go.uber.org/fx"
|
||||
)
|
||||
|
||||
// NewVerifyCommand creates the verify command
|
||||
func NewVerifyCommand() *cobra.Command {
|
||||
opts := &vaultik.VerifyOptions{}
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "verify <snapshot-id>",
|
||||
Short: "Verify snapshot integrity",
|
||||
Long: `Verifies that all blobs referenced in a snapshot exist and optionally verifies their contents.
|
||||
|
||||
Shallow verification (default):
|
||||
- Downloads and decompresses manifest
|
||||
- Checks existence of all blobs in S3
|
||||
- Reports missing blobs
|
||||
|
||||
Deep verification (--deep):
|
||||
- Downloads and decrypts database
|
||||
- Verifies blob lists match between manifest and database
|
||||
- Downloads, decrypts, and decompresses each blob
|
||||
- Verifies SHA256 hash of each chunk matches database
|
||||
- Ensures chunks are ordered correctly
|
||||
|
||||
The command will fail immediately on any verification error and exit with non-zero status.`,
|
||||
Args: cobra.ExactArgs(1),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
snapshotID := args[0]
|
||||
|
||||
// Use unified config resolution
|
||||
configPath, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Use the app framework for all verification
|
||||
rootFlags := GetRootFlags()
|
||||
return RunWithApp(cmd.Context(), AppOptions{
|
||||
ConfigPath: configPath,
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
},
|
||||
Modules: []fx.Option{},
|
||||
Invokes: []fx.Option{
|
||||
fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) {
|
||||
lc.Append(fx.Hook{
|
||||
OnStart: func(ctx context.Context) error {
|
||||
// Run the verify operation directly
|
||||
go func() {
|
||||
var err error
|
||||
if opts.Deep {
|
||||
err = v.RunDeepVerify(snapshotID, opts)
|
||||
} else {
|
||||
err = v.VerifySnapshot(snapshotID, false)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Verification failed", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
if err := v.Shutdowner.Shutdown(); err != nil {
|
||||
log.Error("Failed to shutdown", "error", err)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
},
|
||||
OnStop: func(ctx context.Context) error {
|
||||
log.Debug("Stopping verify operation")
|
||||
v.Cancel()
|
||||
return nil
|
||||
},
|
||||
})
|
||||
}),
|
||||
},
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
cmd.Flags().BoolVar(&opts.Deep, "deep", false, "Perform deep verification by downloading and verifying all blob contents")
|
||||
|
||||
return cmd
|
||||
}
|
||||
27
internal/cli/version.go
Normal file
27
internal/cli/version.go
Normal file
@@ -0,0 +1,27 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"runtime"
|
||||
|
||||
"github.com/spf13/cobra"
|
||||
"sneak.berlin/go/vaultik/internal/globals"
|
||||
)
|
||||
|
||||
// NewVersionCommand creates the version command
|
||||
func NewVersionCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "version",
|
||||
Short: "Print version information",
|
||||
Long: `Print version, git commit, and build information for vaultik.`,
|
||||
Args: cobra.NoArgs,
|
||||
Run: func(cmd *cobra.Command, args []string) {
|
||||
fmt.Printf("vaultik %s\n", globals.Version)
|
||||
fmt.Printf(" commit: %s\n", globals.Commit)
|
||||
fmt.Printf(" go: %s\n", runtime.Version())
|
||||
fmt.Printf(" os/arch: %s/%s\n", runtime.GOOS, runtime.GOARCH)
|
||||
},
|
||||
}
|
||||
|
||||
return cmd
|
||||
}
|
||||
@@ -4,16 +4,18 @@ import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"filippo.io/age"
|
||||
"git.eeqj.de/sneak/smartconfig"
|
||||
"github.com/adrg/xdg"
|
||||
"go.uber.org/fx"
|
||||
"gopkg.in/yaml.v3"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
)
|
||||
|
||||
const appName = "berlin.sneak.app.vaultik"
|
||||
const appName = "vaultik"
|
||||
|
||||
// expandTilde expands ~ at the start of a path to the user's home directory.
|
||||
func expandTilde(path string) string {
|
||||
@@ -37,24 +39,59 @@ func expandTildeInURL(url string) string {
|
||||
return url
|
||||
}
|
||||
|
||||
// SnapshotConfig represents configuration for a named snapshot.
|
||||
// Each snapshot backs up one or more paths and can have its own exclude patterns
|
||||
// in addition to the global excludes.
|
||||
type SnapshotConfig struct {
|
||||
Paths []string `yaml:"paths"`
|
||||
Exclude []string `yaml:"exclude"` // Additional excludes for this snapshot
|
||||
}
|
||||
|
||||
// GetExcludes returns the combined exclude patterns for a named snapshot.
|
||||
// It merges global excludes with the snapshot-specific excludes.
|
||||
func (c *Config) GetExcludes(snapshotName string) []string {
|
||||
snap, ok := c.Snapshots[snapshotName]
|
||||
if !ok {
|
||||
return c.Exclude
|
||||
}
|
||||
|
||||
if len(snap.Exclude) == 0 {
|
||||
return c.Exclude
|
||||
}
|
||||
|
||||
// Combine global and snapshot-specific excludes
|
||||
combined := make([]string, 0, len(c.Exclude)+len(snap.Exclude))
|
||||
combined = append(combined, c.Exclude...)
|
||||
combined = append(combined, snap.Exclude...)
|
||||
return combined
|
||||
}
|
||||
|
||||
// SnapshotNames returns the names of all configured snapshots in sorted order.
|
||||
func (c *Config) SnapshotNames() []string {
|
||||
names := make([]string, 0, len(c.Snapshots))
|
||||
for name := range c.Snapshots {
|
||||
names = append(names, name)
|
||||
}
|
||||
// Sort for deterministic order
|
||||
sort.Strings(names)
|
||||
return names
|
||||
}
|
||||
|
||||
// Config represents the application configuration for Vaultik.
|
||||
// It defines all settings for backup operations, including source directories,
|
||||
// encryption recipients, storage configuration, and performance tuning parameters.
|
||||
// Configuration is typically loaded from a YAML file.
|
||||
type Config struct {
|
||||
AgeRecipients []string `yaml:"age_recipients"`
|
||||
AgeSecretKey string `yaml:"age_secret_key"`
|
||||
BackupInterval time.Duration `yaml:"backup_interval"`
|
||||
BlobSizeLimit Size `yaml:"blob_size_limit"`
|
||||
ChunkSize Size `yaml:"chunk_size"`
|
||||
Exclude []string `yaml:"exclude"`
|
||||
FullScanInterval time.Duration `yaml:"full_scan_interval"`
|
||||
Hostname string `yaml:"hostname"`
|
||||
IndexPath string `yaml:"index_path"`
|
||||
MinTimeBetweenRun time.Duration `yaml:"min_time_between_run"`
|
||||
S3 S3Config `yaml:"s3"`
|
||||
SourceDirs []string `yaml:"source_dirs"`
|
||||
CompressionLevel int `yaml:"compression_level"`
|
||||
AgeRecipients []string `yaml:"age_recipients"`
|
||||
AgeSecretKey string `yaml:"age_secret_key"`
|
||||
BlobSizeLimit Size `yaml:"blob_size_limit"`
|
||||
ChunkSize Size `yaml:"chunk_size"`
|
||||
Exclude []string `yaml:"exclude"` // Global excludes applied to all snapshots
|
||||
Hostname string `yaml:"hostname"`
|
||||
IndexPath string `yaml:"index_path"`
|
||||
S3 S3Config `yaml:"s3"`
|
||||
Snapshots map[string]SnapshotConfig `yaml:"snapshots"`
|
||||
CompressionLevel int `yaml:"compression_level"`
|
||||
|
||||
// StorageURL specifies the storage backend using a URL format.
|
||||
// Takes precedence over S3Config if set.
|
||||
@@ -114,13 +151,10 @@ func Load(path string) (*Config, error) {
|
||||
|
||||
cfg := &Config{
|
||||
// Set defaults
|
||||
BlobSizeLimit: Size(10 * 1024 * 1024 * 1024), // 10GB
|
||||
ChunkSize: Size(10 * 1024 * 1024), // 10MB
|
||||
BackupInterval: 1 * time.Hour,
|
||||
FullScanInterval: 24 * time.Hour,
|
||||
MinTimeBetweenRun: 15 * time.Minute,
|
||||
IndexPath: filepath.Join(xdg.DataHome, appName, "index.sqlite"),
|
||||
CompressionLevel: 3,
|
||||
BlobSizeLimit: Size(10 * 1024 * 1024 * 1024), // 10GB
|
||||
ChunkSize: Size(10 * 1024 * 1024), // 10MB
|
||||
IndexPath: filepath.Join(xdg.DataHome, appName, "index.sqlite"),
|
||||
CompressionLevel: 3,
|
||||
}
|
||||
|
||||
// Convert smartconfig data to YAML then unmarshal
|
||||
@@ -137,8 +171,13 @@ func Load(path string) (*Config, error) {
|
||||
// Expand tilde in all path fields
|
||||
cfg.IndexPath = expandTilde(cfg.IndexPath)
|
||||
cfg.StorageURL = expandTildeInURL(cfg.StorageURL)
|
||||
for i, dir := range cfg.SourceDirs {
|
||||
cfg.SourceDirs[i] = expandTilde(dir)
|
||||
|
||||
// Expand tildes in snapshot paths
|
||||
for name, snap := range cfg.Snapshots {
|
||||
for i, path := range snap.Paths {
|
||||
snap.Paths[i] = expandTilde(path)
|
||||
}
|
||||
cfg.Snapshots[name] = snap
|
||||
}
|
||||
|
||||
// Check for environment variable override for IndexPath
|
||||
@@ -146,6 +185,11 @@ func Load(path string) (*Config, error) {
|
||||
cfg.IndexPath = expandTilde(envIndexPath)
|
||||
}
|
||||
|
||||
// Check for environment variable override for AgeSecretKey
|
||||
if envAgeSecretKey := os.Getenv("VAULTIK_AGE_SECRET_KEY"); envAgeSecretKey != "" {
|
||||
cfg.AgeSecretKey = extractAgeSecretKey(envAgeSecretKey)
|
||||
}
|
||||
|
||||
// Get hostname if not set
|
||||
if cfg.Hostname == "" {
|
||||
hostname, err := os.Hostname()
|
||||
@@ -163,6 +207,17 @@ func Load(path string) (*Config, error) {
|
||||
cfg.S3.PartSize = Size(5 * 1024 * 1024) // 5MB
|
||||
}
|
||||
|
||||
// Check config file permissions (warn if world or group readable)
|
||||
if info, err := os.Stat(path); err == nil {
|
||||
mode := info.Mode().Perm()
|
||||
if mode&0044 != 0 { // group or world readable
|
||||
log.Warn("Config file has insecure permissions (contains S3 credentials)",
|
||||
"path", path,
|
||||
"mode", fmt.Sprintf("%04o", mode),
|
||||
"recommendation", "chmod 600 "+path)
|
||||
}
|
||||
}
|
||||
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return nil, fmt.Errorf("invalid config: %w", err)
|
||||
}
|
||||
@@ -173,7 +228,7 @@ func Load(path string) (*Config, error) {
|
||||
// Validate checks if the configuration is valid and complete.
|
||||
// It ensures all required fields are present and have valid values:
|
||||
// - At least one age recipient must be specified
|
||||
// - At least one source directory must be configured
|
||||
// - At least one snapshot must be configured with at least one path
|
||||
// - Storage must be configured (either storage_url or s3.* fields)
|
||||
// - Chunk size must be at least 1MB
|
||||
// - Blob size limit must be at least the chunk size
|
||||
@@ -181,11 +236,17 @@ func Load(path string) (*Config, error) {
|
||||
// Returns an error describing the first validation failure encountered.
|
||||
func (c *Config) Validate() error {
|
||||
if len(c.AgeRecipients) == 0 {
|
||||
return fmt.Errorf("at least one age_recipient is required")
|
||||
return fmt.Errorf("at least one age_recipient is required (generate with: age-keygen)")
|
||||
}
|
||||
|
||||
if len(c.SourceDirs) == 0 {
|
||||
return fmt.Errorf("at least one source directory is required")
|
||||
if len(c.Snapshots) == 0 {
|
||||
return fmt.Errorf("at least one snapshot must be configured (see config.example.yml)")
|
||||
}
|
||||
|
||||
for name, snap := range c.Snapshots {
|
||||
if len(snap.Paths) == 0 {
|
||||
return fmt.Errorf("snapshot %q must have at least one path", name)
|
||||
}
|
||||
}
|
||||
|
||||
// Validate storage configuration
|
||||
@@ -229,12 +290,16 @@ func (c *Config) validateStorage() error {
|
||||
}
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("storage_url must start with s3:// or file://")
|
||||
if strings.HasPrefix(c.StorageURL, "rclone://") {
|
||||
// Rclone storage uses rclone's own config
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("storage_url must start with s3://, file://, or rclone://")
|
||||
}
|
||||
|
||||
// Legacy S3 configuration
|
||||
if c.S3.Endpoint == "" {
|
||||
return fmt.Errorf("s3.endpoint is required (or set storage_url)")
|
||||
return fmt.Errorf("storage not configured; set storage_url or provide s3.endpoint + s3.bucket + credentials")
|
||||
}
|
||||
|
||||
if c.S3.Bucket == "" {
|
||||
@@ -252,6 +317,21 @@ func (c *Config) validateStorage() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// extractAgeSecretKey extracts the AGE-SECRET-KEY from the input using
|
||||
// the age library's parser, which handles comments and whitespace.
|
||||
func extractAgeSecretKey(input string) string {
|
||||
identities, err := age.ParseIdentities(strings.NewReader(input))
|
||||
if err != nil || len(identities) == 0 {
|
||||
// Fall back to trimmed input if parsing fails
|
||||
return strings.TrimSpace(input)
|
||||
}
|
||||
// Return the string representation of the first identity
|
||||
if id, ok := identities[0].(*age.X25519Identity); ok {
|
||||
return id.String()
|
||||
}
|
||||
return strings.TrimSpace(input)
|
||||
}
|
||||
|
||||
// Module exports the config module for fx dependency injection.
|
||||
// It provides the Config type to other modules in the application.
|
||||
var Module = fx.Module("config",
|
||||
|
||||
@@ -45,12 +45,21 @@ func TestConfigLoad(t *testing.T) {
|
||||
t.Errorf("Expected first age recipient to be %s, got '%s'", TEST_SNEAK_AGE_PUBLIC_KEY, cfg.AgeRecipients[0])
|
||||
}
|
||||
|
||||
if len(cfg.SourceDirs) != 2 {
|
||||
t.Errorf("Expected 2 source dirs, got %d", len(cfg.SourceDirs))
|
||||
if len(cfg.Snapshots) != 1 {
|
||||
t.Errorf("Expected 1 snapshot, got %d", len(cfg.Snapshots))
|
||||
}
|
||||
|
||||
if cfg.SourceDirs[0] != "/tmp/vaultik-test-source" {
|
||||
t.Errorf("Expected first source dir to be '/tmp/vaultik-test-source', got '%s'", cfg.SourceDirs[0])
|
||||
testSnap, ok := cfg.Snapshots["test"]
|
||||
if !ok {
|
||||
t.Fatal("Expected 'test' snapshot to exist")
|
||||
}
|
||||
|
||||
if len(testSnap.Paths) != 2 {
|
||||
t.Errorf("Expected 2 paths in test snapshot, got %d", len(testSnap.Paths))
|
||||
}
|
||||
|
||||
if testSnap.Paths[0] != "/tmp/vaultik-test-source" {
|
||||
t.Errorf("Expected first path to be '/tmp/vaultik-test-source', got '%s'", testSnap.Paths[0])
|
||||
}
|
||||
|
||||
if cfg.S3.Bucket != "vaultik-test-bucket" {
|
||||
@@ -74,3 +83,65 @@ func TestConfigFromEnv(t *testing.T) {
|
||||
t.Errorf("Config file does not exist at path from VAULTIK_CONFIG: %s", configPath)
|
||||
}
|
||||
}
|
||||
|
||||
// TestExtractAgeSecretKey tests extraction of AGE-SECRET-KEY from various inputs
|
||||
func TestExtractAgeSecretKey(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "plain key",
|
||||
input: "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5",
|
||||
expected: "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5",
|
||||
},
|
||||
{
|
||||
name: "key with trailing newline",
|
||||
input: "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5\n",
|
||||
expected: "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5",
|
||||
},
|
||||
{
|
||||
name: "full age-keygen output",
|
||||
input: `# created: 2025-01-14T12:00:00Z
|
||||
# public key: age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg
|
||||
AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5
|
||||
`,
|
||||
expected: "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5",
|
||||
},
|
||||
{
|
||||
name: "age-keygen output with extra blank lines",
|
||||
input: `# created: 2025-01-14T12:00:00Z
|
||||
# public key: age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg
|
||||
|
||||
AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5
|
||||
|
||||
`,
|
||||
expected: "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5",
|
||||
},
|
||||
{
|
||||
name: "key with leading whitespace",
|
||||
input: " AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5 ",
|
||||
expected: "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5",
|
||||
},
|
||||
{
|
||||
name: "empty input",
|
||||
input: "",
|
||||
expected: "",
|
||||
},
|
||||
{
|
||||
name: "only comments",
|
||||
input: "# this is a comment\n# another comment",
|
||||
expected: "# this is a comment\n# another comment",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
result := extractAgeSecretKey(tt.input)
|
||||
if result != tt.expected {
|
||||
t.Errorf("extractAgeSecretKey(%q) = %q, want %q", tt.input, result, tt.expected)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -5,6 +5,8 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
func TestBlobChunkRepository(t *testing.T) {
|
||||
@@ -16,8 +18,8 @@ func TestBlobChunkRepository(t *testing.T) {
|
||||
|
||||
// Create blob first
|
||||
blob := &Blob{
|
||||
ID: "blob1-uuid",
|
||||
Hash: "blob1-hash",
|
||||
ID: types.NewBlobID(),
|
||||
Hash: types.BlobHash("blob1-hash"),
|
||||
CreatedTS: time.Now(),
|
||||
}
|
||||
err := repos.Blobs.Create(ctx, nil, blob)
|
||||
@@ -26,7 +28,7 @@ func TestBlobChunkRepository(t *testing.T) {
|
||||
}
|
||||
|
||||
// Create chunks
|
||||
chunks := []string{"chunk1", "chunk2", "chunk3"}
|
||||
chunks := []types.ChunkHash{"chunk1", "chunk2", "chunk3"}
|
||||
for _, chunkHash := range chunks {
|
||||
chunk := &Chunk{
|
||||
ChunkHash: chunkHash,
|
||||
@@ -41,7 +43,7 @@ func TestBlobChunkRepository(t *testing.T) {
|
||||
// Test Create
|
||||
bc1 := &BlobChunk{
|
||||
BlobID: blob.ID,
|
||||
ChunkHash: "chunk1",
|
||||
ChunkHash: types.ChunkHash("chunk1"),
|
||||
Offset: 0,
|
||||
Length: 1024,
|
||||
}
|
||||
@@ -54,7 +56,7 @@ func TestBlobChunkRepository(t *testing.T) {
|
||||
// Add more chunks to the same blob
|
||||
bc2 := &BlobChunk{
|
||||
BlobID: blob.ID,
|
||||
ChunkHash: "chunk2",
|
||||
ChunkHash: types.ChunkHash("chunk2"),
|
||||
Offset: 1024,
|
||||
Length: 2048,
|
||||
}
|
||||
@@ -65,7 +67,7 @@ func TestBlobChunkRepository(t *testing.T) {
|
||||
|
||||
bc3 := &BlobChunk{
|
||||
BlobID: blob.ID,
|
||||
ChunkHash: "chunk3",
|
||||
ChunkHash: types.ChunkHash("chunk3"),
|
||||
Offset: 3072,
|
||||
Length: 512,
|
||||
}
|
||||
@@ -75,7 +77,7 @@ func TestBlobChunkRepository(t *testing.T) {
|
||||
}
|
||||
|
||||
// Test GetByBlobID
|
||||
blobChunks, err := repos.BlobChunks.GetByBlobID(ctx, blob.ID)
|
||||
blobChunks, err := repos.BlobChunks.GetByBlobID(ctx, blob.ID.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get blob chunks: %v", err)
|
||||
}
|
||||
@@ -134,13 +136,13 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
|
||||
|
||||
// Create blobs
|
||||
blob1 := &Blob{
|
||||
ID: "blob1-uuid",
|
||||
Hash: "blob1-hash",
|
||||
ID: types.NewBlobID(),
|
||||
Hash: types.BlobHash("blob1-hash"),
|
||||
CreatedTS: time.Now(),
|
||||
}
|
||||
blob2 := &Blob{
|
||||
ID: "blob2-uuid",
|
||||
Hash: "blob2-hash",
|
||||
ID: types.NewBlobID(),
|
||||
Hash: types.BlobHash("blob2-hash"),
|
||||
CreatedTS: time.Now(),
|
||||
}
|
||||
|
||||
@@ -154,7 +156,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
|
||||
}
|
||||
|
||||
// Create chunks
|
||||
chunkHashes := []string{"chunk1", "chunk2", "chunk3"}
|
||||
chunkHashes := []types.ChunkHash{"chunk1", "chunk2", "chunk3"}
|
||||
for _, chunkHash := range chunkHashes {
|
||||
chunk := &Chunk{
|
||||
ChunkHash: chunkHash,
|
||||
@@ -169,10 +171,10 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
|
||||
// Create chunks across multiple blobs
|
||||
// Some chunks are shared between blobs (deduplication scenario)
|
||||
blobChunks := []BlobChunk{
|
||||
{BlobID: blob1.ID, ChunkHash: "chunk1", Offset: 0, Length: 1024},
|
||||
{BlobID: blob1.ID, ChunkHash: "chunk2", Offset: 1024, Length: 1024},
|
||||
{BlobID: blob2.ID, ChunkHash: "chunk2", Offset: 0, Length: 1024}, // chunk2 is shared
|
||||
{BlobID: blob2.ID, ChunkHash: "chunk3", Offset: 1024, Length: 1024},
|
||||
{BlobID: blob1.ID, ChunkHash: types.ChunkHash("chunk1"), Offset: 0, Length: 1024},
|
||||
{BlobID: blob1.ID, ChunkHash: types.ChunkHash("chunk2"), Offset: 1024, Length: 1024},
|
||||
{BlobID: blob2.ID, ChunkHash: types.ChunkHash("chunk2"), Offset: 0, Length: 1024}, // chunk2 is shared
|
||||
{BlobID: blob2.ID, ChunkHash: types.ChunkHash("chunk3"), Offset: 1024, Length: 1024},
|
||||
}
|
||||
|
||||
for _, bc := range blobChunks {
|
||||
@@ -183,7 +185,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
|
||||
}
|
||||
|
||||
// Verify blob1 chunks
|
||||
chunks, err := repos.BlobChunks.GetByBlobID(ctx, blob1.ID)
|
||||
chunks, err := repos.BlobChunks.GetByBlobID(ctx, blob1.ID.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get blob1 chunks: %v", err)
|
||||
}
|
||||
@@ -192,7 +194,7 @@ func TestBlobChunkRepositoryMultipleBlobs(t *testing.T) {
|
||||
}
|
||||
|
||||
// Verify blob2 chunks
|
||||
chunks, err = repos.BlobChunks.GetByBlobID(ctx, blob2.ID)
|
||||
chunks, err = repos.BlobChunks.GetByBlobID(ctx, blob2.ID.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get blob2 chunks: %v", err)
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
)
|
||||
|
||||
type BlobRepository struct {
|
||||
|
||||
@@ -4,6 +4,8 @@ import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
func TestBlobRepository(t *testing.T) {
|
||||
@@ -15,8 +17,8 @@ func TestBlobRepository(t *testing.T) {
|
||||
|
||||
// Test Create
|
||||
blob := &Blob{
|
||||
ID: "test-blob-id-123",
|
||||
Hash: "blobhash123",
|
||||
ID: types.NewBlobID(),
|
||||
Hash: types.BlobHash("blobhash123"),
|
||||
CreatedTS: time.Now().Truncate(time.Second),
|
||||
}
|
||||
|
||||
@@ -26,7 +28,7 @@ func TestBlobRepository(t *testing.T) {
|
||||
}
|
||||
|
||||
// Test GetByHash
|
||||
retrieved, err := repo.GetByHash(ctx, blob.Hash)
|
||||
retrieved, err := repo.GetByHash(ctx, blob.Hash.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get blob: %v", err)
|
||||
}
|
||||
@@ -41,7 +43,7 @@ func TestBlobRepository(t *testing.T) {
|
||||
}
|
||||
|
||||
// Test GetByID
|
||||
retrievedByID, err := repo.GetByID(ctx, blob.ID)
|
||||
retrievedByID, err := repo.GetByID(ctx, blob.ID.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get blob by ID: %v", err)
|
||||
}
|
||||
@@ -54,8 +56,8 @@ func TestBlobRepository(t *testing.T) {
|
||||
|
||||
// Test with second blob
|
||||
blob2 := &Blob{
|
||||
ID: "test-blob-id-456",
|
||||
Hash: "blobhash456",
|
||||
ID: types.NewBlobID(),
|
||||
Hash: types.BlobHash("blobhash456"),
|
||||
CreatedTS: time.Now().Truncate(time.Second),
|
||||
}
|
||||
err = repo.Create(ctx, nil, blob2)
|
||||
@@ -65,13 +67,13 @@ func TestBlobRepository(t *testing.T) {
|
||||
|
||||
// Test UpdateFinished
|
||||
now := time.Now()
|
||||
err = repo.UpdateFinished(ctx, nil, blob.ID, blob.Hash, 1000, 500)
|
||||
err = repo.UpdateFinished(ctx, nil, blob.ID.String(), blob.Hash.String(), 1000, 500)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to update blob as finished: %v", err)
|
||||
}
|
||||
|
||||
// Verify update
|
||||
updated, err := repo.GetByID(ctx, blob.ID)
|
||||
updated, err := repo.GetByID(ctx, blob.ID.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get updated blob: %v", err)
|
||||
}
|
||||
@@ -86,13 +88,13 @@ func TestBlobRepository(t *testing.T) {
|
||||
}
|
||||
|
||||
// Test UpdateUploaded
|
||||
err = repo.UpdateUploaded(ctx, nil, blob.ID)
|
||||
err = repo.UpdateUploaded(ctx, nil, blob.ID.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to update blob as uploaded: %v", err)
|
||||
}
|
||||
|
||||
// Verify upload update
|
||||
uploaded, err := repo.GetByID(ctx, blob.ID)
|
||||
uploaded, err := repo.GetByID(ctx, blob.ID.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get uploaded blob: %v", err)
|
||||
}
|
||||
@@ -113,8 +115,8 @@ func TestBlobRepositoryDuplicate(t *testing.T) {
|
||||
repo := NewBlobRepository(db)
|
||||
|
||||
blob := &Blob{
|
||||
ID: "duplicate-test-id",
|
||||
Hash: "duplicate_blob",
|
||||
ID: types.NewBlobID(),
|
||||
Hash: types.BlobHash("duplicate_blob"),
|
||||
CreatedTS: time.Now().Truncate(time.Second),
|
||||
}
|
||||
|
||||
|
||||
@@ -5,6 +5,8 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
// TestCascadeDeleteDebug tests cascade delete with debug output
|
||||
@@ -27,7 +29,6 @@ func TestCascadeDeleteDebug(t *testing.T) {
|
||||
file := &File{
|
||||
Path: "/cascade-test.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -42,7 +43,7 @@ func TestCascadeDeleteDebug(t *testing.T) {
|
||||
// Create chunks and file-chunk mappings
|
||||
for i := 0; i < 3; i++ {
|
||||
chunk := &Chunk{
|
||||
ChunkHash: fmt.Sprintf("cascade-chunk-%d", i),
|
||||
ChunkHash: types.ChunkHash(fmt.Sprintf("cascade-chunk-%d", i)),
|
||||
Size: 1024,
|
||||
}
|
||||
err = repos.Chunks.Create(ctx, nil, chunk)
|
||||
|
||||
@@ -4,6 +4,8 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
type ChunkFileRepository struct {
|
||||
@@ -23,9 +25,9 @@ func (r *ChunkFileRepository) Create(ctx context.Context, tx *sql.Tx, cf *ChunkF
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, cf.ChunkHash, cf.FileID, cf.FileOffset, cf.Length)
|
||||
_, err = tx.ExecContext(ctx, query, cf.ChunkHash.String(), cf.FileID.String(), cf.FileOffset, cf.Length)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, cf.ChunkHash, cf.FileID, cf.FileOffset, cf.Length)
|
||||
_, err = r.db.ExecWithLog(ctx, query, cf.ChunkHash.String(), cf.FileID.String(), cf.FileOffset, cf.Length)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
@@ -35,30 +37,20 @@ func (r *ChunkFileRepository) Create(ctx context.Context, tx *sql.Tx, cf *ChunkF
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *ChunkFileRepository) GetByChunkHash(ctx context.Context, chunkHash string) ([]*ChunkFile, error) {
|
||||
func (r *ChunkFileRepository) GetByChunkHash(ctx context.Context, chunkHash types.ChunkHash) ([]*ChunkFile, error) {
|
||||
query := `
|
||||
SELECT chunk_hash, file_id, file_offset, length
|
||||
FROM chunk_files
|
||||
WHERE chunk_hash = ?
|
||||
`
|
||||
|
||||
rows, err := r.db.conn.QueryContext(ctx, query, chunkHash)
|
||||
rows, err := r.db.conn.QueryContext(ctx, query, chunkHash.String())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("querying chunk files: %w", err)
|
||||
}
|
||||
defer CloseRows(rows)
|
||||
|
||||
var chunkFiles []*ChunkFile
|
||||
for rows.Next() {
|
||||
var cf ChunkFile
|
||||
err := rows.Scan(&cf.ChunkHash, &cf.FileID, &cf.FileOffset, &cf.Length)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning chunk file: %w", err)
|
||||
}
|
||||
chunkFiles = append(chunkFiles, &cf)
|
||||
}
|
||||
|
||||
return chunkFiles, rows.Err()
|
||||
return r.scanChunkFiles(rows)
|
||||
}
|
||||
|
||||
func (r *ChunkFileRepository) GetByFilePath(ctx context.Context, filePath string) ([]*ChunkFile, error) {
|
||||
@@ -75,40 +67,41 @@ func (r *ChunkFileRepository) GetByFilePath(ctx context.Context, filePath string
|
||||
}
|
||||
defer CloseRows(rows)
|
||||
|
||||
var chunkFiles []*ChunkFile
|
||||
for rows.Next() {
|
||||
var cf ChunkFile
|
||||
err := rows.Scan(&cf.ChunkHash, &cf.FileID, &cf.FileOffset, &cf.Length)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning chunk file: %w", err)
|
||||
}
|
||||
chunkFiles = append(chunkFiles, &cf)
|
||||
}
|
||||
|
||||
return chunkFiles, rows.Err()
|
||||
return r.scanChunkFiles(rows)
|
||||
}
|
||||
|
||||
// GetByFileID retrieves chunk files by file ID
|
||||
func (r *ChunkFileRepository) GetByFileID(ctx context.Context, fileID string) ([]*ChunkFile, error) {
|
||||
func (r *ChunkFileRepository) GetByFileID(ctx context.Context, fileID types.FileID) ([]*ChunkFile, error) {
|
||||
query := `
|
||||
SELECT chunk_hash, file_id, file_offset, length
|
||||
FROM chunk_files
|
||||
WHERE file_id = ?
|
||||
`
|
||||
|
||||
rows, err := r.db.conn.QueryContext(ctx, query, fileID)
|
||||
rows, err := r.db.conn.QueryContext(ctx, query, fileID.String())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("querying chunk files: %w", err)
|
||||
}
|
||||
defer CloseRows(rows)
|
||||
|
||||
return r.scanChunkFiles(rows)
|
||||
}
|
||||
|
||||
// scanChunkFiles is a helper that scans chunk file rows
|
||||
func (r *ChunkFileRepository) scanChunkFiles(rows *sql.Rows) ([]*ChunkFile, error) {
|
||||
var chunkFiles []*ChunkFile
|
||||
for rows.Next() {
|
||||
var cf ChunkFile
|
||||
err := rows.Scan(&cf.ChunkHash, &cf.FileID, &cf.FileOffset, &cf.Length)
|
||||
var chunkHashStr, fileIDStr string
|
||||
err := rows.Scan(&chunkHashStr, &fileIDStr, &cf.FileOffset, &cf.Length)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning chunk file: %w", err)
|
||||
}
|
||||
cf.ChunkHash = types.ChunkHash(chunkHashStr)
|
||||
cf.FileID, err = types.ParseFileID(fileIDStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing file ID: %w", err)
|
||||
}
|
||||
chunkFiles = append(chunkFiles, &cf)
|
||||
}
|
||||
|
||||
@@ -116,14 +109,14 @@ func (r *ChunkFileRepository) GetByFileID(ctx context.Context, fileID string) ([
|
||||
}
|
||||
|
||||
// DeleteByFileID deletes all chunk_files entries for a given file ID
|
||||
func (r *ChunkFileRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fileID string) error {
|
||||
func (r *ChunkFileRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fileID types.FileID) error {
|
||||
query := `DELETE FROM chunk_files WHERE file_id = ?`
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, fileID)
|
||||
_, err = tx.ExecContext(ctx, query, fileID.String())
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, fileID)
|
||||
_, err = r.db.ExecWithLog(ctx, query, fileID.String())
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
@@ -132,3 +125,80 @@ func (r *ChunkFileRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fi
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteByFileIDs deletes all chunk_files for multiple files in a single statement.
|
||||
func (r *ChunkFileRepository) DeleteByFileIDs(ctx context.Context, tx *sql.Tx, fileIDs []types.FileID) error {
|
||||
if len(fileIDs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Batch at 500 to stay within SQLite's variable limit
|
||||
const batchSize = 500
|
||||
|
||||
for i := 0; i < len(fileIDs); i += batchSize {
|
||||
end := i + batchSize
|
||||
if end > len(fileIDs) {
|
||||
end = len(fileIDs)
|
||||
}
|
||||
batch := fileIDs[i:end]
|
||||
|
||||
query := "DELETE FROM chunk_files WHERE file_id IN (?" + repeatPlaceholder(len(batch)-1) + ")"
|
||||
args := make([]interface{}, len(batch))
|
||||
for j, id := range batch {
|
||||
args[j] = id.String()
|
||||
}
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, args...)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, args...)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("batch deleting chunk_files: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// CreateBatch inserts multiple chunk_files in a single statement for efficiency.
|
||||
func (r *ChunkFileRepository) CreateBatch(ctx context.Context, tx *sql.Tx, cfs []ChunkFile) error {
|
||||
if len(cfs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Each ChunkFile has 4 values, so batch at 200 to be safe with SQLite's variable limit
|
||||
const batchSize = 200
|
||||
|
||||
for i := 0; i < len(cfs); i += batchSize {
|
||||
end := i + batchSize
|
||||
if end > len(cfs) {
|
||||
end = len(cfs)
|
||||
}
|
||||
batch := cfs[i:end]
|
||||
|
||||
query := "INSERT INTO chunk_files (chunk_hash, file_id, file_offset, length) VALUES "
|
||||
args := make([]interface{}, 0, len(batch)*4)
|
||||
for j, cf := range batch {
|
||||
if j > 0 {
|
||||
query += ", "
|
||||
}
|
||||
query += "(?, ?, ?, ?)"
|
||||
args = append(args, cf.ChunkHash.String(), cf.FileID.String(), cf.FileOffset, cf.Length)
|
||||
}
|
||||
query += " ON CONFLICT(chunk_hash, file_id) DO NOTHING"
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, args...)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, args...)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("batch inserting chunk_files: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -4,6 +4,8 @@ import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
func TestChunkFileRepository(t *testing.T) {
|
||||
@@ -20,7 +22,6 @@ func TestChunkFileRepository(t *testing.T) {
|
||||
file1 := &File{
|
||||
Path: "/file1.txt",
|
||||
MTime: testTime,
|
||||
CTime: testTime,
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -35,7 +36,6 @@ func TestChunkFileRepository(t *testing.T) {
|
||||
file2 := &File{
|
||||
Path: "/file2.txt",
|
||||
MTime: testTime,
|
||||
CTime: testTime,
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -49,7 +49,7 @@ func TestChunkFileRepository(t *testing.T) {
|
||||
|
||||
// Create chunk first
|
||||
chunk := &Chunk{
|
||||
ChunkHash: "chunk1",
|
||||
ChunkHash: types.ChunkHash("chunk1"),
|
||||
Size: 1024,
|
||||
}
|
||||
err = chunksRepo.Create(ctx, nil, chunk)
|
||||
@@ -59,7 +59,7 @@ func TestChunkFileRepository(t *testing.T) {
|
||||
|
||||
// Test Create
|
||||
cf1 := &ChunkFile{
|
||||
ChunkHash: "chunk1",
|
||||
ChunkHash: types.ChunkHash("chunk1"),
|
||||
FileID: file1.ID,
|
||||
FileOffset: 0,
|
||||
Length: 1024,
|
||||
@@ -72,7 +72,7 @@ func TestChunkFileRepository(t *testing.T) {
|
||||
|
||||
// Add same chunk in different file (deduplication scenario)
|
||||
cf2 := &ChunkFile{
|
||||
ChunkHash: "chunk1",
|
||||
ChunkHash: types.ChunkHash("chunk1"),
|
||||
FileID: file2.ID,
|
||||
FileOffset: 2048,
|
||||
Length: 1024,
|
||||
@@ -114,7 +114,7 @@ func TestChunkFileRepository(t *testing.T) {
|
||||
if len(chunkFiles) != 1 {
|
||||
t.Errorf("expected 1 chunk for file, got %d", len(chunkFiles))
|
||||
}
|
||||
if chunkFiles[0].ChunkHash != "chunk1" {
|
||||
if chunkFiles[0].ChunkHash != types.ChunkHash("chunk1") {
|
||||
t.Errorf("wrong chunk hash: expected chunk1, got %s", chunkFiles[0].ChunkHash)
|
||||
}
|
||||
|
||||
@@ -136,9 +136,9 @@ func TestChunkFileRepositoryComplexDeduplication(t *testing.T) {
|
||||
|
||||
// Create test files
|
||||
testTime := time.Now().Truncate(time.Second)
|
||||
file1 := &File{Path: "/file1.txt", MTime: testTime, CTime: testTime, Size: 3072, Mode: 0644, UID: 1000, GID: 1000}
|
||||
file2 := &File{Path: "/file2.txt", MTime: testTime, CTime: testTime, Size: 3072, Mode: 0644, UID: 1000, GID: 1000}
|
||||
file3 := &File{Path: "/file3.txt", MTime: testTime, CTime: testTime, Size: 2048, Mode: 0644, UID: 1000, GID: 1000}
|
||||
file1 := &File{Path: "/file1.txt", MTime: testTime, Size: 3072, Mode: 0644, UID: 1000, GID: 1000}
|
||||
file2 := &File{Path: "/file2.txt", MTime: testTime, Size: 3072, Mode: 0644, UID: 1000, GID: 1000}
|
||||
file3 := &File{Path: "/file3.txt", MTime: testTime, Size: 2048, Mode: 0644, UID: 1000, GID: 1000}
|
||||
|
||||
if err := fileRepo.Create(ctx, nil, file1); err != nil {
|
||||
t.Fatalf("failed to create file1: %v", err)
|
||||
@@ -151,7 +151,7 @@ func TestChunkFileRepositoryComplexDeduplication(t *testing.T) {
|
||||
}
|
||||
|
||||
// Create chunks first
|
||||
chunks := []string{"chunk1", "chunk2", "chunk3", "chunk4"}
|
||||
chunks := []types.ChunkHash{"chunk1", "chunk2", "chunk3", "chunk4"}
|
||||
for _, chunkHash := range chunks {
|
||||
chunk := &Chunk{
|
||||
ChunkHash: chunkHash,
|
||||
@@ -170,16 +170,16 @@ func TestChunkFileRepositoryComplexDeduplication(t *testing.T) {
|
||||
|
||||
chunkFiles := []ChunkFile{
|
||||
// File1
|
||||
{ChunkHash: "chunk1", FileID: file1.ID, FileOffset: 0, Length: 1024},
|
||||
{ChunkHash: "chunk2", FileID: file1.ID, FileOffset: 1024, Length: 1024},
|
||||
{ChunkHash: "chunk3", FileID: file1.ID, FileOffset: 2048, Length: 1024},
|
||||
{ChunkHash: types.ChunkHash("chunk1"), FileID: file1.ID, FileOffset: 0, Length: 1024},
|
||||
{ChunkHash: types.ChunkHash("chunk2"), FileID: file1.ID, FileOffset: 1024, Length: 1024},
|
||||
{ChunkHash: types.ChunkHash("chunk3"), FileID: file1.ID, FileOffset: 2048, Length: 1024},
|
||||
// File2
|
||||
{ChunkHash: "chunk2", FileID: file2.ID, FileOffset: 0, Length: 1024},
|
||||
{ChunkHash: "chunk3", FileID: file2.ID, FileOffset: 1024, Length: 1024},
|
||||
{ChunkHash: "chunk4", FileID: file2.ID, FileOffset: 2048, Length: 1024},
|
||||
{ChunkHash: types.ChunkHash("chunk2"), FileID: file2.ID, FileOffset: 0, Length: 1024},
|
||||
{ChunkHash: types.ChunkHash("chunk3"), FileID: file2.ID, FileOffset: 1024, Length: 1024},
|
||||
{ChunkHash: types.ChunkHash("chunk4"), FileID: file2.ID, FileOffset: 2048, Length: 1024},
|
||||
// File3
|
||||
{ChunkHash: "chunk1", FileID: file3.ID, FileOffset: 0, Length: 1024},
|
||||
{ChunkHash: "chunk4", FileID: file3.ID, FileOffset: 1024, Length: 1024},
|
||||
{ChunkHash: types.ChunkHash("chunk1"), FileID: file3.ID, FileOffset: 0, Length: 1024},
|
||||
{ChunkHash: types.ChunkHash("chunk4"), FileID: file3.ID, FileOffset: 1024, Length: 1024},
|
||||
}
|
||||
|
||||
for _, cf := range chunkFiles {
|
||||
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
)
|
||||
|
||||
type ChunkRepository struct {
|
||||
|
||||
@@ -3,6 +3,8 @@ package database
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
func TestChunkRepository(t *testing.T) {
|
||||
@@ -14,7 +16,7 @@ func TestChunkRepository(t *testing.T) {
|
||||
|
||||
// Test Create
|
||||
chunk := &Chunk{
|
||||
ChunkHash: "chunkhash123",
|
||||
ChunkHash: types.ChunkHash("chunkhash123"),
|
||||
Size: 4096,
|
||||
}
|
||||
|
||||
@@ -24,7 +26,7 @@ func TestChunkRepository(t *testing.T) {
|
||||
}
|
||||
|
||||
// Test GetByHash
|
||||
retrieved, err := repo.GetByHash(ctx, chunk.ChunkHash)
|
||||
retrieved, err := repo.GetByHash(ctx, chunk.ChunkHash.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get chunk: %v", err)
|
||||
}
|
||||
@@ -46,7 +48,7 @@ func TestChunkRepository(t *testing.T) {
|
||||
|
||||
// Test GetByHashes
|
||||
chunk2 := &Chunk{
|
||||
ChunkHash: "chunkhash456",
|
||||
ChunkHash: types.ChunkHash("chunkhash456"),
|
||||
Size: 8192,
|
||||
}
|
||||
err = repo.Create(ctx, nil, chunk2)
|
||||
@@ -54,7 +56,7 @@ func TestChunkRepository(t *testing.T) {
|
||||
t.Fatalf("failed to create second chunk: %v", err)
|
||||
}
|
||||
|
||||
chunks, err := repo.GetByHashes(ctx, []string{chunk.ChunkHash, chunk2.ChunkHash})
|
||||
chunks, err := repo.GetByHashes(ctx, []string{chunk.ChunkHash.String(), chunk2.ChunkHash.String()})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get chunks by hashes: %v", err)
|
||||
}
|
||||
|
||||
@@ -6,24 +6,32 @@
|
||||
// multiple source files. Blobs are content-addressed, meaning their filename
|
||||
// is derived from their SHA256 hash after compression and encryption.
|
||||
//
|
||||
// The database does not support migrations. If the schema changes, delete
|
||||
// the local database and perform a full backup to recreate it.
|
||||
// Schema is managed via numbered SQL migrations embedded in the schema/
|
||||
// directory. Migration 000.sql bootstraps the schema_migrations tracking
|
||||
// table; subsequent migrations (001, 002, …) are applied in order.
|
||||
package database
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
_ "embed"
|
||||
"embed"
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
_ "modernc.org/sqlite"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
)
|
||||
|
||||
//go:embed schema.sql
|
||||
var schemaSQL string
|
||||
//go:embed schema/*.sql
|
||||
var schemaFS embed.FS
|
||||
|
||||
// bootstrapVersion is the migration that creates the schema_migrations
|
||||
// table itself. It is applied before the normal migration loop.
|
||||
const bootstrapVersion = 0
|
||||
|
||||
// DB represents the Vaultik local index database connection.
|
||||
// It uses SQLite to track file metadata, content-defined chunks, and blob associations.
|
||||
@@ -35,27 +43,58 @@ type DB struct {
|
||||
path string
|
||||
}
|
||||
|
||||
// ParseMigrationVersion extracts the numeric version prefix from a migration
|
||||
// filename. Filenames must follow the pattern "<version>.sql" or
|
||||
// "<version>_<description>.sql", where version is a zero-padded numeric
|
||||
// string (e.g. "001", "002"). Returns the version as an integer and an
|
||||
// error if the filename does not match the expected pattern.
|
||||
func ParseMigrationVersion(filename string) (int, error) {
|
||||
name := strings.TrimSuffix(filename, filepath.Ext(filename))
|
||||
if name == "" {
|
||||
return 0, fmt.Errorf("invalid migration filename %q: empty name", filename)
|
||||
}
|
||||
|
||||
// Split on underscore to separate version from description.
|
||||
// If there's no underscore, the entire stem is the version.
|
||||
versionStr := name
|
||||
if idx := strings.IndexByte(name, '_'); idx >= 0 {
|
||||
versionStr = name[:idx]
|
||||
}
|
||||
|
||||
if versionStr == "" {
|
||||
return 0, fmt.Errorf("invalid migration filename %q: empty version prefix", filename)
|
||||
}
|
||||
|
||||
// Validate the version is purely numeric.
|
||||
for _, ch := range versionStr {
|
||||
if ch < '0' || ch > '9' {
|
||||
return 0, fmt.Errorf(
|
||||
"invalid migration filename %q: version %q contains non-numeric character %q",
|
||||
filename, versionStr, string(ch),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
version, err := strconv.Atoi(versionStr)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("invalid migration filename %q: %w", filename, err)
|
||||
}
|
||||
|
||||
return version, nil
|
||||
}
|
||||
|
||||
// New creates a new database connection at the specified path.
|
||||
// It automatically handles database recovery, creates the schema if needed,
|
||||
// and configures SQLite with appropriate settings for performance and reliability.
|
||||
// The database uses WAL mode for better concurrency and sets a busy timeout
|
||||
// to handle concurrent access gracefully.
|
||||
//
|
||||
// If the database appears locked, it will attempt recovery by removing stale
|
||||
// lock files and switching temporarily to TRUNCATE journal mode.
|
||||
//
|
||||
// New creates a new database connection at the specified path.
|
||||
// It automatically handles recovery from stale locks, creates the schema if needed,
|
||||
// and configures SQLite with WAL mode for better concurrency.
|
||||
// It creates the schema if needed and configures SQLite with WAL mode for
|
||||
// better concurrency. SQLite handles crash recovery automatically when
|
||||
// opening a database with journal/WAL files present.
|
||||
// The path parameter can be a file path for persistent storage or ":memory:"
|
||||
// for an in-memory database (useful for testing).
|
||||
func New(ctx context.Context, path string) (*DB, error) {
|
||||
log.Debug("Opening database connection", "path", path)
|
||||
|
||||
// First, try to recover from any stale locks
|
||||
if err := recoverDatabase(ctx, path); err != nil {
|
||||
log.Warn("Failed to recover database", "error", err)
|
||||
}
|
||||
// Note: We do NOT delete journal/WAL files before opening.
|
||||
// SQLite handles crash recovery automatically when the database is opened.
|
||||
// Deleting these files would corrupt the database after an unclean shutdown.
|
||||
|
||||
// First attempt with standard WAL mode
|
||||
log.Debug("Attempting to open database with WAL mode", "path", path)
|
||||
@@ -81,9 +120,9 @@ func New(ctx context.Context, path string) (*DB, error) {
|
||||
}
|
||||
|
||||
db := &DB{conn: conn, path: path}
|
||||
if err := db.createSchema(ctx); err != nil {
|
||||
if err := applyMigrations(ctx, conn); err != nil {
|
||||
_ = conn.Close()
|
||||
return nil, fmt.Errorf("creating schema: %w", err)
|
||||
return nil, fmt.Errorf("applying migrations: %w", err)
|
||||
}
|
||||
return db, nil
|
||||
}
|
||||
@@ -134,9 +173,9 @@ func New(ctx context.Context, path string) (*DB, error) {
|
||||
}
|
||||
|
||||
db := &DB{conn: conn, path: path}
|
||||
if err := db.createSchema(ctx); err != nil {
|
||||
if err := applyMigrations(ctx, conn); err != nil {
|
||||
_ = conn.Close()
|
||||
return nil, fmt.Errorf("creating schema: %w", err)
|
||||
return nil, fmt.Errorf("applying migrations: %w", err)
|
||||
}
|
||||
|
||||
log.Debug("Database connection established successfully", "path", path)
|
||||
@@ -156,62 +195,6 @@ func (db *DB) Close() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
// recoverDatabase attempts to recover a locked database
|
||||
func recoverDatabase(ctx context.Context, path string) error {
|
||||
// Check if database file exists
|
||||
if _, err := os.Stat(path); os.IsNotExist(err) {
|
||||
// No database file, nothing to recover
|
||||
return nil
|
||||
}
|
||||
|
||||
// Remove stale lock files
|
||||
// SQLite creates -wal and -shm files for WAL mode
|
||||
walPath := path + "-wal"
|
||||
shmPath := path + "-shm"
|
||||
journalPath := path + "-journal"
|
||||
|
||||
log.Info("Attempting database recovery", "path", path)
|
||||
|
||||
// Always remove lock files on startup to ensure clean state
|
||||
removed := false
|
||||
|
||||
// Check for and remove journal file (from non-WAL mode)
|
||||
if _, err := os.Stat(journalPath); err == nil {
|
||||
log.Info("Found journal file, removing", "path", journalPath)
|
||||
if err := os.Remove(journalPath); err != nil {
|
||||
log.Warn("Failed to remove journal file", "error", err)
|
||||
} else {
|
||||
removed = true
|
||||
}
|
||||
}
|
||||
|
||||
// Remove WAL file
|
||||
if _, err := os.Stat(walPath); err == nil {
|
||||
log.Info("Found WAL file, removing", "path", walPath)
|
||||
if err := os.Remove(walPath); err != nil {
|
||||
log.Warn("Failed to remove WAL file", "error", err)
|
||||
} else {
|
||||
removed = true
|
||||
}
|
||||
}
|
||||
|
||||
// Remove SHM file
|
||||
if _, err := os.Stat(shmPath); err == nil {
|
||||
log.Info("Found shared memory file, removing", "path", shmPath)
|
||||
if err := os.Remove(shmPath); err != nil {
|
||||
log.Warn("Failed to remove shared memory file", "error", err)
|
||||
} else {
|
||||
removed = true
|
||||
}
|
||||
}
|
||||
|
||||
if removed {
|
||||
log.Info("Database lock files removed")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Conn returns the underlying *sql.DB connection.
|
||||
// This should be used sparingly and primarily for read operations.
|
||||
// For write operations, prefer using the ExecWithLog method.
|
||||
@@ -219,6 +202,11 @@ func (db *DB) Conn() *sql.DB {
|
||||
return db.conn
|
||||
}
|
||||
|
||||
// Path returns the path to the database file.
|
||||
func (db *DB) Path() string {
|
||||
return db.path
|
||||
}
|
||||
|
||||
// BeginTx starts a new database transaction with the given options.
|
||||
// The caller is responsible for committing or rolling back the transaction.
|
||||
// For write transactions, consider using the Repositories.WithTx method instead,
|
||||
@@ -258,9 +246,120 @@ func (db *DB) QueryRowWithLog(
|
||||
return db.conn.QueryRowContext(ctx, query, args...)
|
||||
}
|
||||
|
||||
func (db *DB) createSchema(ctx context.Context) error {
|
||||
_, err := db.conn.ExecContext(ctx, schemaSQL)
|
||||
return err
|
||||
// collectMigrations reads the embedded schema directory and returns
|
||||
// migration filenames sorted lexicographically.
|
||||
func collectMigrations() ([]string, error) {
|
||||
entries, err := schemaFS.ReadDir("schema")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read schema directory: %w", err)
|
||||
}
|
||||
|
||||
var migrations []string
|
||||
|
||||
for _, entry := range entries {
|
||||
if !entry.IsDir() && strings.HasSuffix(entry.Name(), ".sql") {
|
||||
migrations = append(migrations, entry.Name())
|
||||
}
|
||||
}
|
||||
|
||||
sort.Strings(migrations)
|
||||
|
||||
return migrations, nil
|
||||
}
|
||||
|
||||
// bootstrapMigrationsTable ensures the schema_migrations table exists
|
||||
// by applying 000.sql if the table is missing.
|
||||
func bootstrapMigrationsTable(ctx context.Context, db *sql.DB) error {
|
||||
var tableExists int
|
||||
|
||||
err := db.QueryRowContext(ctx,
|
||||
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='schema_migrations'",
|
||||
).Scan(&tableExists)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to check for migrations table: %w", err)
|
||||
}
|
||||
|
||||
if tableExists > 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
content, err := schemaFS.ReadFile("schema/000.sql")
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to read bootstrap migration 000.sql: %w", err)
|
||||
}
|
||||
|
||||
log.Info("applying bootstrap migration", "version", bootstrapVersion)
|
||||
|
||||
_, err = db.ExecContext(ctx, string(content))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to apply bootstrap migration: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// applyMigrations applies all pending migrations to db. It first bootstraps
|
||||
// the schema_migrations table via 000.sql, then iterates through remaining
|
||||
// migration files in order.
|
||||
func applyMigrations(ctx context.Context, db *sql.DB) error {
|
||||
if err := bootstrapMigrationsTable(ctx, db); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
migrations, err := collectMigrations()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
for _, migration := range migrations {
|
||||
version, parseErr := ParseMigrationVersion(migration)
|
||||
if parseErr != nil {
|
||||
return parseErr
|
||||
}
|
||||
|
||||
// Check if already applied.
|
||||
var count int
|
||||
|
||||
err := db.QueryRowContext(ctx,
|
||||
"SELECT COUNT(*) FROM schema_migrations WHERE version = ?",
|
||||
version,
|
||||
).Scan(&count)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to check migration status: %w", err)
|
||||
}
|
||||
|
||||
if count > 0 {
|
||||
log.Debug("migration already applied", "version", version)
|
||||
|
||||
continue
|
||||
}
|
||||
|
||||
// Read and apply migration.
|
||||
content, readErr := schemaFS.ReadFile(filepath.Join("schema", migration))
|
||||
if readErr != nil {
|
||||
return fmt.Errorf("failed to read migration %s: %w", migration, readErr)
|
||||
}
|
||||
|
||||
log.Info("applying migration", "version", version)
|
||||
|
||||
_, execErr := db.ExecContext(ctx, string(content))
|
||||
if execErr != nil {
|
||||
return fmt.Errorf("failed to apply migration %s: %w", migration, execErr)
|
||||
}
|
||||
|
||||
// Record migration as applied.
|
||||
_, recErr := db.ExecContext(ctx,
|
||||
"INSERT INTO schema_migrations (version) VALUES (?)",
|
||||
version,
|
||||
)
|
||||
if recErr != nil {
|
||||
return fmt.Errorf("failed to record migration %s: %w", migration, recErr)
|
||||
}
|
||||
|
||||
log.Info("migration applied successfully", "version", version)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// NewTestDB creates an in-memory SQLite database for testing purposes.
|
||||
@@ -270,6 +369,15 @@ func NewTestDB() (*DB, error) {
|
||||
return New(context.Background(), ":memory:")
|
||||
}
|
||||
|
||||
// repeatPlaceholder generates a string of ", ?" repeated n times for IN clause construction.
|
||||
// For example, repeatPlaceholder(2) returns ", ?, ?".
|
||||
func repeatPlaceholder(n int) string {
|
||||
if n <= 0 {
|
||||
return ""
|
||||
}
|
||||
return strings.Repeat(", ?", n)
|
||||
}
|
||||
|
||||
// LogSQL logs SQL queries and their arguments when debug mode is enabled.
|
||||
// Debug mode is activated by setting the GODEBUG environment variable to include "vaultik".
|
||||
// This is useful for troubleshooting database operations and understanding query patterns.
|
||||
|
||||
@@ -2,6 +2,7 @@ package database
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
@@ -26,9 +27,10 @@ func TestDatabase(t *testing.T) {
|
||||
t.Fatal("database connection is nil")
|
||||
}
|
||||
|
||||
// Test schema creation (already done in New)
|
||||
// Test schema creation (already done in New via migrations)
|
||||
// Verify tables exist
|
||||
tables := []string{
|
||||
"schema_migrations",
|
||||
"files", "file_chunks", "chunks", "blobs",
|
||||
"blob_chunks", "chunk_files", "snapshots",
|
||||
}
|
||||
@@ -99,3 +101,139 @@ func TestDatabaseConcurrentAccess(t *testing.T) {
|
||||
t.Errorf("expected 10 chunks, got %d", count)
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseMigrationVersion(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
filename string
|
||||
wantVer int
|
||||
wantError bool
|
||||
}{
|
||||
{name: "valid 000.sql", filename: "000.sql", wantVer: 0, wantError: false},
|
||||
{name: "valid 001.sql", filename: "001.sql", wantVer: 1, wantError: false},
|
||||
{name: "valid 099.sql", filename: "099.sql", wantVer: 99, wantError: false},
|
||||
{name: "valid with description", filename: "001_initial_schema.sql", wantVer: 1, wantError: false},
|
||||
{name: "valid large version", filename: "123_big_migration.sql", wantVer: 123, wantError: false},
|
||||
{name: "invalid alpha version", filename: "abc.sql", wantVer: 0, wantError: true},
|
||||
{name: "invalid mixed chars", filename: "12a.sql", wantVer: 0, wantError: true},
|
||||
{name: "invalid no extension", filename: "schema.sql", wantVer: 0, wantError: true},
|
||||
{name: "empty string", filename: "", wantVer: 0, wantError: true},
|
||||
}
|
||||
|
||||
for _, tc := range tests {
|
||||
t.Run(tc.name, func(t *testing.T) {
|
||||
got, err := ParseMigrationVersion(tc.filename)
|
||||
if tc.wantError {
|
||||
if err == nil {
|
||||
t.Errorf("ParseMigrationVersion(%q) = %d, nil; want error", tc.filename, got)
|
||||
}
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
t.Errorf("ParseMigrationVersion(%q) unexpected error: %v", tc.filename, err)
|
||||
return
|
||||
}
|
||||
if got != tc.wantVer {
|
||||
t.Errorf("ParseMigrationVersion(%q) = %d; want %d", tc.filename, got, tc.wantVer)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestApplyMigrations_Idempotent(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
conn, err := sql.Open("sqlite", ":memory:?_foreign_keys=ON")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to open database: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := conn.Close(); err != nil {
|
||||
t.Errorf("failed to close database: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
conn.SetMaxOpenConns(1)
|
||||
conn.SetMaxIdleConns(1)
|
||||
|
||||
// First run: apply all migrations.
|
||||
if err := applyMigrations(ctx, conn); err != nil {
|
||||
t.Fatalf("first applyMigrations failed: %v", err)
|
||||
}
|
||||
|
||||
// Count rows in schema_migrations after first run.
|
||||
var countBefore int
|
||||
if err := conn.QueryRowContext(ctx, "SELECT COUNT(*) FROM schema_migrations").Scan(&countBefore); err != nil {
|
||||
t.Fatalf("failed to count schema_migrations after first run: %v", err)
|
||||
}
|
||||
|
||||
// Second run: must be a no-op.
|
||||
if err := applyMigrations(ctx, conn); err != nil {
|
||||
t.Fatalf("second applyMigrations failed: %v", err)
|
||||
}
|
||||
|
||||
// Count rows in schema_migrations after second run — must be unchanged.
|
||||
var countAfter int
|
||||
if err := conn.QueryRowContext(ctx, "SELECT COUNT(*) FROM schema_migrations").Scan(&countAfter); err != nil {
|
||||
t.Fatalf("failed to count schema_migrations after second run: %v", err)
|
||||
}
|
||||
|
||||
if countBefore != countAfter {
|
||||
t.Errorf("schema_migrations row count changed: before=%d, after=%d", countBefore, countAfter)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBootstrapMigrationsTable_FreshDatabase(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
|
||||
conn, err := sql.Open("sqlite", ":memory:?_foreign_keys=ON")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to open database: %v", err)
|
||||
}
|
||||
defer func() {
|
||||
if err := conn.Close(); err != nil {
|
||||
t.Errorf("failed to close database: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
conn.SetMaxOpenConns(1)
|
||||
conn.SetMaxIdleConns(1)
|
||||
|
||||
// Verify schema_migrations does NOT exist yet.
|
||||
var tableBefore int
|
||||
if err := conn.QueryRowContext(ctx,
|
||||
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='schema_migrations'",
|
||||
).Scan(&tableBefore); err != nil {
|
||||
t.Fatalf("failed to check for table before bootstrap: %v", err)
|
||||
}
|
||||
if tableBefore != 0 {
|
||||
t.Fatal("schema_migrations table should not exist before bootstrap")
|
||||
}
|
||||
|
||||
// Run bootstrap.
|
||||
if err := bootstrapMigrationsTable(ctx, conn); err != nil {
|
||||
t.Fatalf("bootstrapMigrationsTable failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify schema_migrations now exists.
|
||||
var tableAfter int
|
||||
if err := conn.QueryRowContext(ctx,
|
||||
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='schema_migrations'",
|
||||
).Scan(&tableAfter); err != nil {
|
||||
t.Fatalf("failed to check for table after bootstrap: %v", err)
|
||||
}
|
||||
if tableAfter != 1 {
|
||||
t.Fatalf("schema_migrations table should exist after bootstrap, got count=%d", tableAfter)
|
||||
}
|
||||
|
||||
// Verify version 0 row exists.
|
||||
var version int
|
||||
if err := conn.QueryRowContext(ctx,
|
||||
"SELECT version FROM schema_migrations WHERE version = 0",
|
||||
).Scan(&version); err != nil {
|
||||
t.Fatalf("version 0 row not found in schema_migrations: %v", err)
|
||||
}
|
||||
if version != 0 {
|
||||
t.Errorf("expected version 0, got %d", version)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,6 +4,8 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
type FileChunkRepository struct {
|
||||
@@ -23,9 +25,9 @@ func (r *FileChunkRepository) Create(ctx context.Context, tx *sql.Tx, fc *FileCh
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, fc.FileID, fc.Idx, fc.ChunkHash)
|
||||
_, err = tx.ExecContext(ctx, query, fc.FileID.String(), fc.Idx, fc.ChunkHash.String())
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, fc.FileID, fc.Idx, fc.ChunkHash)
|
||||
_, err = r.db.ExecWithLog(ctx, query, fc.FileID.String(), fc.Idx, fc.ChunkHash.String())
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
@@ -50,21 +52,11 @@ func (r *FileChunkRepository) GetByPath(ctx context.Context, path string) ([]*Fi
|
||||
}
|
||||
defer CloseRows(rows)
|
||||
|
||||
var fileChunks []*FileChunk
|
||||
for rows.Next() {
|
||||
var fc FileChunk
|
||||
err := rows.Scan(&fc.FileID, &fc.Idx, &fc.ChunkHash)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning file chunk: %w", err)
|
||||
}
|
||||
fileChunks = append(fileChunks, &fc)
|
||||
}
|
||||
|
||||
return fileChunks, rows.Err()
|
||||
return r.scanFileChunks(rows)
|
||||
}
|
||||
|
||||
// GetByFileID retrieves file chunks by file ID
|
||||
func (r *FileChunkRepository) GetByFileID(ctx context.Context, fileID string) ([]*FileChunk, error) {
|
||||
func (r *FileChunkRepository) GetByFileID(ctx context.Context, fileID types.FileID) ([]*FileChunk, error) {
|
||||
query := `
|
||||
SELECT file_id, idx, chunk_hash
|
||||
FROM file_chunks
|
||||
@@ -72,23 +64,13 @@ func (r *FileChunkRepository) GetByFileID(ctx context.Context, fileID string) ([
|
||||
ORDER BY idx
|
||||
`
|
||||
|
||||
rows, err := r.db.conn.QueryContext(ctx, query, fileID)
|
||||
rows, err := r.db.conn.QueryContext(ctx, query, fileID.String())
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("querying file chunks: %w", err)
|
||||
}
|
||||
defer CloseRows(rows)
|
||||
|
||||
var fileChunks []*FileChunk
|
||||
for rows.Next() {
|
||||
var fc FileChunk
|
||||
err := rows.Scan(&fc.FileID, &fc.Idx, &fc.ChunkHash)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning file chunk: %w", err)
|
||||
}
|
||||
fileChunks = append(fileChunks, &fc)
|
||||
}
|
||||
|
||||
return fileChunks, rows.Err()
|
||||
return r.scanFileChunks(rows)
|
||||
}
|
||||
|
||||
// GetByPathTx retrieves file chunks within a transaction
|
||||
@@ -108,16 +90,28 @@ func (r *FileChunkRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path
|
||||
}
|
||||
defer CloseRows(rows)
|
||||
|
||||
fileChunks, err := r.scanFileChunks(rows)
|
||||
LogSQL("GetByPathTx", "Complete", path, "count", len(fileChunks))
|
||||
return fileChunks, err
|
||||
}
|
||||
|
||||
// scanFileChunks is a helper that scans file chunk rows
|
||||
func (r *FileChunkRepository) scanFileChunks(rows *sql.Rows) ([]*FileChunk, error) {
|
||||
var fileChunks []*FileChunk
|
||||
for rows.Next() {
|
||||
var fc FileChunk
|
||||
err := rows.Scan(&fc.FileID, &fc.Idx, &fc.ChunkHash)
|
||||
var fileIDStr, chunkHashStr string
|
||||
err := rows.Scan(&fileIDStr, &fc.Idx, &chunkHashStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning file chunk: %w", err)
|
||||
}
|
||||
fc.FileID, err = types.ParseFileID(fileIDStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing file ID: %w", err)
|
||||
}
|
||||
fc.ChunkHash = types.ChunkHash(chunkHashStr)
|
||||
fileChunks = append(fileChunks, &fc)
|
||||
}
|
||||
LogSQL("GetByPathTx", "Complete", path, "count", len(fileChunks))
|
||||
|
||||
return fileChunks, rows.Err()
|
||||
}
|
||||
@@ -140,14 +134,14 @@ func (r *FileChunkRepository) DeleteByPath(ctx context.Context, tx *sql.Tx, path
|
||||
}
|
||||
|
||||
// DeleteByFileID deletes all chunks for a file by its UUID
|
||||
func (r *FileChunkRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fileID string) error {
|
||||
func (r *FileChunkRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fileID types.FileID) error {
|
||||
query := `DELETE FROM file_chunks WHERE file_id = ?`
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, fileID)
|
||||
_, err = tx.ExecContext(ctx, query, fileID.String())
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, fileID)
|
||||
_, err = r.db.ExecWithLog(ctx, query, fileID.String())
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
@@ -157,6 +151,86 @@ func (r *FileChunkRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fi
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteByFileIDs deletes all chunks for multiple files in a single statement.
|
||||
func (r *FileChunkRepository) DeleteByFileIDs(ctx context.Context, tx *sql.Tx, fileIDs []types.FileID) error {
|
||||
if len(fileIDs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Batch at 500 to stay within SQLite's variable limit
|
||||
const batchSize = 500
|
||||
|
||||
for i := 0; i < len(fileIDs); i += batchSize {
|
||||
end := i + batchSize
|
||||
if end > len(fileIDs) {
|
||||
end = len(fileIDs)
|
||||
}
|
||||
batch := fileIDs[i:end]
|
||||
|
||||
query := "DELETE FROM file_chunks WHERE file_id IN (?" + repeatPlaceholder(len(batch)-1) + ")"
|
||||
args := make([]interface{}, len(batch))
|
||||
for j, id := range batch {
|
||||
args[j] = id.String()
|
||||
}
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, args...)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, args...)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("batch deleting file_chunks: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// CreateBatch inserts multiple file_chunks in a single statement for efficiency.
|
||||
// Batches are automatically split to stay within SQLite's variable limit.
|
||||
func (r *FileChunkRepository) CreateBatch(ctx context.Context, tx *sql.Tx, fcs []FileChunk) error {
|
||||
if len(fcs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// SQLite has a limit on variables (typically 999 or 32766).
|
||||
// Each FileChunk has 3 values, so batch at 300 to be safe.
|
||||
const batchSize = 300
|
||||
|
||||
for i := 0; i < len(fcs); i += batchSize {
|
||||
end := i + batchSize
|
||||
if end > len(fcs) {
|
||||
end = len(fcs)
|
||||
}
|
||||
batch := fcs[i:end]
|
||||
|
||||
// Build the query with multiple value sets
|
||||
query := "INSERT INTO file_chunks (file_id, idx, chunk_hash) VALUES "
|
||||
args := make([]interface{}, 0, len(batch)*3)
|
||||
for j, fc := range batch {
|
||||
if j > 0 {
|
||||
query += ", "
|
||||
}
|
||||
query += "(?, ?, ?)"
|
||||
args = append(args, fc.FileID.String(), fc.Idx, fc.ChunkHash.String())
|
||||
}
|
||||
query += " ON CONFLICT(file_id, idx) DO NOTHING"
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, args...)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, args...)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("batch inserting file_chunks: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// GetByFile is an alias for GetByPath for compatibility
|
||||
func (r *FileChunkRepository) GetByFile(ctx context.Context, path string) ([]*FileChunk, error) {
|
||||
LogSQL("GetByFile", "Starting", path)
|
||||
|
||||
@@ -5,6 +5,8 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
func TestFileChunkRepository(t *testing.T) {
|
||||
@@ -20,7 +22,6 @@ func TestFileChunkRepository(t *testing.T) {
|
||||
file := &File{
|
||||
Path: "/test/file.txt",
|
||||
MTime: testTime,
|
||||
CTime: testTime,
|
||||
Size: 3072,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -33,7 +34,7 @@ func TestFileChunkRepository(t *testing.T) {
|
||||
}
|
||||
|
||||
// Create chunks first
|
||||
chunks := []string{"chunk1", "chunk2", "chunk3"}
|
||||
chunks := []types.ChunkHash{"chunk1", "chunk2", "chunk3"}
|
||||
chunkRepo := NewChunkRepository(db)
|
||||
for _, chunkHash := range chunks {
|
||||
chunk := &Chunk{
|
||||
@@ -50,7 +51,7 @@ func TestFileChunkRepository(t *testing.T) {
|
||||
fc1 := &FileChunk{
|
||||
FileID: file.ID,
|
||||
Idx: 0,
|
||||
ChunkHash: "chunk1",
|
||||
ChunkHash: types.ChunkHash("chunk1"),
|
||||
}
|
||||
|
||||
err = repo.Create(ctx, nil, fc1)
|
||||
@@ -62,7 +63,7 @@ func TestFileChunkRepository(t *testing.T) {
|
||||
fc2 := &FileChunk{
|
||||
FileID: file.ID,
|
||||
Idx: 1,
|
||||
ChunkHash: "chunk2",
|
||||
ChunkHash: types.ChunkHash("chunk2"),
|
||||
}
|
||||
err = repo.Create(ctx, nil, fc2)
|
||||
if err != nil {
|
||||
@@ -72,7 +73,7 @@ func TestFileChunkRepository(t *testing.T) {
|
||||
fc3 := &FileChunk{
|
||||
FileID: file.ID,
|
||||
Idx: 2,
|
||||
ChunkHash: "chunk3",
|
||||
ChunkHash: types.ChunkHash("chunk3"),
|
||||
}
|
||||
err = repo.Create(ctx, nil, fc3)
|
||||
if err != nil {
|
||||
@@ -131,9 +132,8 @@ func TestFileChunkRepositoryMultipleFiles(t *testing.T) {
|
||||
|
||||
for i, path := range filePaths {
|
||||
file := &File{
|
||||
Path: path,
|
||||
Path: types.FilePath(path),
|
||||
MTime: testTime,
|
||||
CTime: testTime,
|
||||
Size: 2048,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -151,7 +151,7 @@ func TestFileChunkRepositoryMultipleFiles(t *testing.T) {
|
||||
chunkRepo := NewChunkRepository(db)
|
||||
for i := range files {
|
||||
for j := 0; j < 2; j++ {
|
||||
chunkHash := fmt.Sprintf("file%d_chunk%d", i, j)
|
||||
chunkHash := types.ChunkHash(fmt.Sprintf("file%d_chunk%d", i, j))
|
||||
chunk := &Chunk{
|
||||
ChunkHash: chunkHash,
|
||||
Size: 1024,
|
||||
@@ -169,7 +169,7 @@ func TestFileChunkRepositoryMultipleFiles(t *testing.T) {
|
||||
fc := &FileChunk{
|
||||
FileID: file.ID,
|
||||
Idx: j,
|
||||
ChunkHash: fmt.Sprintf("file%d_chunk%d", i, j),
|
||||
ChunkHash: types.ChunkHash(fmt.Sprintf("file%d_chunk%d", i, j)),
|
||||
}
|
||||
err := repo.Create(ctx, nil, fc)
|
||||
if err != nil {
|
||||
|
||||
@@ -6,8 +6,8 @@ import (
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"github.com/google/uuid"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
type FileRepository struct {
|
||||
@@ -20,16 +20,16 @@ func NewFileRepository(db *DB) *FileRepository {
|
||||
|
||||
func (r *FileRepository) Create(ctx context.Context, tx *sql.Tx, file *File) error {
|
||||
// Generate UUID if not provided
|
||||
if file.ID == "" {
|
||||
file.ID = uuid.New().String()
|
||||
if file.ID.IsZero() {
|
||||
file.ID = types.NewFileID()
|
||||
}
|
||||
|
||||
query := `
|
||||
INSERT INTO files (id, path, mtime, ctime, size, mode, uid, gid, link_target)
|
||||
INSERT INTO files (id, path, source_path, mtime, size, mode, uid, gid, link_target)
|
||||
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
|
||||
ON CONFLICT(path) DO UPDATE SET
|
||||
source_path = excluded.source_path,
|
||||
mtime = excluded.mtime,
|
||||
ctime = excluded.ctime,
|
||||
size = excluded.size,
|
||||
mode = excluded.mode,
|
||||
uid = excluded.uid,
|
||||
@@ -38,44 +38,36 @@ func (r *FileRepository) Create(ctx context.Context, tx *sql.Tx, file *File) err
|
||||
RETURNING id
|
||||
`
|
||||
|
||||
var idStr string
|
||||
var err error
|
||||
if tx != nil {
|
||||
LogSQL("Execute", query, file.ID, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget)
|
||||
err = tx.QueryRowContext(ctx, query, file.ID, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget).Scan(&file.ID)
|
||||
LogSQL("Execute", query, file.ID.String(), file.Path.String(), file.SourcePath.String(), file.MTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget.String())
|
||||
err = tx.QueryRowContext(ctx, query, file.ID.String(), file.Path.String(), file.SourcePath.String(), file.MTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget.String()).Scan(&idStr)
|
||||
} else {
|
||||
err = r.db.QueryRowWithLog(ctx, query, file.ID, file.Path, file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget).Scan(&file.ID)
|
||||
err = r.db.QueryRowWithLog(ctx, query, file.ID.String(), file.Path.String(), file.SourcePath.String(), file.MTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget.String()).Scan(&idStr)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return fmt.Errorf("inserting file: %w", err)
|
||||
}
|
||||
|
||||
// Parse the returned ID
|
||||
file.ID, err = types.ParseFileID(idStr)
|
||||
if err != nil {
|
||||
return fmt.Errorf("parsing file ID: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (r *FileRepository) GetByPath(ctx context.Context, path string) (*File, error) {
|
||||
query := `
|
||||
SELECT id, path, mtime, ctime, size, mode, uid, gid, link_target
|
||||
SELECT id, path, source_path, mtime, size, mode, uid, gid, link_target
|
||||
FROM files
|
||||
WHERE path = ?
|
||||
`
|
||||
|
||||
var file File
|
||||
var mtimeUnix, ctimeUnix int64
|
||||
var linkTarget sql.NullString
|
||||
|
||||
err := r.db.conn.QueryRowContext(ctx, query, path).Scan(
|
||||
&file.ID,
|
||||
&file.Path,
|
||||
&mtimeUnix,
|
||||
&ctimeUnix,
|
||||
&file.Size,
|
||||
&file.Mode,
|
||||
&file.UID,
|
||||
&file.GID,
|
||||
&linkTarget,
|
||||
)
|
||||
|
||||
file, err := r.scanFile(r.db.conn.QueryRowContext(ctx, query, path))
|
||||
if err == sql.ErrNoRows {
|
||||
return nil, nil
|
||||
}
|
||||
@@ -83,39 +75,18 @@ func (r *FileRepository) GetByPath(ctx context.Context, path string) (*File, err
|
||||
return nil, fmt.Errorf("querying file: %w", err)
|
||||
}
|
||||
|
||||
file.MTime = time.Unix(mtimeUnix, 0).UTC()
|
||||
file.CTime = time.Unix(ctimeUnix, 0).UTC()
|
||||
if linkTarget.Valid {
|
||||
file.LinkTarget = linkTarget.String
|
||||
}
|
||||
|
||||
return &file, nil
|
||||
return file, nil
|
||||
}
|
||||
|
||||
// GetByID retrieves a file by its UUID
|
||||
func (r *FileRepository) GetByID(ctx context.Context, id string) (*File, error) {
|
||||
func (r *FileRepository) GetByID(ctx context.Context, id types.FileID) (*File, error) {
|
||||
query := `
|
||||
SELECT id, path, mtime, ctime, size, mode, uid, gid, link_target
|
||||
SELECT id, path, source_path, mtime, size, mode, uid, gid, link_target
|
||||
FROM files
|
||||
WHERE id = ?
|
||||
`
|
||||
|
||||
var file File
|
||||
var mtimeUnix, ctimeUnix int64
|
||||
var linkTarget sql.NullString
|
||||
|
||||
err := r.db.conn.QueryRowContext(ctx, query, id).Scan(
|
||||
&file.ID,
|
||||
&file.Path,
|
||||
&mtimeUnix,
|
||||
&ctimeUnix,
|
||||
&file.Size,
|
||||
&file.Mode,
|
||||
&file.UID,
|
||||
&file.GID,
|
||||
&linkTarget,
|
||||
)
|
||||
|
||||
file, err := r.scanFile(r.db.conn.QueryRowContext(ctx, query, id.String()))
|
||||
if err == sql.ErrNoRows {
|
||||
return nil, nil
|
||||
}
|
||||
@@ -123,38 +94,18 @@ func (r *FileRepository) GetByID(ctx context.Context, id string) (*File, error)
|
||||
return nil, fmt.Errorf("querying file: %w", err)
|
||||
}
|
||||
|
||||
file.MTime = time.Unix(mtimeUnix, 0).UTC()
|
||||
file.CTime = time.Unix(ctimeUnix, 0).UTC()
|
||||
if linkTarget.Valid {
|
||||
file.LinkTarget = linkTarget.String
|
||||
}
|
||||
|
||||
return &file, nil
|
||||
return file, nil
|
||||
}
|
||||
|
||||
func (r *FileRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path string) (*File, error) {
|
||||
query := `
|
||||
SELECT id, path, mtime, ctime, size, mode, uid, gid, link_target
|
||||
SELECT id, path, source_path, mtime, size, mode, uid, gid, link_target
|
||||
FROM files
|
||||
WHERE path = ?
|
||||
`
|
||||
|
||||
var file File
|
||||
var mtimeUnix, ctimeUnix int64
|
||||
var linkTarget sql.NullString
|
||||
|
||||
LogSQL("GetByPathTx QueryRowContext", query, path)
|
||||
err := tx.QueryRowContext(ctx, query, path).Scan(
|
||||
&file.ID,
|
||||
&file.Path,
|
||||
&mtimeUnix,
|
||||
&ctimeUnix,
|
||||
&file.Size,
|
||||
&file.Mode,
|
||||
&file.UID,
|
||||
&file.GID,
|
||||
&linkTarget,
|
||||
)
|
||||
file, err := r.scanFile(tx.QueryRowContext(ctx, query, path))
|
||||
LogSQL("GetByPathTx Scan complete", query, path)
|
||||
|
||||
if err == sql.ErrNoRows {
|
||||
@@ -164,10 +115,76 @@ func (r *FileRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path strin
|
||||
return nil, fmt.Errorf("querying file: %w", err)
|
||||
}
|
||||
|
||||
return file, nil
|
||||
}
|
||||
|
||||
// scanFile is a helper that scans a single file row
|
||||
func (r *FileRepository) scanFile(row *sql.Row) (*File, error) {
|
||||
var file File
|
||||
var idStr, pathStr, sourcePathStr string
|
||||
var mtimeUnix int64
|
||||
var linkTarget sql.NullString
|
||||
|
||||
err := row.Scan(
|
||||
&idStr,
|
||||
&pathStr,
|
||||
&sourcePathStr,
|
||||
&mtimeUnix,
|
||||
&file.Size,
|
||||
&file.Mode,
|
||||
&file.UID,
|
||||
&file.GID,
|
||||
&linkTarget,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
file.ID, err = types.ParseFileID(idStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing file ID: %w", err)
|
||||
}
|
||||
file.Path = types.FilePath(pathStr)
|
||||
file.SourcePath = types.SourcePath(sourcePathStr)
|
||||
file.MTime = time.Unix(mtimeUnix, 0).UTC()
|
||||
file.CTime = time.Unix(ctimeUnix, 0).UTC()
|
||||
if linkTarget.Valid {
|
||||
file.LinkTarget = linkTarget.String
|
||||
file.LinkTarget = types.FilePath(linkTarget.String)
|
||||
}
|
||||
|
||||
return &file, nil
|
||||
}
|
||||
|
||||
// scanFileRows is a helper that scans a file row from rows iterator
|
||||
func (r *FileRepository) scanFileRows(rows *sql.Rows) (*File, error) {
|
||||
var file File
|
||||
var idStr, pathStr, sourcePathStr string
|
||||
var mtimeUnix int64
|
||||
var linkTarget sql.NullString
|
||||
|
||||
err := rows.Scan(
|
||||
&idStr,
|
||||
&pathStr,
|
||||
&sourcePathStr,
|
||||
&mtimeUnix,
|
||||
&file.Size,
|
||||
&file.Mode,
|
||||
&file.UID,
|
||||
&file.GID,
|
||||
&linkTarget,
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
file.ID, err = types.ParseFileID(idStr)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("parsing file ID: %w", err)
|
||||
}
|
||||
file.Path = types.FilePath(pathStr)
|
||||
file.SourcePath = types.SourcePath(sourcePathStr)
|
||||
file.MTime = time.Unix(mtimeUnix, 0).UTC()
|
||||
if linkTarget.Valid {
|
||||
file.LinkTarget = types.FilePath(linkTarget.String)
|
||||
}
|
||||
|
||||
return &file, nil
|
||||
@@ -175,7 +192,7 @@ func (r *FileRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path strin
|
||||
|
||||
func (r *FileRepository) ListModifiedSince(ctx context.Context, since time.Time) ([]*File, error) {
|
||||
query := `
|
||||
SELECT id, path, mtime, ctime, size, mode, uid, gid, link_target
|
||||
SELECT id, path, source_path, mtime, size, mode, uid, gid, link_target
|
||||
FROM files
|
||||
WHERE mtime >= ?
|
||||
ORDER BY path
|
||||
@@ -189,32 +206,11 @@ func (r *FileRepository) ListModifiedSince(ctx context.Context, since time.Time)
|
||||
|
||||
var files []*File
|
||||
for rows.Next() {
|
||||
var file File
|
||||
var mtimeUnix, ctimeUnix int64
|
||||
var linkTarget sql.NullString
|
||||
|
||||
err := rows.Scan(
|
||||
&file.ID,
|
||||
&file.Path,
|
||||
&mtimeUnix,
|
||||
&ctimeUnix,
|
||||
&file.Size,
|
||||
&file.Mode,
|
||||
&file.UID,
|
||||
&file.GID,
|
||||
&linkTarget,
|
||||
)
|
||||
file, err := r.scanFileRows(rows)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning file: %w", err)
|
||||
}
|
||||
|
||||
file.MTime = time.Unix(mtimeUnix, 0)
|
||||
file.CTime = time.Unix(ctimeUnix, 0)
|
||||
if linkTarget.Valid {
|
||||
file.LinkTarget = linkTarget.String
|
||||
}
|
||||
|
||||
files = append(files, &file)
|
||||
files = append(files, file)
|
||||
}
|
||||
|
||||
return files, rows.Err()
|
||||
@@ -238,14 +234,14 @@ func (r *FileRepository) Delete(ctx context.Context, tx *sql.Tx, path string) er
|
||||
}
|
||||
|
||||
// DeleteByID deletes a file by its UUID
|
||||
func (r *FileRepository) DeleteByID(ctx context.Context, tx *sql.Tx, id string) error {
|
||||
func (r *FileRepository) DeleteByID(ctx context.Context, tx *sql.Tx, id types.FileID) error {
|
||||
query := `DELETE FROM files WHERE id = ?`
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, id)
|
||||
_, err = tx.ExecContext(ctx, query, id.String())
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, id)
|
||||
_, err = r.db.ExecWithLog(ctx, query, id.String())
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
@@ -257,7 +253,7 @@ func (r *FileRepository) DeleteByID(ctx context.Context, tx *sql.Tx, id string)
|
||||
|
||||
func (r *FileRepository) ListByPrefix(ctx context.Context, prefix string) ([]*File, error) {
|
||||
query := `
|
||||
SELECT id, path, mtime, ctime, size, mode, uid, gid, link_target
|
||||
SELECT id, path, source_path, mtime, size, mode, uid, gid, link_target
|
||||
FROM files
|
||||
WHERE path LIKE ? || '%'
|
||||
ORDER BY path
|
||||
@@ -271,43 +267,97 @@ func (r *FileRepository) ListByPrefix(ctx context.Context, prefix string) ([]*Fi
|
||||
|
||||
var files []*File
|
||||
for rows.Next() {
|
||||
var file File
|
||||
var mtimeUnix, ctimeUnix int64
|
||||
var linkTarget sql.NullString
|
||||
|
||||
err := rows.Scan(
|
||||
&file.ID,
|
||||
&file.Path,
|
||||
&mtimeUnix,
|
||||
&ctimeUnix,
|
||||
&file.Size,
|
||||
&file.Mode,
|
||||
&file.UID,
|
||||
&file.GID,
|
||||
&linkTarget,
|
||||
)
|
||||
file, err := r.scanFileRows(rows)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning file: %w", err)
|
||||
}
|
||||
|
||||
file.MTime = time.Unix(mtimeUnix, 0)
|
||||
file.CTime = time.Unix(ctimeUnix, 0)
|
||||
if linkTarget.Valid {
|
||||
file.LinkTarget = linkTarget.String
|
||||
}
|
||||
|
||||
files = append(files, &file)
|
||||
files = append(files, file)
|
||||
}
|
||||
|
||||
return files, rows.Err()
|
||||
}
|
||||
|
||||
// ListAll returns all files in the database
|
||||
func (r *FileRepository) ListAll(ctx context.Context) ([]*File, error) {
|
||||
query := `
|
||||
SELECT id, path, source_path, mtime, size, mode, uid, gid, link_target
|
||||
FROM files
|
||||
ORDER BY path
|
||||
`
|
||||
|
||||
rows, err := r.db.conn.QueryContext(ctx, query)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("querying files: %w", err)
|
||||
}
|
||||
defer CloseRows(rows)
|
||||
|
||||
var files []*File
|
||||
for rows.Next() {
|
||||
file, err := r.scanFileRows(rows)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("scanning file: %w", err)
|
||||
}
|
||||
files = append(files, file)
|
||||
}
|
||||
|
||||
return files, rows.Err()
|
||||
}
|
||||
|
||||
// CreateBatch inserts or updates multiple files in a single statement for efficiency.
|
||||
// File IDs must be pre-generated before calling this method.
|
||||
func (r *FileRepository) CreateBatch(ctx context.Context, tx *sql.Tx, files []*File) error {
|
||||
if len(files) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Each File has 9 values, so batch at 100 to be safe with SQLite's variable limit
|
||||
const batchSize = 100
|
||||
|
||||
for i := 0; i < len(files); i += batchSize {
|
||||
end := i + batchSize
|
||||
if end > len(files) {
|
||||
end = len(files)
|
||||
}
|
||||
batch := files[i:end]
|
||||
|
||||
query := `INSERT INTO files (id, path, source_path, mtime, size, mode, uid, gid, link_target) VALUES `
|
||||
args := make([]interface{}, 0, len(batch)*9)
|
||||
for j, f := range batch {
|
||||
if j > 0 {
|
||||
query += ", "
|
||||
}
|
||||
query += "(?, ?, ?, ?, ?, ?, ?, ?, ?)"
|
||||
args = append(args, f.ID.String(), f.Path.String(), f.SourcePath.String(), f.MTime.Unix(), f.Size, f.Mode, f.UID, f.GID, f.LinkTarget.String())
|
||||
}
|
||||
query += ` ON CONFLICT(path) DO UPDATE SET
|
||||
source_path = excluded.source_path,
|
||||
mtime = excluded.mtime,
|
||||
size = excluded.size,
|
||||
mode = excluded.mode,
|
||||
uid = excluded.uid,
|
||||
gid = excluded.gid,
|
||||
link_target = excluded.link_target`
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, args...)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, args...)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("batch inserting files: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// DeleteOrphaned deletes files that are not referenced by any snapshot
|
||||
func (r *FileRepository) DeleteOrphaned(ctx context.Context) error {
|
||||
query := `
|
||||
DELETE FROM files
|
||||
DELETE FROM files
|
||||
WHERE NOT EXISTS (
|
||||
SELECT 1 FROM snapshot_files
|
||||
SELECT 1 FROM snapshot_files
|
||||
WHERE snapshot_files.file_id = files.id
|
||||
)
|
||||
`
|
||||
|
||||
@@ -39,7 +39,6 @@ func TestFileRepository(t *testing.T) {
|
||||
file := &File{
|
||||
Path: "/test/file.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -53,7 +52,7 @@ func TestFileRepository(t *testing.T) {
|
||||
}
|
||||
|
||||
// Test GetByPath
|
||||
retrieved, err := repo.GetByPath(ctx, file.Path)
|
||||
retrieved, err := repo.GetByPath(ctx, file.Path.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get file: %v", err)
|
||||
}
|
||||
@@ -81,7 +80,7 @@ func TestFileRepository(t *testing.T) {
|
||||
t.Fatalf("failed to update file: %v", err)
|
||||
}
|
||||
|
||||
retrieved, err = repo.GetByPath(ctx, file.Path)
|
||||
retrieved, err = repo.GetByPath(ctx, file.Path.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get updated file: %v", err)
|
||||
}
|
||||
@@ -99,12 +98,12 @@ func TestFileRepository(t *testing.T) {
|
||||
}
|
||||
|
||||
// Test Delete
|
||||
err = repo.Delete(ctx, nil, file.Path)
|
||||
err = repo.Delete(ctx, nil, file.Path.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to delete file: %v", err)
|
||||
}
|
||||
|
||||
retrieved, err = repo.GetByPath(ctx, file.Path)
|
||||
retrieved, err = repo.GetByPath(ctx, file.Path.String())
|
||||
if err != nil {
|
||||
t.Fatalf("error getting deleted file: %v", err)
|
||||
}
|
||||
@@ -124,7 +123,6 @@ func TestFileRepositorySymlink(t *testing.T) {
|
||||
symlink := &File{
|
||||
Path: "/test/link",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 0,
|
||||
Mode: uint32(0777 | os.ModeSymlink),
|
||||
UID: 1000,
|
||||
@@ -137,7 +135,7 @@ func TestFileRepositorySymlink(t *testing.T) {
|
||||
t.Fatalf("failed to create symlink: %v", err)
|
||||
}
|
||||
|
||||
retrieved, err := repo.GetByPath(ctx, symlink.Path)
|
||||
retrieved, err := repo.GetByPath(ctx, symlink.Path.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get symlink: %v", err)
|
||||
}
|
||||
@@ -161,7 +159,6 @@ func TestFileRepositoryTransaction(t *testing.T) {
|
||||
file := &File{
|
||||
Path: "/test/tx_file.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
|
||||
@@ -2,22 +2,26 @@
|
||||
// It includes types for files, chunks, blobs, snapshots, and their relationships.
|
||||
package database
|
||||
|
||||
import "time"
|
||||
import (
|
||||
"time"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
// File represents a file or directory in the backup system.
|
||||
// It stores metadata about files including timestamps, permissions, ownership,
|
||||
// and symlink targets. This information is used to restore files with their
|
||||
// original attributes.
|
||||
type File struct {
|
||||
ID string // UUID primary key
|
||||
Path string
|
||||
ID types.FileID // UUID primary key
|
||||
Path types.FilePath // Absolute path of the file
|
||||
SourcePath types.SourcePath // The source directory this file came from (for restore path stripping)
|
||||
MTime time.Time
|
||||
CTime time.Time
|
||||
Size int64
|
||||
Mode uint32
|
||||
UID uint32
|
||||
GID uint32
|
||||
LinkTarget string // empty for regular files, target path for symlinks
|
||||
LinkTarget types.FilePath // empty for regular files, target path for symlinks
|
||||
}
|
||||
|
||||
// IsSymlink returns true if this file is a symbolic link.
|
||||
@@ -30,16 +34,16 @@ func (f *File) IsSymlink() bool {
|
||||
// Large files are split into multiple chunks for efficient deduplication and storage.
|
||||
// The Idx field maintains the order of chunks within a file.
|
||||
type FileChunk struct {
|
||||
FileID string
|
||||
FileID types.FileID
|
||||
Idx int
|
||||
ChunkHash string
|
||||
ChunkHash types.ChunkHash
|
||||
}
|
||||
|
||||
// Chunk represents a data chunk in the deduplication system.
|
||||
// Files are split into chunks which are content-addressed by their hash.
|
||||
// The ChunkHash is the SHA256 hash of the chunk content, used for deduplication.
|
||||
type Chunk struct {
|
||||
ChunkHash string
|
||||
ChunkHash types.ChunkHash
|
||||
Size int64
|
||||
}
|
||||
|
||||
@@ -51,13 +55,13 @@ type Chunk struct {
|
||||
// The blob creation process is: chunks are accumulated -> compressed with zstd
|
||||
// -> encrypted with age -> hashed -> uploaded to S3 with the hash as filename.
|
||||
type Blob struct {
|
||||
ID string // UUID assigned when blob creation starts
|
||||
Hash string // SHA256 of final compressed+encrypted content (empty until finalized)
|
||||
CreatedTS time.Time // When blob creation started
|
||||
FinishedTS *time.Time // When blob was finalized (nil if still packing)
|
||||
UncompressedSize int64 // Total size of raw chunks before compression
|
||||
CompressedSize int64 // Size after compression and encryption
|
||||
UploadedTS *time.Time // When blob was uploaded to S3 (nil if not uploaded)
|
||||
ID types.BlobID // UUID assigned when blob creation starts
|
||||
Hash types.BlobHash // SHA256 of final compressed+encrypted content (empty until finalized)
|
||||
CreatedTS time.Time // When blob creation started
|
||||
FinishedTS *time.Time // When blob was finalized (nil if still packing)
|
||||
UncompressedSize int64 // Total size of raw chunks before compression
|
||||
CompressedSize int64 // Size after compression and encryption
|
||||
UploadedTS *time.Time // When blob was uploaded to S3 (nil if not uploaded)
|
||||
}
|
||||
|
||||
// BlobChunk represents the mapping between blobs and the chunks they contain.
|
||||
@@ -65,8 +69,8 @@ type Blob struct {
|
||||
// their position and size within the blob. The offset and length fields
|
||||
// enable extracting specific chunks from a blob without processing the entire blob.
|
||||
type BlobChunk struct {
|
||||
BlobID string
|
||||
ChunkHash string
|
||||
BlobID types.BlobID
|
||||
ChunkHash types.ChunkHash
|
||||
Offset int64
|
||||
Length int64
|
||||
}
|
||||
@@ -75,18 +79,18 @@ type BlobChunk struct {
|
||||
// This is used during deduplication to identify all files that share a chunk,
|
||||
// which is important for garbage collection and integrity verification.
|
||||
type ChunkFile struct {
|
||||
ChunkHash string
|
||||
FileID string
|
||||
ChunkHash types.ChunkHash
|
||||
FileID types.FileID
|
||||
FileOffset int64
|
||||
Length int64
|
||||
}
|
||||
|
||||
// Snapshot represents a snapshot record in the database
|
||||
type Snapshot struct {
|
||||
ID string
|
||||
Hostname string
|
||||
VaultikVersion string
|
||||
VaultikGitRevision string
|
||||
ID types.SnapshotID
|
||||
Hostname types.Hostname
|
||||
VaultikVersion types.Version
|
||||
VaultikGitRevision types.GitRevision
|
||||
StartedAt time.Time
|
||||
CompletedAt *time.Time // nil if still in progress
|
||||
FileCount int64
|
||||
@@ -108,13 +112,13 @@ func (s *Snapshot) IsComplete() bool {
|
||||
|
||||
// SnapshotFile represents the mapping between snapshots and files
|
||||
type SnapshotFile struct {
|
||||
SnapshotID string
|
||||
FileID string
|
||||
SnapshotID types.SnapshotID
|
||||
FileID types.FileID
|
||||
}
|
||||
|
||||
// SnapshotBlob represents the mapping between snapshots and blobs
|
||||
type SnapshotBlob struct {
|
||||
SnapshotID string
|
||||
BlobID string
|
||||
BlobHash string // Denormalized for easier manifest generation
|
||||
SnapshotID types.SnapshotID
|
||||
BlobID types.BlobID
|
||||
BlobHash types.BlobHash // Denormalized for easier manifest generation
|
||||
}
|
||||
|
||||
@@ -6,9 +6,9 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/config"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"go.uber.org/fx"
|
||||
"sneak.berlin/go/vaultik/internal/config"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
)
|
||||
|
||||
// Module provides database dependencies
|
||||
|
||||
@@ -75,6 +75,11 @@ func (r *Repositories) WithTx(ctx context.Context, fn TxFunc) error {
|
||||
return tx.Commit()
|
||||
}
|
||||
|
||||
// DB returns the underlying database for direct queries
|
||||
func (r *Repositories) DB() *DB {
|
||||
return r.db
|
||||
}
|
||||
|
||||
// WithReadTx executes a function within a read-only transaction.
|
||||
// Read transactions can run concurrently with other read transactions
|
||||
// but will be blocked by write transactions. The transaction is
|
||||
|
||||
@@ -6,6 +6,8 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
func TestRepositoriesTransaction(t *testing.T) {
|
||||
@@ -21,7 +23,6 @@ func TestRepositoriesTransaction(t *testing.T) {
|
||||
file := &File{
|
||||
Path: "/test/tx_file.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -33,7 +34,7 @@ func TestRepositoriesTransaction(t *testing.T) {
|
||||
|
||||
// Create chunks
|
||||
chunk1 := &Chunk{
|
||||
ChunkHash: "tx_chunk1",
|
||||
ChunkHash: types.ChunkHash("tx_chunk1"),
|
||||
Size: 512,
|
||||
}
|
||||
if err := repos.Chunks.Create(ctx, tx, chunk1); err != nil {
|
||||
@@ -41,7 +42,7 @@ func TestRepositoriesTransaction(t *testing.T) {
|
||||
}
|
||||
|
||||
chunk2 := &Chunk{
|
||||
ChunkHash: "tx_chunk2",
|
||||
ChunkHash: types.ChunkHash("tx_chunk2"),
|
||||
Size: 512,
|
||||
}
|
||||
if err := repos.Chunks.Create(ctx, tx, chunk2); err != nil {
|
||||
@@ -69,8 +70,8 @@ func TestRepositoriesTransaction(t *testing.T) {
|
||||
|
||||
// Create blob
|
||||
blob := &Blob{
|
||||
ID: "tx-blob-id-1",
|
||||
Hash: "tx_blob1",
|
||||
ID: types.NewBlobID(),
|
||||
Hash: types.BlobHash("tx_blob1"),
|
||||
CreatedTS: time.Now().Truncate(time.Second),
|
||||
}
|
||||
if err := repos.Blobs.Create(ctx, tx, blob); err != nil {
|
||||
@@ -144,7 +145,6 @@ func TestRepositoriesTransactionRollback(t *testing.T) {
|
||||
file := &File{
|
||||
Path: "/test/rollback_file.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -156,7 +156,7 @@ func TestRepositoriesTransactionRollback(t *testing.T) {
|
||||
|
||||
// Create a chunk
|
||||
chunk := &Chunk{
|
||||
ChunkHash: "rollback_chunk",
|
||||
ChunkHash: types.ChunkHash("rollback_chunk"),
|
||||
Size: 1024,
|
||||
}
|
||||
if err := repos.Chunks.Create(ctx, tx, chunk); err != nil {
|
||||
@@ -200,7 +200,6 @@ func TestRepositoriesReadTransaction(t *testing.T) {
|
||||
file := &File{
|
||||
Path: "/test/read_file.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -224,7 +223,6 @@ func TestRepositoriesReadTransaction(t *testing.T) {
|
||||
_ = repos.Files.Create(ctx, tx, &File{
|
||||
Path: "/test/should_fail.txt",
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 0,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
|
||||
@@ -6,6 +6,8 @@ import (
|
||||
"fmt"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
// TestFileRepositoryUUIDGeneration tests that files get unique UUIDs
|
||||
@@ -21,7 +23,6 @@ func TestFileRepositoryUUIDGeneration(t *testing.T) {
|
||||
{
|
||||
Path: "/file1.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -30,7 +31,6 @@ func TestFileRepositoryUUIDGeneration(t *testing.T) {
|
||||
{
|
||||
Path: "/file2.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 2048,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -46,15 +46,15 @@ func TestFileRepositoryUUIDGeneration(t *testing.T) {
|
||||
}
|
||||
|
||||
// Check UUID was generated
|
||||
if file.ID == "" {
|
||||
if file.ID.IsZero() {
|
||||
t.Error("file ID was not generated")
|
||||
}
|
||||
|
||||
// Check UUID is unique
|
||||
if uuids[file.ID] {
|
||||
if uuids[file.ID.String()] {
|
||||
t.Errorf("duplicate UUID generated: %s", file.ID)
|
||||
}
|
||||
uuids[file.ID] = true
|
||||
uuids[file.ID.String()] = true
|
||||
}
|
||||
}
|
||||
|
||||
@@ -70,7 +70,6 @@ func TestFileRepositoryGetByID(t *testing.T) {
|
||||
file := &File{
|
||||
Path: "/test.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -96,7 +95,8 @@ func TestFileRepositoryGetByID(t *testing.T) {
|
||||
}
|
||||
|
||||
// Test non-existent ID
|
||||
nonExistent, err := repo.GetByID(ctx, "non-existent-uuid")
|
||||
nonExistentID := types.NewFileID() // Generate a new UUID that won't exist in the database
|
||||
nonExistent, err := repo.GetByID(ctx, nonExistentID)
|
||||
if err != nil {
|
||||
t.Fatalf("GetByID should not return error for non-existent ID: %v", err)
|
||||
}
|
||||
@@ -117,7 +117,6 @@ func TestOrphanedFileCleanup(t *testing.T) {
|
||||
file1 := &File{
|
||||
Path: "/orphaned.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -126,7 +125,6 @@ func TestOrphanedFileCleanup(t *testing.T) {
|
||||
file2 := &File{
|
||||
Path: "/referenced.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 2048,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -154,7 +152,7 @@ func TestOrphanedFileCleanup(t *testing.T) {
|
||||
}
|
||||
|
||||
// Add file2 to snapshot
|
||||
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID, file2.ID)
|
||||
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID.String(), file2.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to add file to snapshot: %v", err)
|
||||
}
|
||||
@@ -194,11 +192,11 @@ func TestOrphanedChunkCleanup(t *testing.T) {
|
||||
|
||||
// Create chunks
|
||||
chunk1 := &Chunk{
|
||||
ChunkHash: "orphaned-chunk",
|
||||
ChunkHash: types.ChunkHash("orphaned-chunk"),
|
||||
Size: 1024,
|
||||
}
|
||||
chunk2 := &Chunk{
|
||||
ChunkHash: "referenced-chunk",
|
||||
ChunkHash: types.ChunkHash("referenced-chunk"),
|
||||
Size: 1024,
|
||||
}
|
||||
|
||||
@@ -215,7 +213,6 @@ func TestOrphanedChunkCleanup(t *testing.T) {
|
||||
file := &File{
|
||||
Path: "/test.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -244,7 +241,7 @@ func TestOrphanedChunkCleanup(t *testing.T) {
|
||||
}
|
||||
|
||||
// Check that orphaned chunk is gone
|
||||
orphanedChunk, err := repos.Chunks.GetByHash(ctx, chunk1.ChunkHash)
|
||||
orphanedChunk, err := repos.Chunks.GetByHash(ctx, chunk1.ChunkHash.String())
|
||||
if err != nil {
|
||||
t.Fatalf("error getting chunk: %v", err)
|
||||
}
|
||||
@@ -253,7 +250,7 @@ func TestOrphanedChunkCleanup(t *testing.T) {
|
||||
}
|
||||
|
||||
// Check that referenced chunk still exists
|
||||
referencedChunk, err := repos.Chunks.GetByHash(ctx, chunk2.ChunkHash)
|
||||
referencedChunk, err := repos.Chunks.GetByHash(ctx, chunk2.ChunkHash.String())
|
||||
if err != nil {
|
||||
t.Fatalf("error getting chunk: %v", err)
|
||||
}
|
||||
@@ -272,13 +269,13 @@ func TestOrphanedBlobCleanup(t *testing.T) {
|
||||
|
||||
// Create blobs
|
||||
blob1 := &Blob{
|
||||
ID: "orphaned-blob-id",
|
||||
Hash: "orphaned-blob",
|
||||
ID: types.NewBlobID(),
|
||||
Hash: types.BlobHash("orphaned-blob"),
|
||||
CreatedTS: time.Now().Truncate(time.Second),
|
||||
}
|
||||
blob2 := &Blob{
|
||||
ID: "referenced-blob-id",
|
||||
Hash: "referenced-blob",
|
||||
ID: types.NewBlobID(),
|
||||
Hash: types.BlobHash("referenced-blob"),
|
||||
CreatedTS: time.Now().Truncate(time.Second),
|
||||
}
|
||||
|
||||
@@ -303,7 +300,7 @@ func TestOrphanedBlobCleanup(t *testing.T) {
|
||||
}
|
||||
|
||||
// Add blob2 to snapshot
|
||||
err = repos.Snapshots.AddBlob(ctx, nil, snapshot.ID, blob2.ID, blob2.Hash)
|
||||
err = repos.Snapshots.AddBlob(ctx, nil, snapshot.ID.String(), blob2.ID, blob2.Hash)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to add blob to snapshot: %v", err)
|
||||
}
|
||||
@@ -315,7 +312,7 @@ func TestOrphanedBlobCleanup(t *testing.T) {
|
||||
}
|
||||
|
||||
// Check that orphaned blob is gone
|
||||
orphanedBlob, err := repos.Blobs.GetByID(ctx, blob1.ID)
|
||||
orphanedBlob, err := repos.Blobs.GetByID(ctx, blob1.ID.String())
|
||||
if err != nil {
|
||||
t.Fatalf("error getting blob: %v", err)
|
||||
}
|
||||
@@ -324,7 +321,7 @@ func TestOrphanedBlobCleanup(t *testing.T) {
|
||||
}
|
||||
|
||||
// Check that referenced blob still exists
|
||||
referencedBlob, err := repos.Blobs.GetByID(ctx, blob2.ID)
|
||||
referencedBlob, err := repos.Blobs.GetByID(ctx, blob2.ID.String())
|
||||
if err != nil {
|
||||
t.Fatalf("error getting blob: %v", err)
|
||||
}
|
||||
@@ -345,7 +342,6 @@ func TestFileChunkRepositoryWithUUIDs(t *testing.T) {
|
||||
file := &File{
|
||||
Path: "/test.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 3072,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -357,7 +353,7 @@ func TestFileChunkRepositoryWithUUIDs(t *testing.T) {
|
||||
}
|
||||
|
||||
// Create chunks
|
||||
chunks := []string{"chunk1", "chunk2", "chunk3"}
|
||||
chunks := []types.ChunkHash{"chunk1", "chunk2", "chunk3"}
|
||||
for i, chunkHash := range chunks {
|
||||
chunk := &Chunk{
|
||||
ChunkHash: chunkHash,
|
||||
@@ -416,7 +412,6 @@ func TestChunkFileRepositoryWithUUIDs(t *testing.T) {
|
||||
file1 := &File{
|
||||
Path: "/file1.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -425,7 +420,6 @@ func TestChunkFileRepositoryWithUUIDs(t *testing.T) {
|
||||
file2 := &File{
|
||||
Path: "/file2.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -443,7 +437,7 @@ func TestChunkFileRepositoryWithUUIDs(t *testing.T) {
|
||||
|
||||
// Create a chunk that appears in both files (deduplication)
|
||||
chunk := &Chunk{
|
||||
ChunkHash: "shared-chunk",
|
||||
ChunkHash: types.ChunkHash("shared-chunk"),
|
||||
Size: 1024,
|
||||
}
|
||||
err = repos.Chunks.Create(ctx, nil, chunk)
|
||||
@@ -526,7 +520,7 @@ func TestSnapshotRepositoryExtendedFields(t *testing.T) {
|
||||
}
|
||||
|
||||
// Retrieve and verify
|
||||
retrieved, err := repo.GetByID(ctx, snapshot.ID)
|
||||
retrieved, err := repo.GetByID(ctx, snapshot.ID.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get snapshot: %v", err)
|
||||
}
|
||||
@@ -581,9 +575,8 @@ func TestComplexOrphanedDataScenario(t *testing.T) {
|
||||
files := make([]*File, 3)
|
||||
for i := range files {
|
||||
files[i] = &File{
|
||||
Path: fmt.Sprintf("/file%d.txt", i),
|
||||
Path: types.FilePath(fmt.Sprintf("/file%d.txt", i)),
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -601,29 +594,29 @@ func TestComplexOrphanedDataScenario(t *testing.T) {
|
||||
// file0: only in snapshot1
|
||||
// file1: in both snapshots
|
||||
// file2: only in snapshot2
|
||||
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot1.ID, files[0].ID)
|
||||
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot1.ID.String(), files[0].ID)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot1.ID, files[1].ID)
|
||||
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot1.ID.String(), files[1].ID)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot2.ID, files[1].ID)
|
||||
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot2.ID.String(), files[1].ID)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot2.ID, files[2].ID)
|
||||
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot2.ID.String(), files[2].ID)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Delete snapshot1
|
||||
err = repos.Snapshots.DeleteSnapshotFiles(ctx, snapshot1.ID)
|
||||
err = repos.Snapshots.DeleteSnapshotFiles(ctx, snapshot1.ID.String())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
err = repos.Snapshots.Delete(ctx, snapshot1.ID)
|
||||
err = repos.Snapshots.Delete(ctx, snapshot1.ID.String())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -675,7 +668,6 @@ func TestCascadeDelete(t *testing.T) {
|
||||
file := &File{
|
||||
Path: "/cascade-test.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -689,7 +681,7 @@ func TestCascadeDelete(t *testing.T) {
|
||||
// Create chunks and file-chunk mappings
|
||||
for i := 0; i < 3; i++ {
|
||||
chunk := &Chunk{
|
||||
ChunkHash: fmt.Sprintf("cascade-chunk-%d", i),
|
||||
ChunkHash: types.ChunkHash(fmt.Sprintf("cascade-chunk-%d", i)),
|
||||
Size: 1024,
|
||||
}
|
||||
err = repos.Chunks.Create(ctx, nil, chunk)
|
||||
@@ -747,7 +739,6 @@ func TestTransactionIsolation(t *testing.T) {
|
||||
file := &File{
|
||||
Path: "/tx-test.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -807,9 +798,8 @@ func TestConcurrentOrphanedCleanup(t *testing.T) {
|
||||
// Create many files, some orphaned
|
||||
for i := 0; i < 20; i++ {
|
||||
file := &File{
|
||||
Path: fmt.Sprintf("/concurrent-%d.txt", i),
|
||||
Path: types.FilePath(fmt.Sprintf("/concurrent-%d.txt", i)),
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -822,7 +812,7 @@ func TestConcurrentOrphanedCleanup(t *testing.T) {
|
||||
|
||||
// Add even-numbered files to snapshot
|
||||
if i%2 == 0 {
|
||||
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID, file.ID)
|
||||
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID.String(), file.ID)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -860,7 +850,7 @@ func TestConcurrentOrphanedCleanup(t *testing.T) {
|
||||
// Verify all remaining files are even-numbered
|
||||
for _, file := range files {
|
||||
var num int
|
||||
_, err := fmt.Sscanf(file.Path, "/concurrent-%d.txt", &num)
|
||||
_, err := fmt.Sscanf(file.Path.String(), "/concurrent-%d.txt", &num)
|
||||
if err != nil {
|
||||
t.Logf("failed to parse file number from %s: %v", file.Path, err)
|
||||
}
|
||||
|
||||
@@ -18,7 +18,6 @@ func TestOrphanedFileCleanupDebug(t *testing.T) {
|
||||
file1 := &File{
|
||||
Path: "/orphaned.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -27,7 +26,6 @@ func TestOrphanedFileCleanupDebug(t *testing.T) {
|
||||
file2 := &File{
|
||||
Path: "/referenced.txt",
|
||||
MTime: time.Now().Truncate(time.Second),
|
||||
CTime: time.Now().Truncate(time.Second),
|
||||
Size: 2048,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -67,7 +65,7 @@ func TestOrphanedFileCleanupDebug(t *testing.T) {
|
||||
t.Logf("snapshot_files count before add: %d", count)
|
||||
|
||||
// Add file2 to snapshot
|
||||
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID, file2.ID)
|
||||
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID.String(), file2.ID)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to add file to snapshot: %v", err)
|
||||
}
|
||||
|
||||
@@ -6,6 +6,8 @@ import (
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
// TestFileRepositoryEdgeCases tests edge cases for file repository
|
||||
@@ -27,7 +29,6 @@ func TestFileRepositoryEdgeCases(t *testing.T) {
|
||||
file: &File{
|
||||
Path: "",
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -38,9 +39,8 @@ func TestFileRepositoryEdgeCases(t *testing.T) {
|
||||
{
|
||||
name: "very long path",
|
||||
file: &File{
|
||||
Path: "/" + strings.Repeat("a", 4096),
|
||||
Path: types.FilePath("/" + strings.Repeat("a", 4096)),
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -53,7 +53,6 @@ func TestFileRepositoryEdgeCases(t *testing.T) {
|
||||
file: &File{
|
||||
Path: "/test/file with spaces and 特殊文字.txt",
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -66,7 +65,6 @@ func TestFileRepositoryEdgeCases(t *testing.T) {
|
||||
file: &File{
|
||||
Path: "/empty.txt",
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 0,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -79,7 +77,6 @@ func TestFileRepositoryEdgeCases(t *testing.T) {
|
||||
file: &File{
|
||||
Path: "/link",
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 0,
|
||||
Mode: 0777 | 0120000, // symlink mode
|
||||
UID: 1000,
|
||||
@@ -94,7 +91,7 @@ func TestFileRepositoryEdgeCases(t *testing.T) {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
// Add a unique suffix to paths to avoid UNIQUE constraint violations
|
||||
if tt.file.Path != "" {
|
||||
tt.file.Path = fmt.Sprintf("%s_%d_%d", tt.file.Path, i, time.Now().UnixNano())
|
||||
tt.file.Path = types.FilePath(fmt.Sprintf("%s_%d_%d", tt.file.Path, i, time.Now().UnixNano()))
|
||||
}
|
||||
|
||||
err := repo.Create(ctx, nil, tt.file)
|
||||
@@ -121,7 +118,6 @@ func TestDuplicateHandling(t *testing.T) {
|
||||
file1 := &File{
|
||||
Path: "/duplicate.txt",
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -130,7 +126,6 @@ func TestDuplicateHandling(t *testing.T) {
|
||||
file2 := &File{
|
||||
Path: "/duplicate.txt", // Same path
|
||||
MTime: time.Now().Add(time.Hour),
|
||||
CTime: time.Now().Add(time.Hour),
|
||||
Size: 2048,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -169,7 +164,7 @@ func TestDuplicateHandling(t *testing.T) {
|
||||
// Test duplicate chunk hashes
|
||||
t.Run("duplicate chunk hashes", func(t *testing.T) {
|
||||
chunk := &Chunk{
|
||||
ChunkHash: "duplicate-chunk",
|
||||
ChunkHash: types.ChunkHash("duplicate-chunk"),
|
||||
Size: 1024,
|
||||
}
|
||||
|
||||
@@ -190,7 +185,6 @@ func TestDuplicateHandling(t *testing.T) {
|
||||
file := &File{
|
||||
Path: "/test-dup-fc.txt",
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -202,7 +196,7 @@ func TestDuplicateHandling(t *testing.T) {
|
||||
}
|
||||
|
||||
chunk := &Chunk{
|
||||
ChunkHash: "test-chunk-dup",
|
||||
ChunkHash: types.ChunkHash("test-chunk-dup"),
|
||||
Size: 1024,
|
||||
}
|
||||
err = repos.Chunks.Create(ctx, nil, chunk)
|
||||
@@ -242,7 +236,6 @@ func TestNullHandling(t *testing.T) {
|
||||
file := &File{
|
||||
Path: "/regular.txt",
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -279,7 +272,7 @@ func TestNullHandling(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
retrieved, err := repos.Snapshots.GetByID(ctx, snapshot.ID)
|
||||
retrieved, err := repos.Snapshots.GetByID(ctx, snapshot.ID.String())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -292,8 +285,8 @@ func TestNullHandling(t *testing.T) {
|
||||
// Test blob with NULL uploaded_ts
|
||||
t.Run("blob not uploaded", func(t *testing.T) {
|
||||
blob := &Blob{
|
||||
ID: "not-uploaded",
|
||||
Hash: "test-hash",
|
||||
ID: types.NewBlobID(),
|
||||
Hash: types.BlobHash("test-hash"),
|
||||
CreatedTS: time.Now(),
|
||||
UploadedTS: nil, // Not uploaded yet
|
||||
}
|
||||
@@ -303,7 +296,7 @@ func TestNullHandling(t *testing.T) {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
retrieved, err := repos.Blobs.GetByID(ctx, blob.ID)
|
||||
retrieved, err := repos.Blobs.GetByID(ctx, blob.ID.String())
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -339,15 +332,14 @@ func TestLargeDatasets(t *testing.T) {
|
||||
|
||||
// Create many files
|
||||
const fileCount = 1000
|
||||
fileIDs := make([]string, fileCount)
|
||||
fileIDs := make([]types.FileID, fileCount)
|
||||
|
||||
t.Run("create many files", func(t *testing.T) {
|
||||
start := time.Now()
|
||||
for i := 0; i < fileCount; i++ {
|
||||
file := &File{
|
||||
Path: fmt.Sprintf("/large/file%05d.txt", i),
|
||||
Path: types.FilePath(fmt.Sprintf("/large/file%05d.txt", i)),
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: int64(i * 1024),
|
||||
Mode: 0644,
|
||||
UID: uint32(1000 + (i % 10)),
|
||||
@@ -361,7 +353,7 @@ func TestLargeDatasets(t *testing.T) {
|
||||
|
||||
// Add half to snapshot
|
||||
if i%2 == 0 {
|
||||
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID, file.ID)
|
||||
err = repos.Snapshots.AddFileByID(ctx, nil, snapshot.ID.String(), file.ID)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -413,7 +405,7 @@ func TestErrorPropagation(t *testing.T) {
|
||||
|
||||
// Test GetByID with non-existent ID
|
||||
t.Run("GetByID non-existent", func(t *testing.T) {
|
||||
file, err := repos.Files.GetByID(ctx, "non-existent-uuid")
|
||||
file, err := repos.Files.GetByID(ctx, types.NewFileID())
|
||||
if err != nil {
|
||||
t.Errorf("GetByID should not return error for non-existent ID, got: %v", err)
|
||||
}
|
||||
@@ -436,9 +428,9 @@ func TestErrorPropagation(t *testing.T) {
|
||||
// Test invalid foreign key reference
|
||||
t.Run("invalid foreign key", func(t *testing.T) {
|
||||
fc := &FileChunk{
|
||||
FileID: "non-existent-file-id",
|
||||
FileID: types.NewFileID(),
|
||||
Idx: 0,
|
||||
ChunkHash: "some-chunk",
|
||||
ChunkHash: types.ChunkHash("some-chunk"),
|
||||
}
|
||||
err := repos.FileChunks.Create(ctx, nil, fc)
|
||||
if err == nil {
|
||||
@@ -470,9 +462,8 @@ func TestQueryInjection(t *testing.T) {
|
||||
t.Run("injection attempt", func(t *testing.T) {
|
||||
// Try injection in file path
|
||||
file := &File{
|
||||
Path: injection,
|
||||
Path: types.FilePath(injection),
|
||||
MTime: time.Now(),
|
||||
CTime: time.Now(),
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
@@ -511,7 +502,6 @@ func TestTimezoneHandling(t *testing.T) {
|
||||
file := &File{
|
||||
Path: "/timezone-test.txt",
|
||||
MTime: nyTime,
|
||||
CTime: nyTime,
|
||||
Size: 1024,
|
||||
Mode: 0644,
|
||||
UID: 1000,
|
||||
|
||||
9
internal/database/schema/000.sql
Normal file
9
internal/database/schema/000.sql
Normal file
@@ -0,0 +1,9 @@
|
||||
-- Migration 000: Schema migrations tracking table
|
||||
-- Applied as a bootstrap step before the normal migration loop.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS schema_migrations (
|
||||
version INTEGER PRIMARY KEY,
|
||||
applied_at DATETIME DEFAULT CURRENT_TIMESTAMP
|
||||
);
|
||||
|
||||
INSERT OR IGNORE INTO schema_migrations (version) VALUES (0);
|
||||
@@ -1,13 +1,12 @@
|
||||
-- Vaultik Database Schema
|
||||
-- Note: This database does not support migrations. If the schema changes,
|
||||
-- delete the local database and perform a full backup to recreate it.
|
||||
-- Migration 001: Initial Vaultik schema
|
||||
-- All core tables for tracking files, chunks, blobs, snapshots, and uploads.
|
||||
|
||||
-- Files table: stores metadata about files in the filesystem
|
||||
CREATE TABLE IF NOT EXISTS files (
|
||||
id TEXT PRIMARY KEY, -- UUID
|
||||
path TEXT NOT NULL UNIQUE,
|
||||
source_path TEXT NOT NULL DEFAULT '', -- The source directory this file came from (for restore path stripping)
|
||||
mtime INTEGER NOT NULL,
|
||||
ctime INTEGER NOT NULL,
|
||||
size INTEGER NOT NULL,
|
||||
mode INTEGER NOT NULL,
|
||||
uid INTEGER NOT NULL,
|
||||
@@ -28,6 +27,9 @@ CREATE TABLE IF NOT EXISTS file_chunks (
|
||||
FOREIGN KEY (chunk_hash) REFERENCES chunks(chunk_hash)
|
||||
);
|
||||
|
||||
-- Index for efficient chunk lookups (used in orphan detection)
|
||||
CREATE INDEX IF NOT EXISTS idx_file_chunks_chunk_hash ON file_chunks(chunk_hash);
|
||||
|
||||
-- Chunks table: stores unique content-defined chunks
|
||||
CREATE TABLE IF NOT EXISTS chunks (
|
||||
chunk_hash TEXT PRIMARY KEY,
|
||||
@@ -56,6 +58,9 @@ CREATE TABLE IF NOT EXISTS blob_chunks (
|
||||
FOREIGN KEY (chunk_hash) REFERENCES chunks(chunk_hash)
|
||||
);
|
||||
|
||||
-- Index for efficient chunk lookups (used in orphan detection)
|
||||
CREATE INDEX IF NOT EXISTS idx_blob_chunks_chunk_hash ON blob_chunks(chunk_hash);
|
||||
|
||||
-- Chunk files table: reverse mapping of chunks to files
|
||||
CREATE TABLE IF NOT EXISTS chunk_files (
|
||||
chunk_hash TEXT NOT NULL,
|
||||
@@ -67,6 +72,9 @@ CREATE TABLE IF NOT EXISTS chunk_files (
|
||||
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
-- Index for efficient file lookups (used in orphan detection)
|
||||
CREATE INDEX IF NOT EXISTS idx_chunk_files_file_id ON chunk_files(file_id);
|
||||
|
||||
-- Snapshots table: tracks backup snapshots
|
||||
CREATE TABLE IF NOT EXISTS snapshots (
|
||||
id TEXT PRIMARY KEY,
|
||||
@@ -93,9 +101,12 @@ CREATE TABLE IF NOT EXISTS snapshot_files (
|
||||
file_id TEXT NOT NULL,
|
||||
PRIMARY KEY (snapshot_id, file_id),
|
||||
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (file_id) REFERENCES files(id)
|
||||
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
-- Index for efficient file lookups (used in orphan detection)
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshot_files_file_id ON snapshot_files(file_id);
|
||||
|
||||
-- Snapshot blobs table: maps snapshots to blobs
|
||||
CREATE TABLE IF NOT EXISTS snapshot_blobs (
|
||||
snapshot_id TEXT NOT NULL,
|
||||
@@ -103,9 +114,12 @@ CREATE TABLE IF NOT EXISTS snapshot_blobs (
|
||||
blob_hash TEXT NOT NULL,
|
||||
PRIMARY KEY (snapshot_id, blob_id),
|
||||
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
|
||||
FOREIGN KEY (blob_id) REFERENCES blobs(id)
|
||||
FOREIGN KEY (blob_id) REFERENCES blobs(id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
-- Index for efficient blob lookups (used in orphan detection)
|
||||
CREATE INDEX IF NOT EXISTS idx_snapshot_blobs_blob_id ON snapshot_blobs(blob_id);
|
||||
|
||||
-- Uploads table: tracks blob upload metrics
|
||||
CREATE TABLE IF NOT EXISTS uploads (
|
||||
blob_hash TEXT PRIMARY KEY,
|
||||
@@ -114,5 +128,8 @@ CREATE TABLE IF NOT EXISTS uploads (
|
||||
size INTEGER NOT NULL,
|
||||
duration_ms INTEGER NOT NULL,
|
||||
FOREIGN KEY (blob_hash) REFERENCES blobs(blob_hash),
|
||||
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id)
|
||||
);
|
||||
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE
|
||||
);
|
||||
|
||||
-- Index for efficient snapshot lookups
|
||||
CREATE INDEX IF NOT EXISTS idx_uploads_snapshot_id ON uploads(snapshot_id);
|
||||
@@ -1,11 +0,0 @@
|
||||
-- Track blob upload metrics
|
||||
CREATE TABLE IF NOT EXISTS uploads (
|
||||
blob_hash TEXT PRIMARY KEY,
|
||||
uploaded_at TIMESTAMP NOT NULL,
|
||||
size INTEGER NOT NULL,
|
||||
duration_ms INTEGER NOT NULL,
|
||||
FOREIGN KEY (blob_hash) REFERENCES blobs(blob_hash)
|
||||
);
|
||||
|
||||
CREATE INDEX idx_uploads_uploaded_at ON uploads(uploaded_at);
|
||||
CREATE INDEX idx_uploads_duration ON uploads(duration_ms);
|
||||
@@ -5,6 +5,8 @@ import (
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"time"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
type SnapshotRepository struct {
|
||||
@@ -269,7 +271,7 @@ func (r *SnapshotRepository) AddFile(ctx context.Context, tx *sql.Tx, snapshotID
|
||||
}
|
||||
|
||||
// AddFileByID adds a file to a snapshot by file ID
|
||||
func (r *SnapshotRepository) AddFileByID(ctx context.Context, tx *sql.Tx, snapshotID string, fileID string) error {
|
||||
func (r *SnapshotRepository) AddFileByID(ctx context.Context, tx *sql.Tx, snapshotID string, fileID types.FileID) error {
|
||||
query := `
|
||||
INSERT OR IGNORE INTO snapshot_files (snapshot_id, file_id)
|
||||
VALUES (?, ?)
|
||||
@@ -277,9 +279,9 @@ func (r *SnapshotRepository) AddFileByID(ctx context.Context, tx *sql.Tx, snapsh
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, snapshotID, fileID)
|
||||
_, err = tx.ExecContext(ctx, query, snapshotID, fileID.String())
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, snapshotID, fileID)
|
||||
_, err = r.db.ExecWithLog(ctx, query, snapshotID, fileID.String())
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
@@ -289,8 +291,48 @@ func (r *SnapshotRepository) AddFileByID(ctx context.Context, tx *sql.Tx, snapsh
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddFilesByIDBatch adds multiple files to a snapshot in batched inserts
|
||||
func (r *SnapshotRepository) AddFilesByIDBatch(ctx context.Context, tx *sql.Tx, snapshotID string, fileIDs []types.FileID) error {
|
||||
if len(fileIDs) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Each entry has 2 values, so batch at 400 to be safe
|
||||
const batchSize = 400
|
||||
|
||||
for i := 0; i < len(fileIDs); i += batchSize {
|
||||
end := i + batchSize
|
||||
if end > len(fileIDs) {
|
||||
end = len(fileIDs)
|
||||
}
|
||||
batch := fileIDs[i:end]
|
||||
|
||||
query := "INSERT OR IGNORE INTO snapshot_files (snapshot_id, file_id) VALUES "
|
||||
args := make([]interface{}, 0, len(batch)*2)
|
||||
for j, fileID := range batch {
|
||||
if j > 0 {
|
||||
query += ", "
|
||||
}
|
||||
query += "(?, ?)"
|
||||
args = append(args, snapshotID, fileID.String())
|
||||
}
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, args...)
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, args...)
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("batch adding files to snapshot: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// AddBlob adds a blob to a snapshot
|
||||
func (r *SnapshotRepository) AddBlob(ctx context.Context, tx *sql.Tx, snapshotID string, blobID string, blobHash string) error {
|
||||
func (r *SnapshotRepository) AddBlob(ctx context.Context, tx *sql.Tx, snapshotID string, blobID types.BlobID, blobHash types.BlobHash) error {
|
||||
query := `
|
||||
INSERT OR IGNORE INTO snapshot_blobs (snapshot_id, blob_id, blob_hash)
|
||||
VALUES (?, ?, ?)
|
||||
@@ -298,9 +340,9 @@ func (r *SnapshotRepository) AddBlob(ctx context.Context, tx *sql.Tx, snapshotID
|
||||
|
||||
var err error
|
||||
if tx != nil {
|
||||
_, err = tx.ExecContext(ctx, query, snapshotID, blobID, blobHash)
|
||||
_, err = tx.ExecContext(ctx, query, snapshotID, blobID.String(), blobHash.String())
|
||||
} else {
|
||||
_, err = r.db.ExecWithLog(ctx, query, snapshotID, blobID, blobHash)
|
||||
_, err = r.db.ExecWithLog(ctx, query, snapshotID, blobID.String(), blobHash.String())
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
|
||||
@@ -6,6 +6,8 @@ import (
|
||||
"math"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -46,7 +48,7 @@ func TestSnapshotRepository(t *testing.T) {
|
||||
}
|
||||
|
||||
// Test GetByID
|
||||
retrieved, err := repo.GetByID(ctx, snapshot.ID)
|
||||
retrieved, err := repo.GetByID(ctx, snapshot.ID.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get snapshot: %v", err)
|
||||
}
|
||||
@@ -64,12 +66,12 @@ func TestSnapshotRepository(t *testing.T) {
|
||||
}
|
||||
|
||||
// Test UpdateCounts
|
||||
err = repo.UpdateCounts(ctx, nil, snapshot.ID, 200, 1000, 20, twoHundredMebibytes, sixtyMebibytes)
|
||||
err = repo.UpdateCounts(ctx, nil, snapshot.ID.String(), 200, 1000, 20, twoHundredMebibytes, sixtyMebibytes)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to update counts: %v", err)
|
||||
}
|
||||
|
||||
retrieved, err = repo.GetByID(ctx, snapshot.ID)
|
||||
retrieved, err = repo.GetByID(ctx, snapshot.ID.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get updated snapshot: %v", err)
|
||||
}
|
||||
@@ -97,7 +99,7 @@ func TestSnapshotRepository(t *testing.T) {
|
||||
// Add more snapshots
|
||||
for i := 2; i <= 5; i++ {
|
||||
s := &Snapshot{
|
||||
ID: fmt.Sprintf("2024-01-0%dT12:00:00Z", i),
|
||||
ID: types.SnapshotID(fmt.Sprintf("2024-01-0%dT12:00:00Z", i)),
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "1.0.0",
|
||||
StartedAt: time.Now().Add(time.Duration(i) * time.Hour).Truncate(time.Second),
|
||||
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
"database/sql"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
)
|
||||
|
||||
// Upload represents a blob upload record
|
||||
|
||||
@@ -35,6 +35,7 @@ type Config struct {
|
||||
Verbose bool
|
||||
Debug bool
|
||||
Cron bool
|
||||
Quiet bool
|
||||
}
|
||||
|
||||
var logger *slog.Logger
|
||||
@@ -44,8 +45,8 @@ func Initialize(cfg Config) {
|
||||
// Determine log level based on configuration
|
||||
var level slog.Level
|
||||
|
||||
if cfg.Cron {
|
||||
// In cron mode, only show fatal errors (which we'll handle specially)
|
||||
if cfg.Cron || cfg.Quiet {
|
||||
// In quiet/cron mode, only show errors
|
||||
level = slog.LevelError
|
||||
} else if cfg.Debug || strings.Contains(os.Getenv("GODEBUG"), "vaultik") {
|
||||
level = slog.LevelDebug
|
||||
|
||||
@@ -21,4 +21,5 @@ type LogOptions struct {
|
||||
Verbose bool
|
||||
Debug bool
|
||||
Cron bool
|
||||
Quiet bool
|
||||
}
|
||||
|
||||
@@ -63,10 +63,3 @@ type Chunk struct {
|
||||
Offset int64
|
||||
Length int64
|
||||
}
|
||||
|
||||
// DirtyPath represents a path marked for backup by inotify
|
||||
type DirtyPath struct {
|
||||
Path string
|
||||
MarkedAt time.Time
|
||||
EventType string // "create", "modify", "delete"
|
||||
}
|
||||
|
||||
@@ -2,6 +2,7 @@ package s3
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"io"
|
||||
"sync/atomic"
|
||||
|
||||
@@ -10,6 +11,8 @@ import (
|
||||
"github.com/aws/aws-sdk-go-v2/credentials"
|
||||
"github.com/aws/aws-sdk-go-v2/feature/s3/manager"
|
||||
"github.com/aws/aws-sdk-go-v2/service/s3"
|
||||
s3types "github.com/aws/aws-sdk-go-v2/service/s3/types"
|
||||
"github.com/aws/smithy-go/logging"
|
||||
)
|
||||
|
||||
// Client wraps the AWS S3 client for vaultik operations.
|
||||
@@ -35,12 +38,18 @@ type Config struct {
|
||||
Region string
|
||||
}
|
||||
|
||||
// nopLogger is a logger that discards all output.
|
||||
// Used to suppress SDK warnings about checksums.
|
||||
type nopLogger struct{}
|
||||
|
||||
func (nopLogger) Logf(classification logging.Classification, format string, v ...interface{}) {}
|
||||
|
||||
// NewClient creates a new S3 client with the provided configuration.
|
||||
// It establishes a connection to the S3-compatible storage service and
|
||||
// validates the credentials. The client uses static credentials and
|
||||
// path-style URLs for compatibility with various S3-compatible services.
|
||||
func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
||||
// Create AWS config
|
||||
// Create AWS config with a nop logger to suppress SDK warnings
|
||||
awsCfg, err := config.LoadDefaultConfig(ctx,
|
||||
config.WithRegion(cfg.Region),
|
||||
config.WithCredentialsProvider(credentials.NewStaticCredentialsProvider(
|
||||
@@ -48,6 +57,7 @@ func NewClient(ctx context.Context, cfg Config) (*Client, error) {
|
||||
cfg.SecretAccessKey,
|
||||
"",
|
||||
)),
|
||||
config.WithLogger(nopLogger{}),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -195,9 +205,12 @@ func (c *Client) HeadObject(ctx context.Context, key string) (bool, error) {
|
||||
Key: aws.String(fullKey),
|
||||
})
|
||||
if err != nil {
|
||||
// Check if it's a not found error
|
||||
// TODO: Add proper error type checking
|
||||
return false, nil
|
||||
var notFound *s3types.NotFound
|
||||
var noSuchKey *s3types.NoSuchKey
|
||||
if errors.As(err, ¬Found) || errors.As(err, &noSuchKey) {
|
||||
return false, nil
|
||||
}
|
||||
return false, err
|
||||
}
|
||||
return true, nil
|
||||
}
|
||||
|
||||
@@ -6,7 +6,7 @@ import (
|
||||
"io"
|
||||
"testing"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/s3"
|
||||
"sneak.berlin/go/vaultik/internal/s3"
|
||||
)
|
||||
|
||||
func TestClient(t *testing.T) {
|
||||
|
||||
@@ -3,8 +3,8 @@ package s3
|
||||
import (
|
||||
"context"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/config"
|
||||
"go.uber.org/fx"
|
||||
"sneak.berlin/go/vaultik/internal/config"
|
||||
)
|
||||
|
||||
// Module exports S3 functionality as an fx module.
|
||||
|
||||
@@ -13,7 +13,8 @@ import (
|
||||
"testing/fstest"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"sneak.berlin/go/vaultik/internal/database"
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
// MockS3Client is a mock implementation of S3 operations for testing
|
||||
@@ -138,13 +139,13 @@ func TestBackupWithInMemoryFS(t *testing.T) {
|
||||
}
|
||||
|
||||
for _, file := range files {
|
||||
if !expectedFiles[file.Path] {
|
||||
if !expectedFiles[file.Path.String()] {
|
||||
t.Errorf("Unexpected file in database: %s", file.Path)
|
||||
}
|
||||
delete(expectedFiles, file.Path)
|
||||
delete(expectedFiles, file.Path.String())
|
||||
|
||||
// Verify file metadata
|
||||
fsFile := testFS[file.Path]
|
||||
fsFile := testFS[file.Path.String()]
|
||||
if fsFile == nil {
|
||||
t.Errorf("File %s not found in test filesystem", file.Path)
|
||||
continue
|
||||
@@ -294,8 +295,8 @@ func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (str
|
||||
hostname, _ := os.Hostname()
|
||||
snapshotID := time.Now().Format(time.RFC3339)
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID,
|
||||
Hostname: hostname,
|
||||
ID: types.SnapshotID(snapshotID),
|
||||
Hostname: types.Hostname(hostname),
|
||||
VaultikVersion: "test",
|
||||
StartedAt: time.Now(),
|
||||
CompletedAt: nil,
|
||||
@@ -340,13 +341,12 @@ func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (str
|
||||
|
||||
// Create file record in a short transaction
|
||||
file := &database.File{
|
||||
Path: path,
|
||||
Path: types.FilePath(path),
|
||||
Size: info.Size(),
|
||||
Mode: uint32(info.Mode()),
|
||||
MTime: info.ModTime(),
|
||||
CTime: info.ModTime(), // Use mtime as ctime for test
|
||||
UID: 1000, // Default UID for test
|
||||
GID: 1000, // Default GID for test
|
||||
UID: 1000, // Default UID for test
|
||||
GID: 1000, // Default GID for test
|
||||
}
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return b.repos.Files.Create(ctx, tx, file)
|
||||
@@ -392,7 +392,7 @@ func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (str
|
||||
// Create new chunk in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
chunk := &database.Chunk{
|
||||
ChunkHash: chunkHash,
|
||||
ChunkHash: types.ChunkHash(chunkHash),
|
||||
Size: int64(n),
|
||||
}
|
||||
return b.repos.Chunks.Create(ctx, tx, chunk)
|
||||
@@ -408,7 +408,7 @@ func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (str
|
||||
fileChunk := &database.FileChunk{
|
||||
FileID: file.ID,
|
||||
Idx: chunkIndex,
|
||||
ChunkHash: chunkHash,
|
||||
ChunkHash: types.ChunkHash(chunkHash),
|
||||
}
|
||||
return b.repos.FileChunks.Create(ctx, tx, fileChunk)
|
||||
})
|
||||
@@ -419,7 +419,7 @@ func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (str
|
||||
// Create chunk-file mapping in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
chunkFile := &database.ChunkFile{
|
||||
ChunkHash: chunkHash,
|
||||
ChunkHash: types.ChunkHash(chunkHash),
|
||||
FileID: file.ID,
|
||||
FileOffset: int64(chunkIndex * defaultChunkSize),
|
||||
Length: int64(n),
|
||||
@@ -463,10 +463,11 @@ func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (str
|
||||
}
|
||||
|
||||
// Create blob entry in a short transaction
|
||||
blobID := types.NewBlobID()
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
blob := &database.Blob{
|
||||
ID: "test-blob-" + blobHash[:8],
|
||||
Hash: blobHash,
|
||||
ID: blobID,
|
||||
Hash: types.BlobHash(blobHash),
|
||||
CreatedTS: time.Now(),
|
||||
}
|
||||
return b.repos.Blobs.Create(ctx, tx, blob)
|
||||
@@ -481,8 +482,8 @@ func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (str
|
||||
// Create blob-chunk mapping in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
blobChunk := &database.BlobChunk{
|
||||
BlobID: "test-blob-" + blobHash[:8],
|
||||
ChunkHash: chunkHash,
|
||||
BlobID: blobID,
|
||||
ChunkHash: types.ChunkHash(chunkHash),
|
||||
Offset: 0,
|
||||
Length: chunk.Size,
|
||||
}
|
||||
@@ -494,7 +495,7 @@ func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (str
|
||||
|
||||
// Add blob to snapshot in a short transaction
|
||||
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
return b.repos.Snapshots.AddBlob(ctx, tx, snapshotID, "test-blob-"+blobHash[:8], blobHash)
|
||||
return b.repos.Snapshots.AddBlob(ctx, tx, snapshotID, blobID, types.BlobHash(blobHash))
|
||||
})
|
||||
if err != nil {
|
||||
return "", err
|
||||
|
||||
454
internal/snapshot/exclude_test.go
Normal file
454
internal/snapshot/exclude_test.go
Normal file
@@ -0,0 +1,454 @@
|
||||
package snapshot_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"path/filepath"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/spf13/afero"
|
||||
"github.com/stretchr/testify/require"
|
||||
"sneak.berlin/go/vaultik/internal/database"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/snapshot"
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
func setupExcludeTestFS(t *testing.T) afero.Fs {
|
||||
t.Helper()
|
||||
|
||||
// Create in-memory filesystem
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
// Create test directory structure:
|
||||
// /backup/
|
||||
// file1.txt (should be backed up)
|
||||
// file2.log (should be excluded if *.log is in patterns)
|
||||
// .git/
|
||||
// config (should be excluded if .git is in patterns)
|
||||
// objects/
|
||||
// pack/
|
||||
// data.pack (should be excluded if .git is in patterns)
|
||||
// src/
|
||||
// main.go (should be backed up)
|
||||
// test.go (should be backed up)
|
||||
// node_modules/
|
||||
// package/
|
||||
// index.js (should be excluded if node_modules is in patterns)
|
||||
// cache/
|
||||
// temp.dat (should be excluded if cache/ is in patterns)
|
||||
// build/
|
||||
// output.bin (should be excluded if build is in patterns)
|
||||
// docs/
|
||||
// readme.md (should be backed up)
|
||||
// .DS_Store (should be excluded if .DS_Store is in patterns)
|
||||
// thumbs.db (should be excluded if thumbs.db is in patterns)
|
||||
|
||||
files := map[string]string{
|
||||
"/backup/file1.txt": "content1",
|
||||
"/backup/file2.log": "log content",
|
||||
"/backup/.git/config": "git config",
|
||||
"/backup/.git/objects/pack/data.pack": "pack data",
|
||||
"/backup/src/main.go": "package main",
|
||||
"/backup/src/test.go": "package main_test",
|
||||
"/backup/node_modules/package/index.js": "module.exports = {}",
|
||||
"/backup/cache/temp.dat": "cached data",
|
||||
"/backup/build/output.bin": "binary data",
|
||||
"/backup/docs/readme.md": "# Documentation",
|
||||
"/backup/.DS_Store": "ds store data",
|
||||
"/backup/thumbs.db": "thumbs data",
|
||||
"/backup/src/.hidden": "hidden file",
|
||||
"/backup/important.log.bak": "backup of log",
|
||||
}
|
||||
|
||||
testTime := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
|
||||
for path, content := range files {
|
||||
dir := filepath.Dir(path)
|
||||
err := fs.MkdirAll(dir, 0755)
|
||||
require.NoError(t, err)
|
||||
err = afero.WriteFile(fs, path, []byte(content), 0644)
|
||||
require.NoError(t, err)
|
||||
err = fs.Chtimes(path, testTime, testTime)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
return fs
|
||||
}
|
||||
|
||||
func createTestScanner(t *testing.T, fs afero.Fs, excludePatterns []string) (*snapshot.Scanner, *database.Repositories, func()) {
|
||||
t.Helper()
|
||||
|
||||
// Initialize logger
|
||||
log.Initialize(log.Config{})
|
||||
|
||||
// Create test database
|
||||
db, err := database.NewTestDB()
|
||||
require.NoError(t, err)
|
||||
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
|
||||
FS: fs,
|
||||
ChunkSize: 64 * 1024,
|
||||
Repositories: repos,
|
||||
MaxBlobSize: 1024 * 1024,
|
||||
CompressionLevel: 3,
|
||||
AgeRecipients: []string{"age1ql3z7hjy54pw3hyww5ayyfg7zqgvc7w3j2elw8zmrj2kg5sfn9aqmcac8p"},
|
||||
Exclude: excludePatterns,
|
||||
})
|
||||
|
||||
cleanup := func() {
|
||||
_ = db.Close()
|
||||
}
|
||||
|
||||
return scanner, repos, cleanup
|
||||
}
|
||||
|
||||
func createSnapshotRecord(t *testing.T, ctx context.Context, repos *database.Repositories, snapshotID string) {
|
||||
t.Helper()
|
||||
err := repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
snap := &database.Snapshot{
|
||||
ID: types.SnapshotID(snapshotID),
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "test",
|
||||
StartedAt: time.Now(),
|
||||
CompletedAt: nil,
|
||||
FileCount: 0,
|
||||
ChunkCount: 0,
|
||||
BlobCount: 0,
|
||||
TotalSize: 0,
|
||||
BlobSize: 0,
|
||||
CompressionRatio: 1.0,
|
||||
}
|
||||
return repos.Snapshots.Create(ctx, tx, snap)
|
||||
})
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
func TestExcludePatterns_ExcludeGitDirectory(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{".git"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should have scanned files but NOT .git directory contents
|
||||
// Expected: file1.txt, file2.log, src/main.go, src/test.go, node_modules/package/index.js,
|
||||
// cache/temp.dat, build/output.bin, docs/readme.md, .DS_Store, thumbs.db,
|
||||
// src/.hidden, important.log.bak
|
||||
// Excluded: .git/config, .git/objects/pack/data.pack
|
||||
require.Equal(t, 12, result.FilesScanned, "Should exclude .git directory contents")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_ExcludeByExtension(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"*.log"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should exclude file2.log but NOT important.log.bak (different extension)
|
||||
// Total files: 14, excluded: 1 (file2.log)
|
||||
require.Equal(t, 13, result.FilesScanned, "Should exclude *.log files")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_ExcludeNodeModules(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"node_modules"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should exclude node_modules/package/index.js
|
||||
// Total files: 14, excluded: 1
|
||||
require.Equal(t, 13, result.FilesScanned, "Should exclude node_modules directory")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_MultiplePatterns(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{".git", "node_modules", "*.log", ".DS_Store", "thumbs.db", "cache", "build"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should only have: file1.txt, src/main.go, src/test.go, docs/readme.md, src/.hidden, important.log.bak
|
||||
// Excluded: .git/*, node_modules/*, *.log (file2.log), .DS_Store, thumbs.db, cache/*, build/*
|
||||
require.Equal(t, 6, result.FilesScanned, "Should exclude multiple patterns")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_NoExclusions(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should scan all 14 files
|
||||
require.Equal(t, 14, result.FilesScanned, "Should scan all files when no exclusions")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_ExcludeHiddenFiles(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{".*"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should exclude: .git/*, .DS_Store, src/.hidden
|
||||
// Total files: 14, excluded: 4 (.git/config, .git/objects/pack/data.pack, .DS_Store, src/.hidden)
|
||||
require.Equal(t, 10, result.FilesScanned, "Should exclude hidden files and directories")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_DoubleStarGlob(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"**/*.pack"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should exclude .git/objects/pack/data.pack
|
||||
// Total files: 14, excluded: 1
|
||||
require.Equal(t, 13, result.FilesScanned, "Should exclude **/*.pack files")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_ExactFileName(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"thumbs.db", ".DS_Store"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should exclude thumbs.db and .DS_Store
|
||||
// Total files: 14, excluded: 2
|
||||
require.Equal(t, 12, result.FilesScanned, "Should exclude exact file names")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_CaseSensitive(t *testing.T) {
|
||||
// Pattern matching should be case-sensitive
|
||||
fs := setupExcludeTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"THUMBS.DB"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Case-sensitive matching: THUMBS.DB should NOT match thumbs.db
|
||||
// All 14 files should be scanned
|
||||
require.Equal(t, 14, result.FilesScanned, "Pattern matching should be case-sensitive")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_DirectoryWithTrailingSlash(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
// Some users might add trailing slashes to directory patterns
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"cache/", "build/"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should exclude cache/temp.dat and build/output.bin
|
||||
// Total files: 14, excluded: 2
|
||||
require.Equal(t, 12, result.FilesScanned, "Should handle directory patterns with trailing slashes")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_PatternInSubdirectory(t *testing.T) {
|
||||
fs := setupExcludeTestFS(t)
|
||||
// Exclude .hidden file specifically in src directory
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"src/.hidden"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// Should exclude only src/.hidden
|
||||
// Total files: 14, excluded: 1
|
||||
require.Equal(t, 13, result.FilesScanned, "Should exclude specific subdirectory files")
|
||||
}
|
||||
|
||||
// setupAnchoredTestFS creates a filesystem for testing anchored patterns
|
||||
// Source dir: /backup
|
||||
// Structure:
|
||||
//
|
||||
// /backup/
|
||||
// projectname/
|
||||
// file.txt (should be excluded with /projectname)
|
||||
// otherproject/
|
||||
// projectname/
|
||||
// file.txt (should NOT be excluded with /projectname, only with projectname)
|
||||
// src/
|
||||
// file.go
|
||||
func setupAnchoredTestFS(t *testing.T) afero.Fs {
|
||||
t.Helper()
|
||||
|
||||
fs := afero.NewMemMapFs()
|
||||
|
||||
files := map[string]string{
|
||||
"/backup/projectname/file.txt": "root project file",
|
||||
"/backup/otherproject/projectname/file.txt": "nested project file",
|
||||
"/backup/src/file.go": "source file",
|
||||
"/backup/file.txt": "root file",
|
||||
}
|
||||
|
||||
testTime := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
|
||||
for path, content := range files {
|
||||
dir := filepath.Dir(path)
|
||||
err := fs.MkdirAll(dir, 0755)
|
||||
require.NoError(t, err)
|
||||
err = afero.WriteFile(fs, path, []byte(content), 0644)
|
||||
require.NoError(t, err)
|
||||
err = fs.Chtimes(path, testTime, testTime)
|
||||
require.NoError(t, err)
|
||||
}
|
||||
|
||||
return fs
|
||||
}
|
||||
|
||||
func TestExcludePatterns_AnchoredPattern(t *testing.T) {
|
||||
// Pattern starting with / should only match from root of source dir
|
||||
fs := setupAnchoredTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"/projectname"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// /projectname should ONLY exclude /backup/projectname/file.txt (1 file)
|
||||
// /backup/otherproject/projectname/file.txt should NOT be excluded
|
||||
// Total files: 4, excluded: 1
|
||||
require.Equal(t, 3, result.FilesScanned, "Anchored pattern /projectname should only match at root of source dir")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_UnanchoredPattern(t *testing.T) {
|
||||
// Pattern without leading / should match anywhere in path
|
||||
fs := setupAnchoredTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"projectname"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// projectname (without /) should exclude BOTH:
|
||||
// - /backup/projectname/file.txt
|
||||
// - /backup/otherproject/projectname/file.txt
|
||||
// Total files: 4, excluded: 2
|
||||
require.Equal(t, 2, result.FilesScanned, "Unanchored pattern should match anywhere in path")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_AnchoredPatternWithGlob(t *testing.T) {
|
||||
// Anchored pattern with glob
|
||||
fs := setupAnchoredTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"/src/*.go"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// /src/*.go should exclude /backup/src/file.go
|
||||
// Total files: 4, excluded: 1
|
||||
require.Equal(t, 3, result.FilesScanned, "Anchored pattern with glob should work")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_AnchoredPatternFile(t *testing.T) {
|
||||
// Anchored pattern for exact file at root
|
||||
fs := setupAnchoredTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"/file.txt"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// /file.txt should ONLY exclude /backup/file.txt
|
||||
// NOT /backup/projectname/file.txt or /backup/otherproject/projectname/file.txt
|
||||
// Total files: 4, excluded: 1
|
||||
require.Equal(t, 3, result.FilesScanned, "Anchored pattern for file should only match at root")
|
||||
}
|
||||
|
||||
func TestExcludePatterns_UnanchoredPatternFile(t *testing.T) {
|
||||
// Unanchored pattern for file should match anywhere
|
||||
fs := setupAnchoredTestFS(t)
|
||||
scanner, repos, cleanup := createTestScanner(t, fs, []string{"file.txt"})
|
||||
defer cleanup()
|
||||
require.NotNil(t, scanner)
|
||||
|
||||
ctx := context.Background()
|
||||
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||
|
||||
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||
require.NoError(t, err)
|
||||
|
||||
// file.txt should exclude ALL file.txt files:
|
||||
// - /backup/file.txt
|
||||
// - /backup/projectname/file.txt
|
||||
// - /backup/otherproject/projectname/file.txt
|
||||
// Total files: 4, excluded: 3
|
||||
require.Equal(t, 1, result.FilesScanned, "Unanchored pattern for file should match anywhere")
|
||||
}
|
||||
@@ -6,12 +6,13 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/snapshot"
|
||||
"github.com/spf13/afero"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"sneak.berlin/go/vaultik/internal/database"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/snapshot"
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
// TestFileContentChange verifies that when a file's content changes,
|
||||
@@ -53,7 +54,7 @@ func TestFileContentChange(t *testing.T) {
|
||||
snapshotID1 := "snapshot1"
|
||||
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID1,
|
||||
ID: types.SnapshotID(snapshotID1),
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "test",
|
||||
StartedAt: time.Now(),
|
||||
@@ -87,7 +88,7 @@ func TestFileContentChange(t *testing.T) {
|
||||
snapshotID2 := "snapshot2"
|
||||
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID2,
|
||||
ID: types.SnapshotID(snapshotID2),
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "test",
|
||||
StartedAt: time.Now(),
|
||||
@@ -117,12 +118,12 @@ func TestFileContentChange(t *testing.T) {
|
||||
assert.Equal(t, newChunkHash, chunkFiles2[0].ChunkHash)
|
||||
|
||||
// Verify old chunk still exists (it's still valid data)
|
||||
oldChunk, err := repos.Chunks.GetByHash(ctx, oldChunkHash)
|
||||
oldChunk, err := repos.Chunks.GetByHash(ctx, oldChunkHash.String())
|
||||
require.NoError(t, err)
|
||||
assert.NotNil(t, oldChunk)
|
||||
|
||||
// Verify new chunk exists
|
||||
newChunk, err := repos.Chunks.GetByHash(ctx, newChunkHash)
|
||||
newChunk, err := repos.Chunks.GetByHash(ctx, newChunkHash.String())
|
||||
require.NoError(t, err)
|
||||
assert.NotNil(t, newChunk)
|
||||
|
||||
@@ -182,7 +183,7 @@ func TestMultipleFileChanges(t *testing.T) {
|
||||
snapshotID1 := "snapshot1"
|
||||
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID1,
|
||||
ID: types.SnapshotID(snapshotID1),
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "test",
|
||||
StartedAt: time.Now(),
|
||||
@@ -208,7 +209,7 @@ func TestMultipleFileChanges(t *testing.T) {
|
||||
snapshotID2 := "snapshot2"
|
||||
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID2,
|
||||
ID: types.SnapshotID(snapshotID2),
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "test",
|
||||
StartedAt: time.Now(),
|
||||
|
||||
@@ -1,17 +1,19 @@
|
||||
package snapshot
|
||||
|
||||
import (
|
||||
"git.eeqj.de/sneak/vaultik/internal/config"
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/storage"
|
||||
"github.com/spf13/afero"
|
||||
"go.uber.org/fx"
|
||||
"sneak.berlin/go/vaultik/internal/config"
|
||||
"sneak.berlin/go/vaultik/internal/database"
|
||||
"sneak.berlin/go/vaultik/internal/storage"
|
||||
)
|
||||
|
||||
// ScannerParams holds parameters for scanner creation
|
||||
type ScannerParams struct {
|
||||
EnableProgress bool
|
||||
Fs afero.Fs
|
||||
Exclude []string // Exclude patterns (combined global + snapshot-specific)
|
||||
SkipErrors bool // Skip file read errors (log loudly but continue)
|
||||
}
|
||||
|
||||
// Module exports backup functionality as an fx module.
|
||||
@@ -29,6 +31,12 @@ type ScannerFactory func(params ScannerParams) *Scanner
|
||||
|
||||
func provideScannerFactory(cfg *config.Config, repos *database.Repositories, storer storage.Storer) ScannerFactory {
|
||||
return func(params ScannerParams) *Scanner {
|
||||
// Use provided excludes, or fall back to global config excludes
|
||||
excludes := params.Exclude
|
||||
if len(excludes) == 0 {
|
||||
excludes = cfg.Exclude
|
||||
}
|
||||
|
||||
return NewScanner(ScannerConfig{
|
||||
FS: params.Fs,
|
||||
ChunkSize: cfg.ChunkSize.Int64(),
|
||||
@@ -38,6 +46,8 @@ func provideScannerFactory(cfg *config.Config, repos *database.Repositories, sto
|
||||
CompressionLevel: cfg.CompressionLevel,
|
||||
AgeRecipients: cfg.AgeRecipients,
|
||||
EnableProgress: params.EnableProgress,
|
||||
Exclude: excludes,
|
||||
SkipErrors: params.SkipErrors,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -10,8 +10,8 @@ import (
|
||||
"syscall"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"github.com/dustin/go-humanize"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -39,7 +39,7 @@ type ProgressStats struct {
|
||||
BlobsCreated atomic.Int64
|
||||
BlobsUploaded atomic.Int64
|
||||
BytesUploaded atomic.Int64
|
||||
UploadDurationMs atomic.Int64 // Total milliseconds spent uploading to S3
|
||||
UploadDurationMs atomic.Int64 // Total milliseconds spent uploading
|
||||
CurrentFile atomic.Value // stores string
|
||||
TotalSize atomic.Int64 // Total size to process (set after scan phase)
|
||||
TotalFiles atomic.Int64 // Total files to process in phase 2
|
||||
@@ -273,7 +273,7 @@ func (pr *ProgressReporter) printDetailedStatus() {
|
||||
"created", blobsCreated,
|
||||
"uploaded", blobsUploaded,
|
||||
"pending", blobsCreated-blobsUploaded)
|
||||
log.Info("Total uploaded to S3",
|
||||
log.Info("Total uploaded to remote",
|
||||
"uploaded", humanize.Bytes(uint64(bytesUploaded)),
|
||||
"compression_ratio", formatRatio(bytesUploaded, bytesScanned))
|
||||
if currentFile != "" {
|
||||
@@ -336,7 +336,7 @@ func (pr *ProgressReporter) ReportUploadStart(blobHash string, size int64) {
|
||||
pr.stats.CurrentUpload.Store(info)
|
||||
|
||||
// Log the start of upload
|
||||
log.Info("Starting blob upload to S3",
|
||||
log.Info("Starting blob upload",
|
||||
"hash", blobHash[:8]+"...",
|
||||
"size", humanize.Bytes(uint64(size)))
|
||||
}
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -7,10 +7,11 @@ import (
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/snapshot"
|
||||
"github.com/spf13/afero"
|
||||
"sneak.berlin/go/vaultik/internal/database"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/snapshot"
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
func TestScannerSimpleDirectory(t *testing.T) {
|
||||
@@ -74,7 +75,7 @@ func TestScannerSimpleDirectory(t *testing.T) {
|
||||
snapshotID := "test-snapshot-001"
|
||||
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID,
|
||||
ID: types.SnapshotID(snapshotID),
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "test",
|
||||
StartedAt: time.Now(),
|
||||
@@ -109,15 +110,15 @@ func TestScannerSimpleDirectory(t *testing.T) {
|
||||
t.Errorf("expected at least 97 bytes scanned, got %d", result.BytesScanned)
|
||||
}
|
||||
|
||||
// Verify files in database - only regular files are stored
|
||||
// Verify files in database - includes regular files and directories
|
||||
files, err := repos.Files.ListByPrefix(ctx, "/source")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to list files: %v", err)
|
||||
}
|
||||
|
||||
// We should have 6 files (directories are not stored)
|
||||
if len(files) != 6 {
|
||||
t.Errorf("expected 6 files in database, got %d", len(files))
|
||||
// 6 regular files + 3 directories (/source, /source/subdir, /source/subdir2)
|
||||
if len(files) != 9 {
|
||||
t.Errorf("expected 9 entries in database (6 files + 3 dirs), got %d", len(files))
|
||||
}
|
||||
|
||||
// Verify specific file
|
||||
@@ -209,7 +210,7 @@ func TestScannerLargeFile(t *testing.T) {
|
||||
snapshotID := "test-snapshot-001"
|
||||
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID,
|
||||
ID: types.SnapshotID(snapshotID),
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "test",
|
||||
StartedAt: time.Now(),
|
||||
|
||||
@@ -19,24 +19,19 @@ package snapshot
|
||||
// - Blobs not containing any remaining chunks
|
||||
// - All related mapping tables (file_chunks, chunk_files, blob_chunks)
|
||||
// 7. Close the temporary database
|
||||
// 8. Use sqlite3 to dump the cleaned database to SQL
|
||||
// 9. Delete the temporary database file
|
||||
// 10. Compress the SQL dump with zstd
|
||||
// 11. Encrypt the compressed dump with age (if encryption is enabled)
|
||||
// 12. Upload to S3 as: snapshots/{snapshot-id}.sql.zst[.age]
|
||||
// 13. Reopen the main database
|
||||
// 8. VACUUM the database to remove deleted data and compact (security critical)
|
||||
// 9. Compress the binary database with zstd
|
||||
// 10. Encrypt the compressed database with age (if encryption is enabled)
|
||||
// 11. Upload to S3 as: metadata/{snapshot-id}/db.zst.age
|
||||
// 12. Reopen the main database
|
||||
//
|
||||
// Advantages of this approach:
|
||||
// - No custom metadata format needed
|
||||
// - Reuses existing database schema and relationships
|
||||
// - SQL dumps are portable and compress well
|
||||
// - Restore process can simply execute the SQL
|
||||
// - Binary SQLite files are portable and compress well
|
||||
// - Fast restore - just decompress and open (no SQL parsing)
|
||||
// - VACUUM ensures no deleted data leaks
|
||||
// - Atomic and consistent snapshot of all metadata
|
||||
//
|
||||
// TODO: Future improvements:
|
||||
// - Add snapshot-file relationships to track which files belong to which snapshot
|
||||
// - Implement incremental snapshots that reference previous snapshots
|
||||
// - Add snapshot manifest with additional metadata (size, chunk count, etc.)
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
@@ -46,16 +41,18 @@ import (
|
||||
"io"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/blobgen"
|
||||
"git.eeqj.de/sneak/vaultik/internal/config"
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/storage"
|
||||
"github.com/dustin/go-humanize"
|
||||
"github.com/spf13/afero"
|
||||
"go.uber.org/fx"
|
||||
"sneak.berlin/go/vaultik/internal/blobgen"
|
||||
"sneak.berlin/go/vaultik/internal/config"
|
||||
"sneak.berlin/go/vaultik/internal/database"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/storage"
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
// SnapshotManager handles snapshot creation and metadata export
|
||||
@@ -89,15 +86,35 @@ func (sm *SnapshotManager) SetFilesystem(fs afero.Fs) {
|
||||
sm.fs = fs
|
||||
}
|
||||
|
||||
// CreateSnapshot creates a new snapshot record in the database at the start of a backup
|
||||
// CreateSnapshot creates a new snapshot record in the database at the start of a backup.
|
||||
// Deprecated: Use CreateSnapshotWithName instead for multi-snapshot support.
|
||||
func (sm *SnapshotManager) CreateSnapshot(ctx context.Context, hostname, version, gitRevision string) (string, error) {
|
||||
snapshotID := fmt.Sprintf("%s-%s", hostname, time.Now().UTC().Format("20060102-150405Z"))
|
||||
return sm.CreateSnapshotWithName(ctx, hostname, "", version, gitRevision)
|
||||
}
|
||||
|
||||
// CreateSnapshotWithName creates a new snapshot record with an optional snapshot name.
|
||||
// The snapshot ID format is: hostname_name_timestamp or hostname_timestamp if name is empty.
|
||||
func (sm *SnapshotManager) CreateSnapshotWithName(ctx context.Context, hostname, name, version, gitRevision string) (string, error) {
|
||||
// Use short hostname (strip domain if present)
|
||||
shortHostname := hostname
|
||||
if idx := strings.Index(hostname, "."); idx != -1 {
|
||||
shortHostname = hostname[:idx]
|
||||
}
|
||||
|
||||
// Build snapshot ID with optional name
|
||||
timestamp := time.Now().UTC().Format("2006-01-02T15:04:05Z")
|
||||
var snapshotID string
|
||||
if name != "" {
|
||||
snapshotID = fmt.Sprintf("%s_%s_%s", shortHostname, name, timestamp)
|
||||
} else {
|
||||
snapshotID = fmt.Sprintf("%s_%s", shortHostname, timestamp)
|
||||
}
|
||||
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID,
|
||||
Hostname: hostname,
|
||||
VaultikVersion: version,
|
||||
VaultikGitRevision: gitRevision,
|
||||
ID: types.SnapshotID(snapshotID),
|
||||
Hostname: types.Hostname(hostname),
|
||||
VaultikVersion: types.Version(version),
|
||||
VaultikGitRevision: types.GitRevision(gitRevision),
|
||||
StartedAt: time.Now().UTC(),
|
||||
CompletedAt: nil, // Not completed yet
|
||||
FileCount: 0,
|
||||
@@ -210,12 +227,39 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
|
||||
}
|
||||
}()
|
||||
|
||||
// Steps 1-5: Copy, clean, vacuum, compress, and read the database
|
||||
finalData, tempDBPath, err := sm.prepareExportDB(ctx, dbPath, snapshotID, tempDir)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Step 6: Generate blob manifest (before closing temp DB)
|
||||
blobManifest, err := sm.generateBlobManifest(ctx, tempDBPath, snapshotID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("generating blob manifest: %w", err)
|
||||
}
|
||||
|
||||
// Step 7: Upload to S3 in snapshot subdirectory
|
||||
if err := sm.uploadSnapshotArtifacts(ctx, snapshotID, finalData, blobManifest); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Info("Uploaded snapshot metadata",
|
||||
"snapshot_id", snapshotID,
|
||||
"db_size", len(finalData),
|
||||
"manifest_size", len(blobManifest))
|
||||
return nil
|
||||
}
|
||||
|
||||
// prepareExportDB copies, cleans, vacuums, and compresses the snapshot database for export.
|
||||
// Returns the compressed data and the path to the temporary database (needed for manifest generation).
|
||||
func (sm *SnapshotManager) prepareExportDB(ctx context.Context, dbPath, snapshotID, tempDir string) ([]byte, string, error) {
|
||||
// Step 1: Copy database to temp file
|
||||
// The main database should be closed at this point
|
||||
tempDBPath := filepath.Join(tempDir, "snapshot.db")
|
||||
log.Debug("Copying database to temporary location", "source", dbPath, "destination", tempDBPath)
|
||||
if err := sm.copyFile(dbPath, tempDBPath); err != nil {
|
||||
return fmt.Errorf("copying database: %w", err)
|
||||
return nil, "", fmt.Errorf("copying database: %w", err)
|
||||
}
|
||||
log.Debug("Database copy complete", "size", sm.getFileSize(tempDBPath))
|
||||
|
||||
@@ -223,7 +267,7 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
|
||||
log.Debug("Cleaning temporary database", "snapshot_id", snapshotID)
|
||||
stats, err := sm.cleanSnapshotDB(ctx, tempDBPath, snapshotID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("cleaning snapshot database: %w", err)
|
||||
return nil, "", fmt.Errorf("cleaning snapshot database: %w", err)
|
||||
}
|
||||
log.Info("Temporary database cleanup complete",
|
||||
"db_path", tempDBPath,
|
||||
@@ -235,68 +279,62 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
|
||||
"total_uncompressed_size", humanize.Bytes(uint64(stats.UncompressedSize)),
|
||||
"compression_ratio", fmt.Sprintf("%.2fx", float64(stats.UncompressedSize)/float64(stats.CompressedSize)))
|
||||
|
||||
// Step 3: Dump the cleaned database to SQL
|
||||
dumpPath := filepath.Join(tempDir, "snapshot.sql")
|
||||
if err := sm.dumpDatabase(tempDBPath, dumpPath); err != nil {
|
||||
return fmt.Errorf("dumping database: %w", err)
|
||||
// Step 3: VACUUM the database to remove deleted data and compact
|
||||
// This is critical for security - ensures no stale/deleted data is uploaded
|
||||
if err := sm.vacuumDatabase(tempDBPath); err != nil {
|
||||
return nil, "", fmt.Errorf("vacuuming database: %w", err)
|
||||
}
|
||||
log.Debug("SQL dump complete", "size", humanize.Bytes(uint64(sm.getFileSize(dumpPath))))
|
||||
log.Debug("Database vacuumed", "size", humanize.Bytes(uint64(sm.getFileSize(tempDBPath))))
|
||||
|
||||
// Step 4: Compress and encrypt the SQL dump
|
||||
compressedPath := filepath.Join(tempDir, "snapshot.sql.zst.age")
|
||||
if err := sm.compressDump(dumpPath, compressedPath); err != nil {
|
||||
return fmt.Errorf("compressing dump: %w", err)
|
||||
// Step 4: Compress and encrypt the binary database file
|
||||
compressedPath := filepath.Join(tempDir, "db.zst.age")
|
||||
if err := sm.compressFile(tempDBPath, compressedPath); err != nil {
|
||||
return nil, "", fmt.Errorf("compressing database: %w", err)
|
||||
}
|
||||
log.Debug("Compression complete",
|
||||
"original_size", humanize.Bytes(uint64(sm.getFileSize(dumpPath))),
|
||||
"original_size", humanize.Bytes(uint64(sm.getFileSize(tempDBPath))),
|
||||
"compressed_size", humanize.Bytes(uint64(sm.getFileSize(compressedPath))))
|
||||
|
||||
// Step 5: Read compressed and encrypted data for upload
|
||||
finalData, err := afero.ReadFile(sm.fs, compressedPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading compressed dump: %w", err)
|
||||
return nil, "", fmt.Errorf("reading compressed dump: %w", err)
|
||||
}
|
||||
|
||||
// Step 6: Generate blob manifest (before closing temp DB)
|
||||
blobManifest, err := sm.generateBlobManifest(ctx, tempDBPath, snapshotID)
|
||||
if err != nil {
|
||||
return fmt.Errorf("generating blob manifest: %w", err)
|
||||
}
|
||||
return finalData, tempDBPath, nil
|
||||
}
|
||||
|
||||
// Step 7: Upload to S3 in snapshot subdirectory
|
||||
// uploadSnapshotArtifacts uploads the database backup and blob manifest to S3
|
||||
func (sm *SnapshotManager) uploadSnapshotArtifacts(ctx context.Context, snapshotID string, dbData, manifestData []byte) error {
|
||||
// Upload database backup (compressed and encrypted)
|
||||
dbKey := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID)
|
||||
|
||||
dbUploadStart := time.Now()
|
||||
if err := sm.storage.Put(ctx, dbKey, bytes.NewReader(finalData)); err != nil {
|
||||
if err := sm.storage.Put(ctx, dbKey, bytes.NewReader(dbData)); err != nil {
|
||||
return fmt.Errorf("uploading snapshot database: %w", err)
|
||||
}
|
||||
dbUploadDuration := time.Since(dbUploadStart)
|
||||
dbUploadSpeed := float64(len(finalData)) * 8 / dbUploadDuration.Seconds() // bits per second
|
||||
log.Info("Uploaded snapshot database to S3",
|
||||
dbUploadSpeed := float64(len(dbData)) * 8 / dbUploadDuration.Seconds() // bits per second
|
||||
log.Info("Uploaded snapshot database",
|
||||
"path", dbKey,
|
||||
"size", humanize.Bytes(uint64(len(finalData))),
|
||||
"size", humanize.Bytes(uint64(len(dbData))),
|
||||
"duration", dbUploadDuration,
|
||||
"speed", humanize.SI(dbUploadSpeed, "bps"))
|
||||
|
||||
// Upload blob manifest (compressed only, not encrypted)
|
||||
manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
|
||||
manifestUploadStart := time.Now()
|
||||
if err := sm.storage.Put(ctx, manifestKey, bytes.NewReader(blobManifest)); err != nil {
|
||||
if err := sm.storage.Put(ctx, manifestKey, bytes.NewReader(manifestData)); err != nil {
|
||||
return fmt.Errorf("uploading blob manifest: %w", err)
|
||||
}
|
||||
manifestUploadDuration := time.Since(manifestUploadStart)
|
||||
manifestUploadSpeed := float64(len(blobManifest)) * 8 / manifestUploadDuration.Seconds() // bits per second
|
||||
log.Info("Uploaded blob manifest to S3",
|
||||
manifestUploadSpeed := float64(len(manifestData)) * 8 / manifestUploadDuration.Seconds() // bits per second
|
||||
log.Info("Uploaded blob manifest",
|
||||
"path", manifestKey,
|
||||
"size", humanize.Bytes(uint64(len(blobManifest))),
|
||||
"size", humanize.Bytes(uint64(len(manifestData))),
|
||||
"duration", manifestUploadDuration,
|
||||
"speed", humanize.SI(manifestUploadSpeed, "bps"))
|
||||
|
||||
log.Info("Uploaded snapshot metadata",
|
||||
"snapshot_id", snapshotID,
|
||||
"db_size", len(finalData),
|
||||
"manifest_size", len(blobManifest))
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -416,26 +454,21 @@ func (sm *SnapshotManager) cleanSnapshotDB(ctx context.Context, dbPath string, s
|
||||
return stats, nil
|
||||
}
|
||||
|
||||
// dumpDatabase creates a SQL dump of the database
|
||||
func (sm *SnapshotManager) dumpDatabase(dbPath, dumpPath string) error {
|
||||
log.Debug("Running sqlite3 dump command", "source", dbPath, "destination", dumpPath)
|
||||
cmd := exec.Command("sqlite3", dbPath, ".dump")
|
||||
// vacuumDatabase runs VACUUM on the database to remove deleted data and compact
|
||||
// This is critical for security - ensures no stale/deleted data pages are uploaded
|
||||
func (sm *SnapshotManager) vacuumDatabase(dbPath string) error {
|
||||
log.Debug("Running VACUUM on database", "path", dbPath)
|
||||
cmd := exec.Command("sqlite3", dbPath, "VACUUM;")
|
||||
|
||||
output, err := cmd.Output()
|
||||
if err != nil {
|
||||
return fmt.Errorf("running sqlite3 dump: %w", err)
|
||||
}
|
||||
|
||||
log.Debug("SQL dump generated", "size", humanize.Bytes(uint64(len(output))))
|
||||
if err := afero.WriteFile(sm.fs, dumpPath, output, 0644); err != nil {
|
||||
return fmt.Errorf("writing dump file: %w", err)
|
||||
if output, err := cmd.CombinedOutput(); err != nil {
|
||||
return fmt.Errorf("running VACUUM: %w (output: %s)", err, string(output))
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// compressDump compresses the SQL dump using zstd
|
||||
func (sm *SnapshotManager) compressDump(inputPath, outputPath string) error {
|
||||
// compressFile compresses a file using zstd and encrypts with age
|
||||
func (sm *SnapshotManager) compressFile(inputPath, outputPath string) error {
|
||||
input, err := sm.fs.Open(inputPath)
|
||||
if err != nil {
|
||||
return fmt.Errorf("opening input file: %w", err)
|
||||
@@ -646,7 +679,7 @@ func (sm *SnapshotManager) CleanupIncompleteSnapshots(ctx context.Context, hostn
|
||||
log.Info("Cleaning up incomplete snapshot record", "snapshot_id", snapshot.ID, "started_at", snapshot.StartedAt)
|
||||
|
||||
// Delete the snapshot and all its associations
|
||||
if err := sm.deleteSnapshot(ctx, snapshot.ID); err != nil {
|
||||
if err := sm.deleteSnapshot(ctx, snapshot.ID.String()); err != nil {
|
||||
return fmt.Errorf("deleting incomplete snapshot %s: %w", snapshot.ID, err)
|
||||
}
|
||||
|
||||
@@ -654,8 +687,8 @@ func (sm *SnapshotManager) CleanupIncompleteSnapshots(ctx context.Context, hostn
|
||||
} else {
|
||||
// Metadata exists - this snapshot was completed but database wasn't updated
|
||||
// This shouldn't happen in normal operation, but mark it complete
|
||||
log.Warn("Found snapshot with S3 metadata but incomplete in database", "snapshot_id", snapshot.ID)
|
||||
if err := sm.repos.Snapshots.MarkComplete(ctx, nil, snapshot.ID); err != nil {
|
||||
log.Warn("Found snapshot with remote metadata but incomplete in database", "snapshot_id", snapshot.ID)
|
||||
if err := sm.repos.Snapshots.MarkComplete(ctx, nil, snapshot.ID.String()); err != nil {
|
||||
log.Error("Failed to mark snapshot as complete in database", "snapshot_id", snapshot.ID, "error", err)
|
||||
}
|
||||
}
|
||||
@@ -688,15 +721,16 @@ func (sm *SnapshotManager) deleteSnapshot(ctx context.Context, snapshotID string
|
||||
|
||||
// Clean up orphaned data
|
||||
log.Debug("Cleaning up orphaned records in main database")
|
||||
if err := sm.cleanupOrphanedData(ctx); err != nil {
|
||||
if err := sm.CleanupOrphanedData(ctx); err != nil {
|
||||
return fmt.Errorf("cleaning up orphaned data: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// cleanupOrphanedData removes files, chunks, and blobs that are no longer referenced by any snapshot
|
||||
func (sm *SnapshotManager) cleanupOrphanedData(ctx context.Context) error {
|
||||
// CleanupOrphanedData removes files, chunks, and blobs that are no longer referenced by any snapshot.
|
||||
// This should be called periodically to clean up data from deleted or incomplete snapshots.
|
||||
func (sm *SnapshotManager) CleanupOrphanedData(ctx context.Context) error {
|
||||
// Order is important to respect foreign key constraints:
|
||||
// 1. Delete orphaned files (will cascade delete file_chunks)
|
||||
// 2. Delete orphaned blobs (will cascade delete blob_chunks for deleted blobs)
|
||||
|
||||
@@ -7,10 +7,10 @@ import (
|
||||
"path/filepath"
|
||||
"testing"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/config"
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"github.com/spf13/afero"
|
||||
"sneak.berlin/go/vaultik/internal/config"
|
||||
"sneak.berlin/go/vaultik/internal/database"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
)
|
||||
|
||||
const (
|
||||
@@ -101,7 +101,7 @@ func TestCleanSnapshotDBEmptySnapshot(t *testing.T) {
|
||||
config: cfg,
|
||||
fs: fs,
|
||||
}
|
||||
if _, err := sm.cleanSnapshotDB(ctx, tempDBPath, snapshot.ID); err != nil {
|
||||
if _, err := sm.cleanSnapshotDB(ctx, tempDBPath, snapshot.ID.String()); err != nil {
|
||||
t.Fatalf("failed to clean snapshot database: %v", err)
|
||||
}
|
||||
|
||||
@@ -119,7 +119,7 @@ func TestCleanSnapshotDBEmptySnapshot(t *testing.T) {
|
||||
cleanedRepos := database.NewRepositories(cleanedDB)
|
||||
|
||||
// Verify snapshot exists
|
||||
verifySnapshot, err := cleanedRepos.Snapshots.GetByID(ctx, snapshot.ID)
|
||||
verifySnapshot, err := cleanedRepos.Snapshots.GetByID(ctx, snapshot.ID.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to get snapshot: %v", err)
|
||||
}
|
||||
@@ -128,7 +128,7 @@ func TestCleanSnapshotDBEmptySnapshot(t *testing.T) {
|
||||
}
|
||||
|
||||
// Verify orphan file is gone
|
||||
f, err := cleanedRepos.Files.GetByPath(ctx, file.Path)
|
||||
f, err := cleanedRepos.Files.GetByPath(ctx, file.Path.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to check file: %v", err)
|
||||
}
|
||||
@@ -137,7 +137,7 @@ func TestCleanSnapshotDBEmptySnapshot(t *testing.T) {
|
||||
}
|
||||
|
||||
// Verify orphan chunk is gone
|
||||
c, err := cleanedRepos.Chunks.GetByHash(ctx, chunk.ChunkHash)
|
||||
c, err := cleanedRepos.Chunks.GetByHash(ctx, chunk.ChunkHash.String())
|
||||
if err != nil {
|
||||
t.Fatalf("failed to check chunk: %v", err)
|
||||
}
|
||||
|
||||
@@ -5,9 +5,9 @@ import (
|
||||
"fmt"
|
||||
"strings"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/config"
|
||||
"git.eeqj.de/sneak/vaultik/internal/s3"
|
||||
"go.uber.org/fx"
|
||||
"sneak.berlin/go/vaultik/internal/config"
|
||||
"sneak.berlin/go/vaultik/internal/s3"
|
||||
)
|
||||
|
||||
// Module exports storage functionality as an fx module.
|
||||
@@ -73,6 +73,9 @@ func storerFromURL(rawURL string, cfg *config.Config) (Storer, error) {
|
||||
}
|
||||
return NewS3Storer(client), nil
|
||||
|
||||
case "rclone":
|
||||
return NewRcloneStorer(context.Background(), parsed.RcloneRemote, parsed.Prefix)
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported storage scheme: %s", parsed.Scheme)
|
||||
}
|
||||
|
||||
236
internal/storage/rclone.go
Normal file
236
internal/storage/rclone.go
Normal file
@@ -0,0 +1,236 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/fs/config/configfile"
|
||||
"github.com/rclone/rclone/fs/operations"
|
||||
|
||||
// Import all rclone backends
|
||||
_ "github.com/rclone/rclone/backend/all"
|
||||
)
|
||||
|
||||
// ErrRemoteNotFound is returned when an rclone remote is not configured.
|
||||
var ErrRemoteNotFound = errors.New("rclone remote not found in config")
|
||||
|
||||
// RcloneStorer implements Storer using rclone's filesystem abstraction.
|
||||
// This allows vaultik to use any of rclone's 70+ supported storage providers.
|
||||
type RcloneStorer struct {
|
||||
fsys fs.Fs // rclone filesystem
|
||||
remote string // remote name (for Info())
|
||||
path string // path within remote (for Info())
|
||||
}
|
||||
|
||||
// NewRcloneStorer creates a new rclone storage backend.
|
||||
// The remote parameter is the rclone remote name (as configured via `rclone config`).
|
||||
// The path parameter is the path within the remote.
|
||||
func NewRcloneStorer(ctx context.Context, remote, path string) (*RcloneStorer, error) {
|
||||
// Install the default config file handler
|
||||
configfile.Install()
|
||||
|
||||
// Build the rclone path string (e.g., "myremote:path/to/backups")
|
||||
rclonePath := remote + ":"
|
||||
if path != "" {
|
||||
rclonePath += path
|
||||
}
|
||||
|
||||
// Create the rclone filesystem
|
||||
fsys, err := fs.NewFs(ctx, rclonePath)
|
||||
if err != nil {
|
||||
// Check for remote not found error
|
||||
if strings.Contains(err.Error(), "didn't find section in config file") ||
|
||||
strings.Contains(err.Error(), "failed to find remote") {
|
||||
return nil, fmt.Errorf("%w: %s", ErrRemoteNotFound, remote)
|
||||
}
|
||||
return nil, fmt.Errorf("creating rclone filesystem: %w", err)
|
||||
}
|
||||
|
||||
return &RcloneStorer{
|
||||
fsys: fsys,
|
||||
remote: remote,
|
||||
path: path,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Put stores data at the specified key.
|
||||
func (r *RcloneStorer) Put(ctx context.Context, key string, data io.Reader) error {
|
||||
// Read all data into memory to get size (required by rclone)
|
||||
buf, err := io.ReadAll(data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading data: %w", err)
|
||||
}
|
||||
|
||||
// Upload the object
|
||||
_, err = operations.Rcat(ctx, r.fsys, key, io.NopCloser(bytes.NewReader(buf)), time.Now(), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("uploading object: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// PutWithProgress stores data with progress reporting.
|
||||
func (r *RcloneStorer) PutWithProgress(ctx context.Context, key string, data io.Reader, size int64, progress ProgressCallback) error {
|
||||
// Wrap reader with progress tracking
|
||||
pr := &progressReader{
|
||||
reader: data,
|
||||
callback: progress,
|
||||
}
|
||||
|
||||
// Upload the object
|
||||
_, err := operations.Rcat(ctx, r.fsys, key, io.NopCloser(pr), time.Now(), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("uploading object: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get retrieves data from the specified key.
|
||||
func (r *RcloneStorer) Get(ctx context.Context, key string) (io.ReadCloser, error) {
|
||||
// Get the object
|
||||
obj, err := r.fsys.NewObject(ctx, key)
|
||||
if err != nil {
|
||||
if errors.Is(err, fs.ErrorObjectNotFound) {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
if errors.Is(err, fs.ErrorDirNotFound) {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
return nil, fmt.Errorf("getting object: %w", err)
|
||||
}
|
||||
|
||||
// Open the object for reading
|
||||
reader, err := obj.Open(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("opening object: %w", err)
|
||||
}
|
||||
|
||||
return reader, nil
|
||||
}
|
||||
|
||||
// Stat returns metadata about an object without retrieving its contents.
|
||||
func (r *RcloneStorer) Stat(ctx context.Context, key string) (*ObjectInfo, error) {
|
||||
obj, err := r.fsys.NewObject(ctx, key)
|
||||
if err != nil {
|
||||
if errors.Is(err, fs.ErrorObjectNotFound) {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
if errors.Is(err, fs.ErrorDirNotFound) {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
return nil, fmt.Errorf("getting object: %w", err)
|
||||
}
|
||||
|
||||
return &ObjectInfo{
|
||||
Key: key,
|
||||
Size: obj.Size(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Delete removes an object.
|
||||
func (r *RcloneStorer) Delete(ctx context.Context, key string) error {
|
||||
obj, err := r.fsys.NewObject(ctx, key)
|
||||
if err != nil {
|
||||
if errors.Is(err, fs.ErrorObjectNotFound) {
|
||||
return nil // Match S3 behavior: no error if doesn't exist
|
||||
}
|
||||
if errors.Is(err, fs.ErrorDirNotFound) {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("getting object: %w", err)
|
||||
}
|
||||
|
||||
if err := obj.Remove(ctx); err != nil {
|
||||
return fmt.Errorf("removing object: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// List returns all keys with the given prefix.
|
||||
func (r *RcloneStorer) List(ctx context.Context, prefix string) ([]string, error) {
|
||||
var keys []string
|
||||
|
||||
err := operations.ListFn(ctx, r.fsys, func(obj fs.Object) {
|
||||
key := obj.Remote()
|
||||
if prefix == "" || strings.HasPrefix(key, prefix) {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("listing objects: %w", err)
|
||||
}
|
||||
|
||||
return keys, nil
|
||||
}
|
||||
|
||||
// ListStream returns a channel of ObjectInfo for large result sets.
|
||||
func (r *RcloneStorer) ListStream(ctx context.Context, prefix string) <-chan ObjectInfo {
|
||||
ch := make(chan ObjectInfo)
|
||||
|
||||
go func() {
|
||||
defer close(ch)
|
||||
|
||||
err := operations.ListFn(ctx, r.fsys, func(obj fs.Object) {
|
||||
// Check context cancellation
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
key := obj.Remote()
|
||||
if prefix == "" || strings.HasPrefix(key, prefix) {
|
||||
ch <- ObjectInfo{
|
||||
Key: key,
|
||||
Size: obj.Size(),
|
||||
}
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
ch <- ObjectInfo{Err: fmt.Errorf("listing objects: %w", err)}
|
||||
}
|
||||
}()
|
||||
|
||||
return ch
|
||||
}
|
||||
|
||||
// Info returns human-readable storage location information.
|
||||
func (r *RcloneStorer) Info() StorageInfo {
|
||||
location := r.remote
|
||||
if r.path != "" {
|
||||
location += ":" + r.path
|
||||
}
|
||||
return StorageInfo{
|
||||
Type: "rclone",
|
||||
Location: location,
|
||||
}
|
||||
}
|
||||
|
||||
// progressReader wraps an io.Reader to track read progress.
|
||||
type progressReader struct {
|
||||
reader io.Reader
|
||||
read int64
|
||||
callback ProgressCallback
|
||||
}
|
||||
|
||||
func (pr *progressReader) Read(p []byte) (int, error) {
|
||||
n, err := pr.reader.Read(p)
|
||||
if n > 0 {
|
||||
pr.read += int64(n)
|
||||
if pr.callback != nil {
|
||||
if callbackErr := pr.callback(pr.read); callbackErr != nil {
|
||||
return n, callbackErr
|
||||
}
|
||||
}
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
@@ -5,7 +5,7 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/s3"
|
||||
"sneak.berlin/go/vaultik/internal/s3"
|
||||
)
|
||||
|
||||
// S3Storer wraps the existing s3.Client to implement Storer.
|
||||
|
||||
@@ -8,18 +8,20 @@ import (
|
||||
|
||||
// StorageURL represents a parsed storage URL.
|
||||
type StorageURL struct {
|
||||
Scheme string // "s3" or "file"
|
||||
Bucket string // S3 bucket name (empty for file)
|
||||
Prefix string // Path within bucket or filesystem base path
|
||||
Endpoint string // S3 endpoint (optional, default AWS)
|
||||
Region string // S3 region (optional)
|
||||
UseSSL bool // Use HTTPS for S3 (default true)
|
||||
Scheme string // "s3", "file", or "rclone"
|
||||
Bucket string // S3 bucket name (empty for file/rclone)
|
||||
Prefix string // Path within bucket or filesystem base path
|
||||
Endpoint string // S3 endpoint (optional, default AWS)
|
||||
Region string // S3 region (optional)
|
||||
UseSSL bool // Use HTTPS for S3 (default true)
|
||||
RcloneRemote string // rclone remote name (for rclone:// URLs)
|
||||
}
|
||||
|
||||
// ParseStorageURL parses a storage URL string.
|
||||
// Supported formats:
|
||||
// - s3://bucket/prefix?endpoint=host®ion=us-east-1&ssl=true
|
||||
// - file:///absolute/path/to/backup
|
||||
// - rclone://remote/path/to/backups
|
||||
func ParseStorageURL(rawURL string) (*StorageURL, error) {
|
||||
if rawURL == "" {
|
||||
return nil, fmt.Errorf("storage URL is empty")
|
||||
@@ -67,7 +69,28 @@ func ParseStorageURL(rawURL string) (*StorageURL, error) {
|
||||
}, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unsupported URL scheme: must start with s3:// or file://")
|
||||
// Handle rclone:// URLs
|
||||
if strings.HasPrefix(rawURL, "rclone://") {
|
||||
u, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid URL: %w", err)
|
||||
}
|
||||
|
||||
remote := u.Host
|
||||
if remote == "" {
|
||||
return nil, fmt.Errorf("rclone URL missing remote name")
|
||||
}
|
||||
|
||||
path := strings.TrimPrefix(u.Path, "/")
|
||||
|
||||
return &StorageURL{
|
||||
Scheme: "rclone",
|
||||
Prefix: path,
|
||||
RcloneRemote: remote,
|
||||
}, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unsupported URL scheme: must start with s3://, file://, or rclone://")
|
||||
}
|
||||
|
||||
// String returns a human-readable representation of the storage URL.
|
||||
@@ -84,6 +107,11 @@ func (u *StorageURL) String() string {
|
||||
return fmt.Sprintf("s3://%s/%s (endpoint: %s)", u.Bucket, u.Prefix, endpoint)
|
||||
}
|
||||
return fmt.Sprintf("s3://%s (endpoint: %s)", u.Bucket, endpoint)
|
||||
case "rclone":
|
||||
if u.Prefix != "" {
|
||||
return fmt.Sprintf("rclone://%s/%s", u.RcloneRemote, u.Prefix)
|
||||
}
|
||||
return fmt.Sprintf("rclone://%s", u.RcloneRemote)
|
||||
default:
|
||||
return fmt.Sprintf("%s://?", u.Scheme)
|
||||
}
|
||||
|
||||
203
internal/types/types.go
Normal file
203
internal/types/types.go
Normal file
@@ -0,0 +1,203 @@
|
||||
// Package types provides custom types for better type safety across the vaultik codebase.
|
||||
// Using distinct types for IDs, hashes, paths, and credentials prevents accidental
|
||||
// mixing of semantically different values that happen to share the same underlying type.
|
||||
package types
|
||||
|
||||
import (
|
||||
"database/sql/driver"
|
||||
"fmt"
|
||||
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
// FileID is a UUID identifying a file record in the database.
|
||||
type FileID uuid.UUID
|
||||
|
||||
// NewFileID generates a new random FileID.
|
||||
func NewFileID() FileID {
|
||||
return FileID(uuid.New())
|
||||
}
|
||||
|
||||
// ParseFileID parses a string into a FileID.
|
||||
func ParseFileID(s string) (FileID, error) {
|
||||
id, err := uuid.Parse(s)
|
||||
if err != nil {
|
||||
return FileID{}, err
|
||||
}
|
||||
return FileID(id), nil
|
||||
}
|
||||
|
||||
// IsZero returns true if the FileID is the zero value.
|
||||
func (id FileID) IsZero() bool {
|
||||
return uuid.UUID(id) == uuid.Nil
|
||||
}
|
||||
|
||||
// Value implements driver.Valuer for database serialization.
|
||||
func (id FileID) Value() (driver.Value, error) {
|
||||
return uuid.UUID(id).String(), nil
|
||||
}
|
||||
|
||||
// Scan implements sql.Scanner for database deserialization.
|
||||
func (id *FileID) Scan(src interface{}) error {
|
||||
if src == nil {
|
||||
*id = FileID{}
|
||||
return nil
|
||||
}
|
||||
|
||||
var s string
|
||||
switch v := src.(type) {
|
||||
case string:
|
||||
s = v
|
||||
case []byte:
|
||||
s = string(v)
|
||||
default:
|
||||
return fmt.Errorf("cannot scan %T into FileID", src)
|
||||
}
|
||||
|
||||
parsed, err := uuid.Parse(s)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid FileID: %w", err)
|
||||
}
|
||||
*id = FileID(parsed)
|
||||
return nil
|
||||
}
|
||||
|
||||
// BlobID is a UUID identifying a blob record in the database.
|
||||
// This is distinct from BlobHash which is the content-addressed hash of the blob.
|
||||
type BlobID uuid.UUID
|
||||
|
||||
// NewBlobID generates a new random BlobID.
|
||||
func NewBlobID() BlobID {
|
||||
return BlobID(uuid.New())
|
||||
}
|
||||
|
||||
// ParseBlobID parses a string into a BlobID.
|
||||
func ParseBlobID(s string) (BlobID, error) {
|
||||
id, err := uuid.Parse(s)
|
||||
if err != nil {
|
||||
return BlobID{}, err
|
||||
}
|
||||
return BlobID(id), nil
|
||||
}
|
||||
|
||||
// IsZero returns true if the BlobID is the zero value.
|
||||
func (id BlobID) IsZero() bool {
|
||||
return uuid.UUID(id) == uuid.Nil
|
||||
}
|
||||
|
||||
// Value implements driver.Valuer for database serialization.
|
||||
func (id BlobID) Value() (driver.Value, error) {
|
||||
return uuid.UUID(id).String(), nil
|
||||
}
|
||||
|
||||
// Scan implements sql.Scanner for database deserialization.
|
||||
func (id *BlobID) Scan(src interface{}) error {
|
||||
if src == nil {
|
||||
*id = BlobID{}
|
||||
return nil
|
||||
}
|
||||
|
||||
var s string
|
||||
switch v := src.(type) {
|
||||
case string:
|
||||
s = v
|
||||
case []byte:
|
||||
s = string(v)
|
||||
default:
|
||||
return fmt.Errorf("cannot scan %T into BlobID", src)
|
||||
}
|
||||
|
||||
parsed, err := uuid.Parse(s)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid BlobID: %w", err)
|
||||
}
|
||||
*id = BlobID(parsed)
|
||||
return nil
|
||||
}
|
||||
|
||||
// SnapshotID identifies a snapshot, typically in format "hostname_name_timestamp".
|
||||
type SnapshotID string
|
||||
|
||||
// ChunkHash is the SHA256 hash of a chunk's content.
|
||||
// Used for content-addressing and deduplication of file chunks.
|
||||
type ChunkHash string
|
||||
|
||||
// BlobHash is the SHA256 hash of a blob's compressed and encrypted content.
|
||||
// This is used as the filename in S3 storage for content-addressed retrieval.
|
||||
type BlobHash string
|
||||
|
||||
// FilePath represents an absolute path to a file or directory.
|
||||
type FilePath string
|
||||
|
||||
// SourcePath represents the root directory from which files are backed up.
|
||||
// Used during restore to strip the source prefix from paths.
|
||||
type SourcePath string
|
||||
|
||||
// AgeRecipient is an age public key used for encryption.
|
||||
// Format: age1... (Bech32-encoded X25519 public key)
|
||||
type AgeRecipient string
|
||||
|
||||
// AgeSecretKey is an age private key used for decryption.
|
||||
// Format: AGE-SECRET-KEY-... (Bech32-encoded X25519 private key)
|
||||
// This type should never be logged or serialized in plaintext.
|
||||
type AgeSecretKey string
|
||||
|
||||
// S3Endpoint is the URL of an S3-compatible storage endpoint.
|
||||
type S3Endpoint string
|
||||
|
||||
// BucketName is the name of an S3 bucket.
|
||||
type BucketName string
|
||||
|
||||
// S3Prefix is the path prefix within an S3 bucket.
|
||||
type S3Prefix string
|
||||
|
||||
// AWSRegion is an AWS region identifier (e.g., "us-east-1").
|
||||
type AWSRegion string
|
||||
|
||||
// AWSAccessKeyID is an AWS access key ID for authentication.
|
||||
type AWSAccessKeyID string
|
||||
|
||||
// AWSSecretAccessKey is an AWS secret access key for authentication.
|
||||
// This type should never be logged or serialized in plaintext.
|
||||
type AWSSecretAccessKey string
|
||||
|
||||
// Hostname identifies a host machine.
|
||||
type Hostname string
|
||||
|
||||
// Version is a semantic version string.
|
||||
type Version string
|
||||
|
||||
// GitRevision is a git commit SHA.
|
||||
type GitRevision string
|
||||
|
||||
// GlobPattern is a glob pattern for file matching (e.g., "*.log", "node_modules").
|
||||
type GlobPattern string
|
||||
|
||||
// String methods for Stringer interface
|
||||
|
||||
func (id FileID) String() string { return uuid.UUID(id).String() }
|
||||
func (id BlobID) String() string { return uuid.UUID(id).String() }
|
||||
func (id SnapshotID) String() string { return string(id) }
|
||||
func (h ChunkHash) String() string { return string(h) }
|
||||
func (h BlobHash) String() string { return string(h) }
|
||||
func (p FilePath) String() string { return string(p) }
|
||||
func (p SourcePath) String() string { return string(p) }
|
||||
func (r AgeRecipient) String() string { return string(r) }
|
||||
func (e S3Endpoint) String() string { return string(e) }
|
||||
func (b BucketName) String() string { return string(b) }
|
||||
func (p S3Prefix) String() string { return string(p) }
|
||||
func (r AWSRegion) String() string { return string(r) }
|
||||
func (k AWSAccessKeyID) String() string { return string(k) }
|
||||
func (h Hostname) String() string { return string(h) }
|
||||
func (v Version) String() string { return string(v) }
|
||||
func (r GitRevision) String() string { return string(r) }
|
||||
func (p GlobPattern) String() string { return string(p) }
|
||||
|
||||
// Redacted String methods for sensitive types - prevents accidental logging
|
||||
|
||||
func (k AgeSecretKey) String() string { return "[REDACTED]" }
|
||||
func (k AWSSecretAccessKey) String() string { return "[REDACTED]" }
|
||||
|
||||
// Raw returns the actual value for sensitive types when explicitly needed
|
||||
func (k AgeSecretKey) Raw() string { return string(k) }
|
||||
func (k AWSSecretAccessKey) Raw() string { return string(k) }
|
||||
93
internal/vaultik/blob_fetch.go
Normal file
93
internal/vaultik/blob_fetch.go
Normal file
@@ -0,0 +1,93 @@
|
||||
package vaultik
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"fmt"
|
||||
"io"
|
||||
|
||||
"filippo.io/age"
|
||||
"sneak.berlin/go/vaultik/internal/blobgen"
|
||||
)
|
||||
|
||||
// hashVerifyReader wraps a blobgen.Reader and verifies the double-SHA-256 hash
|
||||
// of decrypted plaintext when Close is called. It reuses the hash that
|
||||
// blobgen.Reader already computes internally via its TeeReader, avoiding
|
||||
// redundant SHA-256 computation.
|
||||
type hashVerifyReader struct {
|
||||
reader *blobgen.Reader // underlying decrypted blob reader (has internal hasher)
|
||||
fetcher io.ReadCloser // raw fetched stream (closed on Close)
|
||||
blobHash string // expected double-SHA-256 hex
|
||||
done bool // EOF reached
|
||||
}
|
||||
|
||||
func (h *hashVerifyReader) Read(p []byte) (int, error) {
|
||||
n, err := h.reader.Read(p)
|
||||
if err == io.EOF {
|
||||
h.done = true
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
// Close verifies the hash (if the stream was fully read) and closes underlying readers.
|
||||
func (h *hashVerifyReader) Close() error {
|
||||
readerErr := h.reader.Close()
|
||||
fetcherErr := h.fetcher.Close()
|
||||
|
||||
if h.done {
|
||||
firstHash := h.reader.Sum256()
|
||||
secondHasher := sha256.New()
|
||||
secondHasher.Write(firstHash)
|
||||
actualHashHex := hex.EncodeToString(secondHasher.Sum(nil))
|
||||
if actualHashHex != h.blobHash {
|
||||
return fmt.Errorf("blob hash mismatch: expected %s, got %s", h.blobHash[:16], actualHashHex[:16])
|
||||
}
|
||||
}
|
||||
|
||||
if readerErr != nil {
|
||||
return readerErr
|
||||
}
|
||||
return fetcherErr
|
||||
}
|
||||
|
||||
// FetchAndDecryptBlob downloads a blob, decrypts and decompresses it, and
|
||||
// returns a streaming reader that computes the double-SHA-256 hash on the fly.
|
||||
// The hash is verified when the returned reader is closed (after fully reading).
|
||||
// This avoids buffering the entire blob in memory.
|
||||
func (v *Vaultik) FetchAndDecryptBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) (io.ReadCloser, error) {
|
||||
rc, _, err := v.FetchBlob(ctx, blobHash, expectedSize)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
reader, err := blobgen.NewReader(rc, identity)
|
||||
if err != nil {
|
||||
_ = rc.Close()
|
||||
return nil, fmt.Errorf("creating blob reader: %w", err)
|
||||
}
|
||||
|
||||
return &hashVerifyReader{
|
||||
reader: reader,
|
||||
fetcher: rc,
|
||||
blobHash: blobHash,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// FetchBlob downloads a blob and returns a reader for the encrypted data.
|
||||
func (v *Vaultik) FetchBlob(ctx context.Context, blobHash string, expectedSize int64) (io.ReadCloser, int64, error) {
|
||||
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobHash[:2], blobHash[2:4], blobHash)
|
||||
|
||||
rc, err := v.Storage.Get(ctx, blobPath)
|
||||
if err != nil {
|
||||
return nil, 0, fmt.Errorf("downloading blob %s: %w", blobHash[:16], err)
|
||||
}
|
||||
|
||||
info, err := v.Storage.Stat(ctx, blobPath)
|
||||
if err != nil {
|
||||
_ = rc.Close()
|
||||
return nil, 0, fmt.Errorf("stat blob %s: %w", blobHash[:16], err)
|
||||
}
|
||||
|
||||
return rc, info.Size, nil
|
||||
}
|
||||
100
internal/vaultik/blob_fetch_hash_test.go
Normal file
100
internal/vaultik/blob_fetch_hash_test.go
Normal file
@@ -0,0 +1,100 @@
|
||||
package vaultik_test
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"io"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"filippo.io/age"
|
||||
"sneak.berlin/go/vaultik/internal/blobgen"
|
||||
"sneak.berlin/go/vaultik/internal/vaultik"
|
||||
)
|
||||
|
||||
// TestFetchAndDecryptBlobVerifiesHash verifies that FetchAndDecryptBlob checks
|
||||
// the double-SHA-256 hash of the decrypted plaintext against the expected blob hash.
|
||||
func TestFetchAndDecryptBlobVerifiesHash(t *testing.T) {
|
||||
identity, err := age.GenerateX25519Identity()
|
||||
if err != nil {
|
||||
t.Fatalf("generating identity: %v", err)
|
||||
}
|
||||
|
||||
// Create test data and encrypt it using blobgen.Writer
|
||||
plaintext := []byte("hello world test data for blob hash verification")
|
||||
var encBuf bytes.Buffer
|
||||
writer, err := blobgen.NewWriter(&encBuf, 1, []string{identity.Recipient().String()})
|
||||
if err != nil {
|
||||
t.Fatalf("creating blobgen writer: %v", err)
|
||||
}
|
||||
if _, err := writer.Write(plaintext); err != nil {
|
||||
t.Fatalf("writing plaintext: %v", err)
|
||||
}
|
||||
if err := writer.Close(); err != nil {
|
||||
t.Fatalf("closing writer: %v", err)
|
||||
}
|
||||
encryptedData := encBuf.Bytes()
|
||||
|
||||
// Compute correct double-SHA-256 hash of the plaintext (matches blobgen.Writer.Sum256)
|
||||
firstHash := sha256.Sum256(plaintext)
|
||||
secondHash := sha256.Sum256(firstHash[:])
|
||||
correctHash := hex.EncodeToString(secondHash[:])
|
||||
|
||||
// Verify our hash matches what blobgen.Writer produces
|
||||
writerHash := hex.EncodeToString(writer.Sum256())
|
||||
if correctHash != writerHash {
|
||||
t.Fatalf("hash computation mismatch: manual=%s, writer=%s", correctHash, writerHash)
|
||||
}
|
||||
|
||||
// Set up mock storage with the blob at the correct path
|
||||
mockStorage := NewMockStorer()
|
||||
blobPath := "blobs/" + correctHash[:2] + "/" + correctHash[2:4] + "/" + correctHash
|
||||
mockStorage.mu.Lock()
|
||||
mockStorage.data[blobPath] = encryptedData
|
||||
mockStorage.mu.Unlock()
|
||||
|
||||
tv := vaultik.NewForTesting(mockStorage)
|
||||
ctx := context.Background()
|
||||
|
||||
t.Run("correct hash succeeds", func(t *testing.T) {
|
||||
rc, err := tv.FetchAndDecryptBlob(ctx, correctHash, int64(len(encryptedData)), identity)
|
||||
if err != nil {
|
||||
t.Fatalf("expected success, got error: %v", err)
|
||||
}
|
||||
data, err := io.ReadAll(rc)
|
||||
if err != nil {
|
||||
t.Fatalf("reading stream: %v", err)
|
||||
}
|
||||
if err := rc.Close(); err != nil {
|
||||
t.Fatalf("close (hash verification) failed: %v", err)
|
||||
}
|
||||
if !bytes.Equal(data, plaintext) {
|
||||
t.Fatalf("decrypted data mismatch: got %q, want %q", data, plaintext)
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("wrong hash fails", func(t *testing.T) {
|
||||
// Use a fake hash that doesn't match the actual plaintext
|
||||
fakeHash := strings.Repeat("ab", 32) // 64 hex chars
|
||||
fakePath := "blobs/" + fakeHash[:2] + "/" + fakeHash[2:4] + "/" + fakeHash
|
||||
mockStorage.mu.Lock()
|
||||
mockStorage.data[fakePath] = encryptedData
|
||||
mockStorage.mu.Unlock()
|
||||
|
||||
rc, err := tv.FetchAndDecryptBlob(ctx, fakeHash, int64(len(encryptedData)), identity)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error opening stream: %v", err)
|
||||
}
|
||||
// Read all data — hash is verified on Close
|
||||
_, _ = io.ReadAll(rc)
|
||||
err = rc.Close()
|
||||
if err == nil {
|
||||
t.Fatal("expected error for mismatched hash, got nil")
|
||||
}
|
||||
if !strings.Contains(err.Error(), "hash mismatch") {
|
||||
t.Fatalf("expected hash mismatch error, got: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
207
internal/vaultik/blobcache.go
Normal file
207
internal/vaultik/blobcache.go
Normal file
@@ -0,0 +1,207 @@
|
||||
package vaultik
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
)
|
||||
|
||||
// blobDiskCacheEntry tracks a cached blob on disk.
|
||||
type blobDiskCacheEntry struct {
|
||||
key string
|
||||
size int64
|
||||
prev *blobDiskCacheEntry
|
||||
next *blobDiskCacheEntry
|
||||
}
|
||||
|
||||
// blobDiskCache is an LRU cache that stores blobs on disk instead of in memory.
|
||||
// Blobs are written to a temp directory keyed by their hash. When total size
|
||||
// exceeds maxBytes, the least-recently-used entries are evicted (deleted from disk).
|
||||
type blobDiskCache struct {
|
||||
mu sync.Mutex
|
||||
dir string
|
||||
maxBytes int64
|
||||
curBytes int64
|
||||
items map[string]*blobDiskCacheEntry
|
||||
head *blobDiskCacheEntry // most recent
|
||||
tail *blobDiskCacheEntry // least recent
|
||||
}
|
||||
|
||||
// newBlobDiskCache creates a new disk-based blob cache with the given max size.
|
||||
func newBlobDiskCache(maxBytes int64) (*blobDiskCache, error) {
|
||||
dir, err := os.MkdirTemp("", "vaultik-blobcache-*")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating blob cache dir: %w", err)
|
||||
}
|
||||
return &blobDiskCache{
|
||||
dir: dir,
|
||||
maxBytes: maxBytes,
|
||||
items: make(map[string]*blobDiskCacheEntry),
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (c *blobDiskCache) path(key string) string {
|
||||
return filepath.Join(c.dir, key)
|
||||
}
|
||||
|
||||
func (c *blobDiskCache) unlink(e *blobDiskCacheEntry) {
|
||||
if e.prev != nil {
|
||||
e.prev.next = e.next
|
||||
} else {
|
||||
c.head = e.next
|
||||
}
|
||||
if e.next != nil {
|
||||
e.next.prev = e.prev
|
||||
} else {
|
||||
c.tail = e.prev
|
||||
}
|
||||
e.prev = nil
|
||||
e.next = nil
|
||||
}
|
||||
|
||||
func (c *blobDiskCache) pushFront(e *blobDiskCacheEntry) {
|
||||
e.prev = nil
|
||||
e.next = c.head
|
||||
if c.head != nil {
|
||||
c.head.prev = e
|
||||
}
|
||||
c.head = e
|
||||
if c.tail == nil {
|
||||
c.tail = e
|
||||
}
|
||||
}
|
||||
|
||||
func (c *blobDiskCache) evictLRU() {
|
||||
if c.tail == nil {
|
||||
return
|
||||
}
|
||||
victim := c.tail
|
||||
c.unlink(victim)
|
||||
delete(c.items, victim.key)
|
||||
c.curBytes -= victim.size
|
||||
_ = os.Remove(c.path(victim.key))
|
||||
}
|
||||
|
||||
// Put writes blob data to disk cache. Entries larger than maxBytes are silently skipped.
|
||||
func (c *blobDiskCache) Put(key string, data []byte) error {
|
||||
entrySize := int64(len(data))
|
||||
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
if entrySize > c.maxBytes {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Remove old entry if updating
|
||||
if e, ok := c.items[key]; ok {
|
||||
c.unlink(e)
|
||||
c.curBytes -= e.size
|
||||
_ = os.Remove(c.path(key))
|
||||
delete(c.items, key)
|
||||
}
|
||||
|
||||
if err := os.WriteFile(c.path(key), data, 0600); err != nil {
|
||||
return fmt.Errorf("writing blob to cache: %w", err)
|
||||
}
|
||||
|
||||
e := &blobDiskCacheEntry{key: key, size: entrySize}
|
||||
c.pushFront(e)
|
||||
c.items[key] = e
|
||||
c.curBytes += entrySize
|
||||
|
||||
for c.curBytes > c.maxBytes && c.tail != nil {
|
||||
c.evictLRU()
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get reads a cached blob from disk. Returns data and true on hit.
|
||||
func (c *blobDiskCache) Get(key string) ([]byte, bool) {
|
||||
c.mu.Lock()
|
||||
e, ok := c.items[key]
|
||||
if !ok {
|
||||
c.mu.Unlock()
|
||||
return nil, false
|
||||
}
|
||||
c.unlink(e)
|
||||
c.pushFront(e)
|
||||
c.mu.Unlock()
|
||||
|
||||
data, err := os.ReadFile(c.path(key))
|
||||
if err != nil {
|
||||
c.mu.Lock()
|
||||
if e2, ok2 := c.items[key]; ok2 && e2 == e {
|
||||
c.unlink(e)
|
||||
delete(c.items, key)
|
||||
c.curBytes -= e.size
|
||||
}
|
||||
c.mu.Unlock()
|
||||
return nil, false
|
||||
}
|
||||
return data, true
|
||||
}
|
||||
|
||||
// ReadAt reads a slice of a cached blob without loading the entire blob into memory.
|
||||
func (c *blobDiskCache) ReadAt(key string, offset, length int64) ([]byte, error) {
|
||||
c.mu.Lock()
|
||||
e, ok := c.items[key]
|
||||
if !ok {
|
||||
c.mu.Unlock()
|
||||
return nil, fmt.Errorf("key %q not in cache", key)
|
||||
}
|
||||
if offset+length > e.size {
|
||||
c.mu.Unlock()
|
||||
return nil, fmt.Errorf("read beyond blob size: offset=%d length=%d size=%d", offset, length, e.size)
|
||||
}
|
||||
c.unlink(e)
|
||||
c.pushFront(e)
|
||||
c.mu.Unlock()
|
||||
|
||||
f, err := os.Open(c.path(key))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer func() { _ = f.Close() }()
|
||||
|
||||
buf := make([]byte, length)
|
||||
if _, err := f.ReadAt(buf, offset); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return buf, nil
|
||||
}
|
||||
|
||||
// Has returns whether a key exists in the cache.
|
||||
func (c *blobDiskCache) Has(key string) bool {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
_, ok := c.items[key]
|
||||
return ok
|
||||
}
|
||||
|
||||
// Size returns current total cached bytes.
|
||||
func (c *blobDiskCache) Size() int64 {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
return c.curBytes
|
||||
}
|
||||
|
||||
// Len returns number of cached entries.
|
||||
func (c *blobDiskCache) Len() int {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
return len(c.items)
|
||||
}
|
||||
|
||||
// Close removes the cache directory and all cached blobs.
|
||||
func (c *blobDiskCache) Close() error {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
c.items = nil
|
||||
c.head = nil
|
||||
c.tail = nil
|
||||
c.curBytes = 0
|
||||
return os.RemoveAll(c.dir)
|
||||
}
|
||||
189
internal/vaultik/blobcache_test.go
Normal file
189
internal/vaultik/blobcache_test.go
Normal file
@@ -0,0 +1,189 @@
|
||||
package vaultik
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/rand"
|
||||
"fmt"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestBlobDiskCache_BasicGetPut(t *testing.T) {
|
||||
cache, err := newBlobDiskCache(1 << 20)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() { _ = cache.Close() }()
|
||||
|
||||
data := []byte("hello world")
|
||||
if err := cache.Put("key1", data); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
got, ok := cache.Get("key1")
|
||||
if !ok {
|
||||
t.Fatal("expected cache hit")
|
||||
}
|
||||
if !bytes.Equal(got, data) {
|
||||
t.Fatalf("got %q, want %q", got, data)
|
||||
}
|
||||
|
||||
_, ok = cache.Get("nonexistent")
|
||||
if ok {
|
||||
t.Fatal("expected cache miss")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlobDiskCache_EvictionUnderPressure(t *testing.T) {
|
||||
maxBytes := int64(1000)
|
||||
cache, err := newBlobDiskCache(maxBytes)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() { _ = cache.Close() }()
|
||||
|
||||
for i := 0; i < 5; i++ {
|
||||
data := make([]byte, 300)
|
||||
if err := cache.Put(fmt.Sprintf("key%d", i), data); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
if cache.Size() > maxBytes {
|
||||
t.Fatalf("cache size %d exceeds max %d", cache.Size(), maxBytes)
|
||||
}
|
||||
|
||||
if !cache.Has("key4") {
|
||||
t.Fatal("expected key4 to be cached")
|
||||
}
|
||||
if cache.Has("key0") {
|
||||
t.Fatal("expected key0 to be evicted")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlobDiskCache_OversizedEntryRejected(t *testing.T) {
|
||||
cache, err := newBlobDiskCache(100)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() { _ = cache.Close() }()
|
||||
|
||||
data := make([]byte, 200)
|
||||
if err := cache.Put("big", data); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if cache.Has("big") {
|
||||
t.Fatal("oversized entry should not be cached")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlobDiskCache_UpdateInPlace(t *testing.T) {
|
||||
cache, err := newBlobDiskCache(1 << 20)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() { _ = cache.Close() }()
|
||||
|
||||
if err := cache.Put("key1", []byte("v1")); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := cache.Put("key1", []byte("version2")); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
got, ok := cache.Get("key1")
|
||||
if !ok {
|
||||
t.Fatal("expected hit")
|
||||
}
|
||||
if string(got) != "version2" {
|
||||
t.Fatalf("got %q, want %q", got, "version2")
|
||||
}
|
||||
if cache.Len() != 1 {
|
||||
t.Fatalf("expected 1 entry, got %d", cache.Len())
|
||||
}
|
||||
if cache.Size() != int64(len("version2")) {
|
||||
t.Fatalf("expected size %d, got %d", len("version2"), cache.Size())
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlobDiskCache_ReadAt(t *testing.T) {
|
||||
cache, err := newBlobDiskCache(1 << 20)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() { _ = cache.Close() }()
|
||||
|
||||
data := make([]byte, 1024)
|
||||
if _, err := rand.Read(data); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := cache.Put("blob1", data); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
chunk, err := cache.ReadAt("blob1", 100, 200)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if !bytes.Equal(chunk, data[100:300]) {
|
||||
t.Fatal("ReadAt returned wrong data")
|
||||
}
|
||||
|
||||
_, err = cache.ReadAt("blob1", 900, 200)
|
||||
if err == nil {
|
||||
t.Fatal("expected error for out-of-bounds read")
|
||||
}
|
||||
|
||||
_, err = cache.ReadAt("missing", 0, 10)
|
||||
if err == nil {
|
||||
t.Fatal("expected error for missing key")
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlobDiskCache_Close(t *testing.T) {
|
||||
cache, err := newBlobDiskCache(1 << 20)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if err := cache.Put("key1", []byte("data")); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := cache.Close(); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBlobDiskCache_LRUOrder(t *testing.T) {
|
||||
cache, err := newBlobDiskCache(200)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
defer func() { _ = cache.Close() }()
|
||||
|
||||
d := make([]byte, 100)
|
||||
if err := cache.Put("a", d); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
if err := cache.Put("b", d); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
// Access "a" to make it most recently used
|
||||
cache.Get("a")
|
||||
|
||||
// Adding "c" should evict "b" (LRU), not "a"
|
||||
if err := cache.Put("c", d); err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
|
||||
if !cache.Has("a") {
|
||||
t.Fatal("expected 'a' to survive")
|
||||
}
|
||||
if !cache.Has("c") {
|
||||
t.Fatal("expected 'c' to be present")
|
||||
}
|
||||
if cache.Has("b") {
|
||||
t.Fatal("expected 'b' to be evicted")
|
||||
}
|
||||
}
|
||||
@@ -2,16 +2,19 @@ package vaultik
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
)
|
||||
|
||||
// SnapshotInfo contains information about a snapshot
|
||||
type SnapshotInfo struct {
|
||||
ID string `json:"id"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
CompressedSize int64 `json:"compressed_size"`
|
||||
ID types.SnapshotID `json:"id"`
|
||||
Timestamp time.Time `json:"timestamp"`
|
||||
CompressedSize int64 `json:"compressed_size"`
|
||||
}
|
||||
|
||||
// formatNumber formats a number with commas
|
||||
@@ -60,44 +63,72 @@ func formatBytes(bytes int64) string {
|
||||
}
|
||||
|
||||
// parseSnapshotTimestamp extracts the timestamp from a snapshot ID
|
||||
// Format: hostname_snapshotname_2026-01-12T14:41:15Z
|
||||
func parseSnapshotTimestamp(snapshotID string) (time.Time, error) {
|
||||
// Format: hostname-YYYYMMDD-HHMMSSZ
|
||||
parts := strings.Split(snapshotID, "-")
|
||||
if len(parts) < 3 {
|
||||
return time.Time{}, fmt.Errorf("invalid snapshot ID format")
|
||||
parts := strings.Split(snapshotID, "_")
|
||||
if len(parts) < 2 {
|
||||
return time.Time{}, fmt.Errorf("invalid snapshot ID format: expected hostname_snapshotname_timestamp")
|
||||
}
|
||||
|
||||
dateStr := parts[len(parts)-2]
|
||||
timeStr := parts[len(parts)-1]
|
||||
|
||||
if len(dateStr) != 8 || len(timeStr) != 7 || !strings.HasSuffix(timeStr, "Z") {
|
||||
return time.Time{}, fmt.Errorf("invalid timestamp format")
|
||||
}
|
||||
|
||||
// Remove Z suffix
|
||||
timeStr = timeStr[:6]
|
||||
|
||||
// Parse the timestamp
|
||||
timestamp, err := time.Parse("20060102150405", dateStr+timeStr)
|
||||
// Last part is the RFC3339 timestamp
|
||||
timestampStr := parts[len(parts)-1]
|
||||
timestamp, err := time.Parse(time.RFC3339, timestampStr)
|
||||
if err != nil {
|
||||
return time.Time{}, fmt.Errorf("failed to parse timestamp: %w", err)
|
||||
return time.Time{}, fmt.Errorf("invalid timestamp: %w", err)
|
||||
}
|
||||
|
||||
return timestamp.UTC(), nil
|
||||
}
|
||||
|
||||
// parseDuration parses a duration string with support for days
|
||||
// parseSnapshotName extracts the snapshot name from a snapshot ID.
|
||||
// Format: hostname_snapshotname_timestamp — the middle part(s) between hostname
|
||||
// and the RFC3339 timestamp are the snapshot name (may contain underscores).
|
||||
// Returns the snapshot name, or empty string if the ID is malformed.
|
||||
func parseSnapshotName(snapshotID string) string {
|
||||
parts := strings.Split(snapshotID, "_")
|
||||
if len(parts) < 3 {
|
||||
// Format: hostname_timestamp — no snapshot name
|
||||
return ""
|
||||
}
|
||||
// Format: hostname_name_timestamp — middle parts are the name.
|
||||
// The last part is the RFC3339 timestamp, the first part is the hostname,
|
||||
// everything in between is the snapshot name (which may itself contain underscores).
|
||||
return strings.Join(parts[1:len(parts)-1], "_")
|
||||
}
|
||||
|
||||
// parseDuration parses a duration string with support for human-friendly units:
|
||||
// d/day/days, w/week/weeks, mo/month/months, y/year/years, plus standard Go
|
||||
// duration units (h, m, s).
|
||||
func parseDuration(s string) (time.Duration, error) {
|
||||
// Check for days suffix
|
||||
if strings.HasSuffix(s, "d") {
|
||||
daysStr := strings.TrimSuffix(s, "d")
|
||||
days, err := strconv.Atoi(daysStr)
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("invalid days value: %w", err)
|
||||
}
|
||||
return time.Duration(days) * 24 * time.Hour, nil
|
||||
if d, err := time.ParseDuration(s); err == nil {
|
||||
return d, nil
|
||||
}
|
||||
|
||||
// Otherwise use standard Go duration parsing
|
||||
return time.ParseDuration(s)
|
||||
re := regexp.MustCompile(`(\d+)\s*([a-zA-Z]+)`)
|
||||
matches := re.FindAllStringSubmatch(s, -1)
|
||||
if len(matches) == 0 {
|
||||
return 0, fmt.Errorf("invalid duration: %q", s)
|
||||
}
|
||||
|
||||
var total time.Duration
|
||||
for _, match := range matches {
|
||||
n, err := strconv.Atoi(match[1])
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("invalid number %q: %w", match[1], err)
|
||||
}
|
||||
unit := strings.ToLower(match[2])
|
||||
switch unit {
|
||||
case "d", "day", "days":
|
||||
total += time.Duration(n) * 24 * time.Hour
|
||||
case "w", "week", "weeks":
|
||||
total += time.Duration(n) * 7 * 24 * time.Hour
|
||||
case "mo", "month", "months":
|
||||
total += time.Duration(n) * 30 * 24 * time.Hour
|
||||
case "y", "year", "years":
|
||||
total += time.Duration(n) * 365 * 24 * time.Hour
|
||||
default:
|
||||
return 0, fmt.Errorf("unknown time unit %q", unit)
|
||||
}
|
||||
}
|
||||
return total, nil
|
||||
}
|
||||
|
||||
112
internal/vaultik/helpers_test.go
Normal file
112
internal/vaultik/helpers_test.go
Normal file
@@ -0,0 +1,112 @@
|
||||
package vaultik
|
||||
|
||||
import (
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
func TestParseSnapshotName(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
snapshotID string
|
||||
want string
|
||||
}{
|
||||
{
|
||||
name: "standard format with name",
|
||||
snapshotID: "myhost_home_2026-01-12T14:41:15Z",
|
||||
want: "home",
|
||||
},
|
||||
{
|
||||
name: "standard format with different name",
|
||||
snapshotID: "server1_system_2026-02-15T09:30:00Z",
|
||||
want: "system",
|
||||
},
|
||||
{
|
||||
name: "name with underscores",
|
||||
snapshotID: "myhost_my_special_backup_2026-03-01T00:00:00Z",
|
||||
want: "my_special_backup",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
got := parseSnapshotName(tt.snapshotID)
|
||||
if got != tt.want {
|
||||
t.Errorf("parseSnapshotName(%q) = %q, want %q", tt.snapshotID, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseDuration(t *testing.T) {
|
||||
tests := []struct {
|
||||
input string
|
||||
want time.Duration
|
||||
err bool
|
||||
}{
|
||||
{"30d", 30 * 24 * time.Hour, false},
|
||||
{"4w", 4 * 7 * 24 * time.Hour, false},
|
||||
{"6mo", 6 * 30 * 24 * time.Hour, false},
|
||||
{"1y", 365 * 24 * time.Hour, false},
|
||||
{"2w3d", 2*7*24*time.Hour + 3*24*time.Hour, false},
|
||||
{"1h", time.Hour, false},
|
||||
{"30s", 30 * time.Second, false},
|
||||
{"garbage", 0, true},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.input, func(t *testing.T) {
|
||||
got, err := parseDuration(tt.input)
|
||||
if tt.err {
|
||||
if err == nil {
|
||||
t.Fatalf("expected error for %q, got %v", tt.input, got)
|
||||
}
|
||||
return
|
||||
}
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error for %q: %v", tt.input, err)
|
||||
}
|
||||
if got != tt.want {
|
||||
t.Errorf("parseDuration(%q) = %v, want %v", tt.input, got, tt.want)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestParseSnapshotTimestamp(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
snapshotID string
|
||||
wantErr bool
|
||||
}{
|
||||
{
|
||||
name: "valid with name",
|
||||
snapshotID: "myhost_home_2026-01-12T14:41:15Z",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "valid without name",
|
||||
snapshotID: "myhost_2026-01-12T14:41:15Z",
|
||||
wantErr: false,
|
||||
},
|
||||
{
|
||||
name: "invalid - single part",
|
||||
snapshotID: "nounderscore",
|
||||
wantErr: true,
|
||||
},
|
||||
{
|
||||
name: "invalid - bad timestamp",
|
||||
snapshotID: "myhost_home_notadate",
|
||||
wantErr: true,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
_, err := parseSnapshotTimestamp(tt.snapshotID)
|
||||
if (err != nil) != tt.wantErr {
|
||||
t.Errorf("parseSnapshotTimestamp(%q) error = %v, wantErr %v", tt.snapshotID, err, tt.wantErr)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
@@ -1,101 +1,353 @@
|
||||
package vaultik
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"github.com/dustin/go-humanize"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/snapshot"
|
||||
)
|
||||
|
||||
// ShowInfo displays system and configuration information
|
||||
func (v *Vaultik) ShowInfo() error {
|
||||
// System Information
|
||||
fmt.Printf("=== System Information ===\n")
|
||||
fmt.Printf("OS/Architecture: %s/%s\n", runtime.GOOS, runtime.GOARCH)
|
||||
fmt.Printf("Version: %s\n", v.Globals.Version)
|
||||
fmt.Printf("Commit: %s\n", v.Globals.Commit)
|
||||
fmt.Printf("Go Version: %s\n", runtime.Version())
|
||||
fmt.Println()
|
||||
v.printfStdout("=== System Information ===\n")
|
||||
v.printfStdout("OS/Architecture: %s/%s\n", runtime.GOOS, runtime.GOARCH)
|
||||
v.printfStdout("Version: %s\n", v.Globals.Version)
|
||||
v.printfStdout("Commit: %s\n", v.Globals.Commit)
|
||||
v.printfStdout("Go Version: %s\n", runtime.Version())
|
||||
v.printlnStdout()
|
||||
|
||||
// Storage Configuration
|
||||
fmt.Printf("=== Storage Configuration ===\n")
|
||||
fmt.Printf("S3 Bucket: %s\n", v.Config.S3.Bucket)
|
||||
v.printfStdout("=== Storage Configuration ===\n")
|
||||
v.printfStdout("S3 Bucket: %s\n", v.Config.S3.Bucket)
|
||||
if v.Config.S3.Prefix != "" {
|
||||
fmt.Printf("S3 Prefix: %s\n", v.Config.S3.Prefix)
|
||||
v.printfStdout("S3 Prefix: %s\n", v.Config.S3.Prefix)
|
||||
}
|
||||
fmt.Printf("S3 Endpoint: %s\n", v.Config.S3.Endpoint)
|
||||
fmt.Printf("S3 Region: %s\n", v.Config.S3.Region)
|
||||
fmt.Println()
|
||||
v.printfStdout("S3 Endpoint: %s\n", v.Config.S3.Endpoint)
|
||||
v.printfStdout("S3 Region: %s\n", v.Config.S3.Region)
|
||||
v.printlnStdout()
|
||||
|
||||
// Backup Settings
|
||||
fmt.Printf("=== Backup Settings ===\n")
|
||||
fmt.Printf("Source Directories:\n")
|
||||
for _, dir := range v.Config.SourceDirs {
|
||||
fmt.Printf(" - %s\n", dir)
|
||||
v.printfStdout("=== Backup Settings ===\n")
|
||||
|
||||
// Show configured snapshots
|
||||
v.printfStdout("Snapshots:\n")
|
||||
for _, name := range v.Config.SnapshotNames() {
|
||||
snap := v.Config.Snapshots[name]
|
||||
v.printfStdout(" %s:\n", name)
|
||||
for _, path := range snap.Paths {
|
||||
v.printfStdout(" - %s\n", path)
|
||||
}
|
||||
if len(snap.Exclude) > 0 {
|
||||
v.printfStdout(" exclude: %s\n", strings.Join(snap.Exclude, ", "))
|
||||
}
|
||||
}
|
||||
|
||||
// Global exclude patterns
|
||||
if len(v.Config.Exclude) > 0 {
|
||||
fmt.Printf("Exclude Patterns: %s\n", strings.Join(v.Config.Exclude, ", "))
|
||||
v.printfStdout("Global Exclude: %s\n", strings.Join(v.Config.Exclude, ", "))
|
||||
}
|
||||
|
||||
fmt.Printf("Compression: zstd level %d\n", v.Config.CompressionLevel)
|
||||
fmt.Printf("Chunk Size: %s\n", humanize.Bytes(uint64(v.Config.ChunkSize)))
|
||||
fmt.Printf("Blob Size Limit: %s\n", humanize.Bytes(uint64(v.Config.BlobSizeLimit)))
|
||||
fmt.Println()
|
||||
v.printfStdout("Compression: zstd level %d\n", v.Config.CompressionLevel)
|
||||
v.printfStdout("Chunk Size: %s\n", humanize.Bytes(uint64(v.Config.ChunkSize)))
|
||||
v.printfStdout("Blob Size Limit: %s\n", humanize.Bytes(uint64(v.Config.BlobSizeLimit)))
|
||||
v.printlnStdout()
|
||||
|
||||
// Encryption Configuration
|
||||
fmt.Printf("=== Encryption Configuration ===\n")
|
||||
fmt.Printf("Recipients:\n")
|
||||
v.printfStdout("=== Encryption Configuration ===\n")
|
||||
v.printfStdout("Recipients:\n")
|
||||
for _, recipient := range v.Config.AgeRecipients {
|
||||
fmt.Printf(" - %s\n", recipient)
|
||||
}
|
||||
fmt.Println()
|
||||
|
||||
// Daemon Settings (if applicable)
|
||||
if v.Config.BackupInterval > 0 || v.Config.MinTimeBetweenRun > 0 {
|
||||
fmt.Printf("=== Daemon Settings ===\n")
|
||||
if v.Config.BackupInterval > 0 {
|
||||
fmt.Printf("Backup Interval: %s\n", v.Config.BackupInterval)
|
||||
}
|
||||
if v.Config.MinTimeBetweenRun > 0 {
|
||||
fmt.Printf("Minimum Time: %s\n", v.Config.MinTimeBetweenRun)
|
||||
}
|
||||
fmt.Println()
|
||||
v.printfStdout(" - %s\n", recipient)
|
||||
}
|
||||
v.printlnStdout()
|
||||
|
||||
// Local Database
|
||||
fmt.Printf("=== Local Database ===\n")
|
||||
fmt.Printf("Index Path: %s\n", v.Config.IndexPath)
|
||||
v.printfStdout("=== Local Database ===\n")
|
||||
v.printfStdout("Index Path: %s\n", v.Config.IndexPath)
|
||||
|
||||
// Check if index file exists and get its size
|
||||
if info, err := v.Fs.Stat(v.Config.IndexPath); err == nil {
|
||||
fmt.Printf("Index Size: %s\n", humanize.Bytes(uint64(info.Size())))
|
||||
v.printfStdout("Index Size: %s\n", humanize.Bytes(uint64(info.Size())))
|
||||
|
||||
// Get snapshot count from database
|
||||
query := `SELECT COUNT(*) FROM snapshots WHERE completed_at IS NOT NULL`
|
||||
var snapshotCount int
|
||||
if err := v.DB.Conn().QueryRowContext(v.ctx, query).Scan(&snapshotCount); err == nil {
|
||||
fmt.Printf("Snapshots: %d\n", snapshotCount)
|
||||
v.printfStdout("Snapshots: %d\n", snapshotCount)
|
||||
}
|
||||
|
||||
// Get blob count from database
|
||||
query = `SELECT COUNT(*) FROM blobs`
|
||||
var blobCount int
|
||||
if err := v.DB.Conn().QueryRowContext(v.ctx, query).Scan(&blobCount); err == nil {
|
||||
fmt.Printf("Blobs: %d\n", blobCount)
|
||||
v.printfStdout("Blobs: %d\n", blobCount)
|
||||
}
|
||||
|
||||
// Get file count from database
|
||||
query = `SELECT COUNT(*) FROM files`
|
||||
var fileCount int
|
||||
if err := v.DB.Conn().QueryRowContext(v.ctx, query).Scan(&fileCount); err == nil {
|
||||
fmt.Printf("Files: %d\n", fileCount)
|
||||
v.printfStdout("Files: %d\n", fileCount)
|
||||
}
|
||||
} else {
|
||||
fmt.Printf("Index Size: (not created)\n")
|
||||
v.printfStdout("Index Size: (not created)\n")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// SnapshotMetadataInfo contains information about a single snapshot's metadata
|
||||
type SnapshotMetadataInfo struct {
|
||||
SnapshotID string `json:"snapshot_id"`
|
||||
ManifestSize int64 `json:"manifest_size"`
|
||||
DatabaseSize int64 `json:"database_size"`
|
||||
TotalSize int64 `json:"total_size"`
|
||||
BlobCount int `json:"blob_count"`
|
||||
BlobsSize int64 `json:"blobs_size"`
|
||||
}
|
||||
|
||||
// RemoteInfoResult contains all remote storage information
|
||||
type RemoteInfoResult struct {
|
||||
// Storage info
|
||||
StorageType string `json:"storage_type"`
|
||||
StorageLocation string `json:"storage_location"`
|
||||
|
||||
// Snapshot metadata
|
||||
Snapshots []SnapshotMetadataInfo `json:"snapshots"`
|
||||
TotalMetadataSize int64 `json:"total_metadata_size"`
|
||||
TotalMetadataCount int `json:"total_metadata_count"`
|
||||
|
||||
// All blobs on remote
|
||||
TotalBlobCount int `json:"total_blob_count"`
|
||||
TotalBlobSize int64 `json:"total_blob_size"`
|
||||
|
||||
// Referenced blobs (from manifests)
|
||||
ReferencedBlobCount int `json:"referenced_blob_count"`
|
||||
ReferencedBlobSize int64 `json:"referenced_blob_size"`
|
||||
|
||||
// Orphaned blobs
|
||||
OrphanedBlobCount int `json:"orphaned_blob_count"`
|
||||
OrphanedBlobSize int64 `json:"orphaned_blob_size"`
|
||||
}
|
||||
|
||||
// RemoteInfo displays information about remote storage
|
||||
func (v *Vaultik) RemoteInfo(jsonOutput bool) error {
|
||||
log.Info("Starting remote storage info gathering")
|
||||
result := &RemoteInfoResult{}
|
||||
|
||||
storageInfo := v.Storage.Info()
|
||||
result.StorageType = storageInfo.Type
|
||||
result.StorageLocation = storageInfo.Location
|
||||
|
||||
if !jsonOutput {
|
||||
v.printfStdout("=== Remote Storage ===\n")
|
||||
v.printfStdout("Type: %s\n", storageInfo.Type)
|
||||
v.printfStdout("Location: %s\n", storageInfo.Location)
|
||||
v.printlnStdout()
|
||||
v.printfStdout("Scanning snapshot metadata...\n")
|
||||
}
|
||||
|
||||
snapshotMetadata, snapshotIDs, err := v.collectSnapshotMetadata()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
if !jsonOutput {
|
||||
v.printfStdout("Downloading %d manifest(s)...\n", len(snapshotIDs))
|
||||
}
|
||||
|
||||
referencedBlobs := v.collectReferencedBlobsFromManifests(snapshotIDs, snapshotMetadata)
|
||||
|
||||
v.populateRemoteInfoResult(result, snapshotMetadata, snapshotIDs, referencedBlobs)
|
||||
|
||||
if err := v.scanRemoteBlobStorage(result, referencedBlobs, jsonOutput); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
log.Info("Remote info complete",
|
||||
"snapshots", result.TotalMetadataCount,
|
||||
"total_blobs", result.TotalBlobCount,
|
||||
"referenced_blobs", result.ReferencedBlobCount,
|
||||
"orphaned_blobs", result.OrphanedBlobCount)
|
||||
|
||||
if jsonOutput {
|
||||
enc := json.NewEncoder(v.Stdout)
|
||||
enc.SetIndent("", " ")
|
||||
return enc.Encode(result)
|
||||
}
|
||||
|
||||
v.printRemoteInfoTable(result)
|
||||
return nil
|
||||
}
|
||||
|
||||
// collectSnapshotMetadata scans remote metadata and returns per-snapshot info and sorted IDs
|
||||
func (v *Vaultik) collectSnapshotMetadata() (map[string]*SnapshotMetadataInfo, []string, error) {
|
||||
snapshotMetadata := make(map[string]*SnapshotMetadataInfo)
|
||||
|
||||
metadataCh := v.Storage.ListStream(v.ctx, "metadata/")
|
||||
for obj := range metadataCh {
|
||||
if obj.Err != nil {
|
||||
return nil, nil, fmt.Errorf("listing metadata: %w", obj.Err)
|
||||
}
|
||||
|
||||
parts := strings.Split(obj.Key, "/")
|
||||
if len(parts) < 3 {
|
||||
continue
|
||||
}
|
||||
snapshotID := parts[1]
|
||||
|
||||
if _, exists := snapshotMetadata[snapshotID]; !exists {
|
||||
snapshotMetadata[snapshotID] = &SnapshotMetadataInfo{SnapshotID: snapshotID}
|
||||
}
|
||||
|
||||
info := snapshotMetadata[snapshotID]
|
||||
filename := parts[2]
|
||||
if strings.HasPrefix(filename, "manifest") {
|
||||
info.ManifestSize = obj.Size
|
||||
} else if strings.HasPrefix(filename, "db") {
|
||||
info.DatabaseSize = obj.Size
|
||||
}
|
||||
info.TotalSize = info.ManifestSize + info.DatabaseSize
|
||||
}
|
||||
|
||||
var snapshotIDs []string
|
||||
for id := range snapshotMetadata {
|
||||
snapshotIDs = append(snapshotIDs, id)
|
||||
}
|
||||
sort.Strings(snapshotIDs)
|
||||
|
||||
return snapshotMetadata, snapshotIDs, nil
|
||||
}
|
||||
|
||||
// collectReferencedBlobsFromManifests downloads manifests and returns referenced blob hashes with sizes
|
||||
func (v *Vaultik) collectReferencedBlobsFromManifests(snapshotIDs []string, snapshotMetadata map[string]*SnapshotMetadataInfo) map[string]int64 {
|
||||
referencedBlobs := make(map[string]int64)
|
||||
|
||||
for _, snapshotID := range snapshotIDs {
|
||||
manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
|
||||
reader, err := v.Storage.Get(v.ctx, manifestKey)
|
||||
if err != nil {
|
||||
log.Warn("Failed to get manifest", "snapshot", snapshotID, "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
manifest, err := snapshot.DecodeManifest(reader)
|
||||
_ = reader.Close()
|
||||
if err != nil {
|
||||
log.Warn("Failed to decode manifest", "snapshot", snapshotID, "error", err)
|
||||
continue
|
||||
}
|
||||
|
||||
info := snapshotMetadata[snapshotID]
|
||||
info.BlobCount = manifest.BlobCount
|
||||
var blobsSize int64
|
||||
for _, blob := range manifest.Blobs {
|
||||
referencedBlobs[blob.Hash] = blob.CompressedSize
|
||||
blobsSize += blob.CompressedSize
|
||||
}
|
||||
info.BlobsSize = blobsSize
|
||||
}
|
||||
|
||||
return referencedBlobs
|
||||
}
|
||||
|
||||
// populateRemoteInfoResult fills in the result's snapshot and referenced blob stats
|
||||
func (v *Vaultik) populateRemoteInfoResult(result *RemoteInfoResult, snapshotMetadata map[string]*SnapshotMetadataInfo, snapshotIDs []string, referencedBlobs map[string]int64) {
|
||||
var totalMetadataSize int64
|
||||
for _, id := range snapshotIDs {
|
||||
info := snapshotMetadata[id]
|
||||
result.Snapshots = append(result.Snapshots, *info)
|
||||
totalMetadataSize += info.TotalSize
|
||||
}
|
||||
result.TotalMetadataSize = totalMetadataSize
|
||||
result.TotalMetadataCount = len(snapshotIDs)
|
||||
|
||||
for _, size := range referencedBlobs {
|
||||
result.ReferencedBlobCount++
|
||||
result.ReferencedBlobSize += size
|
||||
}
|
||||
}
|
||||
|
||||
// scanRemoteBlobStorage lists all blobs on remote and computes orphan stats
|
||||
func (v *Vaultik) scanRemoteBlobStorage(result *RemoteInfoResult, referencedBlobs map[string]int64, jsonOutput bool) error {
|
||||
if !jsonOutput {
|
||||
v.printfStdout("Scanning blobs...\n")
|
||||
}
|
||||
|
||||
blobCh := v.Storage.ListStream(v.ctx, "blobs/")
|
||||
allBlobs := make(map[string]int64)
|
||||
|
||||
for obj := range blobCh {
|
||||
if obj.Err != nil {
|
||||
return fmt.Errorf("listing blobs: %w", obj.Err)
|
||||
}
|
||||
parts := strings.Split(obj.Key, "/")
|
||||
if len(parts) < 4 {
|
||||
continue
|
||||
}
|
||||
hash := parts[3]
|
||||
allBlobs[hash] = obj.Size
|
||||
result.TotalBlobCount++
|
||||
result.TotalBlobSize += obj.Size
|
||||
}
|
||||
|
||||
for hash, size := range allBlobs {
|
||||
if _, referenced := referencedBlobs[hash]; !referenced {
|
||||
result.OrphanedBlobCount++
|
||||
result.OrphanedBlobSize += size
|
||||
}
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// printRemoteInfoTable renders the human-readable remote info output
|
||||
func (v *Vaultik) printRemoteInfoTable(result *RemoteInfoResult) {
|
||||
v.printfStdout("\n=== Snapshot Metadata ===\n")
|
||||
if len(result.Snapshots) == 0 {
|
||||
v.printfStdout("No snapshots found\n")
|
||||
} else {
|
||||
v.printfStdout("%-45s %12s %12s %12s %10s %12s\n", "SNAPSHOT", "MANIFEST", "DATABASE", "TOTAL", "BLOBS", "BLOB SIZE")
|
||||
v.printfStdout("%-45s %12s %12s %12s %10s %12s\n", strings.Repeat("-", 45), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 10), strings.Repeat("-", 12))
|
||||
for _, info := range result.Snapshots {
|
||||
v.printfStdout("%-45s %12s %12s %12s %10s %12s\n",
|
||||
truncateString(info.SnapshotID, 45),
|
||||
humanize.Bytes(uint64(info.ManifestSize)),
|
||||
humanize.Bytes(uint64(info.DatabaseSize)),
|
||||
humanize.Bytes(uint64(info.TotalSize)),
|
||||
humanize.Comma(int64(info.BlobCount)),
|
||||
humanize.Bytes(uint64(info.BlobsSize)),
|
||||
)
|
||||
}
|
||||
v.printfStdout("%-45s %12s %12s %12s %10s %12s\n", strings.Repeat("-", 45), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 12), strings.Repeat("-", 10), strings.Repeat("-", 12))
|
||||
v.printfStdout("%-45s %12s %12s %12s\n", fmt.Sprintf("Total (%d snapshots)", result.TotalMetadataCount), "", "", humanize.Bytes(uint64(result.TotalMetadataSize)))
|
||||
}
|
||||
|
||||
v.printfStdout("\n=== Blob Storage ===\n")
|
||||
v.printfStdout("Total blobs on remote: %s (%s)\n",
|
||||
humanize.Comma(int64(result.TotalBlobCount)), humanize.Bytes(uint64(result.TotalBlobSize)))
|
||||
v.printfStdout("Referenced by snapshots: %s (%s)\n",
|
||||
humanize.Comma(int64(result.ReferencedBlobCount)), humanize.Bytes(uint64(result.ReferencedBlobSize)))
|
||||
v.printfStdout("Orphaned (unreferenced): %s (%s)\n",
|
||||
humanize.Comma(int64(result.OrphanedBlobCount)), humanize.Bytes(uint64(result.OrphanedBlobSize)))
|
||||
|
||||
if result.OrphanedBlobCount > 0 {
|
||||
v.printfStdout("\nRun 'vaultik prune --remote' to remove orphaned blobs.\n")
|
||||
}
|
||||
}
|
||||
|
||||
// truncateString truncates a string to maxLen, adding "..." if truncated
|
||||
func truncateString(s string, maxLen int) string {
|
||||
if len(s) <= maxLen {
|
||||
return s
|
||||
}
|
||||
if maxLen <= 3 {
|
||||
return s[:maxLen]
|
||||
}
|
||||
return s[:maxLen-3] + "..."
|
||||
}
|
||||
|
||||
@@ -5,18 +5,22 @@ import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"sync"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/config"
|
||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/snapshot"
|
||||
"git.eeqj.de/sneak/vaultik/internal/storage"
|
||||
"github.com/spf13/afero"
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
"sneak.berlin/go/vaultik/internal/config"
|
||||
"sneak.berlin/go/vaultik/internal/database"
|
||||
"sneak.berlin/go/vaultik/internal/log"
|
||||
"sneak.berlin/go/vaultik/internal/snapshot"
|
||||
"sneak.berlin/go/vaultik/internal/storage"
|
||||
"sneak.berlin/go/vaultik/internal/types"
|
||||
"sneak.berlin/go/vaultik/internal/vaultik"
|
||||
)
|
||||
|
||||
// MockStorer implements storage.Storer for testing
|
||||
@@ -184,7 +188,11 @@ func TestEndToEndBackup(t *testing.T) {
|
||||
|
||||
// Create test configuration
|
||||
cfg := &config.Config{
|
||||
SourceDirs: []string{"/home/user"},
|
||||
Snapshots: map[string]config.SnapshotConfig{
|
||||
"test": {
|
||||
Paths: []string{"/home/user"},
|
||||
},
|
||||
},
|
||||
Exclude: []string{"*.tmp", "*.log"},
|
||||
ChunkSize: config.Size(16 * 1024), // 16KB chunks
|
||||
BlobSizeLimit: config.Size(100 * 1024), // 100KB blobs
|
||||
@@ -232,7 +240,7 @@ func TestEndToEndBackup(t *testing.T) {
|
||||
snapshotID := "test-snapshot-001"
|
||||
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID,
|
||||
ID: types.SnapshotID(snapshotID),
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "test-version",
|
||||
StartedAt: time.Now(),
|
||||
@@ -352,7 +360,7 @@ func TestBackupAndVerify(t *testing.T) {
|
||||
snapshotID := "test-snapshot-001"
|
||||
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||
snapshot := &database.Snapshot{
|
||||
ID: snapshotID,
|
||||
ID: types.SnapshotID(snapshotID),
|
||||
Hostname: "test-host",
|
||||
VaultikVersion: "test-version",
|
||||
StartedAt: time.Now(),
|
||||
@@ -398,3 +406,309 @@ func TestBackupAndVerify(t *testing.T) {
|
||||
|
||||
t.Logf("Backup and verify test completed successfully")
|
||||
}
|
||||
|
||||
// TestBackupAndRestore tests the full backup and restore workflow
|
||||
// This test verifies that the restore code correctly handles the binary SQLite
|
||||
// database format that is exported by the snapshot manager.
|
||||
func TestBackupAndRestore(t *testing.T) {
|
||||
// Initialize logger
|
||||
log.Initialize(log.Config{})
|
||||
|
||||
// Create real temp directory for the database (SQLite needs real filesystem)
|
||||
realTempDir, err := os.MkdirTemp("", "vaultik-test-")
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = os.RemoveAll(realTempDir) }()
|
||||
|
||||
// Use real OS filesystem for this test
|
||||
fs := afero.NewOsFs()
|
||||
|
||||
// Create test directory structure and files
|
||||
dataDir := filepath.Join(realTempDir, "data")
|
||||
testFiles := map[string]string{
|
||||
filepath.Join(dataDir, "file1.txt"): "This is file 1 content",
|
||||
filepath.Join(dataDir, "file2.txt"): "This is file 2 content with more data",
|
||||
filepath.Join(dataDir, "subdir", "file3.txt"): "This is file 3 in a subdirectory",
|
||||
}
|
||||
|
||||
// Create directories and files
|
||||
for path, content := range testFiles {
|
||||
dir := filepath.Dir(path)
|
||||
if err := fs.MkdirAll(dir, 0755); err != nil {
|
||||
t.Fatalf("failed to create directory %s: %v", dir, err)
|
||||
}
|
||||
if err := afero.WriteFile(fs, path, []byte(content), 0644); err != nil {
|
||||
t.Fatalf("failed to create test file %s: %v", path, err)
|
||||
}
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
// Create mock storage
|
||||
mockStorage := NewMockStorer()
|
||||
|
||||
// Test keypair
|
||||
agePublicKey := "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
|
||||
ageSecretKey := "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
|
||||
|
||||
// Create database file
|
||||
dbPath := filepath.Join(realTempDir, "test.db")
|
||||
db, err := database.New(ctx, dbPath)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = db.Close() }()
|
||||
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
// Create config for snapshot manager
|
||||
cfg := &config.Config{
|
||||
AgeSecretKey: ageSecretKey,
|
||||
AgeRecipients: []string{agePublicKey},
|
||||
CompressionLevel: 3,
|
||||
}
|
||||
|
||||
// Create snapshot manager
|
||||
sm := snapshot.NewSnapshotManager(snapshot.SnapshotManagerParams{
|
||||
Repos: repos,
|
||||
Storage: mockStorage,
|
||||
Config: cfg,
|
||||
})
|
||||
sm.SetFilesystem(fs)
|
||||
|
||||
// Create scanner
|
||||
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
|
||||
FS: fs,
|
||||
Storage: mockStorage,
|
||||
ChunkSize: int64(16 * 1024),
|
||||
MaxBlobSize: int64(100 * 1024),
|
||||
CompressionLevel: 3,
|
||||
AgeRecipients: []string{agePublicKey},
|
||||
Repositories: repos,
|
||||
})
|
||||
|
||||
// Create a snapshot
|
||||
snapshotID, err := sm.CreateSnapshot(ctx, "test-host", "test-version", "test-git")
|
||||
require.NoError(t, err)
|
||||
t.Logf("Created snapshot: %s", snapshotID)
|
||||
|
||||
// Run the backup (scan)
|
||||
result, err := scanner.Scan(ctx, dataDir, snapshotID)
|
||||
require.NoError(t, err)
|
||||
t.Logf("Scan complete: %d files, %d blobs", result.FilesScanned, result.BlobsCreated)
|
||||
|
||||
// Complete the snapshot
|
||||
err = sm.CompleteSnapshot(ctx, snapshotID)
|
||||
require.NoError(t, err)
|
||||
|
||||
// Export snapshot metadata (this uploads db.zst.age and manifest.json.zst)
|
||||
err = sm.ExportSnapshotMetadata(ctx, dbPath, snapshotID)
|
||||
require.NoError(t, err)
|
||||
t.Logf("Exported snapshot metadata")
|
||||
|
||||
// Verify metadata was uploaded
|
||||
keys, err := mockStorage.List(ctx, "metadata/")
|
||||
require.NoError(t, err)
|
||||
t.Logf("Metadata keys: %v", keys)
|
||||
assert.GreaterOrEqual(t, len(keys), 2, "Should have at least db.zst.age and manifest.json.zst")
|
||||
|
||||
// Close the source database
|
||||
err = db.Close()
|
||||
require.NoError(t, err)
|
||||
|
||||
// Create Vaultik instance for restore
|
||||
vaultikApp := &vaultik.Vaultik{
|
||||
Config: cfg,
|
||||
Storage: mockStorage,
|
||||
Fs: fs,
|
||||
Stdout: io.Discard,
|
||||
Stderr: io.Discard,
|
||||
}
|
||||
vaultikApp.SetContext(ctx)
|
||||
|
||||
// Try to restore - this should work with binary SQLite format
|
||||
restoreDir := filepath.Join(realTempDir, "restored")
|
||||
err = vaultikApp.Restore(&vaultik.RestoreOptions{
|
||||
SnapshotID: snapshotID,
|
||||
TargetDir: restoreDir,
|
||||
})
|
||||
require.NoError(t, err, "Restore should succeed with binary SQLite database format")
|
||||
|
||||
// Verify restored files match originals
|
||||
for origPath, expectedContent := range testFiles {
|
||||
restoredPath := filepath.Join(restoreDir, origPath)
|
||||
restoredContent, err := afero.ReadFile(fs, restoredPath)
|
||||
require.NoError(t, err, "Should be able to read restored file: %s", restoredPath)
|
||||
assert.Equal(t, expectedContent, string(restoredContent), "Restored content should match original for: %s", origPath)
|
||||
}
|
||||
|
||||
t.Log("Backup and restore test completed successfully")
|
||||
}
|
||||
|
||||
// TestEndToEndFileStorage exercises the full backup → restore loop against the
|
||||
// real `file://` storage backend (FileStorer) on a real OS filesystem. This is
|
||||
// the closest local approximation of a production backup: encrypted blobs get
|
||||
// written to disk, the metadata SQLite database is exported through the same
|
||||
// blobgen pipeline as a real backup, and restoration reads them back through
|
||||
// the public Vaultik.Restore entrypoint. It is the canonical end-to-end smoke
|
||||
// test for 1.0.
|
||||
func TestEndToEndFileStorage(t *testing.T) {
|
||||
log.Initialize(log.Config{})
|
||||
|
||||
// Real OS filesystem (SQLite + FileStorer both need it).
|
||||
fs := afero.NewOsFs()
|
||||
tempDir, err := os.MkdirTemp("", "vaultik-e2e-")
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = os.RemoveAll(tempDir) }()
|
||||
|
||||
dataDir := filepath.Join(tempDir, "source")
|
||||
storeDir := filepath.Join(tempDir, "remote")
|
||||
restoreDir := filepath.Join(tempDir, "restored")
|
||||
dbPath := filepath.Join(tempDir, "index.sqlite")
|
||||
|
||||
// Write a representative mix of file sizes:
|
||||
// - empty file
|
||||
// - tiny text file
|
||||
// - file just under chunk boundary
|
||||
// - file forcing multiple chunks
|
||||
// - nested subdirectories
|
||||
chunkSize := int64(64 * 1024)
|
||||
maxBlobSize := int64(512 * 1024)
|
||||
|
||||
testFiles := map[string][]byte{
|
||||
filepath.Join(dataDir, "empty.txt"): {},
|
||||
filepath.Join(dataDir, "small.txt"): []byte("hello vaultik"),
|
||||
filepath.Join(dataDir, "subdir", "medium.bin"): bytesPattern("medium-", int(chunkSize/2)),
|
||||
filepath.Join(dataDir, "subdir", "large.bin"): bytesPattern("large-", int(chunkSize*4)),
|
||||
filepath.Join(dataDir, "deep", "nest", "leaf.txt"): []byte("leaf"),
|
||||
}
|
||||
|
||||
for path, content := range testFiles {
|
||||
require.NoError(t, fs.MkdirAll(filepath.Dir(path), 0o755))
|
||||
require.NoError(t, afero.WriteFile(fs, path, content, 0o644))
|
||||
}
|
||||
|
||||
// Create a file with non-default permissions.
|
||||
restrictedPath := filepath.Join(dataDir, "restricted.txt")
|
||||
require.NoError(t, afero.WriteFile(fs, restrictedPath, []byte("secret"), 0o600))
|
||||
testFiles[restrictedPath] = []byte("secret")
|
||||
|
||||
// Create an empty directory (should survive round-trip).
|
||||
emptyDir := filepath.Join(dataDir, "emptydir")
|
||||
require.NoError(t, fs.MkdirAll(emptyDir, 0o755))
|
||||
|
||||
// Create a symlink.
|
||||
symlinkPath := filepath.Join(dataDir, "link-to-small")
|
||||
require.NoError(t, os.Symlink("small.txt", symlinkPath))
|
||||
|
||||
// FileStorer is the real-world local-disk backend.
|
||||
storer, err := storage.NewFileStorer(storeDir)
|
||||
require.NoError(t, err)
|
||||
|
||||
agePublicKey := "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
|
||||
ageSecretKey := "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
|
||||
|
||||
cfg := &config.Config{
|
||||
AgeRecipients: []string{agePublicKey},
|
||||
AgeSecretKey: ageSecretKey,
|
||||
CompressionLevel: 3,
|
||||
Hostname: "test-host",
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
db, err := database.New(ctx, dbPath)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = db.Close() }()
|
||||
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
sm := snapshot.NewSnapshotManager(snapshot.SnapshotManagerParams{
|
||||
Repos: repos,
|
||||
Storage: storer,
|
||||
Config: cfg,
|
||||
})
|
||||
sm.SetFilesystem(fs)
|
||||
|
||||
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
|
||||
FS: fs,
|
||||
Storage: storer,
|
||||
ChunkSize: chunkSize,
|
||||
MaxBlobSize: maxBlobSize,
|
||||
CompressionLevel: cfg.CompressionLevel,
|
||||
AgeRecipients: cfg.AgeRecipients,
|
||||
Repositories: repos,
|
||||
})
|
||||
|
||||
snapshotID, err := sm.CreateSnapshotWithName(ctx, cfg.Hostname, "e2e", "test-version", "test-git")
|
||||
require.NoError(t, err)
|
||||
|
||||
scanResult, err := scanner.Scan(ctx, dataDir, snapshotID)
|
||||
require.NoError(t, err)
|
||||
require.Greater(t, scanResult.FilesScanned, 0)
|
||||
require.Greater(t, scanResult.BlobsCreated, 0)
|
||||
|
||||
require.NoError(t, sm.CompleteSnapshot(ctx, snapshotID))
|
||||
require.NoError(t, sm.ExportSnapshotMetadata(ctx, dbPath, snapshotID))
|
||||
|
||||
// Verify the backup actually landed on disk under blobs/ and metadata/.
|
||||
blobInfo, err := os.Stat(filepath.Join(storeDir, "blobs"))
|
||||
require.NoError(t, err)
|
||||
require.True(t, blobInfo.IsDir())
|
||||
metaInfo, err := os.Stat(filepath.Join(storeDir, "metadata", snapshotID))
|
||||
require.NoError(t, err)
|
||||
require.True(t, metaInfo.IsDir())
|
||||
|
||||
// Tear down the source DB before restore — restore must work using only
|
||||
// the remote bytes plus the secret key, with no help from the local index.
|
||||
require.NoError(t, db.Close())
|
||||
|
||||
restoreVaultik := &vaultik.Vaultik{
|
||||
Config: cfg,
|
||||
Storage: storer,
|
||||
Fs: fs,
|
||||
Stdout: io.Discard,
|
||||
Stderr: io.Discard,
|
||||
}
|
||||
restoreVaultik.SetContext(ctx)
|
||||
|
||||
require.NoError(t, restoreVaultik.Restore(&vaultik.RestoreOptions{
|
||||
SnapshotID: snapshotID,
|
||||
TargetDir: restoreDir,
|
||||
Verify: true,
|
||||
}))
|
||||
|
||||
// Byte-equality compare every original against its restored copy.
|
||||
for origPath, expected := range testFiles {
|
||||
restoredPath := filepath.Join(restoreDir, origPath)
|
||||
got, err := afero.ReadFile(fs, restoredPath)
|
||||
require.NoError(t, err, "restored file missing: %s", restoredPath)
|
||||
require.Equalf(t, expected, got, "byte-equality failed for %s", origPath)
|
||||
}
|
||||
|
||||
// Verify the restricted file kept its permissions.
|
||||
restoredRestricted := filepath.Join(restoreDir, restrictedPath)
|
||||
rInfo, err := os.Stat(restoredRestricted)
|
||||
require.NoError(t, err)
|
||||
assert.Equal(t, os.FileMode(0o600), rInfo.Mode().Perm(),
|
||||
"restricted file should preserve 0600 permissions")
|
||||
|
||||
// Verify the empty directory was restored.
|
||||
restoredEmptyDir := filepath.Join(restoreDir, emptyDir)
|
||||
dInfo, err := os.Stat(restoredEmptyDir)
|
||||
require.NoError(t, err, "empty directory should be restored")
|
||||
assert.True(t, dInfo.IsDir(), "emptydir should be a directory")
|
||||
|
||||
// Verify the symlink was restored with the correct target.
|
||||
restoredSymlink := filepath.Join(restoreDir, symlinkPath)
|
||||
target, err := os.Readlink(restoredSymlink)
|
||||
require.NoError(t, err, "symlink should be restored")
|
||||
assert.Equal(t, "small.txt", target, "symlink target should be preserved")
|
||||
}
|
||||
|
||||
// bytesPattern returns a deterministic byte slice of length n with a tag prefix,
|
||||
// useful for forcing chunker behavior with reproducible content.
|
||||
func bytesPattern(tag string, n int) []byte {
|
||||
out := make([]byte, n)
|
||||
for i := range out {
|
||||
out[i] = byte(tag[i%len(tag)] ^ byte(i&0xff))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user