1 Commits

Author SHA1 Message Date
clawbot
332ea26bce fix: use deleteSnapshotFromLocalDB in syncWithRemote
All checks were successful
check / check (pull_request) Successful in 2m30s
syncWithRemote was calling v.Repositories.Snapshots.Delete() directly,
which only removes the snapshots row. This leaves orphaned rows in
snapshot_files, snapshot_blobs, and uploads tables.

Replace with deleteSnapshotFromLocalDB() which properly cleans up all
related tables before deleting the snapshot record.

closes #10
2026-03-17 13:42:23 -07:00
58 changed files with 2704 additions and 3622 deletions

2
.gitignore vendored
View File

@@ -1,5 +1,5 @@
# Binary # Binary
/vaultik vaultik
# Test artifacts # Test artifacts
*.out *.out

View File

@@ -1,55 +0,0 @@
version: 2
project_name: vaultik
before:
hooks:
- go mod tidy
builds:
- id: vaultik
main: ./cmd/vaultik
binary: vaultik
env:
- CGO_ENABLED=0
goos:
- linux
- darwin
goarch:
- amd64
- arm64
ldflags:
- -s -w
- -X 'git.eeqj.de/sneak/vaultik/internal/globals.Version={{ .Version }}'
- -X 'git.eeqj.de/sneak/vaultik/internal/globals.Commit={{ .Commit }}'
archives:
- id: default
name_template: "{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
formats:
- tar.gz
files:
- LICENSE
- README.md
checksum:
name_template: "checksums.txt"
algorithm: sha256
snapshot:
version_template: "{{ incpatch .Version }}-next"
changelog:
sort: asc
use: git
filters:
exclude:
- "^docs:"
- "^test:"
- "^chore:"
- "Merge pull request"
- "Merge branch"
release:
draft: true
prerelease: auto

View File

@@ -38,9 +38,10 @@ Version: 2025-06-08
1. Before committing, tests must pass (`make test`), linting must pass 1. Before committing, tests must pass (`make test`), linting must pass
(`make lint`), and code must be formatted (`make fmt`). For go, those (`make lint`), and code must be formatted (`make fmt`). For go, those
makefile targets should use `go fmt` and `go test -v ./...` and makefile targets should use `go fmt` and `go test -v ./...` and
`golangci-lint run`. Each Makefile target does exactly one thing — to `golangci-lint run`. When you think your changes are complete, rather
run lint + fmt-check + test together (the standard pre-commit gate), than making three different tool calls to check, you can just run `make
use `make check`. test && make fmt && make lint` as a single tool call which will save
time.
2. Always write a `Makefile` with the default target being `test`, and with 2. Always write a `Makefile` with the default target being `test`, and with
a `fmt` target that formats the code. The `test` target should run all a `fmt` target that formats the code. The `test` target should run all
@@ -102,9 +103,3 @@ Version: 2025-06-08
build files are acceptable in the root, but source code and other files build files are acceptable in the root, but source code and other files
should be organized in appropriate subdirectories. should be organized in appropriate subdirectories.
13. Pre-1.0: NEVER write database migrations. There are no live databases
anywhere — every user's local index can be rebuilt from a fresh full
backup. When the schema changes, just change `schema.sql` (and any code
that touches the affected tables). The local index is disposable until
1.0 ships and is tagged.

View File

@@ -53,8 +53,8 @@ The database tracks five primary entities and their relationships:
### Entity Descriptions ### Entity Descriptions
#### File (`database.File`) #### File (`database.File`)
Represents a file, directory, or symlink in the backup system. Stores metadata needed for restoration: Represents a file or directory in the backup system. Stores metadata needed for restoration:
- Path, source_path (for restore path stripping), mtime - Path, timestamps (mtime, ctime)
- Size, mode, ownership (uid, gid) - Size, mode, ownership (uid, gid)
- Symlink target (if applicable) - Symlink target (if applicable)
@@ -95,7 +95,7 @@ Maps chunks to their position within blobs:
#### Snapshot (`database.Snapshot`) #### Snapshot (`database.Snapshot`)
Represents a point-in-time backup: Represents a point-in-time backup:
- `ID`: Format is `{hostname}_{snapshot-name}_{RFC3339}` (e.g. `server1_home_2025-06-01T12:00:00Z`) - `ID`: Format is `{hostname}-{YYYYMMDD}-{HHMMSS}Z`
- Tracks file count, chunk count, blob count, sizes, compression ratio - Tracks file count, chunk count, blob count, sizes, compression ratio
- `CompletedAt`: Null until snapshot finishes successfully - `CompletedAt`: Null until snapshot finishes successfully
@@ -127,7 +127,7 @@ fx.New(
config.Module, // 5. Config config.Module, // 5. Config
database.Module, // 6. Database + Repositories database.Module, // 6. Database + Repositories
log.Module, // 7. Logger initialization log.Module, // 7. Logger initialization
storage.Module, // 8. Storage backend (S3/file/rclone) s3.Module, // 8. S3 client
snapshot.Module, // 9. SnapshotManager + ScannerFactory snapshot.Module, // 9. SnapshotManager + ScannerFactory
fx.Provide(vaultik.New), // 10. Vaultik orchestrator fx.Provide(vaultik.New), // 10. Vaultik orchestrator
) )
@@ -161,7 +161,7 @@ type Vaultik struct {
Config *config.Config Config *config.Config
DB *database.DB DB *database.DB
Repositories *database.Repositories Repositories *database.Repositories
Storage storage.Storer S3Client *s3.Client
ScannerFactory snapshot.ScannerFactory ScannerFactory snapshot.ScannerFactory
SnapshotManager *snapshot.SnapshotManager SnapshotManager *snapshot.SnapshotManager
Shutdowner fx.Shutdowner Shutdowner fx.Shutdowner
@@ -341,11 +341,12 @@ CreateSnapshot(opts)
└─► SnapshotManager.ExportSnapshotMetadata() └─► SnapshotManager.ExportSnapshotMetadata()
├─► Copy database to temp file ├─► Copy database to temp file
├─► Clean to only current snapshot data (VACUUM) ├─► Clean to only current snapshot data
├─► Compress binary SQLite with zstd ├─► Dump to SQL
├─► Compress with zstd
├─► Encrypt with age ├─► Encrypt with age
├─► Upload db.zst.age to storage ├─► Upload db.zst.age to S3
└─► Upload manifest.json.zst to storage └─► Upload manifest.json.zst to S3
``` ```
## Deduplication Strategy ## Deduplication Strategy
@@ -367,8 +368,8 @@ bucket/
└── metadata/ └── metadata/
└── {snapshot-id}/ └── {snapshot-id}/
├── db.zst.age # Encrypted binary SQLite database ├── db.zst.age # Encrypted database dump
└── manifest.json.zst # Blob list (for pruning/verification) └── manifest.json.zst # Blob list (for verification)
``` ```
## Thread Safety ## Thread Safety

View File

@@ -1,7 +1,7 @@
.PHONY: all check test lint fmt fmt-check build clean deps test-coverage test-integration local install release release-snapshot docker hooks .PHONY: test fmt lint fmt-check check build clean all docker hooks
# Version number # Version number
VERSION := 1.0.0-rc.1 VERSION := 0.0.1
# Build variables # Build variables
GIT_REVISION := $(shell git rev-parse HEAD 2>/dev/null || echo "unknown") GIT_REVISION := $(shell git rev-parse HEAD 2>/dev/null || echo "unknown")
@@ -13,45 +13,37 @@ LDFLAGS := -X 'git.eeqj.de/sneak/vaultik/internal/globals.Version=$(VERSION)' \
# Default target # Default target
all: vaultik all: vaultik
# Combined pre-commit/CI gate: lint, format check, then tests. # Run tests
check: lint fmt-check test
# Run tests only.
test: test:
go test -race -timeout 30s ./... go test -race -timeout 30s ./...
# Check if code is formatted (read-only). # Check if code is formatted (read-only)
fmt-check: fmt-check:
@test -z "$$(gofmt -l .)" || (echo "Files not formatted:" && gofmt -l . && exit 1) @test -z "$$(gofmt -l .)" || (echo "Files not formatted:" && gofmt -l . && exit 1)
# Format code. # Format code
fmt: fmt:
go fmt ./... go fmt ./...
# Run linter only. # Run linter
lint: lint:
golangci-lint run ./... golangci-lint run ./...
# Build binary. # Build binary
vaultik: internal/*/*.go cmd/vaultik/*.go vaultik: internal/*/*.go cmd/vaultik/*.go
go build -ldflags "$(LDFLAGS)" -o $@ ./cmd/vaultik go build -ldflags "$(LDFLAGS)" -o $@ ./cmd/vaultik
# Clean build artifacts. # Clean build artifacts
clean: clean:
rm -f vaultik rm -f vaultik
go clean go clean
# Install dependencies. # Run tests with coverage
deps:
go mod download
go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
# Run tests with coverage.
test-coverage: test-coverage:
go test -v -coverprofile=coverage.out ./... go test -v -coverprofile=coverage.out ./...
go tool cover -html=coverage.out -o coverage.html go tool cover -html=coverage.out -o coverage.html
# Run integration tests. # Run integration tests
test-integration: test-integration:
go test -v -tags=integration ./... go test -v -tags=integration ./...
@@ -62,19 +54,14 @@ local:
install: vaultik install: vaultik
cp ./vaultik $(HOME)/bin/ cp ./vaultik $(HOME)/bin/
# Build and publish release artifacts (linux/darwin × amd64/arm64) via goreleaser. # Run all checks (formatting, linting, tests) without modifying files
release: check: fmt-check lint test
goreleaser release --clean
# Dry-run a release build without publishing or tagging. # Build Docker image
release-snapshot:
goreleaser release --clean --snapshot
# Build Docker image.
docker: docker:
docker build -t vaultik . docker build -t vaultik .
# Install pre-commit hook. # Install pre-commit hook
hooks: hooks:
@printf '#!/bin/sh\nset -e\n' > .git/hooks/pre-commit @printf '#!/bin/sh\nset -e\n' > .git/hooks/pre-commit
@printf 'go mod tidy\ngo fmt ./...\ngit diff --exit-code -- go.mod go.sum || { echo "go mod tidy changed files; please stage and retry"; exit 1; }\n' >> .git/hooks/pre-commit @printf 'go mod tidy\ngo fmt ./...\ngit diff --exit-code -- go.mod go.sum || { echo "go mod tidy changed files; please stage and retry"; exit 1; }\n' >> .git/hooks/pre-commit

556
PROCESS.md Normal file
View File

@@ -0,0 +1,556 @@
# Vaultik Snapshot Creation Process
This document describes the lifecycle of objects during snapshot creation, with a focus on database transactions and foreign key constraints.
## Database Schema Overview
### Tables and Foreign Key Dependencies
```
┌─────────────────────────────────────────────────────────────────────────┐
│ FOREIGN KEY GRAPH │
│ │
│ snapshots ◄────── snapshot_files ────────► files │
│ │ │ │
│ └───────── snapshot_blobs ────────► blobs │ │
│ │ │ │
│ │ ├──► file_chunks ◄── chunks│
│ │ │ ▲ │
│ │ └──► chunk_files ────┘ │
│ │ │
│ └──► blob_chunks ─────────────┘│
│ │
│ uploads ───────► blobs.blob_hash │
│ └──────────► snapshots.id │
└─────────────────────────────────────────────────────────────────────────┘
```
### Critical Constraint: `chunks` Must Exist First
These tables reference `chunks.chunk_hash` **without CASCADE**:
- `file_chunks.chunk_hash``chunks.chunk_hash`
- `chunk_files.chunk_hash``chunks.chunk_hash`
- `blob_chunks.chunk_hash``chunks.chunk_hash`
**Implication**: A chunk record MUST be committed to the database BEFORE any of these referencing records can be created.
### Order of Operations Required by Schema
```
1. snapshots (created first, before scan)
2. blobs (created when packer starts new blob)
3. chunks (created during file processing)
4. blob_chunks (created immediately after chunk added to packer)
5. files (created after file fully chunked)
6. file_chunks (created with file record)
7. chunk_files (created with file record)
8. snapshot_files (created with file record)
9. snapshot_blobs (created after blob uploaded)
10. uploads (created after blob uploaded)
```
---
## Snapshot Creation Phases
### Phase 0: Initialization
**Actions:**
1. Snapshot record created in database (Transaction T0)
2. Known files loaded into memory from `files` table
3. Known chunks loaded into memory from `chunks` table
**Transactions:**
```
T0: INSERT INTO snapshots (id, hostname, ...) VALUES (...)
COMMIT
```
---
### Phase 1: Scan Directory
**Actions:**
1. Walk filesystem directory tree
2. For each file, compare against in-memory `knownFiles` map
3. Classify files as: unchanged, new, or modified
4. Collect unchanged file IDs for later association
5. Collect new/modified files for processing
**Transactions:**
```
(None during scan - all in-memory)
```
---
### Phase 1b: Associate Unchanged Files
**Actions:**
1. For unchanged files, add entries to `snapshot_files` table
2. Done in batches of 1000
**Transactions:**
```
For each batch of 1000 file IDs:
T: BEGIN
INSERT INTO snapshot_files (snapshot_id, file_id) VALUES (?, ?)
... (up to 1000 inserts)
COMMIT
```
---
### Phase 2: Process Files
For each file that needs processing:
#### Step 2a: Open and Chunk File
**Location:** `processFileStreaming()`
For each chunk produced by content-defined chunking:
##### Step 2a-1: Check Chunk Existence
```go
chunkExists := s.chunkExists(chunk.Hash) // In-memory lookup
```
##### Step 2a-2: Create Chunk Record (if new)
```go
// TRANSACTION: Create chunk in database
err := s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
dbChunk := &database.Chunk{ChunkHash: chunk.Hash, Size: chunk.Size}
return s.repos.Chunks.Create(txCtx, tx, dbChunk)
})
// COMMIT immediately after WithTx returns
// Update in-memory cache
s.addKnownChunk(chunk.Hash)
```
**Transaction:**
```
T_chunk: BEGIN
INSERT INTO chunks (chunk_hash, size) VALUES (?, ?)
COMMIT
```
##### Step 2a-3: Add Chunk to Packer
```go
s.packer.AddChunk(&blob.ChunkRef{Hash: chunk.Hash, Data: chunk.Data})
```
**Inside packer.AddChunk → addChunkToCurrentBlob():**
```go
// TRANSACTION: Create blob_chunks record IMMEDIATELY
if p.repos != nil {
blobChunk := &database.BlobChunk{
BlobID: p.currentBlob.id,
ChunkHash: chunk.Hash,
Offset: offset,
Length: chunkSize,
}
err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
return p.repos.BlobChunks.Create(ctx, tx, blobChunk)
})
// COMMIT immediately
}
```
**Transaction:**
```
T_blob_chunk: BEGIN
INSERT INTO blob_chunks (blob_id, chunk_hash, offset, length) VALUES (?, ?, ?, ?)
COMMIT
```
**⚠️ CRITICAL DEPENDENCY**: This transaction requires `chunks.chunk_hash` to exist (FK constraint).
The chunk MUST be committed in Step 2a-2 BEFORE this can succeed.
---
#### Step 2b: Blob Size Limit Handling
If adding a chunk would exceed blob size limit:
```go
if err == blob.ErrBlobSizeLimitExceeded {
if err := s.packer.FinalizeBlob(); err != nil { ... }
// Retry adding the chunk
if err := s.packer.AddChunk(...); err != nil { ... }
}
```
**FinalizeBlob() transactions:**
```
T_blob_finish: BEGIN
UPDATE blobs SET blob_hash=?, uncompressed_size=?, compressed_size=?, finished_ts=? WHERE id=?
COMMIT
```
Then blob handler is called (handleBlobReady):
```
(Upload to S3 - no transaction)
T_blob_uploaded: BEGIN
UPDATE blobs SET uploaded_ts=? WHERE id=?
INSERT INTO snapshot_blobs (snapshot_id, blob_id, blob_hash) VALUES (?, ?, ?)
INSERT INTO uploads (blob_hash, snapshot_id, uploaded_at, size, duration_ms) VALUES (?, ?, ?, ?, ?)
COMMIT
```
---
#### Step 2c: Queue File for Batch Insertion
After all chunks for a file are processed:
```go
// Build file data (in-memory, no DB)
fileChunks := make([]database.FileChunk, len(chunks))
chunkFiles := make([]database.ChunkFile, len(chunks))
// Queue for batch insertion
return s.addPendingFile(ctx, pendingFileData{
file: fileToProcess.File,
fileChunks: fileChunks,
chunkFiles: chunkFiles,
})
```
**No transaction yet** - just adds to `pendingFiles` slice.
If `len(pendingFiles) >= fileBatchSize (100)`, triggers `flushPendingFiles()`.
---
### Step 2d: Flush Pending Files
**Location:** `flushPendingFiles()` - called when batch is full or at end of processing
```go
return s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
for _, data := range files {
// 1. Create file record
s.repos.Files.Create(txCtx, tx, data.file) // INSERT OR REPLACE
// 2. Delete old associations
s.repos.FileChunks.DeleteByFileID(txCtx, tx, data.file.ID)
s.repos.ChunkFiles.DeleteByFileID(txCtx, tx, data.file.ID)
// 3. Create file_chunks records
for _, fc := range data.fileChunks {
s.repos.FileChunks.Create(txCtx, tx, &fc) // FK: chunks.chunk_hash
}
// 4. Create chunk_files records
for _, cf := range data.chunkFiles {
s.repos.ChunkFiles.Create(txCtx, tx, &cf) // FK: chunks.chunk_hash
}
// 5. Add file to snapshot
s.repos.Snapshots.AddFileByID(txCtx, tx, s.snapshotID, data.file.ID)
}
return nil
})
// COMMIT (all or nothing for the batch)
```
**Transaction:**
```
T_files_batch: BEGIN
-- For each file in batch:
INSERT OR REPLACE INTO files (...) VALUES (...)
DELETE FROM file_chunks WHERE file_id = ?
DELETE FROM chunk_files WHERE file_id = ?
INSERT INTO file_chunks (file_id, idx, chunk_hash) VALUES (?, ?, ?) -- FK: chunks
INSERT INTO chunk_files (chunk_hash, file_id, ...) VALUES (?, ?, ...) -- FK: chunks
INSERT INTO snapshot_files (snapshot_id, file_id) VALUES (?, ?)
-- Repeat for each file
COMMIT
```
**⚠️ CRITICAL DEPENDENCY**: `file_chunks` and `chunk_files` require `chunks.chunk_hash` to exist.
---
### Phase 2 End: Final Flush
```go
// Flush any remaining pending files
if err := s.flushAllPending(ctx); err != nil { ... }
// Final packer flush
s.packer.Flush()
```
---
## The Current Bug
### Problem
The current code attempts to batch file insertions, but `file_chunks` and `chunk_files` have foreign keys to `chunks.chunk_hash`. The batched file flush tries to insert these records, but if the chunks haven't been committed yet, the FK constraint fails.
### Why It's Happening
Looking at the sequence:
1. Process file A, chunk X
2. Create chunk X in DB (Transaction commits)
3. Add chunk X to packer
4. Packer creates blob_chunks for chunk X (needs chunk X - OK, committed in step 2)
5. Queue file A with chunk references
6. Process file B, chunk Y
7. Create chunk Y in DB (Transaction commits)
8. ... etc ...
9. At end: flushPendingFiles()
10. Insert file_chunks for file A referencing chunk X (chunk X committed - should work)
The chunks ARE being created individually. But something is going wrong.
### Actual Issue
Wait - let me re-read the code. The issue is:
In `processFileStreaming`, when we queue file data:
```go
fileChunks[i] = database.FileChunk{
FileID: fileToProcess.File.ID,
Idx: ci.fileChunk.Idx,
ChunkHash: ci.fileChunk.ChunkHash,
}
```
The `FileID` is set, but `fileToProcess.File.ID` might be empty at this point because the file record hasn't been created yet!
Looking at `checkFileInMemory`:
```go
// For new files:
if !exists {
return file, true // file.ID is empty string!
}
// For existing files:
file.ID = existingFile.ID // Reuse existing ID
```
**For NEW files, `file.ID` is empty!**
Then in `flushPendingFiles`:
```go
s.repos.Files.Create(txCtx, tx, data.file) // This generates/uses the ID
```
But `data.fileChunks` was built with the EMPTY ID!
### The Real Problem
For new files:
1. `checkFileInMemory` creates file record with empty ID
2. `processFileStreaming` queues file_chunks with empty `FileID`
3. `flushPendingFiles` creates file (generates ID), but file_chunks still have empty `FileID`
Wait, but `Files.Create` should be INSERT OR REPLACE by path, and the file struct should get updated... Let me check.
Actually, looking more carefully at the code path - the file IS created first in the flush, but the `fileChunks` slice was already built with the old (possibly empty) ID. The ID isn't updated after the file is created.
Hmm, but looking at the current code:
```go
fileChunks[i] = database.FileChunk{
FileID: fileToProcess.File.ID, // This uses the ID from the File struct
```
And in `checkFileInMemory` for new files, we create a file struct but don't set the ID. However, looking at the database repository, `Files.Create` should be doing `INSERT OR REPLACE` and the ID should be pre-generated...
Let me check if IDs are being generated. Looking at the File struct usage, it seems like UUIDs should be generated somewhere...
Actually, looking at the test failures again:
```
creating file chunk: inserting file_chunk: constraint failed: FOREIGN KEY constraint failed (787)
```
Error 787 is SQLite's foreign key constraint error. The failing FK is on `file_chunks.chunk_hash → chunks.chunk_hash`.
So the chunks ARE NOT in the database when we try to insert file_chunks. Let me trace through more carefully...
---
## Transaction Timing Issue
The problem is transaction visibility in SQLite.
Each `WithTx` creates a new transaction that commits at the end. But with batched file insertion:
1. Chunk transactions commit one at a time
2. File batch transaction runs later
If chunks are being inserted but something goes wrong with transaction isolation, the file batch might not see them.
But actually SQLite in WAL mode should have SERIALIZABLE isolation by default, so committed transactions should be visible.
Let me check if the in-memory cache is masking a database problem...
Actually, wait. Let me re-check the current broken code more carefully. The issue might be simpler.
---
## Current Code Flow Analysis
Looking at `processFileStreaming` in the current broken state:
```go
// For each chunk:
if !chunkExists {
err := s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
dbChunk := &database.Chunk{ChunkHash: chunk.Hash, Size: chunk.Size}
return s.repos.Chunks.Create(txCtx, tx, dbChunk)
})
// ... check error ...
s.addKnownChunk(chunk.Hash)
}
// ... add to packer (creates blob_chunks) ...
// Collect chunk info for file
chunks = append(chunks, chunkInfo{...})
```
Then at end of function:
```go
// Queue file for batch insertion
return s.addPendingFile(ctx, pendingFileData{
file: fileToProcess.File,
fileChunks: fileChunks,
chunkFiles: chunkFiles,
})
```
At end of `processPhase`:
```go
if err := s.flushAllPending(ctx); err != nil { ... }
```
The chunks are being created one-by-one with individual transactions. By the time `flushPendingFiles` runs, all chunk transactions should have committed.
Unless... there's a bug in how the chunks are being referenced. Let me check if the chunk_hash values are correct.
Or... maybe the test database is being recreated between operations somehow?
Actually, let me check the test setup. Maybe the issue is specific to the test environment.
---
## Summary of Object Lifecycle
| Object | When Created | Transaction | Dependencies |
|--------|--------------|-------------|--------------|
| snapshot | Before scan | Individual tx | None |
| blob | When packer needs new blob | Individual tx | None |
| chunk | During file chunking (each chunk) | Individual tx | None |
| blob_chunks | Immediately after adding chunk to packer | Individual tx | chunks, blobs |
| files | Batched at end of processing | Batch tx | None |
| file_chunks | With file (batched) | Batch tx | files, chunks |
| chunk_files | With file (batched) | Batch tx | files, chunks |
| snapshot_files | With file (batched) | Batch tx | snapshots, files |
| snapshot_blobs | After blob upload | Individual tx | snapshots, blobs |
| uploads | After blob upload | Same tx as snapshot_blobs | blobs, snapshots |
---
## Root Cause Analysis
After detailed analysis, I believe the issue is one of the following:
### Hypothesis 1: File ID Not Set
Looking at `checkFileInMemory()` for NEW files:
```go
if !exists {
return file, true // file.ID is empty string!
}
```
For new files, `file.ID` is empty. Then in `processFileStreaming`:
```go
fileChunks[i] = database.FileChunk{
FileID: fileToProcess.File.ID, // Empty for new files!
...
}
```
The `FileID` in the built `fileChunks` slice is empty.
Then in `flushPendingFiles`:
```go
s.repos.Files.Create(txCtx, tx, data.file) // This generates the ID
// But data.fileChunks still has empty FileID!
for i := range data.fileChunks {
s.repos.FileChunks.Create(...) // Uses empty FileID
}
```
**Solution**: Generate file IDs upfront in `checkFileInMemory()`:
```go
file := &database.File{
ID: uuid.New().String(), // Generate ID immediately
Path: path,
...
}
```
### Hypothesis 2: Transaction Isolation
SQLite with a single connection pool (`MaxOpenConns(1)`) should serialize all transactions. Committed data should be visible to subsequent transactions.
However, there might be a subtle issue with how `context.Background()` is used in the packer vs the scanner's context.
## Recommended Fix
**Step 1: Generate file IDs upfront**
In `checkFileInMemory()`, generate the UUID for new files immediately:
```go
file := &database.File{
ID: uuid.New().String(), // Always generate ID
Path: path,
...
}
```
This ensures `file.ID` is set when building `fileChunks` and `chunkFiles` slices.
**Step 2: Verify by reverting to per-file transactions**
If Step 1 doesn't fix it, revert to non-batched file insertion to isolate the issue:
```go
// Instead of queuing:
// return s.addPendingFile(ctx, pendingFileData{...})
// Do immediate insertion:
return s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
// Create file
s.repos.Files.Create(txCtx, tx, fileToProcess.File)
// Delete old associations
s.repos.FileChunks.DeleteByFileID(...)
s.repos.ChunkFiles.DeleteByFileID(...)
// Create new associations
for _, fc := range fileChunks {
s.repos.FileChunks.Create(...)
}
for _, cf := range chunkFiles {
s.repos.ChunkFiles.Create(...)
}
// Add to snapshot
s.repos.Snapshots.AddFileByID(...)
return nil
})
```
**Step 3: If batching is still desired**
After confirming per-file transactions work, re-implement batching with the ID fix in place, and add debug logging to trace exactly which chunk_hash is failing and why.

545
README.md
View File

@@ -1,35 +1,43 @@
# vaultik (ваултик) # vaultik (ваултик)
`vaultik` is an incremental backup tool written in Go. It encrypts data WIP: pre-1.0, some functions may not be fully implemented yet
`vaultik` is an incremental backup daemon written in Go. It encrypts data
using an `age` public key and uploads each encrypted blob directly to a using an `age` public key and uploads each encrypted blob directly to a
remote S3-compatible object store. It requires no private keys, secrets, or remote S3-compatible object store. It requires no private keys, secrets, or
credentials (other than those required to PUT to encrypted object storage, credentials (other than those required to PUT to encrypted object storage,
such as S3 API keys) stored on the backed-up system. such as S3 API keys) stored on the backed-up system.
Features: It includes table-stakes features such as:
* modern encryption ([age](https://age-encryption.org/), X25519 + XChaCha20-Poly1305) * modern encryption (the excellent `age`)
* content-defined chunking with deduplication (FastCDC) * deduplication
* incremental backups (only changed files are re-chunked) * incremental backups
* multithreaded zstd compression at configurable levels * modern multithreaded zstd compression with configurable levels
* content-addressed immutable storage * content-addressed immutable storage
* local state tracking in SQLite (enables write-only incremental backups) * local state tracking in standard SQLite database, enables write-only
incremental backups to destination
* no mutable remote metadata * no mutable remote metadata
* no plaintext file paths or metadata in remote storage * no plaintext file paths or metadata stored in remote
* packs small files into large blobs (keeps S3 operation counts down) * does not create huge numbers of small files (to keep S3 operation counts
* backs up regular files, symlinks, empty directories, and file permissions down) even if the source system has many small files
* pluggable storage backends: S3, local filesystem, rclone (70+ providers)
* pure Go (no CGO), cross-compiles to linux/darwin × amd64/arm64
## why ## why
Existing backup software fails under one or more of these conditions:
* Requires secrets (passwords, private keys) on the source system, which
compromises encrypted backups in the case of host system compromise
* Depends on symmetric encryption unsuitable for zero-trust environments
* Creates one-blob-per-file, which results in excessive S3 operation counts
* is slow
Other backup tools like `restic`, `borg`, and `duplicity` are designed for Other backup tools like `restic`, `borg`, and `duplicity` are designed for
environments where the source host can store secrets and has access to environments where the source host can store secrets and has access to
decryption keys. `vaultik` is for environments where you don't want to decryption keys. I don't want to store backup decryption keys on my hosts,
store backup decryption keys on your hosts — only public keys for only public keys for encryption.
encryption.
Requirements that no existing tool meets: My requirements are:
* open source * open source
* no passphrases or private keys on the source host * no passphrases or private keys on the source host
@@ -38,42 +46,99 @@ Requirements that no existing tool meets:
* encrypted * encrypted
* s3 compatible without an intermediate step or tool * s3 compatible without an intermediate step or tool
## install Surprisingly, no existing tool meets these requirements, so I wrote `vaultik`.
```sh ## design goals
go install git.eeqj.de/sneak/vaultik@latest
```
## quick start 1. Backups must require only a public key on the source host.
1. No secrets or private keys may exist on the source system.
1. Restore must be possible using **only** the backup bucket and a private key.
1. Prune must be possible (requires private key, done on different hosts).
1. All encryption uses [`age`](https://age-encryption.org/) (X25519, XChaCha20-Poly1305).
1. Compression uses `zstd` at a configurable level.
1. Files are chunked, and multiple chunks are packed into encrypted blobs
to reduce object count for filesystems with many small files.
1. All metadata (snapshots) is stored remotely as encrypted SQLite DBs.
```sh ## what
# 1. Install
go install git.eeqj.de/sneak/vaultik@latest
# 2. Generate an age keypair (store the private key somewhere safe, offline) `vaultik` walks a set of configured directories and builds a
age-keygen -o key.txt content-addressable chunk map of changed files using deterministic chunking.
# the public key is printed to stdout and also in key.txt Each chunk is streamed into a blob packer. Blobs are compressed with `zstd`,
encrypted with `age`, and uploaded directly to remote storage under a
content-addressed S3 path. At the end, a pruned snapshot-specific sqlite
database of metadata is created, encrypted, and uploaded alongside the
blobs.
# 3. Create a default config file No plaintext file contents ever hit disk. No private key or secret
vaultik init passphrase is needed or stored locally.
# Writes to the platform config directory with commented defaults:
# macOS: ~/Library/Application Support/vaultik/config.yml
# Linux: ~/.config/vaultik/config.yml
# root: /etc/vaultik/config.yml
# 4. Edit the config: set age_recipients, snapshots, and storage_url ## how
# (init prints the path it wrote to)
# 5. Run your first backup 1. **install**
vaultik snapshot create
# 6. Verify it worked ```sh
vaultik snapshot list go install git.eeqj.de/sneak/vaultik@latest
vaultik snapshot verify <snapshot-id> ```
# 7. Set up a daily cron job (keeps last 4 weeks of snapshots) 1. **generate keypair**
# 0 3 * * * vaultik snapshot create --cron --prune --keep-newer-than 4w
``` ```sh
age-keygen -o agekey.txt
grep 'public key:' agekey.txt
```
1. **write config**
```yaml
# Named snapshots - each snapshot can contain multiple paths
snapshots:
system:
paths:
- /etc
- /var/lib
exclude:
- '*.cache' # Snapshot-specific exclusions
home:
paths:
- /home/user/documents
- /home/user/photos
# Global exclusions (apply to all snapshots)
exclude:
- '*.log'
- '*.tmp'
- '.git'
- 'node_modules'
age_recipients:
- age1278m9q7dp3chsh2dcy82qk27v047zywyvtxwnj4cvt0z65jw6a7q5dqhfj
s3:
endpoint: https://s3.example.com
bucket: vaultik-data
prefix: host1/
access_key_id: ...
secret_access_key: ...
region: us-east-1
backup_interval: 1h
full_scan_interval: 24h
min_time_between_run: 15m
chunk_size: 10MB
blob_size_limit: 1GB
```
1. **run**
```sh
# Create all configured snapshots
vaultik --config /etc/vaultik.yaml snapshot create
# Create specific snapshots by name
vaultik --config /etc/vaultik.yaml snapshot create home system
# Silent mode for cron
vaultik --config /etc/vaultik.yaml snapshot create --cron
```
--- ---
@@ -82,291 +147,253 @@ vaultik snapshot verify <snapshot-id>
### commands ### commands
```sh ```sh
vaultik [--config <path>] init vaultik [--config <path>] snapshot create [snapshot-names...] [--cron] [--daemon] [--prune]
vaultik [--config <path>] snapshot create [snapshot-names...] [--cron] [--prune] [--keep-newer-than <duration>] [--skip-errors]
vaultik [--config <path>] snapshot list [--json] vaultik [--config <path>] snapshot list [--json]
vaultik [--config <path>] snapshot verify <snapshot-id> [--deep] [--json] vaultik [--config <path>] snapshot verify <snapshot-id> [--deep]
vaultik [--config <path>] snapshot purge [--keep-latest | --older-than <duration>] [--snapshot <name>...] [--force] vaultik [--config <path>] snapshot purge [--keep-latest | --older-than <duration>] [--force]
vaultik [--config <path>] snapshot remove <snapshot-id|--all> [--dry-run] [--force] [--remote] [--json] vaultik [--config <path>] snapshot remove <snapshot-id> [--dry-run] [--force]
vaultik [--config <path>] snapshot prune vaultik [--config <path>] snapshot prune
vaultik [--config <path>] snapshot cleanup vaultik [--config <path>] restore <snapshot-id> <target-dir> [paths...]
vaultik [--config <path>] restore <snapshot-id> <target-dir> [paths...] [--verify] vaultik [--config <path>] prune [--dry-run] [--force]
vaultik [--config <path>] prune [--force] [--json]
vaultik [--config <path>] info vaultik [--config <path>] info
vaultik [--config <path>] remote info [--json]
vaultik [--config <path>] store info vaultik [--config <path>] store info
vaultik [--config <path>] database purge [--force]
vaultik version
``` ```
### global flags ### environment
* `--config <path>`: Path to config file (default: `$VAULTIK_CONFIG`, then platform config dir, then `/etc/vaultik/config.yml`) * `VAULTIK_AGE_SECRET_KEY`: Required for `restore` and deep `verify`. Contains the age private key for decryption.
* `--verbose`, `-v`: Enable verbose output * `VAULTIK_CONFIG`: Optional path to config file.
* `--debug`: Enable debug output
* `--quiet`, `-q`: Suppress non-error output
### environment variables
* `VAULTIK_AGE_SECRET_KEY`: Age private key for decryption (required for `restore` and `verify --deep`)
* `VAULTIK_CONFIG`: Path to config file (overridden by `--config`)
* `VAULTIK_INDEX_PATH`: Override local SQLite index path
### command details ### command details
**init**: Write a default config file with commented explanations for every **snapshot create**: Perform incremental backup of configured snapshots
setting. Writes to the path from `--config`, `$VAULTIK_CONFIG`, or the * Config is located at `/etc/vaultik/config.yml` by default
platform config directory (`~/Library/Application Support/vaultik/` on macOS,
`~/.config/vaultik/` on Linux, `/etc/vaultik/` as root). Refuses to overwrite an
existing file. Created with mode `0600` since it will contain credentials.
**snapshot create**: Perform incremental backup of configured snapshots.
* Optional snapshot names argument to create specific snapshots (default: all) * Optional snapshot names argument to create specific snapshots (default: all)
* `--cron`: Silent unless error (for crontab) * `--cron`: Silent unless error (for crontab)
* `--prune`: After backup, drop older snapshots of each backed-up name and * `--daemon`: Run continuously with inotify monitoring and periodic scans
remove orphaned blobs from remote storage. By default keeps only the latest * `--prune`: Delete old snapshots and orphaned blobs after backup
snapshot per name; use `--keep-newer-than` for a rolling window.
* `--keep-newer-than <duration>`: With `--prune`, keep snapshots newer than
this duration instead of only the latest (e.g. `4w`, `30d`, `6mo`, `1y`)
* `--skip-errors`: Skip file read errors (log them loudly but continue)
**snapshot list**: List all snapshots with their timestamps and sizes. **snapshot list**: List all snapshots with their timestamps and sizes
* `--json`: Output in JSON format * `--json`: Output in JSON format
**snapshot verify**: Verify snapshot integrity. **snapshot verify**: Verify snapshot integrity
* Default (shallow): checks that all blobs referenced in the manifest exist in storage * `--deep`: Download and verify blob contents (not just existence)
* `--deep`: Downloads and decrypts each blob, verifies chunk hashes against the
encrypted metadata database
* `--json`: Output results as JSON
**snapshot purge**: Remove old snapshots based on criteria. Retention is **snapshot purge**: Remove old snapshots based on criteria
per-snapshot-name (`--keep-latest` keeps the latest of each name, not the * `--keep-latest`: Keep only the most recent snapshot
latest globally). * `--older-than`: Remove snapshots older than duration (e.g., 30d, 6mo, 1y)
* `--keep-latest`: Keep only the most recent snapshot of each name
* `--older-than <duration>`: Remove snapshots older than duration (e.g. `30d`, `6m`, `1y`)
* `--snapshot <name>`: Restrict to specific snapshot names (repeat for multiple)
* `--force`: Skip confirmation prompt * `--force`: Skip confirmation prompt
**snapshot remove**: Remove a specific snapshot from the local database. **snapshot remove**: Remove a specific snapshot
* `--remote`: Also remove snapshot metadata from remote storage
* `--all`: Remove all snapshots (requires `--force`)
* `--dry-run`: Show what would be deleted without deleting * `--dry-run`: Show what would be deleted without deleting
* `--force`: Skip confirmation prompt * `--force`: Skip confirmation prompt
* `--json`: Output result as JSON
**snapshot prune**: Clean orphaned data from the local database (files, **snapshot prune**: Clean orphaned data from local database
chunks, blobs not referenced by any snapshot).
**snapshot cleanup**: Remove stale local snapshot records that have no **restore**: Restore snapshot to target directory
corresponding metadata in remote storage. These are typically left behind * Requires `VAULTIK_AGE_SECRET_KEY` environment variable with age private key
by incomplete or interrupted backups. Does not touch remote storage.
**restore**: Restore files from a backup snapshot.
* Requires `VAULTIK_AGE_SECRET_KEY` environment variable
* Optional path arguments to restore specific files/directories (default: all) * Optional path arguments to restore specific files/directories (default: all)
* Preserves file permissions, timestamps, ownership (ownership requires root), * Downloads and decrypts metadata, fetches required blobs, reconstructs files
symlinks, and empty directories * Preserves file permissions, timestamps, and ownership (ownership requires root)
* `--verify`: After restoring, verify every file's chunk hashes match * Handles symlinks and directories
**prune**: Remove unreferenced blobs from remote storage. **prune**: Remove unreferenced blobs from remote storage
* Scans all snapshot manifests for referenced blobs, deletes any blob not referenced * Scans all snapshots for referenced blobs
* `--force`: Skip confirmation prompt * Deletes orphaned blobs
* `--json`: Output stats as JSON
**info**: Display system configuration, storage settings, encryption **info**: Display system and configuration information
recipients, and local database statistics.
**remote info**: Show detailed remote storage information including per-snapshot **store info**: Display S3 bucket configuration and storage statistics
metadata sizes, blob counts, and orphaned blob detection.
* `--json`: Output as JSON
**store info**: Display storage backend type and statistics.
**database purge**: Delete the local SQLite state database entirely. Remote
storage is unaffected; the next backup will do a full scan and re-deduplicate
against existing remote blobs.
* `--force`: Skip confirmation prompt
---
## storage backends
vaultik supports three storage backends, selected via the `storage_url` config field:
**S3** (`s3://bucket/prefix?endpoint=host&region=us-east-1`): Any S3-compatible
object store. Credentials are read from `s3.access_key_id` and
`s3.secret_access_key` in the config file.
**Local filesystem** (`file:///path/to/backup`): Stores blobs and metadata on
a local or mounted filesystem. Useful for testing or backing up to a NAS.
**Rclone** (`rclone://remote/path`): Uses rclone's 70+ supported cloud
providers. Requires rclone to be configured separately (`rclone config`).
Legacy S3 configuration via `s3.*` fields (endpoint, bucket, prefix, etc.) is
still supported for backward compatibility. `storage_url` takes precedence if
both are set.
--- ---
## architecture ## architecture
### remote storage layout ### s3 bucket layout
``` ```
<bucket>/<prefix>/ s3://<bucket>/<prefix>/
├── blobs/ ├── blobs/
│ └── <aa>/<bb>/<full_blob_hash> │ └── <aa>/<bb>/<full_blob_hash>
└── metadata/ └── metadata/
── <snapshot_id>/ ── <snapshot_id>/
├── db.zst.age # Encrypted binary SQLite database ├── db.zst.age
└── manifest.json.zst # Unencrypted blob list (for pruning) └── manifest.json.zst
``` ```
* Blobs are two-level directory sharded using the first 4 hex chars of the blob hash * `blobs/<aa>/<bb>/...`: Two-level directory sharding using first 4 hex chars of blob hash
* `db.zst.age` is a binary SQLite database (zstd compressed, age encrypted) * `metadata/<snapshot_id>/db.zst.age`: Encrypted, compressed SQLite database
containing all file metadata, chunk mappings, and relationships for the snapshot * `metadata/<snapshot_id>/manifest.json.zst`: Unencrypted blob list for pruning
* `manifest.json.zst` is an unencrypted compressed JSON blob list, enabling
pruning without the private key
Snapshot IDs follow the format `<hostname>_<snapshot-name>_<RFC3339-timestamp>` ### blob manifest format
(e.g. `server1_home_2025-06-01T12:00:00Z`).
The `manifest.json.zst` file is unencrypted (compressed JSON) to enable pruning without decryption:
```json
{
"snapshot_id": "hostname_snapshotname_2025-01-01T12:00:00Z",
"blob_hashes": [
"aa1234567890abcdef...",
"bb2345678901bcdef0..."
]
}
```
Snapshot IDs follow the format `<hostname>_<snapshot-name>_<timestamp>` (e.g., `server1_home_2025-01-01T12:00:00Z`).
### local sqlite schema
```sql
CREATE TABLE files (
id TEXT PRIMARY KEY,
path TEXT NOT NULL UNIQUE,
mtime INTEGER NOT NULL,
size INTEGER NOT NULL,
mode INTEGER NOT NULL,
uid INTEGER NOT NULL,
gid INTEGER NOT NULL
);
CREATE TABLE file_chunks (
file_id TEXT NOT NULL,
idx INTEGER NOT NULL,
chunk_hash TEXT NOT NULL,
PRIMARY KEY (file_id, idx),
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
);
CREATE TABLE chunks (
chunk_hash TEXT PRIMARY KEY,
size INTEGER NOT NULL
);
CREATE TABLE blobs (
id TEXT PRIMARY KEY,
blob_hash TEXT NOT NULL UNIQUE,
uncompressed INTEGER NOT NULL,
compressed INTEGER NOT NULL,
uploaded_at INTEGER
);
CREATE TABLE blob_chunks (
blob_hash TEXT NOT NULL,
chunk_hash TEXT NOT NULL,
offset INTEGER NOT NULL,
length INTEGER NOT NULL,
PRIMARY KEY (blob_hash, chunk_hash)
);
CREATE TABLE chunk_files (
chunk_hash TEXT NOT NULL,
file_id TEXT NOT NULL,
file_offset INTEGER NOT NULL,
length INTEGER NOT NULL,
PRIMARY KEY (chunk_hash, file_id)
);
CREATE TABLE snapshots (
id TEXT PRIMARY KEY,
hostname TEXT NOT NULL,
vaultik_version TEXT NOT NULL,
started_at INTEGER NOT NULL,
completed_at INTEGER,
file_count INTEGER NOT NULL,
chunk_count INTEGER NOT NULL,
blob_count INTEGER NOT NULL,
total_size INTEGER NOT NULL,
blob_size INTEGER NOT NULL,
compression_ratio REAL NOT NULL
);
CREATE TABLE snapshot_files (
snapshot_id TEXT NOT NULL,
file_id TEXT NOT NULL,
PRIMARY KEY (snapshot_id, file_id)
);
CREATE TABLE snapshot_blobs (
snapshot_id TEXT NOT NULL,
blob_id TEXT NOT NULL,
blob_hash TEXT NOT NULL,
PRIMARY KEY (snapshot_id, blob_id)
);
```
### data flow ### data flow
**backup:** #### backup
1. Open local SQLite index, load known files and chunks into memory 1. Load config, open local SQLite index
2. Walk source directories, compare mtime/size/mode against index 1. Walk source directories, check mtime/size against index
3. For changed/new files: chunk using content-defined chunking (FastCDC) 1. For changed/new files: chunk using content-defined chunking
4. For symlinks and directories: record metadata (no chunking) 1. For each chunk: hash, check if already uploaded, add to blob packer
5. For each chunk: hash, check dedup, add to blob packer 1. When blob reaches threshold: compress, encrypt, upload to S3
6. When blob reaches size threshold: compress (zstd), encrypt (age), upload 1. Build snapshot metadata, compress, encrypt, upload
7. Build snapshot metadata database, compress, encrypt, upload 1. Create blob manifest (unencrypted) for pruning support
8. Create unencrypted blob manifest for pruning support
**restore:** #### restore
1. Download and decrypt `metadata/<snapshot_id>/db.zst.age` 1. Download `metadata/<snapshot_id>/db.zst.age`
2. Open the binary SQLite database 1. Decrypt and decompress SQLite database
3. Query files (optionally filtered by paths) 1. Query files table (optionally filtered by paths)
4. Download and decrypt required blobs 1. For each file, get ordered chunk list from file_chunks
5. Extract chunks, reconstruct files 1. Download required blobs, decrypt, decompress
6. Restore permissions, timestamps, ownership, symlinks 1. Extract chunks and reconstruct files
1. Restore permissions, mtime, uid/gid
**prune:** #### prune
1. List all snapshot manifests 1. List all snapshot manifests
2. Build set of all referenced blob hashes 1. Build set of all referenced blob hashes
3. List all blobs in storage 1. List all blobs in storage
4. Delete any blob not in the referenced set 1. Delete any blob not in referenced set
### chunking and deduplication ### chunking
* Content-defined chunking using the FastCDC algorithm * Content-defined chunking using FastCDC algorithm
* Average chunk size: configurable (default 10MB) * Average chunk size: configurable (default 10MB)
* Deduplication at file level (unchanged files skipped) and chunk level * Deduplication at chunk level
(identical chunks across files stored once) * Multiple chunks packed into blobs for efficiency
* Multiple chunks packed into blobs to reduce object count
### encryption ### encryption
* Asymmetric encryption using age (X25519 + XChaCha20-Poly1305) * Asymmetric encryption using age (X25519 + XChaCha20-Poly1305)
* Only the public key is needed on the source host * Only public key needed on source host
* Each blob and each metadata database is encrypted independently * Each blob encrypted independently
* Multiple recipients supported (encrypt to multiple keys) * Metadata databases also encrypted
### compression ### compression
* zstd compression at configurable level (1-19, default 3) * zstd compression at configurable level
* Applied before encryption at the blob level * Applied before encryption
* Blob-level compression for efficiency
--- ---
## configuration reference ## does not
See `config.example.yml` for a complete annotated example. Key fields: * Store any secrets on the backed-up machine
* Require mutable remote metadata
* Use tarballs, restic, rsync, or ssh
* Require a symmetric passphrase or password
* Trust the source system with anything
| Field | Default | Description | ## does
|-------|---------|-------------|
| `age_recipients` | (required) | Age public keys for encryption |
| `snapshots` | (required) | Named snapshot definitions with paths and excludes |
| `storage_url` | | Storage backend URL (`s3://`, `file://`, `rclone://`) |
| `s3.*` | | Legacy S3 configuration (endpoint, bucket, credentials) |
| `exclude` | | Global exclude patterns (applied to all snapshots) |
| `chunk_size` | `10MB` | Average chunk size for content-defined chunking |
| `blob_size_limit` | `10GB` | Maximum blob size before splitting |
| `compression_level` | `3` | zstd compression level (1-19) |
| `hostname` | system hostname | Hostname used in snapshot IDs |
| `index_path` | `~/.local/share/.../index.sqlite` | Local SQLite index path |
--- * Incremental deduplicated backup
* Blob-packed chunk encryption
## limitations * Content-addressed immutable blobs
* Public-key encryption only
* **No extended attributes (xattrs).** ACLs, macOS Finder metadata, * SQLite-based local and snapshot metadata
quarantine flags, SELinux labels, and other extended attributes are not * Fully stream-processed storage
backed up or restored.
* **No hard link detection.** Two hard links to the same inode are backed
up as independent files. Content deduplication means the data is stored
once, but the hard link relationship is lost on restore.
* **No sparse file support.** Sparse files are fully materialized during
backup. A 100 GB sparse VM disk that is mostly zeros will consume the
full (compressed) size in storage.
* **No bandwidth limiting.** Uploads and downloads use whatever bandwidth
is available. There is no `--bwlimit` flag yet.
* **No parallel blob downloads during restore.** Blobs are fetched
sequentially. Restore speed is bound by single-stream throughput.
* **Device nodes, named pipes, and sockets are silently skipped.** Only
regular files, directories, and symlinks are backed up.
* **No database migrations.** If the local SQLite schema changes between
versions, delete the local database (`vaultik database purge`) and run
a full backup. Remote storage is unaffected.
* **Files that change during backup may be inconsistent.** There is no
filesystem snapshot or freeze. If a file is modified between the scan
and chunk phases, the backed-up copy may reflect a partial write.
* **Ownership restoration requires root.** File uid/gid are recorded
and restored, but `chown` requires elevated privileges. Without root,
files are restored with the current user's ownership.
---
## roadmap
Items for future releases:
* Error-condition tests (network failures, disk full, corrupted/missing blobs)
* Parallel blob downloads during restore
* Bandwidth limiting (`--bwlimit`)
* Security audit of encryption implementation
* Man pages and richer `--help` examples
--- ---
## requirements ## requirements
* Go 1.26 or later * Go 1.24 or later
* S3-compatible object storage (or local filesystem, or rclone remote) * S3-compatible object storage
* Sufficient disk space for local index (typically <1GB)
## development workflow
All changes follow this workflow. No exceptions.
1. Create a feature branch off `main`.
2. Write tests.
3. Write the implementation.
4. Fix implementation errors until it compiles and tests pass.
5. Fix linting errors (`make lint`).
6. Update documentation and README as required by the change.
7. Format code (`make fmt`).
8. Run `make check` (lint + fmt-check + test). Fix any issues. Repeat until clean.
9. Commit on the branch.
10. Merge to `main`.
11. Push.
Do not commit directly to `main`. Do not skip steps.
Repository policies for AI agents are in [`AGENTS.md`](AGENTS.md).
## license ## license

128
TODO.md Normal file
View File

@@ -0,0 +1,128 @@
# Vaultik 1.0 TODO
Linear list of tasks to complete before 1.0 release.
## Rclone Storage Backend (Complete)
Add rclone as a storage backend via Go library import, allowing vaultik to use any of rclone's 70+ supported cloud storage providers.
**Configuration:**
```yaml
storage_url: "rclone://myremote/path/to/backups"
```
User must have rclone configured separately (via `rclone config`).
**Implementation Steps:**
1. [x] Add rclone dependency to go.mod
2. [x] Create `internal/storage/rclone.go` implementing `Storer` interface
- `NewRcloneStorer(remote, path)` - init with `configfile.Install()` and `fs.NewFs()`
- `Put` / `PutWithProgress` - use `operations.Rcat()`
- `Get` - use `fs.NewObject()` then `obj.Open()`
- `Stat` - use `fs.NewObject()` for size/metadata
- `Delete` - use `obj.Remove()`
- `List` / `ListStream` - use `operations.ListFn()`
- `Info` - return remote name
3. [x] Update `internal/storage/url.go` - parse `rclone://remote/path` URLs
4. [x] Update `internal/storage/module.go` - add rclone case to `storerFromURL()`
5. [x] Test with real rclone remote
**Error Mapping:**
- `fs.ErrorObjectNotFound``ErrNotFound`
- `fs.ErrorDirNotFound``ErrNotFound`
- `fs.ErrorNotFoundInConfigFile``ErrRemoteNotFound` (new)
---
## CLI Polish (Priority)
1. Improve error messages throughout
- Ensure all errors include actionable context
- Add suggestions for common issues (e.g., "did you set VAULTIK_AGE_SECRET_KEY?")
## Security (Priority)
1. Audit encryption implementation
- Verify age encryption is used correctly
- Ensure no plaintext leaks in logs or errors
- Verify blob hashes are computed correctly
1. Secure memory handling for secrets
- Clear S3 credentials from memory after client init
- Document that age_secret_key is env-var only (already implemented)
## Testing
1. Write integration tests for restore command
1. Write end-to-end integration test
- Create backup
- Verify backup
- Restore backup
- Compare restored files to originals
1. Add tests for edge cases
- Empty directories
- Symlinks
- Special characters in filenames
- Very large files (multi-GB)
- Many small files (100k+)
1. Add tests for error conditions
- Network failures during upload
- Disk full during restore
- Corrupted blobs
- Missing blobs
## Performance
1. Profile and optimize restore performance
- Parallel blob downloads
- Streaming decompression/decryption
- Efficient chunk reassembly
1. Add bandwidth limiting option
- `--bwlimit` flag for upload/download speed limiting
## Documentation
1. Add man page or --help improvements
- Detailed help for each command
- Examples in help output
## Final Polish
1. Ensure version is set correctly in releases
1. Create release process
- Binary releases for supported platforms
- Checksums for binaries
- Release notes template
1. Final code review
- Remove debug statements
- Ensure consistent code style
1. Tag and release v1.0.0
---
## Post-1.0 (Daemon Mode)
1. Implement inotify file watcher for Linux
- Watch source directories for changes
- Track dirty paths in memory
1. Implement FSEvents watcher for macOS
- Watch source directories for changes
- Track dirty paths in memory
1. Implement backup scheduler in daemon mode
- Respect backup_interval config
- Trigger backup when dirty paths exist and interval elapsed
- Implement full_scan_interval for periodic full scans
1. Add proper signal handling for daemon
- Graceful shutdown on SIGTERM/SIGINT
- Complete in-progress backup before exit
1. Write tests for daemon mode

View File

@@ -291,6 +291,21 @@ storage_url: "rclone://las1stor1//srv/pool.2024.04/backups/heraklion"
# # Default: 5MB # # Default: 5MB
# #part_size: 5MB # #part_size: 5MB
# How often to run backups in daemon mode
# Format: 1h, 30m, 24h, etc
# Default: 1h
#backup_interval: 1h
# How often to do a full filesystem scan in daemon mode
# Between full scans, inotify is used to detect changes
# Default: 24h
#full_scan_interval: 24h
# Minimum time between backup runs in daemon mode
# Prevents backups from running too frequently
# Default: 15m
#min_time_between_run: 15m
# Path to local SQLite index database # Path to local SQLite index database
# This database tracks file state for incremental backups # This database tracks file state for incremental backups
# Default: /var/lib/vaultik/index.sqlite # Default: /var/lib/vaultik/index.sqlite

View File

@@ -5,14 +5,8 @@
Vaultik uses a local SQLite database to track file metadata, chunk mappings, and blob associations during the backup process. This database serves as an index for incremental backups and enables efficient deduplication. Vaultik uses a local SQLite database to track file metadata, chunk mappings, and blob associations during the backup process. This database serves as an index for incremental backups and enables efficient deduplication.
**Important Notes:** **Important Notes:**
- **No Migration Support (pre-1.0)**: Vaultik does not support database schema - **No Migration Support**: Vaultik does not support database schema migrations. If the schema changes, the local database must be deleted and recreated by performing a full backup.
migrations. The local index is treated as disposable — if the schema changes, - **Version Compatibility**: In rare cases, you may need to use the same version of Vaultik to restore a backup as was used to create it. This ensures compatibility with the metadata format stored in S3.
delete the local SQLite database (`vaultik database purge`) and run a full
backup. The remote storage is unaffected; the new index will re-deduplicate
against existing remote blobs.
- **Version Compatibility**: In rare cases, you may need to use the same version
of Vaultik to restore a backup as was used to create it. This ensures
compatibility with the metadata format stored in S3.
## Database Tables ## Database Tables
@@ -23,6 +17,7 @@ Stores metadata about files in the filesystem being backed up.
- `id` (TEXT PRIMARY KEY) - UUID for the file record - `id` (TEXT PRIMARY KEY) - UUID for the file record
- `path` (TEXT NOT NULL UNIQUE) - Absolute file path - `path` (TEXT NOT NULL UNIQUE) - Absolute file path
- `mtime` (INTEGER NOT NULL) - Modification time as Unix timestamp - `mtime` (INTEGER NOT NULL) - Modification time as Unix timestamp
- `ctime` (INTEGER NOT NULL) - Change time as Unix timestamp
- `size` (INTEGER NOT NULL) - File size in bytes - `size` (INTEGER NOT NULL) - File size in bytes
- `mode` (INTEGER NOT NULL) - Unix file permissions and type - `mode` (INTEGER NOT NULL) - Unix file permissions and type
- `uid` (INTEGER NOT NULL) - User ID of file owner - `uid` (INTEGER NOT NULL) - User ID of file owner

View File

@@ -43,19 +43,18 @@ Blobs contain the actual file data from backups and must be encrypted for securi
Each snapshot has its own subdirectory named with the snapshot ID. Each snapshot has its own subdirectory named with the snapshot ID.
### Snapshot ID Format ### Snapshot ID Format
- **Format**: `<hostname>_<snapshot-name>_<RFC3339>` (or `<hostname>_<RFC3339>` if no - **Format**: `<hostname>-<YYYYMMDD>-<HHMMSSZ>`
name was specified) - **Example**: `laptop-20240115-143052Z`
- **Example**: `laptop_home_2024-01-15T14:30:52Z`
- **Components**: - **Components**:
- Short hostname (everything before the first dot is stripped from the FQDN) - Hostname (may contain hyphens)
- Snapshot name from the configured `snapshots:` map (optional) - Date in YYYYMMDD format
- RFC3339 UTC timestamp - Time in HHMMSSZ format (Z indicates UTC)
### Files in Each Snapshot Directory ### Files in Each Snapshot Directory
#### `db.zst.age` - Encrypted Database #### `db.zst.age` - Encrypted Database Dump
- **What it contains**: Pruned binary SQLite database for this snapshot - **What it contains**: Complete SQLite database dump for this snapshot
- **Format**: Binary SQLite → Zstandard compressed → Age encrypted - **Format**: SQL dump → Zstandard compressed → Age encrypted
- **Encryption**: Encrypted with Age - **Encryption**: Encrypted with Age
- **Purpose**: Contains full file metadata, chunk mappings, and all relationships - **Purpose**: Contains full file metadata, chunk mappings, and all relationships
- **Why encrypted**: Contains sensitive metadata like file paths, permissions, and ownership - **Why encrypted**: Contains sensitive metadata like file paths, permissions, and ownership
@@ -68,7 +67,7 @@ Each snapshot has its own subdirectory named with the snapshot ID.
- **Structure**: - **Structure**:
```json ```json
{ {
"snapshot_id": "laptop_home_2024-01-15T14:30:52Z", "snapshot_id": "laptop-20240115-143052Z",
"timestamp": "2024-01-15T14:30:52Z", "timestamp": "2024-01-15T14:30:52Z",
"blob_count": 42, "blob_count": 42,
"blobs": [ "blobs": [

3
go.mod
View File

@@ -17,13 +17,13 @@ require (
github.com/google/uuid v1.6.0 github.com/google/uuid v1.6.0
github.com/johannesboyne/gofakes3 v0.0.0-20250603205740-ed9094be7668 github.com/johannesboyne/gofakes3 v0.0.0-20250603205740-ed9094be7668
github.com/klauspost/compress v1.18.1 github.com/klauspost/compress v1.18.1
github.com/mattn/go-sqlite3 v1.14.29
github.com/rclone/rclone v1.72.1 github.com/rclone/rclone v1.72.1
github.com/schollz/progressbar/v3 v3.19.0 github.com/schollz/progressbar/v3 v3.19.0
github.com/spf13/afero v1.15.0 github.com/spf13/afero v1.15.0
github.com/spf13/cobra v1.10.1 github.com/spf13/cobra v1.10.1
github.com/stretchr/testify v1.11.1 github.com/stretchr/testify v1.11.1
go.uber.org/fx v1.24.0 go.uber.org/fx v1.24.0
golang.org/x/sync v0.18.0
golang.org/x/term v0.37.0 golang.org/x/term v0.37.0
gopkg.in/yaml.v3 v3.0.1 gopkg.in/yaml.v3 v3.0.1
modernc.org/sqlite v1.38.0 modernc.org/sqlite v1.38.0
@@ -266,6 +266,7 @@ require (
golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect
golang.org/x/net v0.47.0 // indirect golang.org/x/net v0.47.0 // indirect
golang.org/x/oauth2 v0.33.0 // indirect golang.org/x/oauth2 v0.33.0 // indirect
golang.org/x/sync v0.18.0 // indirect
golang.org/x/sys v0.38.0 // indirect golang.org/x/sys v0.38.0 // indirect
golang.org/x/text v0.31.0 // indirect golang.org/x/text v0.31.0 // indirect
golang.org/x/time v0.14.0 // indirect golang.org/x/time v0.14.0 // indirect

2
go.sum
View File

@@ -593,6 +593,8 @@ github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D
github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU= github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw= github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw=
github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs= github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
github.com/mattn/go-sqlite3 v1.14.29 h1:1O6nRLJKvsi1H2Sj0Hzdfojwt8GiGKm+LOfLaBFaouQ=
github.com/mattn/go-sqlite3 v1.14.29/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0= github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso= github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
github.com/miekg/dns v1.1.41 h1:WMszZWJG0XmzbK9FEmzH2TVcqYzFesusSIB41b8KHxY= github.com/miekg/dns v1.1.41 h1:WMszZWJG0XmzbK9FEmzH2TVcqYzFesusSIB41b8KHxY=

View File

@@ -361,23 +361,101 @@ func (p *Packer) finalizeCurrentBlob() error {
return nil return nil
} }
blobHash, finalSize, err := p.closeBlobWriter() // Close blobgen writer to flush all data
if err := p.currentBlob.writer.Close(); err != nil {
p.cleanupTempFile()
return fmt.Errorf("closing blobgen writer: %w", err)
}
// Sync file to ensure all data is written
if err := p.currentBlob.tempFile.Sync(); err != nil {
p.cleanupTempFile()
return fmt.Errorf("syncing temp file: %w", err)
}
// Get the final size (encrypted if applicable)
finalSize, err := p.currentBlob.tempFile.Seek(0, io.SeekCurrent)
if err != nil { if err != nil {
return err p.cleanupTempFile()
return fmt.Errorf("getting file size: %w", err)
} }
chunkRefs := p.buildChunkRefs() // Reset to beginning for reading
if _, err := p.currentBlob.tempFile.Seek(0, io.SeekStart); err != nil {
p.cleanupTempFile()
return fmt.Errorf("seeking to start: %w", err)
}
// Get hash from blobgen writer (of final encrypted data)
finalHash := p.currentBlob.writer.Sum256()
blobHash := hex.EncodeToString(finalHash)
// Create chunk references with offsets
chunkRefs := make([]*BlobChunkRef, 0, len(p.currentBlob.chunks))
for _, chunk := range p.currentBlob.chunks {
chunkRefs = append(chunkRefs, &BlobChunkRef{
ChunkHash: chunk.Hash,
Offset: chunk.Offset,
Length: chunk.Size,
})
}
// Get pending chunks (will be inserted to DB and reported to handler)
chunksToInsert := p.pendingChunks chunksToInsert := p.pendingChunks
p.pendingChunks = nil p.pendingChunks = nil // Clear pending list
if err := p.commitBlobToDatabase(blobHash, finalSize, chunksToInsert); err != nil { // Insert pending chunks, blob_chunks, and update blob in a single transaction
return err if p.repos != nil {
blobIDTyped, parseErr := types.ParseBlobID(p.currentBlob.id)
if parseErr != nil {
p.cleanupTempFile()
return fmt.Errorf("parsing blob ID: %w", parseErr)
}
err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
// First insert all pending chunks (required for blob_chunks FK)
for _, chunk := range chunksToInsert {
dbChunk := &database.Chunk{
ChunkHash: types.ChunkHash(chunk.Hash),
Size: chunk.Size,
}
if err := p.repos.Chunks.Create(ctx, tx, dbChunk); err != nil {
return fmt.Errorf("creating chunk: %w", err)
}
} }
// Insert all blob_chunk records in batch
for _, chunk := range p.currentBlob.chunks {
blobChunk := &database.BlobChunk{
BlobID: blobIDTyped,
ChunkHash: types.ChunkHash(chunk.Hash),
Offset: chunk.Offset,
Length: chunk.Size,
}
if err := p.repos.BlobChunks.Create(ctx, tx, blobChunk); err != nil {
return fmt.Errorf("creating blob_chunk: %w", err)
}
}
// Update blob record with final hash and sizes
return p.repos.Blobs.UpdateFinished(ctx, tx, p.currentBlob.id, blobHash,
p.currentBlob.size, finalSize)
})
if err != nil {
p.cleanupTempFile()
return fmt.Errorf("finalizing blob transaction: %w", err)
}
log.Debug("Committed blob transaction",
"chunks_inserted", len(chunksToInsert),
"blob_chunks_inserted", len(p.currentBlob.chunks))
}
// Create finished blob
finished := &FinishedBlob{ finished := &FinishedBlob{
ID: p.currentBlob.id, ID: p.currentBlob.id,
Hash: blobHash, Hash: blobHash,
Data: nil, // We don't load data into memory anymore
Chunks: chunkRefs, Chunks: chunkRefs,
CreatedTS: p.currentBlob.startTime, CreatedTS: p.currentBlob.startTime,
Uncompressed: p.currentBlob.size, Uncompressed: p.currentBlob.size,
@@ -386,105 +464,28 @@ func (p *Packer) finalizeCurrentBlob() error {
compressionRatio := float64(finished.Compressed) / float64(finished.Uncompressed) compressionRatio := float64(finished.Compressed) / float64(finished.Uncompressed)
log.Info("Finalized blob (compressed and encrypted)", log.Info("Finalized blob (compressed and encrypted)",
"hash", blobHash, "chunks", len(chunkRefs), "hash", blobHash,
"uncompressed", finished.Uncompressed, "compressed", finished.Compressed, "chunks", len(chunkRefs),
"uncompressed", finished.Uncompressed,
"compressed", finished.Compressed,
"ratio", fmt.Sprintf("%.2f", compressionRatio), "ratio", fmt.Sprintf("%.2f", compressionRatio),
"duration", time.Since(p.currentBlob.startTime)) "duration", time.Since(p.currentBlob.startTime))
// Collect inserted chunk hashes for the scanner to track
var insertedChunkHashes []string var insertedChunkHashes []string
for _, chunk := range chunksToInsert { for _, chunk := range chunksToInsert {
insertedChunkHashes = append(insertedChunkHashes, chunk.Hash) insertedChunkHashes = append(insertedChunkHashes, chunk.Hash)
} }
return p.deliverFinishedBlob(finished, insertedChunkHashes) // Call blob handler if set
}
// closeBlobWriter closes the writer, syncs to disk, and returns the blob hash and final size
func (p *Packer) closeBlobWriter() (string, int64, error) {
if err := p.currentBlob.writer.Close(); err != nil {
p.cleanupTempFile()
return "", 0, fmt.Errorf("closing blobgen writer: %w", err)
}
if err := p.currentBlob.tempFile.Sync(); err != nil {
p.cleanupTempFile()
return "", 0, fmt.Errorf("syncing temp file: %w", err)
}
finalSize, err := p.currentBlob.tempFile.Seek(0, io.SeekCurrent)
if err != nil {
p.cleanupTempFile()
return "", 0, fmt.Errorf("getting file size: %w", err)
}
if _, err := p.currentBlob.tempFile.Seek(0, io.SeekStart); err != nil {
p.cleanupTempFile()
return "", 0, fmt.Errorf("seeking to start: %w", err)
}
finalHash := p.currentBlob.writer.Sum256()
return hex.EncodeToString(finalHash), finalSize, nil
}
// buildChunkRefs creates BlobChunkRef entries from the current blob's chunks
func (p *Packer) buildChunkRefs() []*BlobChunkRef {
refs := make([]*BlobChunkRef, 0, len(p.currentBlob.chunks))
for _, chunk := range p.currentBlob.chunks {
refs = append(refs, &BlobChunkRef{
ChunkHash: chunk.Hash, Offset: chunk.Offset, Length: chunk.Size,
})
}
return refs
}
// commitBlobToDatabase inserts pending chunks, blob_chunks, and updates the blob record
func (p *Packer) commitBlobToDatabase(blobHash string, finalSize int64, chunksToInsert []PendingChunk) error {
if p.repos == nil {
return nil
}
blobIDTyped, parseErr := types.ParseBlobID(p.currentBlob.id)
if parseErr != nil {
p.cleanupTempFile()
return fmt.Errorf("parsing blob ID: %w", parseErr)
}
err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
for _, chunk := range chunksToInsert {
dbChunk := &database.Chunk{ChunkHash: types.ChunkHash(chunk.Hash), Size: chunk.Size}
if err := p.repos.Chunks.Create(ctx, tx, dbChunk); err != nil {
return fmt.Errorf("creating chunk: %w", err)
}
}
for _, chunk := range p.currentBlob.chunks {
blobChunk := &database.BlobChunk{
BlobID: blobIDTyped, ChunkHash: types.ChunkHash(chunk.Hash),
Offset: chunk.Offset, Length: chunk.Size,
}
if err := p.repos.BlobChunks.Create(ctx, tx, blobChunk); err != nil {
return fmt.Errorf("creating blob_chunk: %w", err)
}
}
return p.repos.Blobs.UpdateFinished(ctx, tx, p.currentBlob.id, blobHash, p.currentBlob.size, finalSize)
})
if err != nil {
p.cleanupTempFile()
return fmt.Errorf("finalizing blob transaction: %w", err)
}
log.Debug("Committed blob transaction",
"chunks_inserted", len(chunksToInsert), "blob_chunks_inserted", len(p.currentBlob.chunks))
return nil
}
// deliverFinishedBlob passes the blob to the handler or stores it internally
func (p *Packer) deliverFinishedBlob(finished *FinishedBlob, insertedChunkHashes []string) error {
if p.blobHandler != nil { if p.blobHandler != nil {
// Reset file position for handler
if _, err := p.currentBlob.tempFile.Seek(0, io.SeekStart); err != nil { if _, err := p.currentBlob.tempFile.Seek(0, io.SeekStart); err != nil {
p.cleanupTempFile() p.cleanupTempFile()
return fmt.Errorf("seeking for handler: %w", err) return fmt.Errorf("seeking for handler: %w", err)
} }
// Create a blob reader that includes the data stream
blobWithReader := &BlobWithReader{ blobWithReader := &BlobWithReader{
FinishedBlob: finished, FinishedBlob: finished,
Reader: p.currentBlob.tempFile, Reader: p.currentBlob.tempFile,
@@ -496,12 +497,11 @@ func (p *Packer) deliverFinishedBlob(finished *FinishedBlob, insertedChunkHashes
p.cleanupTempFile() p.cleanupTempFile()
return fmt.Errorf("blob handler failed: %w", err) return fmt.Errorf("blob handler failed: %w", err)
} }
// Note: blob handler is responsible for closing/cleaning up temp file
p.currentBlob = nil p.currentBlob = nil
return nil } else {
} log.Debug("No blob handler callback configured", "blob_hash", blobHash[:8]+"...")
// No handler, need to read data for legacy behavior
// No handler - read data for legacy behavior
log.Debug("No blob handler callback configured", "blob_hash", finished.Hash[:8]+"...")
if _, err := p.currentBlob.tempFile.Seek(0, io.SeekStart); err != nil { if _, err := p.currentBlob.tempFile.Seek(0, io.SeekStart); err != nil {
p.cleanupTempFile() p.cleanupTempFile()
return fmt.Errorf("seeking to read data: %w", err) return fmt.Errorf("seeking to read data: %w", err)
@@ -513,9 +513,14 @@ func (p *Packer) deliverFinishedBlob(finished *FinishedBlob, insertedChunkHashes
return fmt.Errorf("reading blob data: %w", err) return fmt.Errorf("reading blob data: %w", err)
} }
finished.Data = data finished.Data = data
p.finishedBlobs = append(p.finishedBlobs, finished) p.finishedBlobs = append(p.finishedBlobs, finished)
// Cleanup
p.cleanupTempFile() p.cleanupTempFile()
p.currentBlob = nil p.currentBlob = nil
}
return nil return nil
} }

View File

@@ -18,7 +18,7 @@ func TestCLIEntry(t *testing.T) {
} }
// Verify all subcommands are registered // Verify all subcommands are registered
expectedCommands := []string{"init", "snapshot", "store", "restore", "prune", "info", "version", "remote", "database"} expectedCommands := []string{"snapshot", "store", "restore", "prune", "verify", "info", "version"}
for _, expected := range expectedCommands { for _, expected := range expectedCommands {
found := false found := false
for _, cmd := range cmd.Commands() { for _, cmd := range cmd.Commands() {
@@ -38,7 +38,7 @@ func TestCLIEntry(t *testing.T) {
t.Errorf("Failed to find snapshot command: %v", err) t.Errorf("Failed to find snapshot command: %v", err)
} else { } else {
// Check snapshot subcommands // Check snapshot subcommands
expectedSubCommands := []string{"create", "list", "purge", "verify", "cleanup"} expectedSubCommands := []string{"create", "list", "purge", "verify"}
for _, expected := range expectedSubCommands { for _, expected := range expectedSubCommands {
found := false found := false
for _, subcmd := range snapshotCmd.Commands() { for _, subcmd := range snapshotCmd.Commands() {

View File

@@ -1,247 +0,0 @@
package cli
import (
"fmt"
"os"
"path/filepath"
"github.com/spf13/cobra"
)
const defaultConfigTemplate = `# vaultik configuration
# Documentation: https://git.eeqj.de/sneak/vaultik
# ─── REQUIRED ────────────────────────────────────────────────────────────────
# Age recipient public keys for encryption.
# Backups are encrypted to ALL listed recipients. Any one of the corresponding
# private keys can decrypt. Generate a keypair with:
# age-keygen -o key.txt && grep 'public key' key.txt
age_recipients:
- age1REPLACE_WITH_YOUR_PUBLIC_KEY
# Named snapshots. Each snapshot backs up one or more paths and can have its
# own exclude patterns in addition to the global excludes below.
#
# Exclude pattern semantics:
# - Patterns starting with / are anchored to the snapshot path root
# (e.g. "/Library/Caches" matches only ~/Library/Caches in a ~ snapshot)
# - Patterns without a leading / match anywhere in the tree
# (e.g. ".cache" matches any directory named .cache at any depth)
# - Globs are supported: *, **, ?
snapshots:
home:
paths:
- "~"
exclude:
# Trash, temp, and filesystem metadata
- "/.Trash"
- "/.Trashes"
- "/.fseventsd"
- "/.Spotlight-V100"
- "/.TemporaryItems"
- "/tmp"
- "/.rnd"
- ".DS_Store"
# Caches and package manager state (rebuildable)
- ".cache"
- ".bundle"
- "/.cpan/build"
- "/.cpan/sources"
- "/.gradle/caches"
- "/.dropbox"
- "/.minikube/cache"
- "/.local/share/containers/podman/machine"
- "/.persepolis"
- "/Library/Caches"
- "/Library/Logs"
- "/Library/Cookies"
- "/Library/Metadata"
- "/Library/Suggestions"
- "/Library/PubSub"
- "/Library/Homebrew"
- "/Library/Developer"
- "/Library/Google/GoogleSoftwareUpdate"
- "/Library/Preferences/Macromedia/Flash Player"
- "/Library/Preferences/SDMHelpData"
- "/Library/VoiceTrigger/SAT"
# Language/toolchain package caches (rebuildable from registries)
- "/.npm"
- "/.cargo/registry"
- "/.cargo/git"
- "/.rustup/toolchains"
- "/go/pkg/mod"
- "/.m2/repository"
- "/.vagrant.d/boxes"
- "node_modules"
- "__pycache__"
- ".venv"
# Virtual machine disk images (huge; remove these lines to back them up)
- "/Parallels"
- "/Virtual Machines.localized"
- "/VirtualBox VMs"
- "/.orbstack"
- "/Library/Containers/com.utmapp.UTM"
# Downloaded LLM models (huge, re-downloadable)
- "/.ollama/models"
- "/.lmstudio/models"
# Cloud-synced storage. These are synced to a provider already, and on
# modern macOS may contain dataless placeholder files that the backup
# would force-download in full.
- "/Library/CloudStorage"
- "/Library/Mobile Documents"
# Android SDK and emulator images (re-downloadable)
- "/Library/Android/sdk"
- "/.android/avd"
# Cloud-synced or restorable-from-server data
- "/Library/Mail"
- "/Library/Mail Downloads"
- "/Library/Safari"
- "/Library/Application Support/Evernote"
- "/Library/Application Support/MobileSync"
- "/Library/Application Support/SyncServices"
- "/Library/Application Support/protonmail/bridge/cache"
- "/Library/Application Support/Syncthing/index-*"
- "/Library/Syncthing/folders"
- "/Documents/Dropbox/.dropbox.cache"
# Large rebuildable app data (games, media caches, device backups)
- "/Applications/Fortnite"
- "/Documents/Steam Content"
- "/Library/Application Support/Ableton"
- "/Library/Application Support/CrossOver Games"
- "/Library/Application Support/SecondLife/cache"
- "/Library/Application Support/Steam/SteamApps"
- "/Library/Containers/com.docker.docker"
- "/Library/Group Containers/group.com.apple.secure-control-center-preferences"
- "/Library/iTunes/iPad Software Updates"
- "/Library/iTunes/iPhone Software Updates"
- "/Movies/CacheClip"
- "/Movies/ProxyMedia"
- "/Music/iTunes/Album Artwork"
- "/Pictures/iPod Photo Cache"
# Third-party applications. OS-provided apps live in /System/Applications
# on modern macOS and are never in /Applications, but Apple-installed
# App Store apps (Safari, GarageBand, iWork, iMovie) are excluded since
# they are re-downloadable.
apps:
paths:
- /Applications
exclude:
- ".DS_Store"
- "/Safari.app"
- "/GarageBand.app"
- "/iMovie.app"
- "/Keynote.app"
- "/Numbers.app"
- "/Pages.app"
- "/Xcode.app"
- "/Spotify.app"
- "/Steam.app"
- "/VirtualBox.app"
- "/Utilities/Adobe Installers"
# Storage backend (pick ONE of the three forms below).
#
# S3-compatible:
# storage_url: "s3://mybucket/backups?endpoint=s3.example.com&region=us-east-1"
# (also set s3.access_key_id and s3.secret_access_key below)
#
# Local filesystem:
# storage_url: "file:///mnt/backups/vaultik"
#
# Rclone (requires rclone configured separately):
# storage_url: "rclone://myremote/path/to/backups"
storage_url: ""
# ─── S3 CREDENTIALS (required for s3:// storage_url) ────────────────────────
# s3:
# access_key_id: YOUR_ACCESS_KEY
# secret_access_key: YOUR_SECRET_KEY
# # region: us-east-1 # Default: us-east-1
# # use_ssl: true # Default: true
# # part_size: 5MB # Multipart upload part size. Default: 5MB
# ─── OPTIONAL ────────────────────────────────────────────────────────────────
# Global exclude patterns applied to ALL snapshots.
# Snapshot-specific excludes are additive.
# exclude:
# - "*.log"
# - "*.tmp"
# - ".git"
# - "node_modules"
# Average chunk size for content-defined chunking (FastCDC).
# Smaller = better deduplication but more metadata overhead.
# Accepts: 1MB, 10M, 64KB, etc.
# Default: 10MB
# chunk_size: 10MB
# Maximum blob size before splitting into a new blob.
# Accepts: 1GB, 10G, 500MB, etc.
# Default: 10GB
# blob_size_limit: 10GB
# Zstd compression level (1-19). Higher = better ratio but slower.
# Default: 3
# compression_level: 3
# Hostname used in snapshot IDs. Default: system hostname.
# hostname: myserver
# Path to the local SQLite index database.
# Default: ~/.local/share/berlin.sneak.app.vaultik/index.sqlite
# index_path: /path/to/index.sqlite
`
// NewInitCommand creates the init command that writes a default config file.
func NewInitCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "init",
Short: "Write a default config file",
Long: `Creates a default configuration file with commented explanations
for every setting. If a config file already exists at the target path,
the command refuses to overwrite it.
The config is written to the path from --config, $VAULTIK_CONFIG, or
the platform default config directory (e.g. ~/Library/Application Support/
on macOS, ~/.config/ on Linux, /etc/vaultik/ as root).`,
Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
path := configPathForInit()
if _, err := os.Stat(path); err == nil {
return fmt.Errorf("config file already exists: %s", path)
}
dir := filepath.Dir(path)
if err := os.MkdirAll(dir, 0o755); err != nil {
return fmt.Errorf("creating config directory %s: %w", dir, err)
}
if err := os.WriteFile(path, []byte(defaultConfigTemplate), 0o600); err != nil {
return fmt.Errorf("writing config file: %w", err)
}
fmt.Printf("Config written to %s\n", path)
fmt.Println("Edit it to set your age_recipients, snapshots, and storage_url.")
return nil
},
}
return cmd
}
// configPathForInit returns the config path to write, checking --config flag,
// VAULTIK_CONFIG env, and the platform default.
func configPathForInit() string {
if rootFlags.ConfigPath != "" {
return rootFlags.ConfigPath
}
if envPath := os.Getenv("VAULTIK_CONFIG"); envPath != "" {
return envPath
}
return DefaultConfigPath()
}

View File

@@ -1,43 +0,0 @@
package cli
import (
"testing"
"git.eeqj.de/sneak/vaultik/internal/config"
"gopkg.in/yaml.v3"
)
// TestDefaultConfigTemplateParses ensures the init template is valid YAML
// that unmarshals into the Config struct with the expected snapshots.
func TestDefaultConfigTemplateParses(t *testing.T) {
var cfg config.Config
if err := yaml.Unmarshal([]byte(defaultConfigTemplate), &cfg); err != nil {
t.Fatalf("default config template is not valid YAML: %v", err)
}
if len(cfg.AgeRecipients) != 1 {
t.Errorf("expected 1 placeholder age recipient, got %d", len(cfg.AgeRecipients))
}
home, ok := cfg.Snapshots["home"]
if !ok {
t.Fatal("expected 'home' snapshot in default config")
}
if len(home.Paths) == 0 {
t.Error("home snapshot should have at least one path")
}
if len(home.Exclude) == 0 {
t.Error("home snapshot should have exclude patterns")
}
apps, ok := cfg.Snapshots["apps"]
if !ok {
t.Fatal("expected 'apps' snapshot in default config")
}
if len(apps.Paths) != 1 || apps.Paths[0] != "/Applications" {
t.Errorf("apps snapshot should back up /Applications, got %v", apps.Paths)
}
if len(apps.Exclude) == 0 {
t.Error("apps snapshot should have exclude patterns")
}
}

100
internal/cli/purge.go Normal file
View File

@@ -0,0 +1,100 @@
package cli
import (
"context"
"fmt"
"os"
"git.eeqj.de/sneak/vaultik/internal/log"
"git.eeqj.de/sneak/vaultik/internal/vaultik"
"github.com/spf13/cobra"
"go.uber.org/fx"
)
// PurgeOptions contains options for the purge command
type PurgeOptions struct {
KeepLatest bool
OlderThan string
Force bool
}
// NewPurgeCommand creates the purge command
func NewPurgeCommand() *cobra.Command {
opts := &PurgeOptions{}
cmd := &cobra.Command{
Use: "purge",
Short: "Purge old snapshots",
Long: `Removes snapshots based on age or count criteria.
This command allows you to:
- Keep only the latest snapshot (--keep-latest)
- Remove snapshots older than a specific duration (--older-than)
Config is located at /etc/vaultik/config.yml by default, but can be overridden by
specifying a path using --config or by setting VAULTIK_CONFIG to a path.`,
Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
// Validate flags
if !opts.KeepLatest && opts.OlderThan == "" {
return fmt.Errorf("must specify either --keep-latest or --older-than")
}
if opts.KeepLatest && opts.OlderThan != "" {
return fmt.Errorf("cannot specify both --keep-latest and --older-than")
}
// Use unified config resolution
configPath, err := ResolveConfigPath()
if err != nil {
return err
}
// Use the app framework like other commands
rootFlags := GetRootFlags()
return RunWithApp(cmd.Context(), AppOptions{
ConfigPath: configPath,
LogOptions: log.LogOptions{
Verbose: rootFlags.Verbose,
Debug: rootFlags.Debug,
Quiet: rootFlags.Quiet,
},
Modules: []fx.Option{},
Invokes: []fx.Option{
fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) {
lc.Append(fx.Hook{
OnStart: func(ctx context.Context) error {
// Start the purge operation in a goroutine
go func() {
// Run the purge operation
if err := v.PurgeSnapshots(opts.KeepLatest, opts.OlderThan, opts.Force); err != nil {
if err != context.Canceled {
log.Error("Purge operation failed", "error", err)
os.Exit(1)
}
}
// Shutdown the app when purge completes
if err := v.Shutdowner.Shutdown(); err != nil {
log.Error("Failed to shutdown", "error", err)
}
}()
return nil
},
OnStop: func(ctx context.Context) error {
log.Debug("Stopping purge operation")
v.Cancel()
return nil
},
})
}),
},
})
},
}
cmd.Flags().BoolVar(&opts.KeepLatest, "keep-latest", false, "Keep only the latest snapshot")
cmd.Flags().StringVar(&opts.OlderThan, "older-than", "", "Remove snapshots older than duration (e.g. 30d, 6m, 1y)")
cmd.Flags().BoolVar(&opts.Force, "force", false, "Skip confirmation prompts")
return cmd
}

View File

@@ -2,7 +2,6 @@ package cli
import ( import (
"context" "context"
"os"
"git.eeqj.de/sneak/vaultik/internal/config" "git.eeqj.de/sneak/vaultik/internal/config"
"git.eeqj.de/sneak/vaultik/internal/globals" "git.eeqj.de/sneak/vaultik/internal/globals"
@@ -58,17 +57,6 @@ Examples:
vaultik restore --verify myhost_docs_2025-01-01T12:00:00Z /restore`, vaultik restore --verify myhost_docs_2025-01-01T12:00:00Z /restore`,
Args: cobra.MinimumNArgs(2), Args: cobra.MinimumNArgs(2),
RunE: func(cmd *cobra.Command, args []string) error { RunE: func(cmd *cobra.Command, args []string) error {
return runRestore(cmd, args, opts)
},
}
cmd.Flags().BoolVar(&opts.Verify, "verify", false, "Verify restored files by checking chunk hashes")
return cmd
}
// runRestore parses arguments and runs the restore operation through the app framework
func runRestore(cmd *cobra.Command, args []string, opts *RestoreOptions) error {
snapshotID := args[0] snapshotID := args[0]
opts.TargetDir = args[1] opts.TargetDir = args[1]
if len(args) > 2 { if len(args) > 2 {
@@ -90,14 +78,7 @@ func runRestore(cmd *cobra.Command, args []string, opts *RestoreOptions) error {
Debug: rootFlags.Debug, Debug: rootFlags.Debug,
Quiet: rootFlags.Quiet, Quiet: rootFlags.Quiet,
}, },
Modules: buildRestoreModules(), Modules: []fx.Option{
Invokes: buildRestoreInvokes(snapshotID, opts),
})
}
// buildRestoreModules returns the fx.Options for dependency injection in restore
func buildRestoreModules() []fx.Option {
return []fx.Option{
fx.Provide(fx.Annotate( fx.Provide(fx.Annotate(
func(g *globals.Globals, cfg *config.Config, func(g *globals.Globals, cfg *config.Config,
storer storage.Storer, v *vaultik.Vaultik, shutdowner fx.Shutdowner) *RestoreApp { storer storage.Storer, v *vaultik.Vaultik, shutdowner fx.Shutdowner) *RestoreApp {
@@ -110,12 +91,8 @@ func buildRestoreModules() []fx.Option {
} }
}, },
)), )),
} },
} Invokes: []fx.Option{
// buildRestoreInvokes returns the fx.Options that wire up the restore lifecycle
func buildRestoreInvokes(snapshotID string, opts *RestoreOptions) []fx.Option {
return []fx.Option{
fx.Invoke(func(app *RestoreApp, lc fx.Lifecycle) { fx.Invoke(func(app *RestoreApp, lc fx.Lifecycle) {
lc.Append(fx.Hook{ lc.Append(fx.Hook{
OnStart: func(ctx context.Context) error { OnStart: func(ctx context.Context) error {
@@ -131,7 +108,6 @@ func buildRestoreInvokes(snapshotID string, opts *RestoreOptions) []fx.Option {
if err := app.Vaultik.Restore(restoreOpts); err != nil { if err := app.Vaultik.Restore(restoreOpts); err != nil {
if err != context.Canceled { if err != context.Canceled {
log.Error("Restore operation failed", "error", err) log.Error("Restore operation failed", "error", err)
os.Exit(1)
} }
} }
@@ -149,5 +125,12 @@ func buildRestoreInvokes(snapshotID string, opts *RestoreOptions) []fx.Option {
}, },
}) })
}), }),
},
})
},
} }
cmd.Flags().BoolVar(&opts.Verify, "verify", false, "Verify restored files by checking chunk hashes")
return cmd
} }

View File

@@ -3,9 +3,7 @@ package cli
import ( import (
"fmt" "fmt"
"os" "os"
"path/filepath"
"github.com/adrg/xdg"
"github.com/spf13/cobra" "github.com/spf13/cobra"
) )
@@ -27,23 +25,23 @@ func NewRootCommand() *cobra.Command {
cmd := &cobra.Command{ cmd := &cobra.Command{
Use: "vaultik", Use: "vaultik",
Short: "Secure incremental backup tool with asymmetric encryption", Short: "Secure incremental backup tool with asymmetric encryption",
Long: `vaultik is a secure incremental backup tool that encrypts data using age Long: `vaultik is a secure incremental backup daemon that encrypts data using age
public keys and uploads to S3-compatible storage. No private keys are needed public keys and uploads to S3-compatible storage. No private keys are needed
on the source system.`, on the source system.`,
SilenceUsage: true, SilenceUsage: true,
} }
// Add global flags // Add global flags
cmd.PersistentFlags().StringVar(&rootFlags.ConfigPath, "config", "", "Path to config file (default: $VAULTIK_CONFIG or platform config dir)") cmd.PersistentFlags().StringVar(&rootFlags.ConfigPath, "config", "", "Path to config file (default: $VAULTIK_CONFIG or /etc/vaultik/config.yml)")
cmd.PersistentFlags().BoolVarP(&rootFlags.Verbose, "verbose", "v", false, "Enable verbose output") cmd.PersistentFlags().BoolVarP(&rootFlags.Verbose, "verbose", "v", false, "Enable verbose output")
cmd.PersistentFlags().BoolVar(&rootFlags.Debug, "debug", false, "Enable debug output") cmd.PersistentFlags().BoolVar(&rootFlags.Debug, "debug", false, "Enable debug output")
cmd.PersistentFlags().BoolVarP(&rootFlags.Quiet, "quiet", "q", false, "Suppress non-error output") cmd.PersistentFlags().BoolVarP(&rootFlags.Quiet, "quiet", "q", false, "Suppress non-error output")
// Add subcommands // Add subcommands
cmd.AddCommand( cmd.AddCommand(
NewInitCommand(),
NewRestoreCommand(), NewRestoreCommand(),
NewPruneCommand(), NewPruneCommand(),
NewVerifyCommand(),
NewStoreCommand(), NewStoreCommand(),
NewSnapshotCommand(), NewSnapshotCommand(),
NewInfoCommand(), NewInfoCommand(),
@@ -62,41 +60,25 @@ func GetRootFlags() RootFlags {
} }
// ResolveConfigPath resolves the config file path from flags, environment, or default. // ResolveConfigPath resolves the config file path from flags, environment, or default.
// Search order: --config flag, VAULTIK_CONFIG env, XDG config dir, /etc/vaultik/config.yml. // It checks in order: 1) --config flag, 2) VAULTIK_CONFIG environment variable,
// 3) default location /etc/vaultik/config.yml. Returns an error if no valid
// config file can be found through any of these methods.
func ResolveConfigPath() (string, error) { func ResolveConfigPath() (string, error) {
// First check global flag
if rootFlags.ConfigPath != "" { if rootFlags.ConfigPath != "" {
return rootFlags.ConfigPath, nil return rootFlags.ConfigPath, nil
} }
// Then check environment variable
if envPath := os.Getenv("VAULTIK_CONFIG"); envPath != "" { if envPath := os.Getenv("VAULTIK_CONFIG"); envPath != "" {
return envPath, nil return envPath, nil
} }
for _, path := range defaultConfigPaths() { // Finally check default location
if _, err := os.Stat(path); err == nil { defaultPath := "/etc/vaultik/config.yml"
return path, nil if _, err := os.Stat(defaultPath); err == nil {
} return defaultPath, nil
} }
return "", fmt.Errorf("no config file found; run 'vaultik init' to create one, or specify with --config") return "", fmt.Errorf("no config file specified, VAULTIK_CONFIG not set, and %s not found", defaultPath)
}
// defaultConfigPaths returns the ordered list of config paths to search.
// On macOS: ~/Library/Application Support/vaultik/config.yml
// On Linux: ~/.config/vaultik/config.yml
// Fallback: /etc/vaultik/config.yml
func defaultConfigPaths() []string {
return []string{
filepath.Join(xdg.ConfigHome, "vaultik", "config.yml"),
"/etc/vaultik/config.yml",
}
}
// DefaultConfigPath returns the platform-appropriate default config path.
// Used by the init command and in help text.
func DefaultConfigPath() string {
if os.Getuid() == 0 {
return "/etc/vaultik/config.yml"
}
return filepath.Join(xdg.ConfigHome, "vaultik", "config.yml")
} }

View File

@@ -3,7 +3,6 @@ package cli
import ( import (
"context" "context"
"fmt" "fmt"
"io"
"os" "os"
"git.eeqj.de/sneak/vaultik/internal/log" "git.eeqj.de/sneak/vaultik/internal/log"
@@ -27,7 +26,6 @@ func NewSnapshotCommand() *cobra.Command {
cmd.AddCommand(newSnapshotVerifyCommand()) cmd.AddCommand(newSnapshotVerifyCommand())
cmd.AddCommand(newSnapshotRemoveCommand()) cmd.AddCommand(newSnapshotRemoveCommand())
cmd.AddCommand(newSnapshotPruneCommand()) cmd.AddCommand(newSnapshotPruneCommand())
cmd.AddCommand(newSnapshotCleanupCommand())
return cmd return cmd
} }
@@ -73,13 +71,10 @@ specifying a path using --config or by setting VAULTIK_CONFIG to a path.`,
OnStart: func(ctx context.Context) error { OnStart: func(ctx context.Context) error {
// Start the snapshot creation in a goroutine // Start the snapshot creation in a goroutine
go func() { go func() {
if opts.Cron { // Run the snapshot creation
v.Stdout = io.Discard
}
if err := v.CreateSnapshot(opts); err != nil { if err := v.CreateSnapshot(opts); err != nil {
if err != context.Canceled { if err != context.Canceled {
log.Error("Snapshot creation failed", "error", err) log.Error("Snapshot creation failed", "error", err)
os.Exit(1)
} }
} }
@@ -103,9 +98,9 @@ specifying a path using --config or by setting VAULTIK_CONFIG to a path.`,
}, },
} }
cmd.Flags().BoolVar(&opts.Daemon, "daemon", false, "Run in daemon mode with inotify monitoring")
cmd.Flags().BoolVar(&opts.Cron, "cron", false, "Run in cron mode (silent unless error)") cmd.Flags().BoolVar(&opts.Cron, "cron", false, "Run in cron mode (silent unless error)")
cmd.Flags().BoolVar(&opts.Prune, "prune", false, "After backup, drop older snapshots of the same name and remove orphaned blobs") cmd.Flags().BoolVar(&opts.Prune, "prune", false, "Delete all previous snapshots and unreferenced blobs after backup")
cmd.Flags().StringVar(&opts.KeepNewerThan, "keep-newer-than", "", "With --prune: keep snapshots newer than this duration (e.g. 4w, 30d, 6mo) instead of only the latest")
cmd.Flags().BoolVar(&opts.SkipErrors, "skip-errors", false, "Skip file read errors (log them loudly but continue)") cmd.Flags().BoolVar(&opts.SkipErrors, "skip-errors", false, "Skip file read errors (log them loudly but continue)")
return cmd return cmd
@@ -172,23 +167,21 @@ func newSnapshotListCommand() *cobra.Command {
// newSnapshotPurgeCommand creates the 'snapshot purge' subcommand // newSnapshotPurgeCommand creates the 'snapshot purge' subcommand
func newSnapshotPurgeCommand() *cobra.Command { func newSnapshotPurgeCommand() *cobra.Command {
opts := &vaultik.SnapshotPurgeOptions{} var keepLatest bool
var olderThan string
var force bool
cmd := &cobra.Command{ cmd := &cobra.Command{
Use: "purge", Use: "purge",
Short: "Purge old snapshots", Short: "Purge old snapshots",
Long: `Removes snapshots based on age or count criteria. Long: "Removes snapshots based on age or count criteria",
Retention is per-snapshot-name: --keep-latest keeps the latest of each
configured snapshot name, not the latest globally. Use --snapshot to
restrict the operation to specific snapshot names.`,
Args: cobra.NoArgs, Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error { RunE: func(cmd *cobra.Command, args []string) error {
// Validate flags // Validate flags
if !opts.KeepLatest && opts.OlderThan == "" { if !keepLatest && olderThan == "" {
return fmt.Errorf("must specify either --keep-latest or --older-than") return fmt.Errorf("must specify either --keep-latest or --older-than")
} }
if opts.KeepLatest && opts.OlderThan != "" { if keepLatest && olderThan != "" {
return fmt.Errorf("cannot specify both --keep-latest and --older-than") return fmt.Errorf("cannot specify both --keep-latest and --older-than")
} }
@@ -212,7 +205,7 @@ restrict the operation to specific snapshot names.`,
lc.Append(fx.Hook{ lc.Append(fx.Hook{
OnStart: func(ctx context.Context) error { OnStart: func(ctx context.Context) error {
go func() { go func() {
if err := v.PurgeSnapshotsWithOptions(opts); err != nil { if err := v.PurgeSnapshots(keepLatest, olderThan, force); err != nil {
if err != context.Canceled { if err != context.Canceled {
log.Error("Failed to purge snapshots", "error", err) log.Error("Failed to purge snapshots", "error", err)
os.Exit(1) os.Exit(1)
@@ -235,10 +228,9 @@ restrict the operation to specific snapshot names.`,
}, },
} }
cmd.Flags().BoolVar(&opts.KeepLatest, "keep-latest", false, "Keep only the latest snapshot of each name") cmd.Flags().BoolVar(&keepLatest, "keep-latest", false, "Keep only the latest snapshot")
cmd.Flags().StringVar(&opts.OlderThan, "older-than", "", "Remove snapshots older than duration (e.g., 30d, 6m, 1y)") cmd.Flags().StringVar(&olderThan, "older-than", "", "Remove snapshots older than duration (e.g., 30d, 6m, 1y)")
cmd.Flags().BoolVar(&opts.Force, "force", false, "Skip confirmation prompt") cmd.Flags().BoolVar(&force, "force", false, "Skip confirmation prompt")
cmd.Flags().StringArrayVar(&opts.Names, "snapshot", nil, "Restrict to snapshots with these names (repeat for multiple)")
return cmd return cmd
} }
@@ -284,7 +276,13 @@ func newSnapshotVerifyCommand() *cobra.Command {
lc.Append(fx.Hook{ lc.Append(fx.Hook{
OnStart: func(ctx context.Context) error { OnStart: func(ctx context.Context) error {
go func() { go func() {
if err := v.VerifySnapshotWithOptions(snapshotID, opts); err != nil { var err error
if opts.Deep {
err = v.RunDeepVerify(snapshotID, opts)
} else {
err = v.VerifySnapshotWithOptions(snapshotID, opts)
}
if err != nil {
if err != context.Canceled { if err != context.Canceled {
if !opts.JSON { if !opts.JSON {
log.Error("Verification failed", "error", err) log.Error("Verification failed", "error", err)
@@ -467,60 +465,3 @@ accumulate from incomplete backups or deleted snapshots.`,
return cmd return cmd
} }
// newSnapshotCleanupCommand creates the 'snapshot cleanup' subcommand
func newSnapshotCleanupCommand() *cobra.Command {
cmd := &cobra.Command{
Use: "cleanup",
Short: "Remove stale local snapshot records not found in remote storage",
Long: `Removes local database records for snapshots whose metadata no longer
exists in remote storage. These are typically left behind by incomplete
or interrupted backups.
This command does not delete anything from remote storage.`,
Args: cobra.NoArgs,
RunE: func(cmd *cobra.Command, args []string) error {
configPath, err := ResolveConfigPath()
if err != nil {
return err
}
rootFlags := GetRootFlags()
return RunWithApp(cmd.Context(), AppOptions{
ConfigPath: configPath,
LogOptions: log.LogOptions{
Verbose: rootFlags.Verbose,
Debug: rootFlags.Debug,
Quiet: rootFlags.Quiet,
},
Modules: []fx.Option{},
Invokes: []fx.Option{
fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) {
lc.Append(fx.Hook{
OnStart: func(ctx context.Context) error {
go func() {
if err := v.CleanupLocalSnapshots(); err != nil {
if err != context.Canceled {
log.Error("Cleanup failed", "error", err)
os.Exit(1)
}
}
if err := v.Shutdowner.Shutdown(); err != nil {
log.Error("Failed to shutdown", "error", err)
}
}()
return nil
},
OnStop: func(ctx context.Context) error {
v.Cancel()
return nil
},
})
}),
},
})
},
}
return cmd
}

98
internal/cli/verify.go Normal file
View File

@@ -0,0 +1,98 @@
package cli
import (
"context"
"os"
"git.eeqj.de/sneak/vaultik/internal/log"
"git.eeqj.de/sneak/vaultik/internal/vaultik"
"github.com/spf13/cobra"
"go.uber.org/fx"
)
// NewVerifyCommand creates the verify command
func NewVerifyCommand() *cobra.Command {
opts := &vaultik.VerifyOptions{}
cmd := &cobra.Command{
Use: "verify <snapshot-id>",
Short: "Verify snapshot integrity",
Long: `Verifies that all blobs referenced in a snapshot exist and optionally verifies their contents.
Shallow verification (default):
- Downloads and decompresses manifest
- Checks existence of all blobs in S3
- Reports missing blobs
Deep verification (--deep):
- Downloads and decrypts database
- Verifies blob lists match between manifest and database
- Downloads, decrypts, and decompresses each blob
- Verifies SHA256 hash of each chunk matches database
- Ensures chunks are ordered correctly
The command will fail immediately on any verification error and exit with non-zero status.`,
Args: cobra.ExactArgs(1),
RunE: func(cmd *cobra.Command, args []string) error {
snapshotID := args[0]
// Use unified config resolution
configPath, err := ResolveConfigPath()
if err != nil {
return err
}
// Use the app framework for all verification
rootFlags := GetRootFlags()
return RunWithApp(cmd.Context(), AppOptions{
ConfigPath: configPath,
LogOptions: log.LogOptions{
Verbose: rootFlags.Verbose,
Debug: rootFlags.Debug,
Quiet: rootFlags.Quiet || opts.JSON, // Suppress log output in JSON mode
},
Modules: []fx.Option{},
Invokes: []fx.Option{
fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) {
lc.Append(fx.Hook{
OnStart: func(ctx context.Context) error {
// Run the verify operation directly
go func() {
var err error
if opts.Deep {
err = v.RunDeepVerify(snapshotID, opts)
} else {
err = v.VerifySnapshotWithOptions(snapshotID, opts)
}
if err != nil {
if err != context.Canceled {
if !opts.JSON {
log.Error("Verification failed", "error", err)
}
os.Exit(1)
}
}
if err := v.Shutdowner.Shutdown(); err != nil {
log.Error("Failed to shutdown", "error", err)
}
}()
return nil
},
OnStop: func(ctx context.Context) error {
log.Debug("Stopping verify operation")
v.Cancel()
return nil
},
})
}),
},
})
},
}
cmd.Flags().BoolVar(&opts.Deep, "deep", false, "Perform deep verification by downloading and verifying all blob contents")
cmd.Flags().BoolVar(&opts.JSON, "json", false, "Output verification results as JSON")
return cmd
}

View File

@@ -6,6 +6,7 @@ import (
"path/filepath" "path/filepath"
"sort" "sort"
"strings" "strings"
"time"
"filippo.io/age" "filippo.io/age"
"git.eeqj.de/sneak/smartconfig" "git.eeqj.de/sneak/smartconfig"
@@ -84,11 +85,14 @@ func (c *Config) SnapshotNames() []string {
type Config struct { type Config struct {
AgeRecipients []string `yaml:"age_recipients"` AgeRecipients []string `yaml:"age_recipients"`
AgeSecretKey string `yaml:"age_secret_key"` AgeSecretKey string `yaml:"age_secret_key"`
BackupInterval time.Duration `yaml:"backup_interval"`
BlobSizeLimit Size `yaml:"blob_size_limit"` BlobSizeLimit Size `yaml:"blob_size_limit"`
ChunkSize Size `yaml:"chunk_size"` ChunkSize Size `yaml:"chunk_size"`
Exclude []string `yaml:"exclude"` // Global excludes applied to all snapshots Exclude []string `yaml:"exclude"` // Global excludes applied to all snapshots
FullScanInterval time.Duration `yaml:"full_scan_interval"`
Hostname string `yaml:"hostname"` Hostname string `yaml:"hostname"`
IndexPath string `yaml:"index_path"` IndexPath string `yaml:"index_path"`
MinTimeBetweenRun time.Duration `yaml:"min_time_between_run"`
S3 S3Config `yaml:"s3"` S3 S3Config `yaml:"s3"`
Snapshots map[string]SnapshotConfig `yaml:"snapshots"` Snapshots map[string]SnapshotConfig `yaml:"snapshots"`
CompressionLevel int `yaml:"compression_level"` CompressionLevel int `yaml:"compression_level"`
@@ -153,6 +157,9 @@ func Load(path string) (*Config, error) {
// Set defaults // Set defaults
BlobSizeLimit: Size(10 * 1024 * 1024 * 1024), // 10GB BlobSizeLimit: Size(10 * 1024 * 1024 * 1024), // 10GB
ChunkSize: Size(10 * 1024 * 1024), // 10MB ChunkSize: Size(10 * 1024 * 1024), // 10MB
BackupInterval: 1 * time.Hour,
FullScanInterval: 24 * time.Hour,
MinTimeBetweenRun: 15 * time.Minute,
IndexPath: filepath.Join(xdg.DataHome, appName, "index.sqlite"), IndexPath: filepath.Join(xdg.DataHome, appName, "index.sqlite"),
CompressionLevel: 3, CompressionLevel: 3,
} }
@@ -236,11 +243,11 @@ func Load(path string) (*Config, error) {
// Returns an error describing the first validation failure encountered. // Returns an error describing the first validation failure encountered.
func (c *Config) Validate() error { func (c *Config) Validate() error {
if len(c.AgeRecipients) == 0 { if len(c.AgeRecipients) == 0 {
return fmt.Errorf("at least one age_recipient is required (generate with: age-keygen)") return fmt.Errorf("at least one age_recipient is required")
} }
if len(c.Snapshots) == 0 { if len(c.Snapshots) == 0 {
return fmt.Errorf("at least one snapshot must be configured (see config.example.yml)") return fmt.Errorf("at least one snapshot must be configured")
} }
for name, snap := range c.Snapshots { for name, snap := range c.Snapshots {
@@ -299,7 +306,7 @@ func (c *Config) validateStorage() error {
// Legacy S3 configuration // Legacy S3 configuration
if c.S3.Endpoint == "" { if c.S3.Endpoint == "" {
return fmt.Errorf("storage not configured; set storage_url or provide s3.endpoint + s3.bucket + credentials") return fmt.Errorf("s3.endpoint is required (or set storage_url)")
} }
if c.S3.Bucket == "" { if c.S3.Bucket == "" {

View File

@@ -29,6 +29,7 @@ func TestCascadeDeleteDebug(t *testing.T) {
file := &File{ file := &File{
Path: "/cascade-test.txt", Path: "/cascade-test.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,

View File

@@ -22,6 +22,7 @@ func TestChunkFileRepository(t *testing.T) {
file1 := &File{ file1 := &File{
Path: "/file1.txt", Path: "/file1.txt",
MTime: testTime, MTime: testTime,
CTime: testTime,
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -36,6 +37,7 @@ func TestChunkFileRepository(t *testing.T) {
file2 := &File{ file2 := &File{
Path: "/file2.txt", Path: "/file2.txt",
MTime: testTime, MTime: testTime,
CTime: testTime,
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -136,9 +138,9 @@ func TestChunkFileRepositoryComplexDeduplication(t *testing.T) {
// Create test files // Create test files
testTime := time.Now().Truncate(time.Second) testTime := time.Now().Truncate(time.Second)
file1 := &File{Path: "/file1.txt", MTime: testTime, Size: 3072, Mode: 0644, UID: 1000, GID: 1000} file1 := &File{Path: "/file1.txt", MTime: testTime, CTime: testTime, Size: 3072, Mode: 0644, UID: 1000, GID: 1000}
file2 := &File{Path: "/file2.txt", MTime: testTime, Size: 3072, Mode: 0644, UID: 1000, GID: 1000} file2 := &File{Path: "/file2.txt", MTime: testTime, CTime: testTime, Size: 3072, Mode: 0644, UID: 1000, GID: 1000}
file3 := &File{Path: "/file3.txt", MTime: testTime, Size: 2048, Mode: 0644, UID: 1000, GID: 1000} file3 := &File{Path: "/file3.txt", MTime: testTime, CTime: testTime, Size: 2048, Mode: 0644, UID: 1000, GID: 1000}
if err := fileRepo.Create(ctx, nil, file1); err != nil { if err := fileRepo.Create(ctx, nil, file1); err != nil {
t.Fatalf("failed to create file1: %v", err) t.Fatalf("failed to create file1: %v", err)

View File

@@ -6,32 +6,24 @@
// multiple source files. Blobs are content-addressed, meaning their filename // multiple source files. Blobs are content-addressed, meaning their filename
// is derived from their SHA256 hash after compression and encryption. // is derived from their SHA256 hash after compression and encryption.
// //
// Schema is managed via numbered SQL migrations embedded in the schema/ // The database does not support migrations. If the schema changes, delete
// directory. Migration 000.sql bootstraps the schema_migrations tracking // the local database and perform a full backup to recreate it.
// table; subsequent migrations (001, 002, …) are applied in order.
package database package database
import ( import (
"context" "context"
"database/sql" "database/sql"
"embed" _ "embed"
"fmt" "fmt"
"os" "os"
"path/filepath"
"sort"
"strconv"
"strings" "strings"
"git.eeqj.de/sneak/vaultik/internal/log" "git.eeqj.de/sneak/vaultik/internal/log"
_ "modernc.org/sqlite" _ "modernc.org/sqlite"
) )
//go:embed schema/*.sql //go:embed schema.sql
var schemaFS embed.FS var schemaSQL string
// bootstrapVersion is the migration that creates the schema_migrations
// table itself. It is applied before the normal migration loop.
const bootstrapVersion = 0
// DB represents the Vaultik local index database connection. // DB represents the Vaultik local index database connection.
// It uses SQLite to track file metadata, content-defined chunks, and blob associations. // It uses SQLite to track file metadata, content-defined chunks, and blob associations.
@@ -43,46 +35,6 @@ type DB struct {
path string path string
} }
// ParseMigrationVersion extracts the numeric version prefix from a migration
// filename. Filenames must follow the pattern "<version>.sql" or
// "<version>_<description>.sql", where version is a zero-padded numeric
// string (e.g. "001", "002"). Returns the version as an integer and an
// error if the filename does not match the expected pattern.
func ParseMigrationVersion(filename string) (int, error) {
name := strings.TrimSuffix(filename, filepath.Ext(filename))
if name == "" {
return 0, fmt.Errorf("invalid migration filename %q: empty name", filename)
}
// Split on underscore to separate version from description.
// If there's no underscore, the entire stem is the version.
versionStr := name
if idx := strings.IndexByte(name, '_'); idx >= 0 {
versionStr = name[:idx]
}
if versionStr == "" {
return 0, fmt.Errorf("invalid migration filename %q: empty version prefix", filename)
}
// Validate the version is purely numeric.
for _, ch := range versionStr {
if ch < '0' || ch > '9' {
return 0, fmt.Errorf(
"invalid migration filename %q: version %q contains non-numeric character %q",
filename, versionStr, string(ch),
)
}
}
version, err := strconv.Atoi(versionStr)
if err != nil {
return 0, fmt.Errorf("invalid migration filename %q: %w", filename, err)
}
return version, nil
}
// New creates a new database connection at the specified path. // New creates a new database connection at the specified path.
// It creates the schema if needed and configures SQLite with WAL mode for // It creates the schema if needed and configures SQLite with WAL mode for
// better concurrency. SQLite handles crash recovery automatically when // better concurrency. SQLite handles crash recovery automatically when
@@ -120,9 +72,9 @@ func New(ctx context.Context, path string) (*DB, error) {
} }
db := &DB{conn: conn, path: path} db := &DB{conn: conn, path: path}
if err := applyMigrations(ctx, conn); err != nil { if err := db.createSchema(ctx); err != nil {
_ = conn.Close() _ = conn.Close()
return nil, fmt.Errorf("applying migrations: %w", err) return nil, fmt.Errorf("creating schema: %w", err)
} }
return db, nil return db, nil
} }
@@ -173,9 +125,9 @@ func New(ctx context.Context, path string) (*DB, error) {
} }
db := &DB{conn: conn, path: path} db := &DB{conn: conn, path: path}
if err := applyMigrations(ctx, conn); err != nil { if err := db.createSchema(ctx); err != nil {
_ = conn.Close() _ = conn.Close()
return nil, fmt.Errorf("applying migrations: %w", err) return nil, fmt.Errorf("creating schema: %w", err)
} }
log.Debug("Database connection established successfully", "path", path) log.Debug("Database connection established successfully", "path", path)
@@ -246,120 +198,9 @@ func (db *DB) QueryRowWithLog(
return db.conn.QueryRowContext(ctx, query, args...) return db.conn.QueryRowContext(ctx, query, args...)
} }
// collectMigrations reads the embedded schema directory and returns func (db *DB) createSchema(ctx context.Context) error {
// migration filenames sorted lexicographically. _, err := db.conn.ExecContext(ctx, schemaSQL)
func collectMigrations() ([]string, error) {
entries, err := schemaFS.ReadDir("schema")
if err != nil {
return nil, fmt.Errorf("failed to read schema directory: %w", err)
}
var migrations []string
for _, entry := range entries {
if !entry.IsDir() && strings.HasSuffix(entry.Name(), ".sql") {
migrations = append(migrations, entry.Name())
}
}
sort.Strings(migrations)
return migrations, nil
}
// bootstrapMigrationsTable ensures the schema_migrations table exists
// by applying 000.sql if the table is missing.
func bootstrapMigrationsTable(ctx context.Context, db *sql.DB) error {
var tableExists int
err := db.QueryRowContext(ctx,
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='schema_migrations'",
).Scan(&tableExists)
if err != nil {
return fmt.Errorf("failed to check for migrations table: %w", err)
}
if tableExists > 0 {
return nil
}
content, err := schemaFS.ReadFile("schema/000.sql")
if err != nil {
return fmt.Errorf("failed to read bootstrap migration 000.sql: %w", err)
}
log.Info("applying bootstrap migration", "version", bootstrapVersion)
_, err = db.ExecContext(ctx, string(content))
if err != nil {
return fmt.Errorf("failed to apply bootstrap migration: %w", err)
}
return nil
}
// applyMigrations applies all pending migrations to db. It first bootstraps
// the schema_migrations table via 000.sql, then iterates through remaining
// migration files in order.
func applyMigrations(ctx context.Context, db *sql.DB) error {
if err := bootstrapMigrationsTable(ctx, db); err != nil {
return err return err
}
migrations, err := collectMigrations()
if err != nil {
return err
}
for _, migration := range migrations {
version, parseErr := ParseMigrationVersion(migration)
if parseErr != nil {
return parseErr
}
// Check if already applied.
var count int
err := db.QueryRowContext(ctx,
"SELECT COUNT(*) FROM schema_migrations WHERE version = ?",
version,
).Scan(&count)
if err != nil {
return fmt.Errorf("failed to check migration status: %w", err)
}
if count > 0 {
log.Debug("migration already applied", "version", version)
continue
}
// Read and apply migration.
content, readErr := schemaFS.ReadFile(filepath.Join("schema", migration))
if readErr != nil {
return fmt.Errorf("failed to read migration %s: %w", migration, readErr)
}
log.Info("applying migration", "version", version)
_, execErr := db.ExecContext(ctx, string(content))
if execErr != nil {
return fmt.Errorf("failed to apply migration %s: %w", migration, execErr)
}
// Record migration as applied.
_, recErr := db.ExecContext(ctx,
"INSERT INTO schema_migrations (version) VALUES (?)",
version,
)
if recErr != nil {
return fmt.Errorf("failed to record migration %s: %w", migration, recErr)
}
log.Info("migration applied successfully", "version", version)
}
return nil
} }
// NewTestDB creates an in-memory SQLite database for testing purposes. // NewTestDB creates an in-memory SQLite database for testing purposes.

View File

@@ -2,7 +2,6 @@ package database
import ( import (
"context" "context"
"database/sql"
"fmt" "fmt"
"path/filepath" "path/filepath"
"testing" "testing"
@@ -27,10 +26,9 @@ func TestDatabase(t *testing.T) {
t.Fatal("database connection is nil") t.Fatal("database connection is nil")
} }
// Test schema creation (already done in New via migrations) // Test schema creation (already done in New)
// Verify tables exist // Verify tables exist
tables := []string{ tables := []string{
"schema_migrations",
"files", "file_chunks", "chunks", "blobs", "files", "file_chunks", "chunks", "blobs",
"blob_chunks", "chunk_files", "snapshots", "blob_chunks", "chunk_files", "snapshots",
} }
@@ -101,139 +99,3 @@ func TestDatabaseConcurrentAccess(t *testing.T) {
t.Errorf("expected 10 chunks, got %d", count) t.Errorf("expected 10 chunks, got %d", count)
} }
} }
func TestParseMigrationVersion(t *testing.T) {
tests := []struct {
name string
filename string
wantVer int
wantError bool
}{
{name: "valid 000.sql", filename: "000.sql", wantVer: 0, wantError: false},
{name: "valid 001.sql", filename: "001.sql", wantVer: 1, wantError: false},
{name: "valid 099.sql", filename: "099.sql", wantVer: 99, wantError: false},
{name: "valid with description", filename: "001_initial_schema.sql", wantVer: 1, wantError: false},
{name: "valid large version", filename: "123_big_migration.sql", wantVer: 123, wantError: false},
{name: "invalid alpha version", filename: "abc.sql", wantVer: 0, wantError: true},
{name: "invalid mixed chars", filename: "12a.sql", wantVer: 0, wantError: true},
{name: "invalid no extension", filename: "schema.sql", wantVer: 0, wantError: true},
{name: "empty string", filename: "", wantVer: 0, wantError: true},
}
for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
got, err := ParseMigrationVersion(tc.filename)
if tc.wantError {
if err == nil {
t.Errorf("ParseMigrationVersion(%q) = %d, nil; want error", tc.filename, got)
}
return
}
if err != nil {
t.Errorf("ParseMigrationVersion(%q) unexpected error: %v", tc.filename, err)
return
}
if got != tc.wantVer {
t.Errorf("ParseMigrationVersion(%q) = %d; want %d", tc.filename, got, tc.wantVer)
}
})
}
}
func TestApplyMigrations_Idempotent(t *testing.T) {
ctx := context.Background()
conn, err := sql.Open("sqlite", ":memory:?_foreign_keys=ON")
if err != nil {
t.Fatalf("failed to open database: %v", err)
}
defer func() {
if err := conn.Close(); err != nil {
t.Errorf("failed to close database: %v", err)
}
}()
conn.SetMaxOpenConns(1)
conn.SetMaxIdleConns(1)
// First run: apply all migrations.
if err := applyMigrations(ctx, conn); err != nil {
t.Fatalf("first applyMigrations failed: %v", err)
}
// Count rows in schema_migrations after first run.
var countBefore int
if err := conn.QueryRowContext(ctx, "SELECT COUNT(*) FROM schema_migrations").Scan(&countBefore); err != nil {
t.Fatalf("failed to count schema_migrations after first run: %v", err)
}
// Second run: must be a no-op.
if err := applyMigrations(ctx, conn); err != nil {
t.Fatalf("second applyMigrations failed: %v", err)
}
// Count rows in schema_migrations after second run — must be unchanged.
var countAfter int
if err := conn.QueryRowContext(ctx, "SELECT COUNT(*) FROM schema_migrations").Scan(&countAfter); err != nil {
t.Fatalf("failed to count schema_migrations after second run: %v", err)
}
if countBefore != countAfter {
t.Errorf("schema_migrations row count changed: before=%d, after=%d", countBefore, countAfter)
}
}
func TestBootstrapMigrationsTable_FreshDatabase(t *testing.T) {
ctx := context.Background()
conn, err := sql.Open("sqlite", ":memory:?_foreign_keys=ON")
if err != nil {
t.Fatalf("failed to open database: %v", err)
}
defer func() {
if err := conn.Close(); err != nil {
t.Errorf("failed to close database: %v", err)
}
}()
conn.SetMaxOpenConns(1)
conn.SetMaxIdleConns(1)
// Verify schema_migrations does NOT exist yet.
var tableBefore int
if err := conn.QueryRowContext(ctx,
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='schema_migrations'",
).Scan(&tableBefore); err != nil {
t.Fatalf("failed to check for table before bootstrap: %v", err)
}
if tableBefore != 0 {
t.Fatal("schema_migrations table should not exist before bootstrap")
}
// Run bootstrap.
if err := bootstrapMigrationsTable(ctx, conn); err != nil {
t.Fatalf("bootstrapMigrationsTable failed: %v", err)
}
// Verify schema_migrations now exists.
var tableAfter int
if err := conn.QueryRowContext(ctx,
"SELECT COUNT(*) FROM sqlite_master WHERE type='table' AND name='schema_migrations'",
).Scan(&tableAfter); err != nil {
t.Fatalf("failed to check for table after bootstrap: %v", err)
}
if tableAfter != 1 {
t.Fatalf("schema_migrations table should exist after bootstrap, got count=%d", tableAfter)
}
// Verify version 0 row exists.
var version int
if err := conn.QueryRowContext(ctx,
"SELECT version FROM schema_migrations WHERE version = 0",
).Scan(&version); err != nil {
t.Fatalf("version 0 row not found in schema_migrations: %v", err)
}
if version != 0 {
t.Errorf("expected version 0, got %d", version)
}
}

View File

@@ -22,6 +22,7 @@ func TestFileChunkRepository(t *testing.T) {
file := &File{ file := &File{
Path: "/test/file.txt", Path: "/test/file.txt",
MTime: testTime, MTime: testTime,
CTime: testTime,
Size: 3072, Size: 3072,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -134,6 +135,7 @@ func TestFileChunkRepositoryMultipleFiles(t *testing.T) {
file := &File{ file := &File{
Path: types.FilePath(path), Path: types.FilePath(path),
MTime: testTime, MTime: testTime,
CTime: testTime,
Size: 2048, Size: 2048,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,

View File

@@ -25,11 +25,12 @@ func (r *FileRepository) Create(ctx context.Context, tx *sql.Tx, file *File) err
} }
query := ` query := `
INSERT INTO files (id, path, source_path, mtime, size, mode, uid, gid, link_target) INSERT INTO files (id, path, source_path, mtime, ctime, size, mode, uid, gid, link_target)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
ON CONFLICT(path) DO UPDATE SET ON CONFLICT(path) DO UPDATE SET
source_path = excluded.source_path, source_path = excluded.source_path,
mtime = excluded.mtime, mtime = excluded.mtime,
ctime = excluded.ctime,
size = excluded.size, size = excluded.size,
mode = excluded.mode, mode = excluded.mode,
uid = excluded.uid, uid = excluded.uid,
@@ -41,10 +42,10 @@ func (r *FileRepository) Create(ctx context.Context, tx *sql.Tx, file *File) err
var idStr string var idStr string
var err error var err error
if tx != nil { if tx != nil {
LogSQL("Execute", query, file.ID.String(), file.Path.String(), file.SourcePath.String(), file.MTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget.String()) LogSQL("Execute", query, file.ID.String(), file.Path.String(), file.SourcePath.String(), file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget.String())
err = tx.QueryRowContext(ctx, query, file.ID.String(), file.Path.String(), file.SourcePath.String(), file.MTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget.String()).Scan(&idStr) err = tx.QueryRowContext(ctx, query, file.ID.String(), file.Path.String(), file.SourcePath.String(), file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget.String()).Scan(&idStr)
} else { } else {
err = r.db.QueryRowWithLog(ctx, query, file.ID.String(), file.Path.String(), file.SourcePath.String(), file.MTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget.String()).Scan(&idStr) err = r.db.QueryRowWithLog(ctx, query, file.ID.String(), file.Path.String(), file.SourcePath.String(), file.MTime.Unix(), file.CTime.Unix(), file.Size, file.Mode, file.UID, file.GID, file.LinkTarget.String()).Scan(&idStr)
} }
if err != nil { if err != nil {
@@ -62,7 +63,7 @@ func (r *FileRepository) Create(ctx context.Context, tx *sql.Tx, file *File) err
func (r *FileRepository) GetByPath(ctx context.Context, path string) (*File, error) { func (r *FileRepository) GetByPath(ctx context.Context, path string) (*File, error) {
query := ` query := `
SELECT id, path, source_path, mtime, size, mode, uid, gid, link_target SELECT id, path, source_path, mtime, ctime, size, mode, uid, gid, link_target
FROM files FROM files
WHERE path = ? WHERE path = ?
` `
@@ -81,7 +82,7 @@ func (r *FileRepository) GetByPath(ctx context.Context, path string) (*File, err
// GetByID retrieves a file by its UUID // GetByID retrieves a file by its UUID
func (r *FileRepository) GetByID(ctx context.Context, id types.FileID) (*File, error) { func (r *FileRepository) GetByID(ctx context.Context, id types.FileID) (*File, error) {
query := ` query := `
SELECT id, path, source_path, mtime, size, mode, uid, gid, link_target SELECT id, path, source_path, mtime, ctime, size, mode, uid, gid, link_target
FROM files FROM files
WHERE id = ? WHERE id = ?
` `
@@ -99,7 +100,7 @@ func (r *FileRepository) GetByID(ctx context.Context, id types.FileID) (*File, e
func (r *FileRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path string) (*File, error) { func (r *FileRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path string) (*File, error) {
query := ` query := `
SELECT id, path, source_path, mtime, size, mode, uid, gid, link_target SELECT id, path, source_path, mtime, ctime, size, mode, uid, gid, link_target
FROM files FROM files
WHERE path = ? WHERE path = ?
` `
@@ -122,7 +123,7 @@ func (r *FileRepository) GetByPathTx(ctx context.Context, tx *sql.Tx, path strin
func (r *FileRepository) scanFile(row *sql.Row) (*File, error) { func (r *FileRepository) scanFile(row *sql.Row) (*File, error) {
var file File var file File
var idStr, pathStr, sourcePathStr string var idStr, pathStr, sourcePathStr string
var mtimeUnix int64 var mtimeUnix, ctimeUnix int64
var linkTarget sql.NullString var linkTarget sql.NullString
err := row.Scan( err := row.Scan(
@@ -130,6 +131,7 @@ func (r *FileRepository) scanFile(row *sql.Row) (*File, error) {
&pathStr, &pathStr,
&sourcePathStr, &sourcePathStr,
&mtimeUnix, &mtimeUnix,
&ctimeUnix,
&file.Size, &file.Size,
&file.Mode, &file.Mode,
&file.UID, &file.UID,
@@ -147,6 +149,7 @@ func (r *FileRepository) scanFile(row *sql.Row) (*File, error) {
file.Path = types.FilePath(pathStr) file.Path = types.FilePath(pathStr)
file.SourcePath = types.SourcePath(sourcePathStr) file.SourcePath = types.SourcePath(sourcePathStr)
file.MTime = time.Unix(mtimeUnix, 0).UTC() file.MTime = time.Unix(mtimeUnix, 0).UTC()
file.CTime = time.Unix(ctimeUnix, 0).UTC()
if linkTarget.Valid { if linkTarget.Valid {
file.LinkTarget = types.FilePath(linkTarget.String) file.LinkTarget = types.FilePath(linkTarget.String)
} }
@@ -158,7 +161,7 @@ func (r *FileRepository) scanFile(row *sql.Row) (*File, error) {
func (r *FileRepository) scanFileRows(rows *sql.Rows) (*File, error) { func (r *FileRepository) scanFileRows(rows *sql.Rows) (*File, error) {
var file File var file File
var idStr, pathStr, sourcePathStr string var idStr, pathStr, sourcePathStr string
var mtimeUnix int64 var mtimeUnix, ctimeUnix int64
var linkTarget sql.NullString var linkTarget sql.NullString
err := rows.Scan( err := rows.Scan(
@@ -166,6 +169,7 @@ func (r *FileRepository) scanFileRows(rows *sql.Rows) (*File, error) {
&pathStr, &pathStr,
&sourcePathStr, &sourcePathStr,
&mtimeUnix, &mtimeUnix,
&ctimeUnix,
&file.Size, &file.Size,
&file.Mode, &file.Mode,
&file.UID, &file.UID,
@@ -183,6 +187,7 @@ func (r *FileRepository) scanFileRows(rows *sql.Rows) (*File, error) {
file.Path = types.FilePath(pathStr) file.Path = types.FilePath(pathStr)
file.SourcePath = types.SourcePath(sourcePathStr) file.SourcePath = types.SourcePath(sourcePathStr)
file.MTime = time.Unix(mtimeUnix, 0).UTC() file.MTime = time.Unix(mtimeUnix, 0).UTC()
file.CTime = time.Unix(ctimeUnix, 0).UTC()
if linkTarget.Valid { if linkTarget.Valid {
file.LinkTarget = types.FilePath(linkTarget.String) file.LinkTarget = types.FilePath(linkTarget.String)
} }
@@ -192,7 +197,7 @@ func (r *FileRepository) scanFileRows(rows *sql.Rows) (*File, error) {
func (r *FileRepository) ListModifiedSince(ctx context.Context, since time.Time) ([]*File, error) { func (r *FileRepository) ListModifiedSince(ctx context.Context, since time.Time) ([]*File, error) {
query := ` query := `
SELECT id, path, source_path, mtime, size, mode, uid, gid, link_target SELECT id, path, source_path, mtime, ctime, size, mode, uid, gid, link_target
FROM files FROM files
WHERE mtime >= ? WHERE mtime >= ?
ORDER BY path ORDER BY path
@@ -253,7 +258,7 @@ func (r *FileRepository) DeleteByID(ctx context.Context, tx *sql.Tx, id types.Fi
func (r *FileRepository) ListByPrefix(ctx context.Context, prefix string) ([]*File, error) { func (r *FileRepository) ListByPrefix(ctx context.Context, prefix string) ([]*File, error) {
query := ` query := `
SELECT id, path, source_path, mtime, size, mode, uid, gid, link_target SELECT id, path, source_path, mtime, ctime, size, mode, uid, gid, link_target
FROM files FROM files
WHERE path LIKE ? || '%' WHERE path LIKE ? || '%'
ORDER BY path ORDER BY path
@@ -280,7 +285,7 @@ func (r *FileRepository) ListByPrefix(ctx context.Context, prefix string) ([]*Fi
// ListAll returns all files in the database // ListAll returns all files in the database
func (r *FileRepository) ListAll(ctx context.Context) ([]*File, error) { func (r *FileRepository) ListAll(ctx context.Context) ([]*File, error) {
query := ` query := `
SELECT id, path, source_path, mtime, size, mode, uid, gid, link_target SELECT id, path, source_path, mtime, ctime, size, mode, uid, gid, link_target
FROM files FROM files
ORDER BY path ORDER BY path
` `
@@ -310,7 +315,7 @@ func (r *FileRepository) CreateBatch(ctx context.Context, tx *sql.Tx, files []*F
return nil return nil
} }
// Each File has 9 values, so batch at 100 to be safe with SQLite's variable limit // Each File has 10 values, so batch at 100 to be safe with SQLite's variable limit
const batchSize = 100 const batchSize = 100
for i := 0; i < len(files); i += batchSize { for i := 0; i < len(files); i += batchSize {
@@ -320,18 +325,19 @@ func (r *FileRepository) CreateBatch(ctx context.Context, tx *sql.Tx, files []*F
} }
batch := files[i:end] batch := files[i:end]
query := `INSERT INTO files (id, path, source_path, mtime, size, mode, uid, gid, link_target) VALUES ` query := `INSERT INTO files (id, path, source_path, mtime, ctime, size, mode, uid, gid, link_target) VALUES `
args := make([]interface{}, 0, len(batch)*9) args := make([]interface{}, 0, len(batch)*10)
for j, f := range batch { for j, f := range batch {
if j > 0 { if j > 0 {
query += ", " query += ", "
} }
query += "(?, ?, ?, ?, ?, ?, ?, ?, ?)" query += "(?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"
args = append(args, f.ID.String(), f.Path.String(), f.SourcePath.String(), f.MTime.Unix(), f.Size, f.Mode, f.UID, f.GID, f.LinkTarget.String()) args = append(args, f.ID.String(), f.Path.String(), f.SourcePath.String(), f.MTime.Unix(), f.CTime.Unix(), f.Size, f.Mode, f.UID, f.GID, f.LinkTarget.String())
} }
query += ` ON CONFLICT(path) DO UPDATE SET query += ` ON CONFLICT(path) DO UPDATE SET
source_path = excluded.source_path, source_path = excluded.source_path,
mtime = excluded.mtime, mtime = excluded.mtime,
ctime = excluded.ctime,
size = excluded.size, size = excluded.size,
mode = excluded.mode, mode = excluded.mode,
uid = excluded.uid, uid = excluded.uid,

View File

@@ -39,6 +39,7 @@ func TestFileRepository(t *testing.T) {
file := &File{ file := &File{
Path: "/test/file.txt", Path: "/test/file.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -123,6 +124,7 @@ func TestFileRepositorySymlink(t *testing.T) {
symlink := &File{ symlink := &File{
Path: "/test/link", Path: "/test/link",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 0, Size: 0,
Mode: uint32(0777 | os.ModeSymlink), Mode: uint32(0777 | os.ModeSymlink),
UID: 1000, UID: 1000,
@@ -159,6 +161,7 @@ func TestFileRepositoryTransaction(t *testing.T) {
file := &File{ file := &File{
Path: "/test/tx_file.txt", Path: "/test/tx_file.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,

View File

@@ -17,6 +17,7 @@ type File struct {
Path types.FilePath // Absolute path of the file Path types.FilePath // Absolute path of the file
SourcePath types.SourcePath // The source directory this file came from (for restore path stripping) SourcePath types.SourcePath // The source directory this file came from (for restore path stripping)
MTime time.Time MTime time.Time
CTime time.Time
Size int64 Size int64
Mode uint32 Mode uint32
UID uint32 UID uint32

View File

@@ -23,6 +23,7 @@ func TestRepositoriesTransaction(t *testing.T) {
file := &File{ file := &File{
Path: "/test/tx_file.txt", Path: "/test/tx_file.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -145,6 +146,7 @@ func TestRepositoriesTransactionRollback(t *testing.T) {
file := &File{ file := &File{
Path: "/test/rollback_file.txt", Path: "/test/rollback_file.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -200,6 +202,7 @@ func TestRepositoriesReadTransaction(t *testing.T) {
file := &File{ file := &File{
Path: "/test/read_file.txt", Path: "/test/read_file.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -223,6 +226,7 @@ func TestRepositoriesReadTransaction(t *testing.T) {
_ = repos.Files.Create(ctx, tx, &File{ _ = repos.Files.Create(ctx, tx, &File{
Path: "/test/should_fail.txt", Path: "/test/should_fail.txt",
MTime: time.Now(), MTime: time.Now(),
CTime: time.Now(),
Size: 0, Size: 0,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,

View File

@@ -23,6 +23,7 @@ func TestFileRepositoryUUIDGeneration(t *testing.T) {
{ {
Path: "/file1.txt", Path: "/file1.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -31,6 +32,7 @@ func TestFileRepositoryUUIDGeneration(t *testing.T) {
{ {
Path: "/file2.txt", Path: "/file2.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 2048, Size: 2048,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -70,6 +72,7 @@ func TestFileRepositoryGetByID(t *testing.T) {
file := &File{ file := &File{
Path: "/test.txt", Path: "/test.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -117,6 +120,7 @@ func TestOrphanedFileCleanup(t *testing.T) {
file1 := &File{ file1 := &File{
Path: "/orphaned.txt", Path: "/orphaned.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -125,6 +129,7 @@ func TestOrphanedFileCleanup(t *testing.T) {
file2 := &File{ file2 := &File{
Path: "/referenced.txt", Path: "/referenced.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 2048, Size: 2048,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -213,6 +218,7 @@ func TestOrphanedChunkCleanup(t *testing.T) {
file := &File{ file := &File{
Path: "/test.txt", Path: "/test.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -342,6 +348,7 @@ func TestFileChunkRepositoryWithUUIDs(t *testing.T) {
file := &File{ file := &File{
Path: "/test.txt", Path: "/test.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 3072, Size: 3072,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -412,6 +419,7 @@ func TestChunkFileRepositoryWithUUIDs(t *testing.T) {
file1 := &File{ file1 := &File{
Path: "/file1.txt", Path: "/file1.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -420,6 +428,7 @@ func TestChunkFileRepositoryWithUUIDs(t *testing.T) {
file2 := &File{ file2 := &File{
Path: "/file2.txt", Path: "/file2.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -577,6 +586,7 @@ func TestComplexOrphanedDataScenario(t *testing.T) {
files[i] = &File{ files[i] = &File{
Path: types.FilePath(fmt.Sprintf("/file%d.txt", i)), Path: types.FilePath(fmt.Sprintf("/file%d.txt", i)),
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -668,6 +678,7 @@ func TestCascadeDelete(t *testing.T) {
file := &File{ file := &File{
Path: "/cascade-test.txt", Path: "/cascade-test.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -739,6 +750,7 @@ func TestTransactionIsolation(t *testing.T) {
file := &File{ file := &File{
Path: "/tx-test.txt", Path: "/tx-test.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -800,6 +812,7 @@ func TestConcurrentOrphanedCleanup(t *testing.T) {
file := &File{ file := &File{
Path: types.FilePath(fmt.Sprintf("/concurrent-%d.txt", i)), Path: types.FilePath(fmt.Sprintf("/concurrent-%d.txt", i)),
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,

View File

@@ -18,6 +18,7 @@ func TestOrphanedFileCleanupDebug(t *testing.T) {
file1 := &File{ file1 := &File{
Path: "/orphaned.txt", Path: "/orphaned.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -26,6 +27,7 @@ func TestOrphanedFileCleanupDebug(t *testing.T) {
file2 := &File{ file2 := &File{
Path: "/referenced.txt", Path: "/referenced.txt",
MTime: time.Now().Truncate(time.Second), MTime: time.Now().Truncate(time.Second),
CTime: time.Now().Truncate(time.Second),
Size: 2048, Size: 2048,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,

View File

@@ -29,6 +29,7 @@ func TestFileRepositoryEdgeCases(t *testing.T) {
file: &File{ file: &File{
Path: "", Path: "",
MTime: time.Now(), MTime: time.Now(),
CTime: time.Now(),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -41,6 +42,7 @@ func TestFileRepositoryEdgeCases(t *testing.T) {
file: &File{ file: &File{
Path: types.FilePath("/" + strings.Repeat("a", 4096)), Path: types.FilePath("/" + strings.Repeat("a", 4096)),
MTime: time.Now(), MTime: time.Now(),
CTime: time.Now(),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -53,6 +55,7 @@ func TestFileRepositoryEdgeCases(t *testing.T) {
file: &File{ file: &File{
Path: "/test/file with spaces and 特殊文字.txt", Path: "/test/file with spaces and 特殊文字.txt",
MTime: time.Now(), MTime: time.Now(),
CTime: time.Now(),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -65,6 +68,7 @@ func TestFileRepositoryEdgeCases(t *testing.T) {
file: &File{ file: &File{
Path: "/empty.txt", Path: "/empty.txt",
MTime: time.Now(), MTime: time.Now(),
CTime: time.Now(),
Size: 0, Size: 0,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -77,6 +81,7 @@ func TestFileRepositoryEdgeCases(t *testing.T) {
file: &File{ file: &File{
Path: "/link", Path: "/link",
MTime: time.Now(), MTime: time.Now(),
CTime: time.Now(),
Size: 0, Size: 0,
Mode: 0777 | 0120000, // symlink mode Mode: 0777 | 0120000, // symlink mode
UID: 1000, UID: 1000,
@@ -118,6 +123,7 @@ func TestDuplicateHandling(t *testing.T) {
file1 := &File{ file1 := &File{
Path: "/duplicate.txt", Path: "/duplicate.txt",
MTime: time.Now(), MTime: time.Now(),
CTime: time.Now(),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -126,6 +132,7 @@ func TestDuplicateHandling(t *testing.T) {
file2 := &File{ file2 := &File{
Path: "/duplicate.txt", // Same path Path: "/duplicate.txt", // Same path
MTime: time.Now().Add(time.Hour), MTime: time.Now().Add(time.Hour),
CTime: time.Now().Add(time.Hour),
Size: 2048, Size: 2048,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -185,6 +192,7 @@ func TestDuplicateHandling(t *testing.T) {
file := &File{ file := &File{
Path: "/test-dup-fc.txt", Path: "/test-dup-fc.txt",
MTime: time.Now(), MTime: time.Now(),
CTime: time.Now(),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -236,6 +244,7 @@ func TestNullHandling(t *testing.T) {
file := &File{ file := &File{
Path: "/regular.txt", Path: "/regular.txt",
MTime: time.Now(), MTime: time.Now(),
CTime: time.Now(),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -340,6 +349,7 @@ func TestLargeDatasets(t *testing.T) {
file := &File{ file := &File{
Path: types.FilePath(fmt.Sprintf("/large/file%05d.txt", i)), Path: types.FilePath(fmt.Sprintf("/large/file%05d.txt", i)),
MTime: time.Now(), MTime: time.Now(),
CTime: time.Now(),
Size: int64(i * 1024), Size: int64(i * 1024),
Mode: 0644, Mode: 0644,
UID: uint32(1000 + (i % 10)), UID: uint32(1000 + (i % 10)),
@@ -464,6 +474,7 @@ func TestQueryInjection(t *testing.T) {
file := &File{ file := &File{
Path: types.FilePath(injection), Path: types.FilePath(injection),
MTime: time.Now(), MTime: time.Now(),
CTime: time.Now(),
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,
@@ -502,6 +513,7 @@ func TestTimezoneHandling(t *testing.T) {
file := &File{ file := &File{
Path: "/timezone-test.txt", Path: "/timezone-test.txt",
MTime: nyTime, MTime: nyTime,
CTime: nyTime,
Size: 1024, Size: 1024,
Mode: 0644, Mode: 0644,
UID: 1000, UID: 1000,

View File

@@ -1,5 +1,6 @@
-- Migration 001: Initial Vaultik schema -- Vaultik Database Schema
-- All core tables for tracking files, chunks, blobs, snapshots, and uploads. -- Note: This database does not support migrations. If the schema changes,
-- delete the local database and perform a full backup to recreate it.
-- Files table: stores metadata about files in the filesystem -- Files table: stores metadata about files in the filesystem
CREATE TABLE IF NOT EXISTS files ( CREATE TABLE IF NOT EXISTS files (
@@ -7,6 +8,7 @@ CREATE TABLE IF NOT EXISTS files (
path TEXT NOT NULL UNIQUE, path TEXT NOT NULL UNIQUE,
source_path TEXT NOT NULL DEFAULT '', -- The source directory this file came from (for restore path stripping) source_path TEXT NOT NULL DEFAULT '', -- The source directory this file came from (for restore path stripping)
mtime INTEGER NOT NULL, mtime INTEGER NOT NULL,
ctime INTEGER NOT NULL,
size INTEGER NOT NULL, size INTEGER NOT NULL,
mode INTEGER NOT NULL, mode INTEGER NOT NULL,
uid INTEGER NOT NULL, uid INTEGER NOT NULL,
@@ -101,7 +103,7 @@ CREATE TABLE IF NOT EXISTS snapshot_files (
file_id TEXT NOT NULL, file_id TEXT NOT NULL,
PRIMARY KEY (snapshot_id, file_id), PRIMARY KEY (snapshot_id, file_id),
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE, FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE FOREIGN KEY (file_id) REFERENCES files(id)
); );
-- Index for efficient file lookups (used in orphan detection) -- Index for efficient file lookups (used in orphan detection)
@@ -114,7 +116,7 @@ CREATE TABLE IF NOT EXISTS snapshot_blobs (
blob_hash TEXT NOT NULL, blob_hash TEXT NOT NULL,
PRIMARY KEY (snapshot_id, blob_id), PRIMARY KEY (snapshot_id, blob_id),
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE, FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
FOREIGN KEY (blob_id) REFERENCES blobs(id) ON DELETE CASCADE FOREIGN KEY (blob_id) REFERENCES blobs(id)
); );
-- Index for efficient blob lookups (used in orphan detection) -- Index for efficient blob lookups (used in orphan detection)
@@ -128,7 +130,7 @@ CREATE TABLE IF NOT EXISTS uploads (
size INTEGER NOT NULL, size INTEGER NOT NULL,
duration_ms INTEGER NOT NULL, duration_ms INTEGER NOT NULL,
FOREIGN KEY (blob_hash) REFERENCES blobs(blob_hash), FOREIGN KEY (blob_hash) REFERENCES blobs(blob_hash),
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE FOREIGN KEY (snapshot_id) REFERENCES snapshots(id)
); );
-- Index for efficient snapshot lookups -- Index for efficient snapshot lookups

View File

@@ -1,9 +0,0 @@
-- Migration 000: Schema migrations tracking table
-- Applied as a bootstrap step before the normal migration loop.
CREATE TABLE IF NOT EXISTS schema_migrations (
version INTEGER PRIMARY KEY,
applied_at DATETIME DEFAULT CURRENT_TIMESTAMP
);
INSERT OR IGNORE INTO schema_migrations (version) VALUES (0);

View File

@@ -0,0 +1,11 @@
-- Track blob upload metrics
CREATE TABLE IF NOT EXISTS uploads (
blob_hash TEXT PRIMARY KEY,
uploaded_at TIMESTAMP NOT NULL,
size INTEGER NOT NULL,
duration_ms INTEGER NOT NULL,
FOREIGN KEY (blob_hash) REFERENCES blobs(blob_hash)
);
CREATE INDEX idx_uploads_uploaded_at ON uploads(uploaded_at);
CREATE INDEX idx_uploads_duration ON uploads(duration_ms);

View File

@@ -63,3 +63,10 @@ type Chunk struct {
Offset int64 Offset int64
Length int64 Length int64
} }
// DirtyPath represents a path marked for backup by inotify
type DirtyPath struct {
Path string
MarkedAt time.Time
EventType string // "create", "modify", "delete"
}

View File

@@ -2,7 +2,6 @@ package s3
import ( import (
"context" "context"
"errors"
"io" "io"
"sync/atomic" "sync/atomic"
@@ -11,7 +10,6 @@ import (
"github.com/aws/aws-sdk-go-v2/credentials" "github.com/aws/aws-sdk-go-v2/credentials"
"github.com/aws/aws-sdk-go-v2/feature/s3/manager" "github.com/aws/aws-sdk-go-v2/feature/s3/manager"
"github.com/aws/aws-sdk-go-v2/service/s3" "github.com/aws/aws-sdk-go-v2/service/s3"
s3types "github.com/aws/aws-sdk-go-v2/service/s3/types"
"github.com/aws/smithy-go/logging" "github.com/aws/smithy-go/logging"
) )
@@ -205,13 +203,10 @@ func (c *Client) HeadObject(ctx context.Context, key string) (bool, error) {
Key: aws.String(fullKey), Key: aws.String(fullKey),
}) })
if err != nil { if err != nil {
var notFound *s3types.NotFound // Check if it's a not found error
var noSuchKey *s3types.NoSuchKey // TODO: Add proper error type checking
if errors.As(err, &notFound) || errors.As(err, &noSuchKey) {
return false, nil return false, nil
} }
return false, err
}
return true, nil return true, nil
} }

View File

@@ -345,6 +345,7 @@ func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (str
Size: info.Size(), Size: info.Size(),
Mode: uint32(info.Mode()), Mode: uint32(info.Mode()),
MTime: info.ModTime(), MTime: info.ModTime(),
CTime: info.ModTime(), // Use mtime as ctime for test
UID: 1000, // Default UID for test UID: 1000, // Default UID for test
GID: 1000, // Default GID for test GID: 1000, // Default GID for test
} }

File diff suppressed because it is too large Load Diff

View File

@@ -110,15 +110,15 @@ func TestScannerSimpleDirectory(t *testing.T) {
t.Errorf("expected at least 97 bytes scanned, got %d", result.BytesScanned) t.Errorf("expected at least 97 bytes scanned, got %d", result.BytesScanned)
} }
// Verify files in database - includes regular files and directories // Verify files in database - only regular files are stored
files, err := repos.Files.ListByPrefix(ctx, "/source") files, err := repos.Files.ListByPrefix(ctx, "/source")
if err != nil { if err != nil {
t.Fatalf("failed to list files: %v", err) t.Fatalf("failed to list files: %v", err)
} }
// 6 regular files + 3 directories (/source, /source/subdir, /source/subdir2) // We should have 6 files (directories are not stored)
if len(files) != 9 { if len(files) != 6 {
t.Errorf("expected 9 entries in database (6 files + 3 dirs), got %d", len(files)) t.Errorf("expected 6 files in database, got %d", len(files))
} }
// Verify specific file // Verify specific file

View File

@@ -227,39 +227,12 @@ func (sm *SnapshotManager) ExportSnapshotMetadata(ctx context.Context, dbPath st
} }
}() }()
// Steps 1-5: Copy, clean, vacuum, compress, and read the database
finalData, tempDBPath, err := sm.prepareExportDB(ctx, dbPath, snapshotID, tempDir)
if err != nil {
return err
}
// Step 6: Generate blob manifest (before closing temp DB)
blobManifest, err := sm.generateBlobManifest(ctx, tempDBPath, snapshotID)
if err != nil {
return fmt.Errorf("generating blob manifest: %w", err)
}
// Step 7: Upload to S3 in snapshot subdirectory
if err := sm.uploadSnapshotArtifacts(ctx, snapshotID, finalData, blobManifest); err != nil {
return err
}
log.Info("Uploaded snapshot metadata",
"snapshot_id", snapshotID,
"db_size", len(finalData),
"manifest_size", len(blobManifest))
return nil
}
// prepareExportDB copies, cleans, vacuums, and compresses the snapshot database for export.
// Returns the compressed data and the path to the temporary database (needed for manifest generation).
func (sm *SnapshotManager) prepareExportDB(ctx context.Context, dbPath, snapshotID, tempDir string) ([]byte, string, error) {
// Step 1: Copy database to temp file // Step 1: Copy database to temp file
// The main database should be closed at this point // The main database should be closed at this point
tempDBPath := filepath.Join(tempDir, "snapshot.db") tempDBPath := filepath.Join(tempDir, "snapshot.db")
log.Debug("Copying database to temporary location", "source", dbPath, "destination", tempDBPath) log.Debug("Copying database to temporary location", "source", dbPath, "destination", tempDBPath)
if err := sm.copyFile(dbPath, tempDBPath); err != nil { if err := sm.copyFile(dbPath, tempDBPath); err != nil {
return nil, "", fmt.Errorf("copying database: %w", err) return fmt.Errorf("copying database: %w", err)
} }
log.Debug("Database copy complete", "size", sm.getFileSize(tempDBPath)) log.Debug("Database copy complete", "size", sm.getFileSize(tempDBPath))
@@ -267,7 +240,7 @@ func (sm *SnapshotManager) prepareExportDB(ctx context.Context, dbPath, snapshot
log.Debug("Cleaning temporary database", "snapshot_id", snapshotID) log.Debug("Cleaning temporary database", "snapshot_id", snapshotID)
stats, err := sm.cleanSnapshotDB(ctx, tempDBPath, snapshotID) stats, err := sm.cleanSnapshotDB(ctx, tempDBPath, snapshotID)
if err != nil { if err != nil {
return nil, "", fmt.Errorf("cleaning snapshot database: %w", err) return fmt.Errorf("cleaning snapshot database: %w", err)
} }
log.Info("Temporary database cleanup complete", log.Info("Temporary database cleanup complete",
"db_path", tempDBPath, "db_path", tempDBPath,
@@ -282,14 +255,14 @@ func (sm *SnapshotManager) prepareExportDB(ctx context.Context, dbPath, snapshot
// Step 3: VACUUM the database to remove deleted data and compact // Step 3: VACUUM the database to remove deleted data and compact
// This is critical for security - ensures no stale/deleted data is uploaded // This is critical for security - ensures no stale/deleted data is uploaded
if err := sm.vacuumDatabase(tempDBPath); err != nil { if err := sm.vacuumDatabase(tempDBPath); err != nil {
return nil, "", fmt.Errorf("vacuuming database: %w", err) return fmt.Errorf("vacuuming database: %w", err)
} }
log.Debug("Database vacuumed", "size", humanize.Bytes(uint64(sm.getFileSize(tempDBPath)))) log.Debug("Database vacuumed", "size", humanize.Bytes(uint64(sm.getFileSize(tempDBPath))))
// Step 4: Compress and encrypt the binary database file // Step 4: Compress and encrypt the binary database file
compressedPath := filepath.Join(tempDir, "db.zst.age") compressedPath := filepath.Join(tempDir, "db.zst.age")
if err := sm.compressFile(tempDBPath, compressedPath); err != nil { if err := sm.compressFile(tempDBPath, compressedPath); err != nil {
return nil, "", fmt.Errorf("compressing database: %w", err) return fmt.Errorf("compressing database: %w", err)
} }
log.Debug("Compression complete", log.Debug("Compression complete",
"original_size", humanize.Bytes(uint64(sm.getFileSize(tempDBPath))), "original_size", humanize.Bytes(uint64(sm.getFileSize(tempDBPath))),
@@ -298,43 +271,49 @@ func (sm *SnapshotManager) prepareExportDB(ctx context.Context, dbPath, snapshot
// Step 5: Read compressed and encrypted data for upload // Step 5: Read compressed and encrypted data for upload
finalData, err := afero.ReadFile(sm.fs, compressedPath) finalData, err := afero.ReadFile(sm.fs, compressedPath)
if err != nil { if err != nil {
return nil, "", fmt.Errorf("reading compressed dump: %w", err) return fmt.Errorf("reading compressed dump: %w", err)
} }
return finalData, tempDBPath, nil // Step 6: Generate blob manifest (before closing temp DB)
} blobManifest, err := sm.generateBlobManifest(ctx, tempDBPath, snapshotID)
if err != nil {
return fmt.Errorf("generating blob manifest: %w", err)
}
// uploadSnapshotArtifacts uploads the database backup and blob manifest to S3 // Step 7: Upload to S3 in snapshot subdirectory
func (sm *SnapshotManager) uploadSnapshotArtifacts(ctx context.Context, snapshotID string, dbData, manifestData []byte) error {
// Upload database backup (compressed and encrypted) // Upload database backup (compressed and encrypted)
dbKey := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID) dbKey := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID)
dbUploadStart := time.Now() dbUploadStart := time.Now()
if err := sm.storage.Put(ctx, dbKey, bytes.NewReader(dbData)); err != nil { if err := sm.storage.Put(ctx, dbKey, bytes.NewReader(finalData)); err != nil {
return fmt.Errorf("uploading snapshot database: %w", err) return fmt.Errorf("uploading snapshot database: %w", err)
} }
dbUploadDuration := time.Since(dbUploadStart) dbUploadDuration := time.Since(dbUploadStart)
dbUploadSpeed := float64(len(dbData)) * 8 / dbUploadDuration.Seconds() // bits per second dbUploadSpeed := float64(len(finalData)) * 8 / dbUploadDuration.Seconds() // bits per second
log.Info("Uploaded snapshot database", log.Info("Uploaded snapshot database",
"path", dbKey, "path", dbKey,
"size", humanize.Bytes(uint64(len(dbData))), "size", humanize.Bytes(uint64(len(finalData))),
"duration", dbUploadDuration, "duration", dbUploadDuration,
"speed", humanize.SI(dbUploadSpeed, "bps")) "speed", humanize.SI(dbUploadSpeed, "bps"))
// Upload blob manifest (compressed only, not encrypted) // Upload blob manifest (compressed only, not encrypted)
manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID) manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
manifestUploadStart := time.Now() manifestUploadStart := time.Now()
if err := sm.storage.Put(ctx, manifestKey, bytes.NewReader(manifestData)); err != nil { if err := sm.storage.Put(ctx, manifestKey, bytes.NewReader(blobManifest)); err != nil {
return fmt.Errorf("uploading blob manifest: %w", err) return fmt.Errorf("uploading blob manifest: %w", err)
} }
manifestUploadDuration := time.Since(manifestUploadStart) manifestUploadDuration := time.Since(manifestUploadStart)
manifestUploadSpeed := float64(len(manifestData)) * 8 / manifestUploadDuration.Seconds() // bits per second manifestUploadSpeed := float64(len(blobManifest)) * 8 / manifestUploadDuration.Seconds() // bits per second
log.Info("Uploaded blob manifest", log.Info("Uploaded blob manifest",
"path", manifestKey, "path", manifestKey,
"size", humanize.Bytes(uint64(len(manifestData))), "size", humanize.Bytes(uint64(len(blobManifest))),
"duration", manifestUploadDuration, "duration", manifestUploadDuration,
"speed", humanize.SI(manifestUploadSpeed, "bps")) "speed", humanize.SI(manifestUploadSpeed, "bps"))
log.Info("Uploaded snapshot metadata",
"snapshot_id", snapshotID,
"db_size", len(finalData),
"manifest_size", len(blobManifest))
return nil return nil
} }

View File

@@ -1,93 +0,0 @@
package vaultik
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"filippo.io/age"
"git.eeqj.de/sneak/vaultik/internal/blobgen"
)
// hashVerifyReader wraps a blobgen.Reader and verifies the double-SHA-256 hash
// of decrypted plaintext when Close is called. It reuses the hash that
// blobgen.Reader already computes internally via its TeeReader, avoiding
// redundant SHA-256 computation.
type hashVerifyReader struct {
reader *blobgen.Reader // underlying decrypted blob reader (has internal hasher)
fetcher io.ReadCloser // raw fetched stream (closed on Close)
blobHash string // expected double-SHA-256 hex
done bool // EOF reached
}
func (h *hashVerifyReader) Read(p []byte) (int, error) {
n, err := h.reader.Read(p)
if err == io.EOF {
h.done = true
}
return n, err
}
// Close verifies the hash (if the stream was fully read) and closes underlying readers.
func (h *hashVerifyReader) Close() error {
readerErr := h.reader.Close()
fetcherErr := h.fetcher.Close()
if h.done {
firstHash := h.reader.Sum256()
secondHasher := sha256.New()
secondHasher.Write(firstHash)
actualHashHex := hex.EncodeToString(secondHasher.Sum(nil))
if actualHashHex != h.blobHash {
return fmt.Errorf("blob hash mismatch: expected %s, got %s", h.blobHash[:16], actualHashHex[:16])
}
}
if readerErr != nil {
return readerErr
}
return fetcherErr
}
// FetchAndDecryptBlob downloads a blob, decrypts and decompresses it, and
// returns a streaming reader that computes the double-SHA-256 hash on the fly.
// The hash is verified when the returned reader is closed (after fully reading).
// This avoids buffering the entire blob in memory.
func (v *Vaultik) FetchAndDecryptBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) (io.ReadCloser, error) {
rc, _, err := v.FetchBlob(ctx, blobHash, expectedSize)
if err != nil {
return nil, err
}
reader, err := blobgen.NewReader(rc, identity)
if err != nil {
_ = rc.Close()
return nil, fmt.Errorf("creating blob reader: %w", err)
}
return &hashVerifyReader{
reader: reader,
fetcher: rc,
blobHash: blobHash,
}, nil
}
// FetchBlob downloads a blob and returns a reader for the encrypted data.
func (v *Vaultik) FetchBlob(ctx context.Context, blobHash string, expectedSize int64) (io.ReadCloser, int64, error) {
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobHash[:2], blobHash[2:4], blobHash)
rc, err := v.Storage.Get(ctx, blobPath)
if err != nil {
return nil, 0, fmt.Errorf("downloading blob %s: %w", blobHash[:16], err)
}
info, err := v.Storage.Stat(ctx, blobPath)
if err != nil {
_ = rc.Close()
return nil, 0, fmt.Errorf("stat blob %s: %w", blobHash[:16], err)
}
return rc, info.Size, nil
}

View File

@@ -1,100 +0,0 @@
package vaultik_test
import (
"bytes"
"context"
"crypto/sha256"
"encoding/hex"
"io"
"strings"
"testing"
"filippo.io/age"
"git.eeqj.de/sneak/vaultik/internal/blobgen"
"git.eeqj.de/sneak/vaultik/internal/vaultik"
)
// TestFetchAndDecryptBlobVerifiesHash verifies that FetchAndDecryptBlob checks
// the double-SHA-256 hash of the decrypted plaintext against the expected blob hash.
func TestFetchAndDecryptBlobVerifiesHash(t *testing.T) {
identity, err := age.GenerateX25519Identity()
if err != nil {
t.Fatalf("generating identity: %v", err)
}
// Create test data and encrypt it using blobgen.Writer
plaintext := []byte("hello world test data for blob hash verification")
var encBuf bytes.Buffer
writer, err := blobgen.NewWriter(&encBuf, 1, []string{identity.Recipient().String()})
if err != nil {
t.Fatalf("creating blobgen writer: %v", err)
}
if _, err := writer.Write(plaintext); err != nil {
t.Fatalf("writing plaintext: %v", err)
}
if err := writer.Close(); err != nil {
t.Fatalf("closing writer: %v", err)
}
encryptedData := encBuf.Bytes()
// Compute correct double-SHA-256 hash of the plaintext (matches blobgen.Writer.Sum256)
firstHash := sha256.Sum256(plaintext)
secondHash := sha256.Sum256(firstHash[:])
correctHash := hex.EncodeToString(secondHash[:])
// Verify our hash matches what blobgen.Writer produces
writerHash := hex.EncodeToString(writer.Sum256())
if correctHash != writerHash {
t.Fatalf("hash computation mismatch: manual=%s, writer=%s", correctHash, writerHash)
}
// Set up mock storage with the blob at the correct path
mockStorage := NewMockStorer()
blobPath := "blobs/" + correctHash[:2] + "/" + correctHash[2:4] + "/" + correctHash
mockStorage.mu.Lock()
mockStorage.data[blobPath] = encryptedData
mockStorage.mu.Unlock()
tv := vaultik.NewForTesting(mockStorage)
ctx := context.Background()
t.Run("correct hash succeeds", func(t *testing.T) {
rc, err := tv.FetchAndDecryptBlob(ctx, correctHash, int64(len(encryptedData)), identity)
if err != nil {
t.Fatalf("expected success, got error: %v", err)
}
data, err := io.ReadAll(rc)
if err != nil {
t.Fatalf("reading stream: %v", err)
}
if err := rc.Close(); err != nil {
t.Fatalf("close (hash verification) failed: %v", err)
}
if !bytes.Equal(data, plaintext) {
t.Fatalf("decrypted data mismatch: got %q, want %q", data, plaintext)
}
})
t.Run("wrong hash fails", func(t *testing.T) {
// Use a fake hash that doesn't match the actual plaintext
fakeHash := strings.Repeat("ab", 32) // 64 hex chars
fakePath := "blobs/" + fakeHash[:2] + "/" + fakeHash[2:4] + "/" + fakeHash
mockStorage.mu.Lock()
mockStorage.data[fakePath] = encryptedData
mockStorage.mu.Unlock()
rc, err := tv.FetchAndDecryptBlob(ctx, fakeHash, int64(len(encryptedData)), identity)
if err != nil {
t.Fatalf("unexpected error opening stream: %v", err)
}
// Read all data — hash is verified on Close
_, _ = io.ReadAll(rc)
err = rc.Close()
if err == nil {
t.Fatal("expected error for mismatched hash, got nil")
}
if !strings.Contains(err.Error(), "hash mismatch") {
t.Fatalf("expected hash mismatch error, got: %v", err)
}
})
}

View File

@@ -0,0 +1,55 @@
package vaultik
import (
"context"
"fmt"
"io"
"filippo.io/age"
"git.eeqj.de/sneak/vaultik/internal/blobgen"
)
// FetchAndDecryptBlobResult holds the result of fetching and decrypting a blob.
type FetchAndDecryptBlobResult struct {
Data []byte
}
// FetchAndDecryptBlob downloads a blob, decrypts it, and returns the plaintext data.
func (v *Vaultik) FetchAndDecryptBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) (*FetchAndDecryptBlobResult, error) {
rc, _, err := v.FetchBlob(ctx, blobHash, expectedSize)
if err != nil {
return nil, err
}
defer func() { _ = rc.Close() }()
reader, err := blobgen.NewReader(rc, identity)
if err != nil {
return nil, fmt.Errorf("creating blob reader: %w", err)
}
defer func() { _ = reader.Close() }()
data, err := io.ReadAll(reader)
if err != nil {
return nil, fmt.Errorf("reading blob data: %w", err)
}
return &FetchAndDecryptBlobResult{Data: data}, nil
}
// FetchBlob downloads a blob and returns a reader for the encrypted data.
func (v *Vaultik) FetchBlob(ctx context.Context, blobHash string, expectedSize int64) (io.ReadCloser, int64, error) {
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobHash[:2], blobHash[2:4], blobHash)
rc, err := v.Storage.Get(ctx, blobPath)
if err != nil {
return nil, 0, fmt.Errorf("downloading blob %s: %w", blobHash[:16], err)
}
info, err := v.Storage.Stat(ctx, blobPath)
if err != nil {
_ = rc.Close()
return nil, 0, fmt.Errorf("stat blob %s: %w", blobHash[:16], err)
}
return rc, info.Size, nil
}

View File

@@ -2,7 +2,6 @@ package vaultik
import ( import (
"fmt" "fmt"
"regexp"
"strconv" "strconv"
"strings" "strings"
"time" "time"
@@ -80,55 +79,18 @@ func parseSnapshotTimestamp(snapshotID string) (time.Time, error) {
return timestamp.UTC(), nil return timestamp.UTC(), nil
} }
// parseSnapshotName extracts the snapshot name from a snapshot ID. // parseDuration parses a duration string with support for days
// Format: hostname_snapshotname_timestamp — the middle part(s) between hostname
// and the RFC3339 timestamp are the snapshot name (may contain underscores).
// Returns the snapshot name, or empty string if the ID is malformed.
func parseSnapshotName(snapshotID string) string {
parts := strings.Split(snapshotID, "_")
if len(parts) < 3 {
// Format: hostname_timestamp — no snapshot name
return ""
}
// Format: hostname_name_timestamp — middle parts are the name.
// The last part is the RFC3339 timestamp, the first part is the hostname,
// everything in between is the snapshot name (which may itself contain underscores).
return strings.Join(parts[1:len(parts)-1], "_")
}
// parseDuration parses a duration string with support for human-friendly units:
// d/day/days, w/week/weeks, mo/month/months, y/year/years, plus standard Go
// duration units (h, m, s).
func parseDuration(s string) (time.Duration, error) { func parseDuration(s string) (time.Duration, error) {
if d, err := time.ParseDuration(s); err == nil { // Check for days suffix
return d, nil if strings.HasSuffix(s, "d") {
} daysStr := strings.TrimSuffix(s, "d")
days, err := strconv.Atoi(daysStr)
re := regexp.MustCompile(`(\d+)\s*([a-zA-Z]+)`)
matches := re.FindAllStringSubmatch(s, -1)
if len(matches) == 0 {
return 0, fmt.Errorf("invalid duration: %q", s)
}
var total time.Duration
for _, match := range matches {
n, err := strconv.Atoi(match[1])
if err != nil { if err != nil {
return 0, fmt.Errorf("invalid number %q: %w", match[1], err) return 0, fmt.Errorf("invalid days value: %w", err)
} }
unit := strings.ToLower(match[2]) return time.Duration(days) * 24 * time.Hour, nil
switch unit {
case "d", "day", "days":
total += time.Duration(n) * 24 * time.Hour
case "w", "week", "weeks":
total += time.Duration(n) * 7 * 24 * time.Hour
case "mo", "month", "months":
total += time.Duration(n) * 30 * 24 * time.Hour
case "y", "year", "years":
total += time.Duration(n) * 365 * 24 * time.Hour
default:
return 0, fmt.Errorf("unknown time unit %q", unit)
} }
}
return total, nil // Otherwise use standard Go duration parsing
return time.ParseDuration(s)
} }

View File

@@ -1,112 +0,0 @@
package vaultik
import (
"testing"
"time"
)
func TestParseSnapshotName(t *testing.T) {
tests := []struct {
name string
snapshotID string
want string
}{
{
name: "standard format with name",
snapshotID: "myhost_home_2026-01-12T14:41:15Z",
want: "home",
},
{
name: "standard format with different name",
snapshotID: "server1_system_2026-02-15T09:30:00Z",
want: "system",
},
{
name: "name with underscores",
snapshotID: "myhost_my_special_backup_2026-03-01T00:00:00Z",
want: "my_special_backup",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got := parseSnapshotName(tt.snapshotID)
if got != tt.want {
t.Errorf("parseSnapshotName(%q) = %q, want %q", tt.snapshotID, got, tt.want)
}
})
}
}
func TestParseDuration(t *testing.T) {
tests := []struct {
input string
want time.Duration
err bool
}{
{"30d", 30 * 24 * time.Hour, false},
{"4w", 4 * 7 * 24 * time.Hour, false},
{"6mo", 6 * 30 * 24 * time.Hour, false},
{"1y", 365 * 24 * time.Hour, false},
{"2w3d", 2*7*24*time.Hour + 3*24*time.Hour, false},
{"1h", time.Hour, false},
{"30s", 30 * time.Second, false},
{"garbage", 0, true},
}
for _, tt := range tests {
t.Run(tt.input, func(t *testing.T) {
got, err := parseDuration(tt.input)
if tt.err {
if err == nil {
t.Fatalf("expected error for %q, got %v", tt.input, got)
}
return
}
if err != nil {
t.Fatalf("unexpected error for %q: %v", tt.input, err)
}
if got != tt.want {
t.Errorf("parseDuration(%q) = %v, want %v", tt.input, got, tt.want)
}
})
}
}
func TestParseSnapshotTimestamp(t *testing.T) {
tests := []struct {
name string
snapshotID string
wantErr bool
}{
{
name: "valid with name",
snapshotID: "myhost_home_2026-01-12T14:41:15Z",
wantErr: false,
},
{
name: "valid without name",
snapshotID: "myhost_2026-01-12T14:41:15Z",
wantErr: false,
},
{
name: "invalid - single part",
snapshotID: "nounderscore",
wantErr: true,
},
{
name: "invalid - bad timestamp",
snapshotID: "myhost_home_notadate",
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
_, err := parseSnapshotTimestamp(tt.snapshotID)
if (err != nil) != tt.wantErr {
t.Errorf("parseSnapshotTimestamp(%q) error = %v, wantErr %v", tt.snapshotID, err, tt.wantErr)
}
})
}
}

View File

@@ -66,6 +66,18 @@ func (v *Vaultik) ShowInfo() error {
} }
v.printlnStdout() v.printlnStdout()
// Daemon Settings (if applicable)
if v.Config.BackupInterval > 0 || v.Config.MinTimeBetweenRun > 0 {
v.printfStdout("=== Daemon Settings ===\n")
if v.Config.BackupInterval > 0 {
v.printfStdout("Backup Interval: %s\n", v.Config.BackupInterval)
}
if v.Config.MinTimeBetweenRun > 0 {
v.printfStdout("Minimum Time: %s\n", v.Config.MinTimeBetweenRun)
}
v.printlnStdout()
}
// Local Database // Local Database
v.printfStdout("=== Local Database ===\n") v.printfStdout("=== Local Database ===\n")
v.printfStdout("Index Path: %s\n", v.Config.IndexPath) v.printfStdout("Index Path: %s\n", v.Config.IndexPath)
@@ -137,9 +149,9 @@ type RemoteInfoResult struct {
// RemoteInfo displays information about remote storage // RemoteInfo displays information about remote storage
func (v *Vaultik) RemoteInfo(jsonOutput bool) error { func (v *Vaultik) RemoteInfo(jsonOutput bool) error {
log.Info("Starting remote storage info gathering")
result := &RemoteInfoResult{} result := &RemoteInfoResult{}
// Get storage info
storageInfo := v.Storage.Info() storageInfo := v.Storage.Info()
result.StorageType = storageInfo.Type result.StorageType = storageInfo.Type
result.StorageLocation = storageInfo.Location result.StorageLocation = storageInfo.Location
@@ -149,52 +161,23 @@ func (v *Vaultik) RemoteInfo(jsonOutput bool) error {
v.printfStdout("Type: %s\n", storageInfo.Type) v.printfStdout("Type: %s\n", storageInfo.Type)
v.printfStdout("Location: %s\n", storageInfo.Location) v.printfStdout("Location: %s\n", storageInfo.Location)
v.printlnStdout() v.printlnStdout()
}
// List all snapshot metadata
if !jsonOutput {
v.printfStdout("Scanning snapshot metadata...\n") v.printfStdout("Scanning snapshot metadata...\n")
} }
snapshotMetadata, snapshotIDs, err := v.collectSnapshotMetadata()
if err != nil {
return err
}
if !jsonOutput {
v.printfStdout("Downloading %d manifest(s)...\n", len(snapshotIDs))
}
referencedBlobs := v.collectReferencedBlobsFromManifests(snapshotIDs, snapshotMetadata)
v.populateRemoteInfoResult(result, snapshotMetadata, snapshotIDs, referencedBlobs)
if err := v.scanRemoteBlobStorage(result, referencedBlobs, jsonOutput); err != nil {
return err
}
log.Info("Remote info complete",
"snapshots", result.TotalMetadataCount,
"total_blobs", result.TotalBlobCount,
"referenced_blobs", result.ReferencedBlobCount,
"orphaned_blobs", result.OrphanedBlobCount)
if jsonOutput {
enc := json.NewEncoder(v.Stdout)
enc.SetIndent("", " ")
return enc.Encode(result)
}
v.printRemoteInfoTable(result)
return nil
}
// collectSnapshotMetadata scans remote metadata and returns per-snapshot info and sorted IDs
func (v *Vaultik) collectSnapshotMetadata() (map[string]*SnapshotMetadataInfo, []string, error) {
snapshotMetadata := make(map[string]*SnapshotMetadataInfo) snapshotMetadata := make(map[string]*SnapshotMetadataInfo)
// Collect metadata files
metadataCh := v.Storage.ListStream(v.ctx, "metadata/") metadataCh := v.Storage.ListStream(v.ctx, "metadata/")
for obj := range metadataCh { for obj := range metadataCh {
if obj.Err != nil { if obj.Err != nil {
return nil, nil, fmt.Errorf("listing metadata: %w", obj.Err) return fmt.Errorf("listing metadata: %w", obj.Err)
} }
// Parse key: metadata/<snapshot-id>/<filename>
parts := strings.Split(obj.Key, "/") parts := strings.Split(obj.Key, "/")
if len(parts) < 3 { if len(parts) < 3 {
continue continue
@@ -202,11 +185,14 @@ func (v *Vaultik) collectSnapshotMetadata() (map[string]*SnapshotMetadataInfo, [
snapshotID := parts[1] snapshotID := parts[1]
if _, exists := snapshotMetadata[snapshotID]; !exists { if _, exists := snapshotMetadata[snapshotID]; !exists {
snapshotMetadata[snapshotID] = &SnapshotMetadataInfo{SnapshotID: snapshotID} snapshotMetadata[snapshotID] = &SnapshotMetadataInfo{
SnapshotID: snapshotID,
}
} }
info := snapshotMetadata[snapshotID] info := snapshotMetadata[snapshotID]
filename := parts[2] filename := parts[2]
if strings.HasPrefix(filename, "manifest") { if strings.HasPrefix(filename, "manifest") {
info.ManifestSize = obj.Size info.ManifestSize = obj.Size
} else if strings.HasPrefix(filename, "db") { } else if strings.HasPrefix(filename, "db") {
@@ -215,18 +201,19 @@ func (v *Vaultik) collectSnapshotMetadata() (map[string]*SnapshotMetadataInfo, [
info.TotalSize = info.ManifestSize + info.DatabaseSize info.TotalSize = info.ManifestSize + info.DatabaseSize
} }
// Sort snapshots by ID for consistent output
var snapshotIDs []string var snapshotIDs []string
for id := range snapshotMetadata { for id := range snapshotMetadata {
snapshotIDs = append(snapshotIDs, id) snapshotIDs = append(snapshotIDs, id)
} }
sort.Strings(snapshotIDs) sort.Strings(snapshotIDs)
return snapshotMetadata, snapshotIDs, nil // Download and parse all manifests to get referenced blobs
} if !jsonOutput {
v.printfStdout("Downloading %d manifest(s)...\n", len(snapshotIDs))
}
// collectReferencedBlobsFromManifests downloads manifests and returns referenced blob hashes with sizes referencedBlobs := make(map[string]int64) // hash -> compressed size
func (v *Vaultik) collectReferencedBlobsFromManifests(snapshotIDs []string, snapshotMetadata map[string]*SnapshotMetadataInfo) map[string]int64 {
referencedBlobs := make(map[string]int64)
for _, snapshotID := range snapshotIDs { for _, snapshotID := range snapshotIDs {
manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID) manifestKey := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
@@ -243,8 +230,10 @@ func (v *Vaultik) collectReferencedBlobsFromManifests(snapshotIDs []string, snap
continue continue
} }
// Record blob info from manifest
info := snapshotMetadata[snapshotID] info := snapshotMetadata[snapshotID]
info.BlobCount = manifest.BlobCount info.BlobCount = manifest.BlobCount
var blobsSize int64 var blobsSize int64
for _, blob := range manifest.Blobs { for _, blob := range manifest.Blobs {
referencedBlobs[blob.Hash] = blob.CompressedSize referencedBlobs[blob.Hash] = blob.CompressedSize
@@ -253,11 +242,7 @@ func (v *Vaultik) collectReferencedBlobsFromManifests(snapshotIDs []string, snap
info.BlobsSize = blobsSize info.BlobsSize = blobsSize
} }
return referencedBlobs // Build result snapshots
}
// populateRemoteInfoResult fills in the result's snapshot and referenced blob stats
func (v *Vaultik) populateRemoteInfoResult(result *RemoteInfoResult, snapshotMetadata map[string]*SnapshotMetadataInfo, snapshotIDs []string, referencedBlobs map[string]int64) {
var totalMetadataSize int64 var totalMetadataSize int64
for _, id := range snapshotIDs { for _, id := range snapshotIDs {
info := snapshotMetadata[id] info := snapshotMetadata[id]
@@ -267,25 +252,26 @@ func (v *Vaultik) populateRemoteInfoResult(result *RemoteInfoResult, snapshotMet
result.TotalMetadataSize = totalMetadataSize result.TotalMetadataSize = totalMetadataSize
result.TotalMetadataCount = len(snapshotIDs) result.TotalMetadataCount = len(snapshotIDs)
// Calculate referenced blob stats
for _, size := range referencedBlobs { for _, size := range referencedBlobs {
result.ReferencedBlobCount++ result.ReferencedBlobCount++
result.ReferencedBlobSize += size result.ReferencedBlobSize += size
} }
}
// scanRemoteBlobStorage lists all blobs on remote and computes orphan stats // List all blobs on remote
func (v *Vaultik) scanRemoteBlobStorage(result *RemoteInfoResult, referencedBlobs map[string]int64, jsonOutput bool) error {
if !jsonOutput { if !jsonOutput {
v.printfStdout("Scanning blobs...\n") v.printfStdout("Scanning blobs...\n")
} }
blobCh := v.Storage.ListStream(v.ctx, "blobs/") allBlobs := make(map[string]int64) // hash -> size from storage
allBlobs := make(map[string]int64)
blobCh := v.Storage.ListStream(v.ctx, "blobs/")
for obj := range blobCh { for obj := range blobCh {
if obj.Err != nil { if obj.Err != nil {
return fmt.Errorf("listing blobs: %w", obj.Err) return fmt.Errorf("listing blobs: %w", obj.Err)
} }
// Extract hash from key: blobs/xx/yy/hash
parts := strings.Split(obj.Key, "/") parts := strings.Split(obj.Key, "/")
if len(parts) < 4 { if len(parts) < 4 {
continue continue
@@ -296,6 +282,7 @@ func (v *Vaultik) scanRemoteBlobStorage(result *RemoteInfoResult, referencedBlob
result.TotalBlobSize += obj.Size result.TotalBlobSize += obj.Size
} }
// Calculate orphaned blobs
for hash, size := range allBlobs { for hash, size := range allBlobs {
if _, referenced := referencedBlobs[hash]; !referenced { if _, referenced := referencedBlobs[hash]; !referenced {
result.OrphanedBlobCount++ result.OrphanedBlobCount++
@@ -303,11 +290,14 @@ func (v *Vaultik) scanRemoteBlobStorage(result *RemoteInfoResult, referencedBlob
} }
} }
return nil // Output results
} if jsonOutput {
enc := json.NewEncoder(v.Stdout)
enc.SetIndent("", " ")
return enc.Encode(result)
}
// printRemoteInfoTable renders the human-readable remote info output // Human-readable output
func (v *Vaultik) printRemoteInfoTable(result *RemoteInfoResult) {
v.printfStdout("\n=== Snapshot Metadata ===\n") v.printfStdout("\n=== Snapshot Metadata ===\n")
if len(result.Snapshots) == 0 { if len(result.Snapshots) == 0 {
v.printfStdout("No snapshots found\n") v.printfStdout("No snapshots found\n")
@@ -330,15 +320,20 @@ func (v *Vaultik) printRemoteInfoTable(result *RemoteInfoResult) {
v.printfStdout("\n=== Blob Storage ===\n") v.printfStdout("\n=== Blob Storage ===\n")
v.printfStdout("Total blobs on remote: %s (%s)\n", v.printfStdout("Total blobs on remote: %s (%s)\n",
humanize.Comma(int64(result.TotalBlobCount)), humanize.Bytes(uint64(result.TotalBlobSize))) humanize.Comma(int64(result.TotalBlobCount)),
humanize.Bytes(uint64(result.TotalBlobSize)))
v.printfStdout("Referenced by snapshots: %s (%s)\n", v.printfStdout("Referenced by snapshots: %s (%s)\n",
humanize.Comma(int64(result.ReferencedBlobCount)), humanize.Bytes(uint64(result.ReferencedBlobSize))) humanize.Comma(int64(result.ReferencedBlobCount)),
humanize.Bytes(uint64(result.ReferencedBlobSize)))
v.printfStdout("Orphaned (unreferenced): %s (%s)\n", v.printfStdout("Orphaned (unreferenced): %s (%s)\n",
humanize.Comma(int64(result.OrphanedBlobCount)), humanize.Bytes(uint64(result.OrphanedBlobSize))) humanize.Comma(int64(result.OrphanedBlobCount)),
humanize.Bytes(uint64(result.OrphanedBlobSize)))
if result.OrphanedBlobCount > 0 { if result.OrphanedBlobCount > 0 {
v.printfStdout("\nRun 'vaultik prune --remote' to remove orphaned blobs.\n") v.printfStdout("\nRun 'vaultik prune --remote' to remove orphaned blobs.\n")
} }
return nil
} }
// truncateString truncates a string to maxLen, adding "..." if truncated // truncateString truncates a string to maxLen, adding "..." if truncated

View File

@@ -541,174 +541,3 @@ func TestBackupAndRestore(t *testing.T) {
t.Log("Backup and restore test completed successfully") t.Log("Backup and restore test completed successfully")
} }
// TestEndToEndFileStorage exercises the full backup → restore loop against the
// real `file://` storage backend (FileStorer) on a real OS filesystem. This is
// the closest local approximation of a production backup: encrypted blobs get
// written to disk, the metadata SQLite database is exported through the same
// blobgen pipeline as a real backup, and restoration reads them back through
// the public Vaultik.Restore entrypoint. It is the canonical end-to-end smoke
// test for 1.0.
func TestEndToEndFileStorage(t *testing.T) {
log.Initialize(log.Config{})
// Real OS filesystem (SQLite + FileStorer both need it).
fs := afero.NewOsFs()
tempDir, err := os.MkdirTemp("", "vaultik-e2e-")
require.NoError(t, err)
defer func() { _ = os.RemoveAll(tempDir) }()
dataDir := filepath.Join(tempDir, "source")
storeDir := filepath.Join(tempDir, "remote")
restoreDir := filepath.Join(tempDir, "restored")
dbPath := filepath.Join(tempDir, "index.sqlite")
// Write a representative mix of file sizes:
// - empty file
// - tiny text file
// - file just under chunk boundary
// - file forcing multiple chunks
// - nested subdirectories
chunkSize := int64(64 * 1024)
maxBlobSize := int64(512 * 1024)
testFiles := map[string][]byte{
filepath.Join(dataDir, "empty.txt"): {},
filepath.Join(dataDir, "small.txt"): []byte("hello vaultik"),
filepath.Join(dataDir, "subdir", "medium.bin"): bytesPattern("medium-", int(chunkSize/2)),
filepath.Join(dataDir, "subdir", "large.bin"): bytesPattern("large-", int(chunkSize*4)),
filepath.Join(dataDir, "deep", "nest", "leaf.txt"): []byte("leaf"),
}
for path, content := range testFiles {
require.NoError(t, fs.MkdirAll(filepath.Dir(path), 0o755))
require.NoError(t, afero.WriteFile(fs, path, content, 0o644))
}
// Create a file with non-default permissions.
restrictedPath := filepath.Join(dataDir, "restricted.txt")
require.NoError(t, afero.WriteFile(fs, restrictedPath, []byte("secret"), 0o600))
testFiles[restrictedPath] = []byte("secret")
// Create an empty directory (should survive round-trip).
emptyDir := filepath.Join(dataDir, "emptydir")
require.NoError(t, fs.MkdirAll(emptyDir, 0o755))
// Create a symlink.
symlinkPath := filepath.Join(dataDir, "link-to-small")
require.NoError(t, os.Symlink("small.txt", symlinkPath))
// FileStorer is the real-world local-disk backend.
storer, err := storage.NewFileStorer(storeDir)
require.NoError(t, err)
agePublicKey := "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
ageSecretKey := "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
cfg := &config.Config{
AgeRecipients: []string{agePublicKey},
AgeSecretKey: ageSecretKey,
CompressionLevel: 3,
Hostname: "test-host",
}
ctx := context.Background()
db, err := database.New(ctx, dbPath)
require.NoError(t, err)
defer func() { _ = db.Close() }()
repos := database.NewRepositories(db)
sm := snapshot.NewSnapshotManager(snapshot.SnapshotManagerParams{
Repos: repos,
Storage: storer,
Config: cfg,
})
sm.SetFilesystem(fs)
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
FS: fs,
Storage: storer,
ChunkSize: chunkSize,
MaxBlobSize: maxBlobSize,
CompressionLevel: cfg.CompressionLevel,
AgeRecipients: cfg.AgeRecipients,
Repositories: repos,
})
snapshotID, err := sm.CreateSnapshotWithName(ctx, cfg.Hostname, "e2e", "test-version", "test-git")
require.NoError(t, err)
scanResult, err := scanner.Scan(ctx, dataDir, snapshotID)
require.NoError(t, err)
require.Greater(t, scanResult.FilesScanned, 0)
require.Greater(t, scanResult.BlobsCreated, 0)
require.NoError(t, sm.CompleteSnapshot(ctx, snapshotID))
require.NoError(t, sm.ExportSnapshotMetadata(ctx, dbPath, snapshotID))
// Verify the backup actually landed on disk under blobs/ and metadata/.
blobInfo, err := os.Stat(filepath.Join(storeDir, "blobs"))
require.NoError(t, err)
require.True(t, blobInfo.IsDir())
metaInfo, err := os.Stat(filepath.Join(storeDir, "metadata", snapshotID))
require.NoError(t, err)
require.True(t, metaInfo.IsDir())
// Tear down the source DB before restore — restore must work using only
// the remote bytes plus the secret key, with no help from the local index.
require.NoError(t, db.Close())
restoreVaultik := &vaultik.Vaultik{
Config: cfg,
Storage: storer,
Fs: fs,
Stdout: io.Discard,
Stderr: io.Discard,
}
restoreVaultik.SetContext(ctx)
require.NoError(t, restoreVaultik.Restore(&vaultik.RestoreOptions{
SnapshotID: snapshotID,
TargetDir: restoreDir,
Verify: true,
}))
// Byte-equality compare every original against its restored copy.
for origPath, expected := range testFiles {
restoredPath := filepath.Join(restoreDir, origPath)
got, err := afero.ReadFile(fs, restoredPath)
require.NoError(t, err, "restored file missing: %s", restoredPath)
require.Equalf(t, expected, got, "byte-equality failed for %s", origPath)
}
// Verify the restricted file kept its permissions.
restoredRestricted := filepath.Join(restoreDir, restrictedPath)
rInfo, err := os.Stat(restoredRestricted)
require.NoError(t, err)
assert.Equal(t, os.FileMode(0o600), rInfo.Mode().Perm(),
"restricted file should preserve 0600 permissions")
// Verify the empty directory was restored.
restoredEmptyDir := filepath.Join(restoreDir, emptyDir)
dInfo, err := os.Stat(restoredEmptyDir)
require.NoError(t, err, "empty directory should be restored")
assert.True(t, dInfo.IsDir(), "emptydir should be a directory")
// Verify the symlink was restored with the correct target.
restoredSymlink := filepath.Join(restoreDir, symlinkPath)
target, err := os.Readlink(restoredSymlink)
require.NoError(t, err, "symlink should be restored")
assert.Equal(t, "small.txt", target, "symlink target should be preserved")
}
// bytesPattern returns a deterministic byte slice of length n with a tag prefix,
// useful for forcing chunker behavior with reproducible content.
func bytesPattern(tag string, n int) []byte {
out := make([]byte, n)
for i := range out {
out[i] = byte(tag[i%len(tag)] ^ byte(i&0xff))
}
return out
}

View File

@@ -27,19 +27,95 @@ type PruneBlobsResult struct {
func (v *Vaultik) PruneBlobs(opts *PruneOptions) error { func (v *Vaultik) PruneBlobs(opts *PruneOptions) error {
log.Info("Starting prune operation") log.Info("Starting prune operation")
allBlobsReferenced, err := v.collectReferencedBlobs() // Get all remote snapshots and their manifests
if err != nil { allBlobsReferenced := make(map[string]bool)
return err manifestCount := 0
// List all snapshots in storage
log.Info("Listing remote snapshots")
objectCh := v.Storage.ListStream(v.ctx, "metadata/")
var snapshotIDs []string
for object := range objectCh {
if object.Err != nil {
return fmt.Errorf("listing remote snapshots: %w", object.Err)
} }
allBlobs, err := v.listAllRemoteBlobs() // Extract snapshot ID from paths like metadata/hostname-20240115-143052Z/
if err != nil { parts := strings.Split(object.Key, "/")
return err if len(parts) >= 2 && parts[0] == "metadata" && parts[1] != "" {
// Check if this is a directory by looking for trailing slash
if strings.HasSuffix(object.Key, "/") || strings.Contains(object.Key, "/manifest.json.zst") {
snapshotID := parts[1]
// Only add unique snapshot IDs
found := false
for _, id := range snapshotIDs {
if id == snapshotID {
found = true
break
}
}
if !found {
snapshotIDs = append(snapshotIDs, snapshotID)
}
}
}
} }
unreferencedBlobs, totalSize := v.findUnreferencedBlobs(allBlobs, allBlobsReferenced) log.Info("Found manifests in remote storage", "count", len(snapshotIDs))
result := &PruneBlobsResult{BlobsFound: len(unreferencedBlobs)} // Download and parse each manifest to get referenced blobs
for _, snapshotID := range snapshotIDs {
log.Debug("Processing manifest", "snapshot_id", snapshotID)
manifest, err := v.downloadManifest(snapshotID)
if err != nil {
log.Error("Failed to download manifest", "snapshot_id", snapshotID, "error", err)
continue
}
// Add all blobs from this manifest to our referenced set
for _, blob := range manifest.Blobs {
allBlobsReferenced[blob.Hash] = true
}
manifestCount++
}
log.Info("Processed manifests", "count", manifestCount, "unique_blobs_referenced", len(allBlobsReferenced))
// List all blobs in storage
log.Info("Listing all blobs in storage")
allBlobs := make(map[string]int64) // hash -> size
blobObjectCh := v.Storage.ListStream(v.ctx, "blobs/")
for object := range blobObjectCh {
if object.Err != nil {
return fmt.Errorf("listing blobs: %w", object.Err)
}
// Extract hash from path like blobs/ab/cd/abcdef123456...
parts := strings.Split(object.Key, "/")
if len(parts) == 4 && parts[0] == "blobs" {
hash := parts[3]
allBlobs[hash] = object.Size
}
}
log.Info("Found blobs in storage", "count", len(allBlobs))
// Find unreferenced blobs
var unreferencedBlobs []string
var totalSize int64
for hash, size := range allBlobs {
if !allBlobsReferenced[hash] {
unreferencedBlobs = append(unreferencedBlobs, hash)
totalSize += size
}
}
result := &PruneBlobsResult{
BlobsFound: len(unreferencedBlobs),
}
if len(unreferencedBlobs) == 0 { if len(unreferencedBlobs) == 0 {
log.Info("No unreferenced blobs found") log.Info("No unreferenced blobs found")
@@ -50,15 +126,18 @@ func (v *Vaultik) PruneBlobs(opts *PruneOptions) error {
return nil return nil
} }
// Show what will be deleted
log.Info("Found unreferenced blobs", "count", len(unreferencedBlobs), "total_size", humanize.Bytes(uint64(totalSize))) log.Info("Found unreferenced blobs", "count", len(unreferencedBlobs), "total_size", humanize.Bytes(uint64(totalSize)))
if !opts.JSON { if !opts.JSON {
v.printfStdout("Found %d unreferenced blob(s) totaling %s\n", len(unreferencedBlobs), humanize.Bytes(uint64(totalSize))) v.printfStdout("Found %d unreferenced blob(s) totaling %s\n", len(unreferencedBlobs), humanize.Bytes(uint64(totalSize)))
} }
// Confirm unless --force is used (skip in JSON mode - require --force)
if !opts.Force && !opts.JSON { if !opts.Force && !opts.JSON {
v.printfStdout("\nDelete %d unreferenced blob(s)? [y/N] ", len(unreferencedBlobs)) v.printfStdout("\nDelete %d unreferenced blob(s)? [y/N] ", len(unreferencedBlobs))
var confirm string var confirm string
if _, err := v.scanStdin(&confirm); err != nil { if _, err := v.scanStdin(&confirm); err != nil {
// Treat EOF or error as "no"
v.printlnStdout("Cancelled") v.printlnStdout("Cancelled")
return nil return nil
} }
@@ -68,109 +147,10 @@ func (v *Vaultik) PruneBlobs(opts *PruneOptions) error {
} }
} }
v.deleteUnreferencedBlobs(unreferencedBlobs, allBlobs, result) // Delete unreferenced blobs
if opts.JSON {
return v.outputPruneBlobsJSON(result)
}
v.printfStdout("\nDeleted %d blob(s) totaling %s\n", result.BlobsDeleted, humanize.Bytes(uint64(result.BytesFreed)))
if result.BlobsFailed > 0 {
v.printfStdout("Failed to delete %d blob(s)\n", result.BlobsFailed)
}
return nil
}
// collectReferencedBlobs downloads all manifests and returns the set of referenced blob hashes
func (v *Vaultik) collectReferencedBlobs() (map[string]bool, error) {
log.Info("Listing remote snapshots")
snapshotIDs, err := v.listUniqueSnapshotIDs()
if err != nil {
return nil, fmt.Errorf("listing snapshot IDs: %w", err)
}
log.Info("Found manifests in remote storage", "count", len(snapshotIDs))
allBlobsReferenced := make(map[string]bool)
manifestCount := 0
for _, snapshotID := range snapshotIDs {
log.Debug("Processing manifest", "snapshot_id", snapshotID)
manifest, err := v.downloadManifest(snapshotID)
if err != nil {
log.Error("Failed to download manifest", "snapshot_id", snapshotID, "error", err)
continue
}
for _, blob := range manifest.Blobs {
allBlobsReferenced[blob.Hash] = true
}
manifestCount++
}
log.Info("Processed manifests", "count", manifestCount, "unique_blobs_referenced", len(allBlobsReferenced))
return allBlobsReferenced, nil
}
// listUniqueSnapshotIDs returns deduplicated snapshot IDs from remote metadata
func (v *Vaultik) listUniqueSnapshotIDs() ([]string, error) {
objectCh := v.Storage.ListStream(v.ctx, "metadata/")
seen := make(map[string]bool)
var snapshotIDs []string
for object := range objectCh {
if object.Err != nil {
return nil, fmt.Errorf("listing metadata objects: %w", object.Err)
}
parts := strings.Split(object.Key, "/")
if len(parts) >= 2 && parts[0] == "metadata" && parts[1] != "" {
if strings.HasSuffix(object.Key, "/") || strings.Contains(object.Key, "/manifest.json.zst") {
snapshotID := parts[1]
if !seen[snapshotID] {
seen[snapshotID] = true
snapshotIDs = append(snapshotIDs, snapshotID)
}
}
}
}
return snapshotIDs, nil
}
// listAllRemoteBlobs returns a map of all blob hashes to their sizes in remote storage
func (v *Vaultik) listAllRemoteBlobs() (map[string]int64, error) {
log.Info("Listing all blobs in storage")
allBlobs := make(map[string]int64)
blobObjectCh := v.Storage.ListStream(v.ctx, "blobs/")
for object := range blobObjectCh {
if object.Err != nil {
return nil, fmt.Errorf("listing blobs: %w", object.Err)
}
parts := strings.Split(object.Key, "/")
if len(parts) == 4 && parts[0] == "blobs" {
allBlobs[parts[3]] = object.Size
}
}
log.Info("Found blobs in storage", "count", len(allBlobs))
return allBlobs, nil
}
// findUnreferencedBlobs returns blob hashes not referenced by any manifest and their total size
func (v *Vaultik) findUnreferencedBlobs(allBlobs map[string]int64, referenced map[string]bool) ([]string, int64) {
var unreferenced []string
var totalSize int64
for hash, size := range allBlobs {
if !referenced[hash] {
unreferenced = append(unreferenced, hash)
totalSize += size
}
}
return unreferenced, totalSize
}
// deleteUnreferencedBlobs deletes the given blobs from storage and populates the result
func (v *Vaultik) deleteUnreferencedBlobs(unreferencedBlobs []string, allBlobs map[string]int64, result *PruneBlobsResult) {
log.Info("Deleting unreferenced blobs") log.Info("Deleting unreferenced blobs")
deletedCount := 0
deletedSize := int64(0)
for i, hash := range unreferencedBlobs { for i, hash := range unreferencedBlobs {
blobPath := fmt.Sprintf("blobs/%s/%s/%s", hash[:2], hash[2:4], hash) blobPath := fmt.Sprintf("blobs/%s/%s/%s", hash[:2], hash[2:4], hash)
@@ -180,9 +160,10 @@ func (v *Vaultik) deleteUnreferencedBlobs(unreferencedBlobs []string, allBlobs m
continue continue
} }
result.BlobsDeleted++ deletedCount++
result.BytesFreed += allBlobs[hash] deletedSize += allBlobs[hash]
// Progress update every 100 blobs
if (i+1)%100 == 0 || i == len(unreferencedBlobs)-1 { if (i+1)%100 == 0 || i == len(unreferencedBlobs)-1 {
log.Info("Deletion progress", log.Info("Deletion progress",
"deleted", i+1, "deleted", i+1,
@@ -192,13 +173,26 @@ func (v *Vaultik) deleteUnreferencedBlobs(unreferencedBlobs []string, allBlobs m
} }
} }
result.BlobsFailed = len(unreferencedBlobs) - result.BlobsDeleted result.BlobsDeleted = deletedCount
result.BlobsFailed = len(unreferencedBlobs) - deletedCount
result.BytesFreed = deletedSize
log.Info("Prune complete", log.Info("Prune complete",
"deleted_count", result.BlobsDeleted, "deleted_count", deletedCount,
"deleted_size", humanize.Bytes(uint64(result.BytesFreed)), "deleted_size", humanize.Bytes(uint64(deletedSize)),
"failed", result.BlobsFailed, "failed", len(unreferencedBlobs)-deletedCount,
) )
if opts.JSON {
return v.outputPruneBlobsJSON(result)
}
v.printfStdout("\nDeleted %d blob(s) totaling %s\n", deletedCount, humanize.Bytes(uint64(deletedSize)))
if deletedCount < len(unreferencedBlobs) {
v.printfStdout("Failed to delete %d blob(s)\n", len(unreferencedBlobs)-deletedCount)
}
return nil
} }
// outputPruneBlobsJSON outputs the prune result as JSON // outputPruneBlobsJSON outputs the prune result as JSON

View File

@@ -1,256 +0,0 @@
package vaultik_test
import (
"bytes"
"context"
"database/sql"
"strings"
"testing"
"time"
"git.eeqj.de/sneak/vaultik/internal/database"
"git.eeqj.de/sneak/vaultik/internal/log"
"git.eeqj.de/sneak/vaultik/internal/types"
"git.eeqj.de/sneak/vaultik/internal/vaultik"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// setupPurgeTest creates a Vaultik instance with an in-memory database and mock
// storage pre-populated with the given snapshot IDs. Each snapshot is marked as
// completed. Remote metadata stubs are created so syncWithRemote keeps them.
func setupPurgeTest(t *testing.T, snapshotIDs []string) *vaultik.Vaultik {
t.Helper()
log.Initialize(log.Config{})
ctx := context.Background()
db, err := database.New(ctx, ":memory:")
require.NoError(t, err)
t.Cleanup(func() { _ = db.Close() })
repos := database.NewRepositories(db)
mockStorage := NewMockStorer()
// Insert each snapshot into the DB and create remote metadata stubs.
// Use timestamps parsed from snapshot IDs for realistic ordering.
for _, id := range snapshotIDs {
// Parse timestamp from the snapshot ID
parts := strings.Split(id, "_")
timestampStr := parts[len(parts)-1]
startedAt, err := time.Parse(time.RFC3339, timestampStr)
require.NoError(t, err, "parsing timestamp from snapshot ID %q", id)
completedAt := startedAt.Add(5 * time.Minute)
snap := &database.Snapshot{
ID: types.SnapshotID(id),
Hostname: "testhost",
VaultikVersion: "test",
StartedAt: startedAt,
CompletedAt: &completedAt,
}
err = repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
return repos.Snapshots.Create(ctx, tx, snap)
})
require.NoError(t, err, "creating snapshot %s", id)
// Create remote metadata stub so syncWithRemote keeps it
metadataKey := "metadata/" + id + "/manifest.json.zst"
err = mockStorage.Put(ctx, metadataKey, strings.NewReader("stub"))
require.NoError(t, err)
}
stdout := &bytes.Buffer{}
stderr := &bytes.Buffer{}
stdin := &bytes.Buffer{}
v := &vaultik.Vaultik{
Storage: mockStorage,
Repositories: repos,
DB: db,
Stdout: stdout,
Stderr: stderr,
Stdin: stdin,
}
v.SetContext(ctx)
return v
}
// listRemainingSnapshots returns IDs of all completed snapshots in the database.
func listRemainingSnapshots(t *testing.T, v *vaultik.Vaultik) []string {
t.Helper()
ctx := context.Background()
dbSnaps, err := v.Repositories.Snapshots.ListRecent(ctx, 10000)
require.NoError(t, err)
var ids []string
for _, s := range dbSnaps {
if s.CompletedAt != nil {
ids = append(ids, s.ID.String())
}
}
return ids
}
func TestPurgeKeepLatest_PerName(t *testing.T) {
// Create snapshots for two different names: "home" and "system".
// With per-name --keep-latest, the latest of each should be kept.
snapshotIDs := []string{
"testhost_system_2026-01-01T00:00:00Z",
"testhost_home_2026-01-01T01:00:00Z",
"testhost_system_2026-01-01T02:00:00Z",
"testhost_home_2026-01-01T03:00:00Z",
"testhost_system_2026-01-01T04:00:00Z",
}
v := setupPurgeTest(t, snapshotIDs)
err := v.PurgeSnapshotsWithOptions(&vaultik.SnapshotPurgeOptions{
KeepLatest: true,
Force: true,
})
require.NoError(t, err)
remaining := listRemainingSnapshots(t, v)
// Should keep the latest of each name
assert.Len(t, remaining, 2, "should keep exactly 2 snapshots (one per name)")
assert.Contains(t, remaining, "testhost_system_2026-01-01T04:00:00Z", "should keep latest system")
assert.Contains(t, remaining, "testhost_home_2026-01-01T03:00:00Z", "should keep latest home")
}
func TestPurgeKeepLatest_SingleName(t *testing.T) {
// All snapshots have the same name — keep-latest should keep exactly one.
snapshotIDs := []string{
"testhost_home_2026-01-01T00:00:00Z",
"testhost_home_2026-01-01T01:00:00Z",
"testhost_home_2026-01-01T02:00:00Z",
}
v := setupPurgeTest(t, snapshotIDs)
err := v.PurgeSnapshotsWithOptions(&vaultik.SnapshotPurgeOptions{
KeepLatest: true,
Force: true,
})
require.NoError(t, err)
remaining := listRemainingSnapshots(t, v)
assert.Len(t, remaining, 1)
assert.Contains(t, remaining, "testhost_home_2026-01-01T02:00:00Z", "should keep the newest")
}
func TestPurgeKeepLatest_WithNameFilter(t *testing.T) {
// Use --name to filter purge to only "home" snapshots.
// "system" snapshots should be untouched.
snapshotIDs := []string{
"testhost_system_2026-01-01T00:00:00Z",
"testhost_home_2026-01-01T01:00:00Z",
"testhost_system_2026-01-01T02:00:00Z",
"testhost_home_2026-01-01T03:00:00Z",
"testhost_home_2026-01-01T04:00:00Z",
}
v := setupPurgeTest(t, snapshotIDs)
err := v.PurgeSnapshotsWithOptions(&vaultik.SnapshotPurgeOptions{
KeepLatest: true,
Force: true,
Names: []string{"home"},
})
require.NoError(t, err)
remaining := listRemainingSnapshots(t, v)
// 2 system snapshots untouched + 1 latest home = 3
assert.Len(t, remaining, 3)
assert.Contains(t, remaining, "testhost_system_2026-01-01T00:00:00Z")
assert.Contains(t, remaining, "testhost_system_2026-01-01T02:00:00Z")
assert.Contains(t, remaining, "testhost_home_2026-01-01T04:00:00Z")
}
func TestPurgeKeepLatest_NoSnapshots(t *testing.T) {
v := setupPurgeTest(t, nil)
err := v.PurgeSnapshotsWithOptions(&vaultik.SnapshotPurgeOptions{
KeepLatest: true,
Force: true,
})
require.NoError(t, err)
}
func TestPurgeKeepLatest_NameFilterNoMatch(t *testing.T) {
snapshotIDs := []string{
"testhost_system_2026-01-01T00:00:00Z",
"testhost_system_2026-01-01T01:00:00Z",
}
v := setupPurgeTest(t, snapshotIDs)
err := v.PurgeSnapshotsWithOptions(&vaultik.SnapshotPurgeOptions{
KeepLatest: true,
Force: true,
Names: []string{"nonexistent"},
})
require.NoError(t, err)
// All snapshots should remain — the name filter matched nothing
remaining := listRemainingSnapshots(t, v)
assert.Len(t, remaining, 2)
}
func TestPurgeOlderThan_WithNameFilter(t *testing.T) {
// Snapshots with different names and timestamps.
// --older-than should apply only to the named subset when --name is used.
snapshotIDs := []string{
"testhost_system_2020-01-01T00:00:00Z",
"testhost_home_2020-01-01T00:00:00Z",
"testhost_system_2026-01-01T00:00:00Z",
"testhost_home_2026-01-01T00:00:00Z",
}
v := setupPurgeTest(t, snapshotIDs)
// Purge only "home" snapshots older than 365 days
err := v.PurgeSnapshotsWithOptions(&vaultik.SnapshotPurgeOptions{
OlderThan: "365d",
Force: true,
Names: []string{"home"},
})
require.NoError(t, err)
remaining := listRemainingSnapshots(t, v)
// Old system stays (not filtered by name), old home deleted, recent ones stay
assert.Len(t, remaining, 3)
assert.Contains(t, remaining, "testhost_system_2020-01-01T00:00:00Z")
assert.Contains(t, remaining, "testhost_system_2026-01-01T00:00:00Z")
assert.Contains(t, remaining, "testhost_home_2026-01-01T00:00:00Z")
}
func TestPurgeKeepLatest_ThreeNames(t *testing.T) {
// Three different snapshot names with multiple snapshots each.
snapshotIDs := []string{
"testhost_home_2026-01-01T00:00:00Z",
"testhost_system_2026-01-01T01:00:00Z",
"testhost_media_2026-01-01T02:00:00Z",
"testhost_home_2026-01-01T03:00:00Z",
"testhost_system_2026-01-01T04:00:00Z",
"testhost_media_2026-01-01T05:00:00Z",
"testhost_home_2026-01-01T06:00:00Z",
}
v := setupPurgeTest(t, snapshotIDs)
err := v.PurgeSnapshotsWithOptions(&vaultik.SnapshotPurgeOptions{
KeepLatest: true,
Force: true,
})
require.NoError(t, err)
remaining := listRemainingSnapshots(t, v)
assert.Len(t, remaining, 3, "should keep one per name")
assert.Contains(t, remaining, "testhost_home_2026-01-01T06:00:00Z")
assert.Contains(t, remaining, "testhost_system_2026-01-01T04:00:00Z")
assert.Contains(t, remaining, "testhost_media_2026-01-01T05:00:00Z")
}

View File

@@ -55,9 +55,15 @@ type RestoreResult struct {
func (v *Vaultik) Restore(opts *RestoreOptions) error { func (v *Vaultik) Restore(opts *RestoreOptions) error {
startTime := time.Now() startTime := time.Now()
identity, err := v.prepareRestoreIdentity() // Check for age_secret_key
if v.Config.AgeSecretKey == "" {
return fmt.Errorf("decryption key required for restore\n\nSet the VAULTIK_AGE_SECRET_KEY environment variable to your age private key:\n export VAULTIK_AGE_SECRET_KEY='AGE-SECRET-KEY-...'")
}
// Parse the age identity
identity, err := age.ParseX25519Identity(v.Config.AgeSecretKey)
if err != nil { if err != nil {
return err return fmt.Errorf("parsing age secret key: %w", err)
} }
log.Info("Starting restore operation", log.Info("Starting restore operation",
@@ -109,73 +115,10 @@ func (v *Vaultik) Restore(opts *RestoreOptions) error {
} }
// Step 5: Restore files // Step 5: Restore files
result, err := v.restoreAllFiles(files, repos, opts, identity, chunkToBlobMap)
if err != nil {
return err
}
result.Duration = time.Since(startTime)
log.Info("Restore complete",
"files_restored", result.FilesRestored,
"bytes_restored", humanize.Bytes(uint64(result.BytesRestored)),
"blobs_downloaded", result.BlobsDownloaded,
"bytes_downloaded", humanize.Bytes(uint64(result.BytesDownloaded)),
"duration", result.Duration,
)
v.printfStdout("Restored %d files (%s) in %s\n",
result.FilesRestored,
humanize.Bytes(uint64(result.BytesRestored)),
result.Duration.Round(time.Second),
)
if result.FilesFailed > 0 {
_, _ = fmt.Fprintf(v.Stdout, "\nWARNING: %d file(s) failed to restore:\n", result.FilesFailed)
for _, path := range result.FailedFiles {
_, _ = fmt.Fprintf(v.Stdout, " - %s\n", path)
}
}
// Run verification if requested
if opts.Verify {
if err := v.handleRestoreVerification(repos, files, opts, result); err != nil {
return err
}
}
if result.FilesFailed > 0 {
return fmt.Errorf("%d file(s) failed to restore", result.FilesFailed)
}
return nil
}
// prepareRestoreIdentity validates that an age secret key is configured and parses it
func (v *Vaultik) prepareRestoreIdentity() (age.Identity, error) {
if v.Config.AgeSecretKey == "" {
return nil, fmt.Errorf("decryption key required for restore\n\nSet the VAULTIK_AGE_SECRET_KEY environment variable to your age private key:\n export VAULTIK_AGE_SECRET_KEY='AGE-SECRET-KEY-...'")
}
identity, err := age.ParseX25519Identity(v.Config.AgeSecretKey)
if err != nil {
return nil, fmt.Errorf("parsing age secret key: %w", err)
}
return identity, nil
}
// restoreAllFiles iterates over files and restores each one, tracking progress and failures
func (v *Vaultik) restoreAllFiles(
files []*database.File,
repos *database.Repositories,
opts *RestoreOptions,
identity age.Identity,
chunkToBlobMap map[string]*database.BlobChunk,
) (*RestoreResult, error) {
result := &RestoreResult{} result := &RestoreResult{}
blobCache, err := newBlobDiskCache(4 * v.Config.BlobSizeLimit.Int64()) blobCache, err := newBlobDiskCache(4 * v.Config.BlobSizeLimit.Int64())
if err != nil { if err != nil {
return nil, fmt.Errorf("creating blob cache: %w", err) return fmt.Errorf("creating blob cache: %w", err)
} }
defer func() { _ = blobCache.Close() }() defer func() { _ = blobCache.Close() }()
@@ -190,7 +133,7 @@ func (v *Vaultik) restoreAllFiles(
for i, file := range files { for i, file := range files {
if v.ctx.Err() != nil { if v.ctx.Err() != nil {
return nil, v.ctx.Err() return v.ctx.Err()
} }
if err := v.restoreFile(v.ctx, repos, file, opts.TargetDir, identity, chunkToBlobMap, blobCache, result); err != nil { if err := v.restoreFile(v.ctx, repos, file, opts.TargetDir, identity, chunkToBlobMap, blobCache, result); err != nil {
@@ -222,16 +165,31 @@ func (v *Vaultik) restoreAllFiles(
_ = bar.Finish() _ = bar.Finish()
} }
return result, nil result.Duration = time.Since(startTime)
}
// handleRestoreVerification runs post-restore verification if requested log.Info("Restore complete",
func (v *Vaultik) handleRestoreVerification( "files_restored", result.FilesRestored,
repos *database.Repositories, "bytes_restored", humanize.Bytes(uint64(result.BytesRestored)),
files []*database.File, "blobs_downloaded", result.BlobsDownloaded,
opts *RestoreOptions, "bytes_downloaded", humanize.Bytes(uint64(result.BytesDownloaded)),
result *RestoreResult, "duration", result.Duration,
) error { )
v.printfStdout("Restored %d files (%s) in %s\n",
result.FilesRestored,
humanize.Bytes(uint64(result.BytesRestored)),
result.Duration.Round(time.Second),
)
if result.FilesFailed > 0 {
_, _ = fmt.Fprintf(v.Stdout, "\nWARNING: %d file(s) failed to restore:\n", result.FilesFailed)
for _, path := range result.FailedFiles {
_, _ = fmt.Fprintf(v.Stdout, " - %s\n", path)
}
}
// Run verification if requested
if opts.Verify {
if err := v.verifyRestoredFiles(v.ctx, repos, files, opts.TargetDir, result); err != nil { if err := v.verifyRestoredFiles(v.ctx, repos, files, opts.TargetDir, result); err != nil {
return fmt.Errorf("verification failed: %w", err) return fmt.Errorf("verification failed: %w", err)
} }
@@ -248,6 +206,12 @@ func (v *Vaultik) handleRestoreVerification(
result.FilesVerified, result.FilesVerified,
humanize.Bytes(uint64(result.BytesVerified)), humanize.Bytes(uint64(result.BytesVerified)),
) )
}
if result.FilesFailed > 0 {
return fmt.Errorf("%d file(s) failed to restore", result.FilesFailed)
}
return nil return nil
} }
@@ -558,23 +522,11 @@ func (v *Vaultik) restoreRegularFile(
// downloadBlob downloads and decrypts a blob // downloadBlob downloads and decrypts a blob
func (v *Vaultik) downloadBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) ([]byte, error) { func (v *Vaultik) downloadBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) ([]byte, error) {
rc, err := v.FetchAndDecryptBlob(ctx, blobHash, expectedSize, identity) result, err := v.FetchAndDecryptBlob(ctx, blobHash, expectedSize, identity)
if err != nil { if err != nil {
return nil, err return nil, err
} }
return result.Data, nil
data, err := io.ReadAll(rc)
if err != nil {
_ = rc.Close()
return nil, fmt.Errorf("reading blob data: %w", err)
}
// Close triggers hash verification
if err := rc.Close(); err != nil {
return nil, err
}
return data, nil
} }
// verifyRestoredFiles verifies that all restored files match their expected chunk hashes // verifyRestoredFiles verifies that all restored files match their expected chunk hashes

File diff suppressed because it is too large Load Diff

View File

@@ -5,7 +5,6 @@ import (
"database/sql" "database/sql"
"encoding/hex" "encoding/hex"
"fmt" "fmt"
"hash"
"io" "io"
"os" "os"
"time" "time"
@@ -14,7 +13,7 @@ import (
"git.eeqj.de/sneak/vaultik/internal/snapshot" "git.eeqj.de/sneak/vaultik/internal/snapshot"
"github.com/dustin/go-humanize" "github.com/dustin/go-humanize"
"github.com/klauspost/compress/zstd" "github.com/klauspost/compress/zstd"
_ "modernc.org/sqlite" _ "github.com/mattn/go-sqlite3"
) )
// VerifyOptions contains options for the verify command // VerifyOptions contains options for the verify command
@@ -36,19 +35,6 @@ type VerifyResult struct {
ErrorMessage string `json:"error,omitempty"` ErrorMessage string `json:"error,omitempty"`
} }
// deepVerifyFailure records a failure in the result and returns it appropriately
func (v *Vaultik) deepVerifyFailure(result *VerifyResult, opts *VerifyOptions, msg string, err error) error {
result.Status = "failed"
result.ErrorMessage = msg
if opts.JSON {
return v.outputVerifyJSON(result)
}
if err != nil {
return err
}
return fmt.Errorf("%s", msg)
}
// RunDeepVerify executes deep verification operation // RunDeepVerify executes deep verification operation
func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error { func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
result := &VerifyResult{ result := &VerifyResult{
@@ -56,19 +42,89 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
Mode: "deep", Mode: "deep",
} }
// Check for decryption capability
if !v.CanDecrypt() { if !v.CanDecrypt() {
msg := "VAULTIK_AGE_SECRET_KEY not set; required for deep verification" result.Status = "failed"
return v.deepVerifyFailure(result, opts, msg, fmt.Errorf("%s", msg)) result.ErrorMessage = "VAULTIK_AGE_SECRET_KEY environment variable not set - required for deep verification"
if opts.JSON {
return v.outputVerifyJSON(result)
}
return fmt.Errorf("VAULTIK_AGE_SECRET_KEY environment variable not set - required for deep verification")
} }
log.Info("Starting snapshot verification", "snapshot_id", snapshotID, "mode", "deep") log.Info("Starting snapshot verification",
"snapshot_id", snapshotID,
"mode", "deep",
)
if !opts.JSON { if !opts.JSON {
v.printfStdout("Deep verification of snapshot: %s\n\n", snapshotID) v.printfStdout("Deep verification of snapshot: %s\n\n", snapshotID)
} }
manifest, tempDB, dbBlobs, err := v.loadVerificationData(snapshotID, opts, result) // Step 1: Download manifest
manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
log.Info("Downloading manifest", "path", manifestPath)
if !opts.JSON {
v.printfStdout("Downloading manifest...\n")
}
manifestReader, err := v.Storage.Get(v.ctx, manifestPath)
if err != nil { if err != nil {
return err result.Status = "failed"
result.ErrorMessage = fmt.Sprintf("failed to download manifest: %v", err)
if opts.JSON {
return v.outputVerifyJSON(result)
}
return fmt.Errorf("failed to download manifest: %w", err)
}
defer func() { _ = manifestReader.Close() }()
// Decompress manifest
manifest, err := snapshot.DecodeManifest(manifestReader)
if err != nil {
result.Status = "failed"
result.ErrorMessage = fmt.Sprintf("failed to decode manifest: %v", err)
if opts.JSON {
return v.outputVerifyJSON(result)
}
return fmt.Errorf("failed to decode manifest: %w", err)
}
log.Info("Manifest loaded",
"manifest_blob_count", manifest.BlobCount,
"manifest_total_size", humanize.Bytes(uint64(manifest.TotalCompressedSize)),
)
if !opts.JSON {
v.printfStdout("Manifest loaded: %d blobs (%s)\n", manifest.BlobCount, humanize.Bytes(uint64(manifest.TotalCompressedSize)))
}
// Step 2: Download and decrypt database (authoritative source)
dbPath := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID)
log.Info("Downloading encrypted database", "path", dbPath)
if !opts.JSON {
v.printfStdout("Downloading and decrypting database...\n")
}
dbReader, err := v.Storage.Get(v.ctx, dbPath)
if err != nil {
result.Status = "failed"
result.ErrorMessage = fmt.Sprintf("failed to download database: %v", err)
if opts.JSON {
return v.outputVerifyJSON(result)
}
return fmt.Errorf("failed to download database: %w", err)
}
defer func() { _ = dbReader.Close() }()
// Decrypt and decompress database
tempDB, err := v.decryptAndLoadDatabase(dbReader, v.Config.AgeSecretKey)
if err != nil {
result.Status = "failed"
result.ErrorMessage = fmt.Sprintf("failed to decrypt database: %v", err)
if opts.JSON {
return v.outputVerifyJSON(result)
}
return fmt.Errorf("failed to decrypt database: %w", err)
} }
defer func() { defer func() {
if tempDB != nil { if tempDB != nil {
@@ -76,6 +132,17 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
} }
}() }()
// Step 3: Get authoritative blob list from database
dbBlobs, err := v.getBlobsFromDatabase(snapshotID, tempDB.DB)
if err != nil {
result.Status = "failed"
result.ErrorMessage = fmt.Sprintf("failed to get blobs from database: %v", err)
if opts.JSON {
return v.outputVerifyJSON(result)
}
return fmt.Errorf("failed to get blobs from database: %w", err)
}
result.BlobCount = len(dbBlobs) result.BlobCount = len(dbBlobs)
var totalSize int64 var totalSize int64
for _, blob := range dbBlobs { for _, blob := range dbBlobs {
@@ -83,10 +150,54 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
} }
result.TotalSize = totalSize result.TotalSize = totalSize
if err := v.runVerificationSteps(manifest, dbBlobs, tempDB, opts, result, totalSize); err != nil { log.Info("Database loaded",
"db_blob_count", len(dbBlobs),
"db_total_size", humanize.Bytes(uint64(totalSize)),
)
if !opts.JSON {
v.printfStdout("Database loaded: %d blobs (%s)\n", len(dbBlobs), humanize.Bytes(uint64(totalSize)))
v.printfStdout("Verifying manifest against database...\n")
}
// Step 4: Verify manifest matches database
if err := v.verifyManifestAgainstDatabase(manifest, dbBlobs); err != nil {
result.Status = "failed"
result.ErrorMessage = err.Error()
if opts.JSON {
return v.outputVerifyJSON(result)
}
return err return err
} }
// Step 5: Verify all blobs exist in S3 (using database as source)
if !opts.JSON {
v.printfStdout("Manifest verified.\n")
v.printfStdout("Checking blob existence in remote storage...\n")
}
if err := v.verifyBlobExistenceFromDB(dbBlobs); err != nil {
result.Status = "failed"
result.ErrorMessage = err.Error()
if opts.JSON {
return v.outputVerifyJSON(result)
}
return err
}
// Step 6: Deep verification - download and verify blob contents
if !opts.JSON {
v.printfStdout("All blobs exist.\n")
v.printfStdout("Downloading and verifying blob contents (%d blobs, %s)...\n", len(dbBlobs), humanize.Bytes(uint64(totalSize)))
}
if err := v.performDeepVerificationFromDB(dbBlobs, tempDB.DB, opts); err != nil {
result.Status = "failed"
result.ErrorMessage = err.Error()
if opts.JSON {
return v.outputVerifyJSON(result)
}
return err
}
// Success
result.Status = "ok" result.Status = "ok"
result.Verified = len(dbBlobs) result.Verified = len(dbBlobs)
@@ -95,7 +206,11 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
} }
log.Info("✓ Verification completed successfully", log.Info("✓ Verification completed successfully",
"snapshot_id", snapshotID, "mode", "deep", "blobs_verified", len(dbBlobs)) "snapshot_id", snapshotID,
"mode", "deep",
"blobs_verified", len(dbBlobs),
)
v.printfStdout("\n✓ Verification completed successfully\n") v.printfStdout("\n✓ Verification completed successfully\n")
v.printfStdout(" Snapshot: %s\n", snapshotID) v.printfStdout(" Snapshot: %s\n", snapshotID)
v.printfStdout(" Blobs verified: %d\n", len(dbBlobs)) v.printfStdout(" Blobs verified: %d\n", len(dbBlobs))
@@ -104,106 +219,6 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
return nil return nil
} }
// loadVerificationData downloads manifest, database, and blob list for verification
func (v *Vaultik) loadVerificationData(snapshotID string, opts *VerifyOptions, result *VerifyResult) (*snapshot.Manifest, *tempDB, []snapshot.BlobInfo, error) {
// Download manifest
manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
log.Info("Downloading manifest", "path", manifestPath)
if !opts.JSON {
v.printfStdout("Downloading manifest...\n")
}
manifestReader, err := v.Storage.Get(v.ctx, manifestPath)
if err != nil {
return nil, nil, nil, v.deepVerifyFailure(result, opts,
fmt.Sprintf("failed to download manifest: %v", err),
fmt.Errorf("failed to download manifest: %w", err))
}
defer func() { _ = manifestReader.Close() }()
manifest, err := snapshot.DecodeManifest(manifestReader)
if err != nil {
return nil, nil, nil, v.deepVerifyFailure(result, opts,
fmt.Sprintf("failed to decode manifest: %v", err),
fmt.Errorf("failed to decode manifest: %w", err))
}
log.Info("Manifest loaded",
"manifest_blob_count", manifest.BlobCount,
"manifest_total_size", humanize.Bytes(uint64(manifest.TotalCompressedSize)))
if !opts.JSON {
v.printfStdout("Manifest loaded: %d blobs (%s)\n", manifest.BlobCount, humanize.Bytes(uint64(manifest.TotalCompressedSize)))
v.printfStdout("Downloading and decrypting database...\n")
}
// Download and decrypt database
dbPath := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID)
log.Info("Downloading encrypted database", "path", dbPath)
dbReader, err := v.Storage.Get(v.ctx, dbPath)
if err != nil {
return nil, nil, nil, v.deepVerifyFailure(result, opts,
fmt.Sprintf("failed to download database: %v", err),
fmt.Errorf("failed to download database: %w", err))
}
defer func() { _ = dbReader.Close() }()
tdb, err := v.decryptAndLoadDatabase(dbReader, v.Config.AgeSecretKey)
if err != nil {
return nil, nil, nil, v.deepVerifyFailure(result, opts,
fmt.Sprintf("failed to decrypt database: %v", err),
fmt.Errorf("failed to decrypt database: %w", err))
}
dbBlobs, err := v.getBlobsFromDatabase(snapshotID, tdb.DB)
if err != nil {
_ = tdb.Close()
return nil, nil, nil, v.deepVerifyFailure(result, opts,
fmt.Sprintf("failed to get blobs from database: %v", err),
fmt.Errorf("failed to get blobs from database: %w", err))
}
var dbTotalSize int64
for _, b := range dbBlobs {
dbTotalSize += b.CompressedSize
}
log.Info("Database loaded",
"db_blob_count", len(dbBlobs),
"db_total_size", humanize.Bytes(uint64(dbTotalSize)))
if !opts.JSON {
v.printfStdout("Database loaded: %d blobs (%s)\n", len(dbBlobs), humanize.Bytes(uint64(dbTotalSize)))
}
return manifest, tdb, dbBlobs, nil
}
// runVerificationSteps executes manifest verification, blob existence check, and deep content verification
func (v *Vaultik) runVerificationSteps(manifest *snapshot.Manifest, dbBlobs []snapshot.BlobInfo, tdb *tempDB, opts *VerifyOptions, result *VerifyResult, totalSize int64) error {
if !opts.JSON {
v.printfStdout("Verifying manifest against database...\n")
}
if err := v.verifyManifestAgainstDatabase(manifest, dbBlobs); err != nil {
return v.deepVerifyFailure(result, opts, err.Error(), err)
}
if !opts.JSON {
v.printfStdout("Manifest verified.\n")
v.printfStdout("Checking blob existence in remote storage...\n")
}
if err := v.verifyBlobExistenceFromDB(dbBlobs); err != nil {
return v.deepVerifyFailure(result, opts, err.Error(), err)
}
if !opts.JSON {
v.printfStdout("All blobs exist.\n")
v.printfStdout("Downloading and verifying blob contents (%d blobs, %s)...\n", len(dbBlobs), humanize.Bytes(uint64(totalSize)))
}
if err := v.performDeepVerificationFromDB(dbBlobs, tdb.DB, opts); err != nil {
return v.deepVerifyFailure(result, opts, err.Error(), err)
}
return nil
}
// tempDB wraps sql.DB with cleanup // tempDB wraps sql.DB with cleanup
type tempDB struct { type tempDB struct {
*sql.DB *sql.DB
@@ -257,7 +272,7 @@ func (v *Vaultik) decryptAndLoadDatabase(reader io.ReadCloser, secretKey string)
log.Info("Database decompressed", "size", humanize.Bytes(uint64(written))) log.Info("Database decompressed", "size", humanize.Bytes(uint64(written)))
// Open the database // Open the database
db, err := sql.Open("sqlite", tempPath) db, err := sql.Open("sqlite3", tempPath)
if err != nil { if err != nil {
_ = os.Remove(tempPath) _ = os.Remove(tempPath)
return nil, fmt.Errorf("failed to open database: %w", err) return nil, fmt.Errorf("failed to open database: %w", err)
@@ -301,27 +316,7 @@ func (v *Vaultik) verifyBlob(blobInfo snapshot.BlobInfo, db *sql.DB) error {
} }
defer decompressor.Close() defer decompressor.Close()
chunkCount, err := v.verifyBlobChunks(db, blobInfo.Hash, decompressor) // Query blob chunks from database to get offsets and lengths
if err != nil {
return err
}
if err := v.verifyBlobFinalIntegrity(decompressor, blobHasher, blobInfo.Hash); err != nil {
return err
}
log.Info("Blob verified",
"hash", blobInfo.Hash[:16]+"...",
"chunks", chunkCount,
"size", humanize.Bytes(uint64(blobInfo.CompressedSize)),
)
return nil
}
// verifyBlobChunks queries blob chunks from the database and verifies each chunk's hash
// against the decompressed blob stream
func (v *Vaultik) verifyBlobChunks(db *sql.DB, blobHash string, decompressor io.Reader) (int, error) {
query := ` query := `
SELECT bc.chunk_hash, bc.offset, bc.length SELECT bc.chunk_hash, bc.offset, bc.length
FROM blob_chunks bc FROM blob_chunks bc
@@ -329,9 +324,9 @@ func (v *Vaultik) verifyBlobChunks(db *sql.DB, blobHash string, decompressor io.
WHERE b.blob_hash = ? WHERE b.blob_hash = ?
ORDER BY bc.offset ORDER BY bc.offset
` `
rows, err := db.QueryContext(v.ctx, query, blobHash) rows, err := db.QueryContext(v.ctx, query, blobInfo.Hash)
if err != nil { if err != nil {
return 0, fmt.Errorf("failed to query blob chunks: %w", err) return fmt.Errorf("failed to query blob chunks: %w", err)
} }
defer func() { _ = rows.Close() }() defer func() { _ = rows.Close() }()
@@ -344,12 +339,12 @@ func (v *Vaultik) verifyBlobChunks(db *sql.DB, blobHash string, decompressor io.
var chunkHash string var chunkHash string
var offset, length int64 var offset, length int64
if err := rows.Scan(&chunkHash, &offset, &length); err != nil { if err := rows.Scan(&chunkHash, &offset, &length); err != nil {
return 0, fmt.Errorf("failed to scan chunk row: %w", err) return fmt.Errorf("failed to scan chunk row: %w", err)
} }
// Verify chunk ordering // Verify chunk ordering
if offset <= lastOffset { if offset <= lastOffset {
return 0, fmt.Errorf("chunks out of order: offset %d after %d", offset, lastOffset) return fmt.Errorf("chunks out of order: offset %d after %d", offset, lastOffset)
} }
lastOffset = offset lastOffset = offset
@@ -358,7 +353,7 @@ func (v *Vaultik) verifyBlobChunks(db *sql.DB, blobHash string, decompressor io.
// Skip to the correct offset // Skip to the correct offset
skipBytes := offset - totalRead skipBytes := offset - totalRead
if _, err := io.CopyN(io.Discard, decompressor, skipBytes); err != nil { if _, err := io.CopyN(io.Discard, decompressor, skipBytes); err != nil {
return 0, fmt.Errorf("failed to skip to offset %d: %w", offset, err) return fmt.Errorf("failed to skip to offset %d: %w", offset, err)
} }
totalRead = offset totalRead = offset
} }
@@ -366,7 +361,7 @@ func (v *Vaultik) verifyBlobChunks(db *sql.DB, blobHash string, decompressor io.
// Read chunk data // Read chunk data
chunkData := make([]byte, length) chunkData := make([]byte, length)
if _, err := io.ReadFull(decompressor, chunkData); err != nil { if _, err := io.ReadFull(decompressor, chunkData); err != nil {
return 0, fmt.Errorf("failed to read chunk at offset %d: %w", offset, err) return fmt.Errorf("failed to read chunk at offset %d: %w", offset, err)
} }
totalRead += length totalRead += length
@@ -376,7 +371,7 @@ func (v *Vaultik) verifyBlobChunks(db *sql.DB, blobHash string, decompressor io.
calculatedHash := hex.EncodeToString(hasher.Sum(nil)) calculatedHash := hex.EncodeToString(hasher.Sum(nil))
if calculatedHash != chunkHash { if calculatedHash != chunkHash {
return 0, fmt.Errorf("chunk hash mismatch at offset %d: calculated %s, expected %s", return fmt.Errorf("chunk hash mismatch at offset %d: calculated %s, expected %s",
offset, calculatedHash, chunkHash) offset, calculatedHash, chunkHash)
} }
@@ -384,15 +379,9 @@ func (v *Vaultik) verifyBlobChunks(db *sql.DB, blobHash string, decompressor io.
} }
if err := rows.Err(); err != nil { if err := rows.Err(); err != nil {
return 0, fmt.Errorf("error iterating blob chunks: %w", err) return fmt.Errorf("error iterating blob chunks: %w", err)
} }
return chunkCount, nil
}
// verifyBlobFinalIntegrity checks that no trailing data exists in the decompressed stream
// and that the encrypted blob hash matches the expected value
func (v *Vaultik) verifyBlobFinalIntegrity(decompressor io.Reader, blobHasher hash.Hash, expectedHash string) error {
// Verify no remaining data in blob - if chunk list is accurate, blob should be fully consumed // Verify no remaining data in blob - if chunk list is accurate, blob should be fully consumed
remaining, err := io.Copy(io.Discard, decompressor) remaining, err := io.Copy(io.Discard, decompressor)
if err != nil { if err != nil {
@@ -404,11 +393,17 @@ func (v *Vaultik) verifyBlobFinalIntegrity(decompressor io.Reader, blobHasher ha
// Verify blob hash matches the encrypted data we downloaded // Verify blob hash matches the encrypted data we downloaded
calculatedBlobHash := hex.EncodeToString(blobHasher.Sum(nil)) calculatedBlobHash := hex.EncodeToString(blobHasher.Sum(nil))
if calculatedBlobHash != expectedHash { if calculatedBlobHash != blobInfo.Hash {
return fmt.Errorf("blob hash mismatch: calculated %s, expected %s", return fmt.Errorf("blob hash mismatch: calculated %s, expected %s",
calculatedBlobHash, expectedHash) calculatedBlobHash, blobInfo.Hash)
} }
log.Info("Blob verified",
"hash", blobInfo.Hash[:16]+"...",
"chunks", chunkCount,
"size", humanize.Bytes(uint64(blobInfo.CompressedSize)),
)
return nil return nil
} }

View File

@@ -20,6 +20,9 @@ s3:
region: us-east-1 region: us-east-1
use_ssl: true use_ssl: true
part_size: 5242880 # 5MB part_size: 5242880 # 5MB
backup_interval: 1h
full_scan_interval: 24h
min_time_between_run: 15m
index_path: /tmp/vaultik-test.sqlite index_path: /tmp/vaultik-test.sqlite
chunk_size: 10MB chunk_size: 10MB
blob_size_limit: 10GB blob_size_limit: 10GB

View File

@@ -17,6 +17,9 @@ s3:
region: us-east-1 region: us-east-1
use_ssl: false use_ssl: false
part_size: 5242880 # 5MB part_size: 5242880 # 5MB
backup_interval: 1h
full_scan_interval: 24h
min_time_between_run: 15m
index_path: /tmp/vaultik-integration-test.sqlite index_path: /tmp/vaultik-integration-test.sqlite
chunk_size: 10MB chunk_size: 10MB
blob_size_limit: 10GB blob_size_limit: 10GB