Compare commits
6 Commits
18c14d1507
...
20d3a9ac8c
| Author | SHA1 | Date | |
|---|---|---|---|
| 20d3a9ac8c | |||
| 0889cf2804 | |||
| f9ebb4bf25 | |||
| 9f2d722734 | |||
| 6821215b0e | |||
| f97a1dc2eb |
55
.goreleaser.yaml
Normal file
55
.goreleaser.yaml
Normal file
@@ -0,0 +1,55 @@
|
||||
version: 2
|
||||
|
||||
project_name: vaultik
|
||||
|
||||
before:
|
||||
hooks:
|
||||
- go mod tidy
|
||||
|
||||
builds:
|
||||
- id: vaultik
|
||||
main: ./cmd/vaultik
|
||||
binary: vaultik
|
||||
env:
|
||||
- CGO_ENABLED=0
|
||||
goos:
|
||||
- linux
|
||||
- darwin
|
||||
goarch:
|
||||
- amd64
|
||||
- arm64
|
||||
ldflags:
|
||||
- -s -w
|
||||
- -X 'git.eeqj.de/sneak/vaultik/internal/globals.Version={{ .Version }}'
|
||||
- -X 'git.eeqj.de/sneak/vaultik/internal/globals.Commit={{ .Commit }}'
|
||||
|
||||
archives:
|
||||
- id: default
|
||||
name_template: "{{ .ProjectName }}_{{ .Version }}_{{ .Os }}_{{ .Arch }}"
|
||||
formats:
|
||||
- tar.gz
|
||||
files:
|
||||
- LICENSE
|
||||
- README.md
|
||||
|
||||
checksum:
|
||||
name_template: "checksums.txt"
|
||||
algorithm: sha256
|
||||
|
||||
snapshot:
|
||||
version_template: "{{ incpatch .Version }}-next"
|
||||
|
||||
changelog:
|
||||
sort: asc
|
||||
use: git
|
||||
filters:
|
||||
exclude:
|
||||
- "^docs:"
|
||||
- "^test:"
|
||||
- "^chore:"
|
||||
- "Merge pull request"
|
||||
- "Merge branch"
|
||||
|
||||
release:
|
||||
draft: true
|
||||
prerelease: auto
|
||||
13
AGENTS.md
13
AGENTS.md
@@ -38,10 +38,9 @@ Version: 2025-06-08
|
||||
1. Before committing, tests must pass (`make test`), linting must pass
|
||||
(`make lint`), and code must be formatted (`make fmt`). For go, those
|
||||
makefile targets should use `go fmt` and `go test -v ./...` and
|
||||
`golangci-lint run`. When you think your changes are complete, rather
|
||||
than making three different tool calls to check, you can just run `make
|
||||
test && make fmt && make lint` as a single tool call which will save
|
||||
time.
|
||||
`golangci-lint run`. Each Makefile target does exactly one thing — to
|
||||
run lint + fmt-check + test together (the standard pre-commit gate),
|
||||
use `make check`.
|
||||
|
||||
2. Always write a `Makefile` with the default target being `test`, and with
|
||||
a `fmt` target that formats the code. The `test` target should run all
|
||||
@@ -103,3 +102,9 @@ Version: 2025-06-08
|
||||
build files are acceptable in the root, but source code and other files
|
||||
should be organized in appropriate subdirectories.
|
||||
|
||||
13. Pre-1.0: NEVER write database migrations. There are no live databases
|
||||
anywhere — every user's local index can be rebuilt from a fresh full
|
||||
backup. When the schema changes, just change `schema.sql` (and any code
|
||||
that touches the affected tables). The local index is disposable until
|
||||
1.0 ships and is tagged.
|
||||
|
||||
|
||||
41
Makefile
41
Makefile
@@ -1,7 +1,7 @@
|
||||
.PHONY: test fmt lint fmt-check check build clean all docker hooks
|
||||
.PHONY: all check test lint fmt fmt-check build clean deps test-coverage test-integration local install release release-snapshot docker hooks
|
||||
|
||||
# Version number
|
||||
VERSION := 0.0.1
|
||||
VERSION := 1.0.0-rc.1
|
||||
|
||||
# Build variables
|
||||
GIT_REVISION := $(shell git rev-parse HEAD 2>/dev/null || echo "unknown")
|
||||
@@ -13,37 +13,45 @@ LDFLAGS := -X 'git.eeqj.de/sneak/vaultik/internal/globals.Version=$(VERSION)' \
|
||||
# Default target
|
||||
all: vaultik
|
||||
|
||||
# Run tests
|
||||
# Combined pre-commit/CI gate: lint, format check, then tests.
|
||||
check: lint fmt-check test
|
||||
|
||||
# Run tests only.
|
||||
test:
|
||||
go test -race -timeout 30s ./...
|
||||
|
||||
# Check if code is formatted (read-only)
|
||||
# Check if code is formatted (read-only).
|
||||
fmt-check:
|
||||
@test -z "$$(gofmt -l .)" || (echo "Files not formatted:" && gofmt -l . && exit 1)
|
||||
|
||||
# Format code
|
||||
# Format code.
|
||||
fmt:
|
||||
go fmt ./...
|
||||
|
||||
# Run linter
|
||||
# Run linter only.
|
||||
lint:
|
||||
golangci-lint run ./...
|
||||
|
||||
# Build binary
|
||||
# Build binary.
|
||||
vaultik: internal/*/*.go cmd/vaultik/*.go
|
||||
go build -ldflags "$(LDFLAGS)" -o $@ ./cmd/vaultik
|
||||
|
||||
# Clean build artifacts
|
||||
# Clean build artifacts.
|
||||
clean:
|
||||
rm -f vaultik
|
||||
go clean
|
||||
|
||||
# Run tests with coverage
|
||||
# Install dependencies.
|
||||
deps:
|
||||
go mod download
|
||||
go install github.com/golangci/golangci-lint/cmd/golangci-lint@latest
|
||||
|
||||
# Run tests with coverage.
|
||||
test-coverage:
|
||||
go test -v -coverprofile=coverage.out ./...
|
||||
go tool cover -html=coverage.out -o coverage.html
|
||||
|
||||
# Run integration tests
|
||||
# Run integration tests.
|
||||
test-integration:
|
||||
go test -v -tags=integration ./...
|
||||
|
||||
@@ -54,14 +62,19 @@ local:
|
||||
install: vaultik
|
||||
cp ./vaultik $(HOME)/bin/
|
||||
|
||||
# Run all checks (formatting, linting, tests) without modifying files
|
||||
check: fmt-check lint test
|
||||
# Build and publish release artifacts (linux/darwin × amd64/arm64) via goreleaser.
|
||||
release:
|
||||
goreleaser release --clean
|
||||
|
||||
# Build Docker image
|
||||
# Dry-run a release build without publishing or tagging.
|
||||
release-snapshot:
|
||||
goreleaser release --clean --snapshot
|
||||
|
||||
# Build Docker image.
|
||||
docker:
|
||||
docker build -t vaultik .
|
||||
|
||||
# Install pre-commit hook
|
||||
# Install pre-commit hook.
|
||||
hooks:
|
||||
@printf '#!/bin/sh\nset -e\n' > .git/hooks/pre-commit
|
||||
@printf 'go mod tidy\ngo fmt ./...\ngit diff --exit-code -- go.mod go.sum || { echo "go mod tidy changed files; please stage and retry"; exit 1; }\n' >> .git/hooks/pre-commit
|
||||
|
||||
556
PROCESS.md
556
PROCESS.md
@@ -1,556 +0,0 @@
|
||||
# Vaultik Snapshot Creation Process
|
||||
|
||||
This document describes the lifecycle of objects during snapshot creation, with a focus on database transactions and foreign key constraints.
|
||||
|
||||
## Database Schema Overview
|
||||
|
||||
### Tables and Foreign Key Dependencies
|
||||
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────────────┐
|
||||
│ FOREIGN KEY GRAPH │
|
||||
│ │
|
||||
│ snapshots ◄────── snapshot_files ────────► files │
|
||||
│ │ │ │
|
||||
│ └───────── snapshot_blobs ────────► blobs │ │
|
||||
│ │ │ │
|
||||
│ │ ├──► file_chunks ◄── chunks│
|
||||
│ │ │ ▲ │
|
||||
│ │ └──► chunk_files ────┘ │
|
||||
│ │ │
|
||||
│ └──► blob_chunks ─────────────┘│
|
||||
│ │
|
||||
│ uploads ───────► blobs.blob_hash │
|
||||
│ └──────────► snapshots.id │
|
||||
└─────────────────────────────────────────────────────────────────────────┘
|
||||
```
|
||||
|
||||
### Critical Constraint: `chunks` Must Exist First
|
||||
|
||||
These tables reference `chunks.chunk_hash` **without CASCADE**:
|
||||
- `file_chunks.chunk_hash` → `chunks.chunk_hash`
|
||||
- `chunk_files.chunk_hash` → `chunks.chunk_hash`
|
||||
- `blob_chunks.chunk_hash` → `chunks.chunk_hash`
|
||||
|
||||
**Implication**: A chunk record MUST be committed to the database BEFORE any of these referencing records can be created.
|
||||
|
||||
### Order of Operations Required by Schema
|
||||
|
||||
```
|
||||
1. snapshots (created first, before scan)
|
||||
2. blobs (created when packer starts new blob)
|
||||
3. chunks (created during file processing)
|
||||
4. blob_chunks (created immediately after chunk added to packer)
|
||||
5. files (created after file fully chunked)
|
||||
6. file_chunks (created with file record)
|
||||
7. chunk_files (created with file record)
|
||||
8. snapshot_files (created with file record)
|
||||
9. snapshot_blobs (created after blob uploaded)
|
||||
10. uploads (created after blob uploaded)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Snapshot Creation Phases
|
||||
|
||||
### Phase 0: Initialization
|
||||
|
||||
**Actions:**
|
||||
1. Snapshot record created in database (Transaction T0)
|
||||
2. Known files loaded into memory from `files` table
|
||||
3. Known chunks loaded into memory from `chunks` table
|
||||
|
||||
**Transactions:**
|
||||
```
|
||||
T0: INSERT INTO snapshots (id, hostname, ...) VALUES (...)
|
||||
COMMIT
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Phase 1: Scan Directory
|
||||
|
||||
**Actions:**
|
||||
1. Walk filesystem directory tree
|
||||
2. For each file, compare against in-memory `knownFiles` map
|
||||
3. Classify files as: unchanged, new, or modified
|
||||
4. Collect unchanged file IDs for later association
|
||||
5. Collect new/modified files for processing
|
||||
|
||||
**Transactions:**
|
||||
```
|
||||
(None during scan - all in-memory)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Phase 1b: Associate Unchanged Files
|
||||
|
||||
**Actions:**
|
||||
1. For unchanged files, add entries to `snapshot_files` table
|
||||
2. Done in batches of 1000
|
||||
|
||||
**Transactions:**
|
||||
```
|
||||
For each batch of 1000 file IDs:
|
||||
T: BEGIN
|
||||
INSERT INTO snapshot_files (snapshot_id, file_id) VALUES (?, ?)
|
||||
... (up to 1000 inserts)
|
||||
COMMIT
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
### Phase 2: Process Files
|
||||
|
||||
For each file that needs processing:
|
||||
|
||||
#### Step 2a: Open and Chunk File
|
||||
|
||||
**Location:** `processFileStreaming()`
|
||||
|
||||
For each chunk produced by content-defined chunking:
|
||||
|
||||
##### Step 2a-1: Check Chunk Existence
|
||||
```go
|
||||
chunkExists := s.chunkExists(chunk.Hash) // In-memory lookup
|
||||
```
|
||||
|
||||
##### Step 2a-2: Create Chunk Record (if new)
|
||||
```go
|
||||
// TRANSACTION: Create chunk in database
|
||||
err := s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||
dbChunk := &database.Chunk{ChunkHash: chunk.Hash, Size: chunk.Size}
|
||||
return s.repos.Chunks.Create(txCtx, tx, dbChunk)
|
||||
})
|
||||
// COMMIT immediately after WithTx returns
|
||||
|
||||
// Update in-memory cache
|
||||
s.addKnownChunk(chunk.Hash)
|
||||
```
|
||||
|
||||
**Transaction:**
|
||||
```
|
||||
T_chunk: BEGIN
|
||||
INSERT INTO chunks (chunk_hash, size) VALUES (?, ?)
|
||||
COMMIT
|
||||
```
|
||||
|
||||
##### Step 2a-3: Add Chunk to Packer
|
||||
|
||||
```go
|
||||
s.packer.AddChunk(&blob.ChunkRef{Hash: chunk.Hash, Data: chunk.Data})
|
||||
```
|
||||
|
||||
**Inside packer.AddChunk → addChunkToCurrentBlob():**
|
||||
|
||||
```go
|
||||
// TRANSACTION: Create blob_chunks record IMMEDIATELY
|
||||
if p.repos != nil {
|
||||
blobChunk := &database.BlobChunk{
|
||||
BlobID: p.currentBlob.id,
|
||||
ChunkHash: chunk.Hash,
|
||||
Offset: offset,
|
||||
Length: chunkSize,
|
||||
}
|
||||
err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||
return p.repos.BlobChunks.Create(ctx, tx, blobChunk)
|
||||
})
|
||||
// COMMIT immediately
|
||||
}
|
||||
```
|
||||
|
||||
**Transaction:**
|
||||
```
|
||||
T_blob_chunk: BEGIN
|
||||
INSERT INTO blob_chunks (blob_id, chunk_hash, offset, length) VALUES (?, ?, ?, ?)
|
||||
COMMIT
|
||||
```
|
||||
|
||||
**⚠️ CRITICAL DEPENDENCY**: This transaction requires `chunks.chunk_hash` to exist (FK constraint).
|
||||
The chunk MUST be committed in Step 2a-2 BEFORE this can succeed.
|
||||
|
||||
---
|
||||
|
||||
#### Step 2b: Blob Size Limit Handling
|
||||
|
||||
If adding a chunk would exceed blob size limit:
|
||||
|
||||
```go
|
||||
if err == blob.ErrBlobSizeLimitExceeded {
|
||||
if err := s.packer.FinalizeBlob(); err != nil { ... }
|
||||
// Retry adding the chunk
|
||||
if err := s.packer.AddChunk(...); err != nil { ... }
|
||||
}
|
||||
```
|
||||
|
||||
**FinalizeBlob() transactions:**
|
||||
```
|
||||
T_blob_finish: BEGIN
|
||||
UPDATE blobs SET blob_hash=?, uncompressed_size=?, compressed_size=?, finished_ts=? WHERE id=?
|
||||
COMMIT
|
||||
```
|
||||
|
||||
Then blob handler is called (handleBlobReady):
|
||||
```
|
||||
(Upload to S3 - no transaction)
|
||||
|
||||
T_blob_uploaded: BEGIN
|
||||
UPDATE blobs SET uploaded_ts=? WHERE id=?
|
||||
INSERT INTO snapshot_blobs (snapshot_id, blob_id, blob_hash) VALUES (?, ?, ?)
|
||||
INSERT INTO uploads (blob_hash, snapshot_id, uploaded_at, size, duration_ms) VALUES (?, ?, ?, ?, ?)
|
||||
COMMIT
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
#### Step 2c: Queue File for Batch Insertion
|
||||
|
||||
After all chunks for a file are processed:
|
||||
|
||||
```go
|
||||
// Build file data (in-memory, no DB)
|
||||
fileChunks := make([]database.FileChunk, len(chunks))
|
||||
chunkFiles := make([]database.ChunkFile, len(chunks))
|
||||
|
||||
// Queue for batch insertion
|
||||
return s.addPendingFile(ctx, pendingFileData{
|
||||
file: fileToProcess.File,
|
||||
fileChunks: fileChunks,
|
||||
chunkFiles: chunkFiles,
|
||||
})
|
||||
```
|
||||
|
||||
**No transaction yet** - just adds to `pendingFiles` slice.
|
||||
|
||||
If `len(pendingFiles) >= fileBatchSize (100)`, triggers `flushPendingFiles()`.
|
||||
|
||||
---
|
||||
|
||||
### Step 2d: Flush Pending Files
|
||||
|
||||
**Location:** `flushPendingFiles()` - called when batch is full or at end of processing
|
||||
|
||||
```go
|
||||
return s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||
for _, data := range files {
|
||||
// 1. Create file record
|
||||
s.repos.Files.Create(txCtx, tx, data.file) // INSERT OR REPLACE
|
||||
|
||||
// 2. Delete old associations
|
||||
s.repos.FileChunks.DeleteByFileID(txCtx, tx, data.file.ID)
|
||||
s.repos.ChunkFiles.DeleteByFileID(txCtx, tx, data.file.ID)
|
||||
|
||||
// 3. Create file_chunks records
|
||||
for _, fc := range data.fileChunks {
|
||||
s.repos.FileChunks.Create(txCtx, tx, &fc) // FK: chunks.chunk_hash
|
||||
}
|
||||
|
||||
// 4. Create chunk_files records
|
||||
for _, cf := range data.chunkFiles {
|
||||
s.repos.ChunkFiles.Create(txCtx, tx, &cf) // FK: chunks.chunk_hash
|
||||
}
|
||||
|
||||
// 5. Add file to snapshot
|
||||
s.repos.Snapshots.AddFileByID(txCtx, tx, s.snapshotID, data.file.ID)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
// COMMIT (all or nothing for the batch)
|
||||
```
|
||||
|
||||
**Transaction:**
|
||||
```
|
||||
T_files_batch: BEGIN
|
||||
-- For each file in batch:
|
||||
INSERT OR REPLACE INTO files (...) VALUES (...)
|
||||
DELETE FROM file_chunks WHERE file_id = ?
|
||||
DELETE FROM chunk_files WHERE file_id = ?
|
||||
INSERT INTO file_chunks (file_id, idx, chunk_hash) VALUES (?, ?, ?) -- FK: chunks
|
||||
INSERT INTO chunk_files (chunk_hash, file_id, ...) VALUES (?, ?, ...) -- FK: chunks
|
||||
INSERT INTO snapshot_files (snapshot_id, file_id) VALUES (?, ?)
|
||||
-- Repeat for each file
|
||||
COMMIT
|
||||
```
|
||||
|
||||
**⚠️ CRITICAL DEPENDENCY**: `file_chunks` and `chunk_files` require `chunks.chunk_hash` to exist.
|
||||
|
||||
---
|
||||
|
||||
### Phase 2 End: Final Flush
|
||||
|
||||
```go
|
||||
// Flush any remaining pending files
|
||||
if err := s.flushAllPending(ctx); err != nil { ... }
|
||||
|
||||
// Final packer flush
|
||||
s.packer.Flush()
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## The Current Bug
|
||||
|
||||
### Problem
|
||||
|
||||
The current code attempts to batch file insertions, but `file_chunks` and `chunk_files` have foreign keys to `chunks.chunk_hash`. The batched file flush tries to insert these records, but if the chunks haven't been committed yet, the FK constraint fails.
|
||||
|
||||
### Why It's Happening
|
||||
|
||||
Looking at the sequence:
|
||||
|
||||
1. Process file A, chunk X
|
||||
2. Create chunk X in DB (Transaction commits)
|
||||
3. Add chunk X to packer
|
||||
4. Packer creates blob_chunks for chunk X (needs chunk X - OK, committed in step 2)
|
||||
5. Queue file A with chunk references
|
||||
6. Process file B, chunk Y
|
||||
7. Create chunk Y in DB (Transaction commits)
|
||||
8. ... etc ...
|
||||
9. At end: flushPendingFiles()
|
||||
10. Insert file_chunks for file A referencing chunk X (chunk X committed - should work)
|
||||
|
||||
The chunks ARE being created individually. But something is going wrong.
|
||||
|
||||
### Actual Issue
|
||||
|
||||
Wait - let me re-read the code. The issue is:
|
||||
|
||||
In `processFileStreaming`, when we queue file data:
|
||||
```go
|
||||
fileChunks[i] = database.FileChunk{
|
||||
FileID: fileToProcess.File.ID,
|
||||
Idx: ci.fileChunk.Idx,
|
||||
ChunkHash: ci.fileChunk.ChunkHash,
|
||||
}
|
||||
```
|
||||
|
||||
The `FileID` is set, but `fileToProcess.File.ID` might be empty at this point because the file record hasn't been created yet!
|
||||
|
||||
Looking at `checkFileInMemory`:
|
||||
```go
|
||||
// For new files:
|
||||
if !exists {
|
||||
return file, true // file.ID is empty string!
|
||||
}
|
||||
|
||||
// For existing files:
|
||||
file.ID = existingFile.ID // Reuse existing ID
|
||||
```
|
||||
|
||||
**For NEW files, `file.ID` is empty!**
|
||||
|
||||
Then in `flushPendingFiles`:
|
||||
```go
|
||||
s.repos.Files.Create(txCtx, tx, data.file) // This generates/uses the ID
|
||||
```
|
||||
|
||||
But `data.fileChunks` was built with the EMPTY ID!
|
||||
|
||||
### The Real Problem
|
||||
|
||||
For new files:
|
||||
1. `checkFileInMemory` creates file record with empty ID
|
||||
2. `processFileStreaming` queues file_chunks with empty `FileID`
|
||||
3. `flushPendingFiles` creates file (generates ID), but file_chunks still have empty `FileID`
|
||||
|
||||
Wait, but `Files.Create` should be INSERT OR REPLACE by path, and the file struct should get updated... Let me check.
|
||||
|
||||
Actually, looking more carefully at the code path - the file IS created first in the flush, but the `fileChunks` slice was already built with the old (possibly empty) ID. The ID isn't updated after the file is created.
|
||||
|
||||
Hmm, but looking at the current code:
|
||||
```go
|
||||
fileChunks[i] = database.FileChunk{
|
||||
FileID: fileToProcess.File.ID, // This uses the ID from the File struct
|
||||
```
|
||||
|
||||
And in `checkFileInMemory` for new files, we create a file struct but don't set the ID. However, looking at the database repository, `Files.Create` should be doing `INSERT OR REPLACE` and the ID should be pre-generated...
|
||||
|
||||
Let me check if IDs are being generated. Looking at the File struct usage, it seems like UUIDs should be generated somewhere...
|
||||
|
||||
Actually, looking at the test failures again:
|
||||
```
|
||||
creating file chunk: inserting file_chunk: constraint failed: FOREIGN KEY constraint failed (787)
|
||||
```
|
||||
|
||||
Error 787 is SQLite's foreign key constraint error. The failing FK is on `file_chunks.chunk_hash → chunks.chunk_hash`.
|
||||
|
||||
So the chunks ARE NOT in the database when we try to insert file_chunks. Let me trace through more carefully...
|
||||
|
||||
---
|
||||
|
||||
## Transaction Timing Issue
|
||||
|
||||
The problem is transaction visibility in SQLite.
|
||||
|
||||
Each `WithTx` creates a new transaction that commits at the end. But with batched file insertion:
|
||||
|
||||
1. Chunk transactions commit one at a time
|
||||
2. File batch transaction runs later
|
||||
|
||||
If chunks are being inserted but something goes wrong with transaction isolation, the file batch might not see them.
|
||||
|
||||
But actually SQLite in WAL mode should have SERIALIZABLE isolation by default, so committed transactions should be visible.
|
||||
|
||||
Let me check if the in-memory cache is masking a database problem...
|
||||
|
||||
Actually, wait. Let me re-check the current broken code more carefully. The issue might be simpler.
|
||||
|
||||
---
|
||||
|
||||
## Current Code Flow Analysis
|
||||
|
||||
Looking at `processFileStreaming` in the current broken state:
|
||||
|
||||
```go
|
||||
// For each chunk:
|
||||
if !chunkExists {
|
||||
err := s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||
dbChunk := &database.Chunk{ChunkHash: chunk.Hash, Size: chunk.Size}
|
||||
return s.repos.Chunks.Create(txCtx, tx, dbChunk)
|
||||
})
|
||||
// ... check error ...
|
||||
s.addKnownChunk(chunk.Hash)
|
||||
}
|
||||
|
||||
// ... add to packer (creates blob_chunks) ...
|
||||
|
||||
// Collect chunk info for file
|
||||
chunks = append(chunks, chunkInfo{...})
|
||||
```
|
||||
|
||||
Then at end of function:
|
||||
```go
|
||||
// Queue file for batch insertion
|
||||
return s.addPendingFile(ctx, pendingFileData{
|
||||
file: fileToProcess.File,
|
||||
fileChunks: fileChunks,
|
||||
chunkFiles: chunkFiles,
|
||||
})
|
||||
```
|
||||
|
||||
At end of `processPhase`:
|
||||
```go
|
||||
if err := s.flushAllPending(ctx); err != nil { ... }
|
||||
```
|
||||
|
||||
The chunks are being created one-by-one with individual transactions. By the time `flushPendingFiles` runs, all chunk transactions should have committed.
|
||||
|
||||
Unless... there's a bug in how the chunks are being referenced. Let me check if the chunk_hash values are correct.
|
||||
|
||||
Or... maybe the test database is being recreated between operations somehow?
|
||||
|
||||
Actually, let me check the test setup. Maybe the issue is specific to the test environment.
|
||||
|
||||
---
|
||||
|
||||
## Summary of Object Lifecycle
|
||||
|
||||
| Object | When Created | Transaction | Dependencies |
|
||||
|--------|--------------|-------------|--------------|
|
||||
| snapshot | Before scan | Individual tx | None |
|
||||
| blob | When packer needs new blob | Individual tx | None |
|
||||
| chunk | During file chunking (each chunk) | Individual tx | None |
|
||||
| blob_chunks | Immediately after adding chunk to packer | Individual tx | chunks, blobs |
|
||||
| files | Batched at end of processing | Batch tx | None |
|
||||
| file_chunks | With file (batched) | Batch tx | files, chunks |
|
||||
| chunk_files | With file (batched) | Batch tx | files, chunks |
|
||||
| snapshot_files | With file (batched) | Batch tx | snapshots, files |
|
||||
| snapshot_blobs | After blob upload | Individual tx | snapshots, blobs |
|
||||
| uploads | After blob upload | Same tx as snapshot_blobs | blobs, snapshots |
|
||||
|
||||
---
|
||||
|
||||
## Root Cause Analysis
|
||||
|
||||
After detailed analysis, I believe the issue is one of the following:
|
||||
|
||||
### Hypothesis 1: File ID Not Set
|
||||
|
||||
Looking at `checkFileInMemory()` for NEW files:
|
||||
```go
|
||||
if !exists {
|
||||
return file, true // file.ID is empty string!
|
||||
}
|
||||
```
|
||||
|
||||
For new files, `file.ID` is empty. Then in `processFileStreaming`:
|
||||
```go
|
||||
fileChunks[i] = database.FileChunk{
|
||||
FileID: fileToProcess.File.ID, // Empty for new files!
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
The `FileID` in the built `fileChunks` slice is empty.
|
||||
|
||||
Then in `flushPendingFiles`:
|
||||
```go
|
||||
s.repos.Files.Create(txCtx, tx, data.file) // This generates the ID
|
||||
// But data.fileChunks still has empty FileID!
|
||||
for i := range data.fileChunks {
|
||||
s.repos.FileChunks.Create(...) // Uses empty FileID
|
||||
}
|
||||
```
|
||||
|
||||
**Solution**: Generate file IDs upfront in `checkFileInMemory()`:
|
||||
```go
|
||||
file := &database.File{
|
||||
ID: uuid.New().String(), // Generate ID immediately
|
||||
Path: path,
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
### Hypothesis 2: Transaction Isolation
|
||||
|
||||
SQLite with a single connection pool (`MaxOpenConns(1)`) should serialize all transactions. Committed data should be visible to subsequent transactions.
|
||||
|
||||
However, there might be a subtle issue with how `context.Background()` is used in the packer vs the scanner's context.
|
||||
|
||||
## Recommended Fix
|
||||
|
||||
**Step 1: Generate file IDs upfront**
|
||||
|
||||
In `checkFileInMemory()`, generate the UUID for new files immediately:
|
||||
```go
|
||||
file := &database.File{
|
||||
ID: uuid.New().String(), // Always generate ID
|
||||
Path: path,
|
||||
...
|
||||
}
|
||||
```
|
||||
|
||||
This ensures `file.ID` is set when building `fileChunks` and `chunkFiles` slices.
|
||||
|
||||
**Step 2: Verify by reverting to per-file transactions**
|
||||
|
||||
If Step 1 doesn't fix it, revert to non-batched file insertion to isolate the issue:
|
||||
|
||||
```go
|
||||
// Instead of queuing:
|
||||
// return s.addPendingFile(ctx, pendingFileData{...})
|
||||
|
||||
// Do immediate insertion:
|
||||
return s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||
// Create file
|
||||
s.repos.Files.Create(txCtx, tx, fileToProcess.File)
|
||||
// Delete old associations
|
||||
s.repos.FileChunks.DeleteByFileID(...)
|
||||
s.repos.ChunkFiles.DeleteByFileID(...)
|
||||
// Create new associations
|
||||
for _, fc := range fileChunks {
|
||||
s.repos.FileChunks.Create(...)
|
||||
}
|
||||
for _, cf := range chunkFiles {
|
||||
s.repos.ChunkFiles.Create(...)
|
||||
}
|
||||
// Add to snapshot
|
||||
s.repos.Snapshots.AddFileByID(...)
|
||||
return nil
|
||||
})
|
||||
```
|
||||
|
||||
**Step 3: If batching is still desired**
|
||||
|
||||
After confirming per-file transactions work, re-implement batching with the ID fix in place, and add debug logging to trace exactly which chunk_hash is failing and why.
|
||||
33
README.md
33
README.md
@@ -2,7 +2,7 @@
|
||||
|
||||
WIP: pre-1.0, some functions may not be fully implemented yet
|
||||
|
||||
`vaultik` is an incremental backup daemon written in Go. It encrypts data
|
||||
`vaultik` is an incremental backup tool written in Go. It encrypts data
|
||||
using an `age` public key and uploads each encrypted blob directly to a
|
||||
remote S3-compatible object store. It requires no private keys, secrets, or
|
||||
credentials (other than those required to PUT to encrypted object storage,
|
||||
@@ -120,9 +120,6 @@ passphrase is needed or stored locally.
|
||||
access_key_id: ...
|
||||
secret_access_key: ...
|
||||
region: us-east-1
|
||||
backup_interval: 1h
|
||||
full_scan_interval: 24h
|
||||
min_time_between_run: 15m
|
||||
chunk_size: 10MB
|
||||
blob_size_limit: 1GB
|
||||
```
|
||||
@@ -147,16 +144,19 @@ passphrase is needed or stored locally.
|
||||
### commands
|
||||
|
||||
```sh
|
||||
vaultik [--config <path>] snapshot create [snapshot-names...] [--cron] [--daemon] [--prune]
|
||||
vaultik [--config <path>] snapshot create [snapshot-names...] [--cron] [--prune] [--skip-errors]
|
||||
vaultik [--config <path>] snapshot list [--json]
|
||||
vaultik [--config <path>] snapshot verify <snapshot-id> [--deep]
|
||||
vaultik [--config <path>] snapshot purge [--keep-latest | --older-than <duration>] [--name <name>] [--force]
|
||||
vaultik [--config <path>] snapshot remove <snapshot-id> [--dry-run] [--force]
|
||||
vaultik [--config <path>] snapshot verify <snapshot-id> [--deep] [--json]
|
||||
vaultik [--config <path>] snapshot purge [--keep-latest | --older-than <duration>] [--snapshot <name>...] [--force]
|
||||
vaultik [--config <path>] snapshot remove <snapshot-id|--all> [--dry-run] [--force] [--remote] [--json]
|
||||
vaultik [--config <path>] snapshot prune
|
||||
vaultik [--config <path>] restore <snapshot-id> <target-dir> [paths...]
|
||||
vaultik [--config <path>] prune [--dry-run] [--force]
|
||||
vaultik [--config <path>] restore <snapshot-id> <target-dir> [paths...] [--verify]
|
||||
vaultik [--config <path>] prune [--force] [--json]
|
||||
vaultik [--config <path>] info
|
||||
vaultik [--config <path>] remote info [--json]
|
||||
vaultik [--config <path>] store info
|
||||
vaultik [--config <path>] database purge [--force]
|
||||
vaultik version
|
||||
```
|
||||
|
||||
### environment
|
||||
@@ -170,8 +170,9 @@ vaultik [--config <path>] store info
|
||||
* Config is located at `/etc/vaultik/config.yml` by default
|
||||
* Optional snapshot names argument to create specific snapshots (default: all)
|
||||
* `--cron`: Silent unless error (for crontab)
|
||||
* `--daemon`: Run continuously with inotify monitoring and periodic scans
|
||||
* `--prune`: Delete old snapshots and orphaned blobs after backup
|
||||
* `--prune`: After backup, drop older snapshots of each backed-up name (keeping
|
||||
only the latest) and remove orphaned blobs from remote storage
|
||||
* `--skip-errors`: Skip file read errors (log them loudly but continue)
|
||||
|
||||
**snapshot list**: List all snapshots with their timestamps and sizes
|
||||
* `--json`: Output in JSON format
|
||||
@@ -179,10 +180,12 @@ vaultik [--config <path>] store info
|
||||
**snapshot verify**: Verify snapshot integrity
|
||||
* `--deep`: Download and verify blob contents (not just existence)
|
||||
|
||||
**snapshot purge**: Remove old snapshots based on criteria
|
||||
* `--keep-latest`: Keep the most recent snapshot per snapshot name
|
||||
**snapshot purge**: Remove old snapshots based on criteria. Retention is
|
||||
applied per-snapshot-name (e.g. `--keep-latest` keeps the latest of each
|
||||
configured name, not the latest globally).
|
||||
* `--keep-latest`: Keep only the most recent snapshot of each name
|
||||
* `--older-than`: Remove snapshots older than duration (e.g., 30d, 6mo, 1y)
|
||||
* `--name`: Filter purge to a specific snapshot name
|
||||
* `--snapshot <name>`: Restrict to specific snapshot names (repeat for multiple)
|
||||
* `--force`: Skip confirmation prompt
|
||||
|
||||
**snapshot remove**: Remove a specific snapshot
|
||||
|
||||
23
TODO.md
23
TODO.md
@@ -103,26 +103,3 @@ User must have rclone configured separately (via `rclone config`).
|
||||
- Ensure consistent code style
|
||||
|
||||
1. Tag and release v1.0.0
|
||||
|
||||
---
|
||||
|
||||
## Post-1.0 (Daemon Mode)
|
||||
|
||||
1. Implement inotify file watcher for Linux
|
||||
- Watch source directories for changes
|
||||
- Track dirty paths in memory
|
||||
|
||||
1. Implement FSEvents watcher for macOS
|
||||
- Watch source directories for changes
|
||||
- Track dirty paths in memory
|
||||
|
||||
1. Implement backup scheduler in daemon mode
|
||||
- Respect backup_interval config
|
||||
- Trigger backup when dirty paths exist and interval elapsed
|
||||
- Implement full_scan_interval for periodic full scans
|
||||
|
||||
1. Add proper signal handling for daemon
|
||||
- Graceful shutdown on SIGTERM/SIGINT
|
||||
- Complete in-progress backup before exit
|
||||
|
||||
1. Write tests for daemon mode
|
||||
|
||||
@@ -291,21 +291,6 @@ storage_url: "rclone://las1stor1//srv/pool.2024.04/backups/heraklion"
|
||||
# # Default: 5MB
|
||||
# #part_size: 5MB
|
||||
|
||||
# How often to run backups in daemon mode
|
||||
# Format: 1h, 30m, 24h, etc
|
||||
# Default: 1h
|
||||
#backup_interval: 1h
|
||||
|
||||
# How often to do a full filesystem scan in daemon mode
|
||||
# Between full scans, inotify is used to detect changes
|
||||
# Default: 24h
|
||||
#full_scan_interval: 24h
|
||||
|
||||
# Minimum time between backup runs in daemon mode
|
||||
# Prevents backups from running too frequently
|
||||
# Default: 15m
|
||||
#min_time_between_run: 15m
|
||||
|
||||
# Path to local SQLite index database
|
||||
# This database tracks file state for incremental backups
|
||||
# Default: /var/lib/vaultik/index.sqlite
|
||||
|
||||
@@ -5,8 +5,14 @@
|
||||
Vaultik uses a local SQLite database to track file metadata, chunk mappings, and blob associations during the backup process. This database serves as an index for incremental backups and enables efficient deduplication.
|
||||
|
||||
**Important Notes:**
|
||||
- **No Migration Support**: Vaultik does not support database schema migrations. If the schema changes, the local database must be deleted and recreated by performing a full backup.
|
||||
- **Version Compatibility**: In rare cases, you may need to use the same version of Vaultik to restore a backup as was used to create it. This ensures compatibility with the metadata format stored in S3.
|
||||
- **No Migration Support (pre-1.0)**: Vaultik does not support database schema
|
||||
migrations. The local index is treated as disposable — if the schema changes,
|
||||
delete the local SQLite database (`vaultik database purge`) and run a full
|
||||
backup. The remote storage is unaffected; the new index will re-deduplicate
|
||||
against existing remote blobs.
|
||||
- **Version Compatibility**: In rare cases, you may need to use the same version
|
||||
of Vaultik to restore a backup as was used to create it. This ensures
|
||||
compatibility with the metadata format stored in S3.
|
||||
|
||||
## Database Tables
|
||||
|
||||
|
||||
@@ -43,18 +43,19 @@ Blobs contain the actual file data from backups and must be encrypted for securi
|
||||
Each snapshot has its own subdirectory named with the snapshot ID.
|
||||
|
||||
### Snapshot ID Format
|
||||
- **Format**: `<hostname>-<YYYYMMDD>-<HHMMSSZ>`
|
||||
- **Example**: `laptop-20240115-143052Z`
|
||||
- **Format**: `<hostname>_<snapshot-name>_<RFC3339>` (or `<hostname>_<RFC3339>` if no
|
||||
name was specified)
|
||||
- **Example**: `laptop_home_2024-01-15T14:30:52Z`
|
||||
- **Components**:
|
||||
- Hostname (may contain hyphens)
|
||||
- Date in YYYYMMDD format
|
||||
- Time in HHMMSSZ format (Z indicates UTC)
|
||||
- Short hostname (everything before the first dot is stripped from the FQDN)
|
||||
- Snapshot name from the configured `snapshots:` map (optional)
|
||||
- RFC3339 UTC timestamp
|
||||
|
||||
### Files in Each Snapshot Directory
|
||||
|
||||
#### `db.zst.age` - Encrypted Database Dump
|
||||
- **What it contains**: Complete SQLite database dump for this snapshot
|
||||
- **Format**: SQL dump → Zstandard compressed → Age encrypted
|
||||
#### `db.zst.age` - Encrypted Database
|
||||
- **What it contains**: Pruned binary SQLite database for this snapshot
|
||||
- **Format**: Binary SQLite → Zstandard compressed → Age encrypted
|
||||
- **Encryption**: Encrypted with Age
|
||||
- **Purpose**: Contains full file metadata, chunk mappings, and all relationships
|
||||
- **Why encrypted**: Contains sensitive metadata like file paths, permissions, and ownership
|
||||
@@ -67,7 +68,7 @@ Each snapshot has its own subdirectory named with the snapshot ID.
|
||||
- **Structure**:
|
||||
```json
|
||||
{
|
||||
"snapshot_id": "laptop-20240115-143052Z",
|
||||
"snapshot_id": "laptop_home_2024-01-15T14:30:52Z",
|
||||
"timestamp": "2024-01-15T14:30:52Z",
|
||||
"blob_count": 42,
|
||||
"blobs": [
|
||||
|
||||
1
go.mod
1
go.mod
@@ -17,7 +17,6 @@ require (
|
||||
github.com/google/uuid v1.6.0
|
||||
github.com/johannesboyne/gofakes3 v0.0.0-20250603205740-ed9094be7668
|
||||
github.com/klauspost/compress v1.18.1
|
||||
github.com/mattn/go-sqlite3 v1.14.29
|
||||
github.com/rclone/rclone v1.72.1
|
||||
github.com/schollz/progressbar/v3 v3.19.0
|
||||
github.com/spf13/afero v1.15.0
|
||||
|
||||
2
go.sum
2
go.sum
@@ -593,8 +593,6 @@ github.com/mattn/go-isatty v0.0.20/go.mod h1:W+V8PltTTMOvKvAeJH7IuucS94S2C6jfK/D
|
||||
github.com/mattn/go-runewidth v0.0.3/go.mod h1:LwmH8dsx7+W8Uxz3IHJYH5QSwggIsqBzpuz5H//U1FU=
|
||||
github.com/mattn/go-runewidth v0.0.19 h1:v++JhqYnZuu5jSKrk9RbgF5v4CGUjqRfBm05byFGLdw=
|
||||
github.com/mattn/go-runewidth v0.0.19/go.mod h1:XBkDxAl56ILZc9knddidhrOlY5R/pDhgLpndooCuJAs=
|
||||
github.com/mattn/go-sqlite3 v1.14.29 h1:1O6nRLJKvsi1H2Sj0Hzdfojwt8GiGKm+LOfLaBFaouQ=
|
||||
github.com/mattn/go-sqlite3 v1.14.29/go.mod h1:Uh1q+B4BYcTPb+yiD3kU8Ct7aC0hY9fxUwlHK0RXw+Y=
|
||||
github.com/matttproud/golang_protobuf_extensions v1.0.1/go.mod h1:D8He9yQNgCq6Z5Ld7szi9bcBfOoFv/3dc6xSMkL2PC0=
|
||||
github.com/miekg/dns v1.1.26/go.mod h1:bPDLeHnStXmXAq1m/Ch/hvfNHr14JKNPMBo3VZKjuso=
|
||||
github.com/miekg/dns v1.1.41 h1:WMszZWJG0XmzbK9FEmzH2TVcqYzFesusSIB41b8KHxY=
|
||||
|
||||
@@ -18,7 +18,7 @@ func TestCLIEntry(t *testing.T) {
|
||||
}
|
||||
|
||||
// Verify all subcommands are registered
|
||||
expectedCommands := []string{"snapshot", "store", "restore", "prune", "verify", "info", "version"}
|
||||
expectedCommands := []string{"snapshot", "store", "restore", "prune", "info", "version", "remote", "database"}
|
||||
for _, expected := range expectedCommands {
|
||||
found := false
|
||||
for _, cmd := range cmd.Commands() {
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"os"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/vaultik"
|
||||
"github.com/spf13/cobra"
|
||||
"go.uber.org/fx"
|
||||
)
|
||||
|
||||
// NewPurgeCommand creates the purge command
|
||||
func NewPurgeCommand() *cobra.Command {
|
||||
opts := &vaultik.SnapshotPurgeOptions{}
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "purge",
|
||||
Short: "Purge old snapshots",
|
||||
Long: `Removes snapshots based on age or count criteria.
|
||||
|
||||
This command allows you to:
|
||||
- Keep only the latest snapshot per name (--keep-latest)
|
||||
- Remove snapshots older than a specific duration (--older-than)
|
||||
- Filter to a specific snapshot name (--name)
|
||||
|
||||
When --keep-latest is used, retention is applied per snapshot name. For example,
|
||||
if you have snapshots named "home" and "system", --keep-latest keeps the most
|
||||
recent of each.
|
||||
|
||||
Use --name to restrict the purge to a single snapshot name.
|
||||
|
||||
Config is located at /etc/vaultik/config.yml by default, but can be overridden by
|
||||
specifying a path using --config or by setting VAULTIK_CONFIG to a path.`,
|
||||
Args: cobra.NoArgs,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Validate flags
|
||||
if !opts.KeepLatest && opts.OlderThan == "" {
|
||||
return fmt.Errorf("must specify either --keep-latest or --older-than")
|
||||
}
|
||||
if opts.KeepLatest && opts.OlderThan != "" {
|
||||
return fmt.Errorf("cannot specify both --keep-latest and --older-than")
|
||||
}
|
||||
|
||||
// Use unified config resolution
|
||||
configPath, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Use the app framework like other commands
|
||||
rootFlags := GetRootFlags()
|
||||
return RunWithApp(cmd.Context(), AppOptions{
|
||||
ConfigPath: configPath,
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
Quiet: rootFlags.Quiet,
|
||||
},
|
||||
Modules: []fx.Option{},
|
||||
Invokes: []fx.Option{
|
||||
fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) {
|
||||
lc.Append(fx.Hook{
|
||||
OnStart: func(ctx context.Context) error {
|
||||
// Start the purge operation in a goroutine
|
||||
go func() {
|
||||
// Run the purge operation
|
||||
if err := v.PurgeSnapshotsWithOptions(opts); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Purge operation failed", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
// Shutdown the app when purge completes
|
||||
if err := v.Shutdowner.Shutdown(); err != nil {
|
||||
log.Error("Failed to shutdown", "error", err)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
},
|
||||
OnStop: func(ctx context.Context) error {
|
||||
log.Debug("Stopping purge operation")
|
||||
v.Cancel()
|
||||
return nil
|
||||
},
|
||||
})
|
||||
}),
|
||||
},
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
cmd.Flags().BoolVar(&opts.KeepLatest, "keep-latest", false, "Keep only the latest snapshot per name")
|
||||
cmd.Flags().StringVar(&opts.OlderThan, "older-than", "", "Remove snapshots older than duration (e.g. 30d, 6m, 1y)")
|
||||
cmd.Flags().BoolVar(&opts.Force, "force", false, "Skip confirmation prompts")
|
||||
cmd.Flags().StringVar(&opts.Name, "name", "", "Filter purge to a specific snapshot name")
|
||||
|
||||
return cmd
|
||||
}
|
||||
@@ -2,6 +2,7 @@ package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/config"
|
||||
"git.eeqj.de/sneak/vaultik/internal/globals"
|
||||
@@ -130,6 +131,7 @@ func buildRestoreInvokes(snapshotID string, opts *RestoreOptions) []fx.Option {
|
||||
if err := app.Vaultik.Restore(restoreOpts); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Restore operation failed", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -25,7 +25,7 @@ func NewRootCommand() *cobra.Command {
|
||||
cmd := &cobra.Command{
|
||||
Use: "vaultik",
|
||||
Short: "Secure incremental backup tool with asymmetric encryption",
|
||||
Long: `vaultik is a secure incremental backup daemon that encrypts data using age
|
||||
Long: `vaultik is a secure incremental backup tool that encrypts data using age
|
||||
public keys and uploads to S3-compatible storage. No private keys are needed
|
||||
on the source system.`,
|
||||
SilenceUsage: true,
|
||||
@@ -41,7 +41,6 @@ on the source system.`,
|
||||
cmd.AddCommand(
|
||||
NewRestoreCommand(),
|
||||
NewPruneCommand(),
|
||||
NewVerifyCommand(),
|
||||
NewStoreCommand(),
|
||||
NewSnapshotCommand(),
|
||||
NewInfoCommand(),
|
||||
|
||||
@@ -75,6 +75,7 @@ specifying a path using --config or by setting VAULTIK_CONFIG to a path.`,
|
||||
if err := v.CreateSnapshot(opts); err != nil {
|
||||
if err != context.Canceled {
|
||||
log.Error("Snapshot creation failed", "error", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -98,9 +99,8 @@ specifying a path using --config or by setting VAULTIK_CONFIG to a path.`,
|
||||
},
|
||||
}
|
||||
|
||||
cmd.Flags().BoolVar(&opts.Daemon, "daemon", false, "Run in daemon mode with inotify monitoring")
|
||||
cmd.Flags().BoolVar(&opts.Cron, "cron", false, "Run in cron mode (silent unless error)")
|
||||
cmd.Flags().BoolVar(&opts.Prune, "prune", false, "Delete all previous snapshots and unreferenced blobs after backup")
|
||||
cmd.Flags().BoolVar(&opts.Prune, "prune", false, "After backup, drop older snapshots of the same name and remove orphaned blobs")
|
||||
cmd.Flags().BoolVar(&opts.SkipErrors, "skip-errors", false, "Skip file read errors (log them loudly but continue)")
|
||||
|
||||
return cmd
|
||||
@@ -174,11 +174,9 @@ func newSnapshotPurgeCommand() *cobra.Command {
|
||||
Short: "Purge old snapshots",
|
||||
Long: `Removes snapshots based on age or count criteria.
|
||||
|
||||
When --keep-latest is used, retention is applied per snapshot name. For example,
|
||||
if you have snapshots named "home" and "system", --keep-latest keeps the most
|
||||
recent of each.
|
||||
|
||||
Use --name to restrict the purge to a single snapshot name.`,
|
||||
Retention is per-snapshot-name: --keep-latest keeps the latest of each
|
||||
configured snapshot name, not the latest globally. Use --snapshot to
|
||||
restrict the operation to specific snapshot names.`,
|
||||
Args: cobra.NoArgs,
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
// Validate flags
|
||||
@@ -232,10 +230,10 @@ Use --name to restrict the purge to a single snapshot name.`,
|
||||
},
|
||||
}
|
||||
|
||||
cmd.Flags().BoolVar(&opts.KeepLatest, "keep-latest", false, "Keep only the latest snapshot per name")
|
||||
cmd.Flags().BoolVar(&opts.KeepLatest, "keep-latest", false, "Keep only the latest snapshot of each name")
|
||||
cmd.Flags().StringVar(&opts.OlderThan, "older-than", "", "Remove snapshots older than duration (e.g., 30d, 6m, 1y)")
|
||||
cmd.Flags().BoolVar(&opts.Force, "force", false, "Skip confirmation prompt")
|
||||
cmd.Flags().StringVar(&opts.Name, "name", "", "Filter purge to a specific snapshot name")
|
||||
cmd.Flags().StringArrayVar(&opts.Names, "snapshot", nil, "Restrict to snapshots with these names (repeat for multiple)")
|
||||
|
||||
return cmd
|
||||
}
|
||||
@@ -281,13 +279,7 @@ func newSnapshotVerifyCommand() *cobra.Command {
|
||||
lc.Append(fx.Hook{
|
||||
OnStart: func(ctx context.Context) error {
|
||||
go func() {
|
||||
var err error
|
||||
if opts.Deep {
|
||||
err = v.RunDeepVerify(snapshotID, opts)
|
||||
} else {
|
||||
err = v.VerifySnapshotWithOptions(snapshotID, opts)
|
||||
}
|
||||
if err != nil {
|
||||
if err := v.VerifySnapshotWithOptions(snapshotID, opts); err != nil {
|
||||
if err != context.Canceled {
|
||||
if !opts.JSON {
|
||||
log.Error("Verification failed", "error", err)
|
||||
|
||||
@@ -1,98 +0,0 @@
|
||||
package cli
|
||||
|
||||
import (
|
||||
"context"
|
||||
"os"
|
||||
|
||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||
"git.eeqj.de/sneak/vaultik/internal/vaultik"
|
||||
"github.com/spf13/cobra"
|
||||
"go.uber.org/fx"
|
||||
)
|
||||
|
||||
// NewVerifyCommand creates the verify command
|
||||
func NewVerifyCommand() *cobra.Command {
|
||||
opts := &vaultik.VerifyOptions{}
|
||||
|
||||
cmd := &cobra.Command{
|
||||
Use: "verify <snapshot-id>",
|
||||
Short: "Verify snapshot integrity",
|
||||
Long: `Verifies that all blobs referenced in a snapshot exist and optionally verifies their contents.
|
||||
|
||||
Shallow verification (default):
|
||||
- Downloads and decompresses manifest
|
||||
- Checks existence of all blobs in S3
|
||||
- Reports missing blobs
|
||||
|
||||
Deep verification (--deep):
|
||||
- Downloads and decrypts database
|
||||
- Verifies blob lists match between manifest and database
|
||||
- Downloads, decrypts, and decompresses each blob
|
||||
- Verifies SHA256 hash of each chunk matches database
|
||||
- Ensures chunks are ordered correctly
|
||||
|
||||
The command will fail immediately on any verification error and exit with non-zero status.`,
|
||||
Args: cobra.ExactArgs(1),
|
||||
RunE: func(cmd *cobra.Command, args []string) error {
|
||||
snapshotID := args[0]
|
||||
|
||||
// Use unified config resolution
|
||||
configPath, err := ResolveConfigPath()
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Use the app framework for all verification
|
||||
rootFlags := GetRootFlags()
|
||||
return RunWithApp(cmd.Context(), AppOptions{
|
||||
ConfigPath: configPath,
|
||||
LogOptions: log.LogOptions{
|
||||
Verbose: rootFlags.Verbose,
|
||||
Debug: rootFlags.Debug,
|
||||
Quiet: rootFlags.Quiet || opts.JSON, // Suppress log output in JSON mode
|
||||
},
|
||||
Modules: []fx.Option{},
|
||||
Invokes: []fx.Option{
|
||||
fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) {
|
||||
lc.Append(fx.Hook{
|
||||
OnStart: func(ctx context.Context) error {
|
||||
// Run the verify operation directly
|
||||
go func() {
|
||||
var err error
|
||||
if opts.Deep {
|
||||
err = v.RunDeepVerify(snapshotID, opts)
|
||||
} else {
|
||||
err = v.VerifySnapshotWithOptions(snapshotID, opts)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
if err != context.Canceled {
|
||||
if !opts.JSON {
|
||||
log.Error("Verification failed", "error", err)
|
||||
}
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
if err := v.Shutdowner.Shutdown(); err != nil {
|
||||
log.Error("Failed to shutdown", "error", err)
|
||||
}
|
||||
}()
|
||||
return nil
|
||||
},
|
||||
OnStop: func(ctx context.Context) error {
|
||||
log.Debug("Stopping verify operation")
|
||||
v.Cancel()
|
||||
return nil
|
||||
},
|
||||
})
|
||||
}),
|
||||
},
|
||||
})
|
||||
},
|
||||
}
|
||||
|
||||
cmd.Flags().BoolVar(&opts.Deep, "deep", false, "Perform deep verification by downloading and verifying all blob contents")
|
||||
cmd.Flags().BoolVar(&opts.JSON, "json", false, "Output verification results as JSON")
|
||||
|
||||
return cmd
|
||||
}
|
||||
@@ -6,7 +6,6 @@ import (
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"filippo.io/age"
|
||||
"git.eeqj.de/sneak/smartconfig"
|
||||
@@ -83,19 +82,16 @@ func (c *Config) SnapshotNames() []string {
|
||||
// encryption recipients, storage configuration, and performance tuning parameters.
|
||||
// Configuration is typically loaded from a YAML file.
|
||||
type Config struct {
|
||||
AgeRecipients []string `yaml:"age_recipients"`
|
||||
AgeSecretKey string `yaml:"age_secret_key"`
|
||||
BackupInterval time.Duration `yaml:"backup_interval"`
|
||||
BlobSizeLimit Size `yaml:"blob_size_limit"`
|
||||
ChunkSize Size `yaml:"chunk_size"`
|
||||
Exclude []string `yaml:"exclude"` // Global excludes applied to all snapshots
|
||||
FullScanInterval time.Duration `yaml:"full_scan_interval"`
|
||||
Hostname string `yaml:"hostname"`
|
||||
IndexPath string `yaml:"index_path"`
|
||||
MinTimeBetweenRun time.Duration `yaml:"min_time_between_run"`
|
||||
S3 S3Config `yaml:"s3"`
|
||||
Snapshots map[string]SnapshotConfig `yaml:"snapshots"`
|
||||
CompressionLevel int `yaml:"compression_level"`
|
||||
AgeRecipients []string `yaml:"age_recipients"`
|
||||
AgeSecretKey string `yaml:"age_secret_key"`
|
||||
BlobSizeLimit Size `yaml:"blob_size_limit"`
|
||||
ChunkSize Size `yaml:"chunk_size"`
|
||||
Exclude []string `yaml:"exclude"` // Global excludes applied to all snapshots
|
||||
Hostname string `yaml:"hostname"`
|
||||
IndexPath string `yaml:"index_path"`
|
||||
S3 S3Config `yaml:"s3"`
|
||||
Snapshots map[string]SnapshotConfig `yaml:"snapshots"`
|
||||
CompressionLevel int `yaml:"compression_level"`
|
||||
|
||||
// StorageURL specifies the storage backend using a URL format.
|
||||
// Takes precedence over S3Config if set.
|
||||
@@ -155,13 +151,10 @@ func Load(path string) (*Config, error) {
|
||||
|
||||
cfg := &Config{
|
||||
// Set defaults
|
||||
BlobSizeLimit: Size(10 * 1024 * 1024 * 1024), // 10GB
|
||||
ChunkSize: Size(10 * 1024 * 1024), // 10MB
|
||||
BackupInterval: 1 * time.Hour,
|
||||
FullScanInterval: 24 * time.Hour,
|
||||
MinTimeBetweenRun: 15 * time.Minute,
|
||||
IndexPath: filepath.Join(xdg.DataHome, appName, "index.sqlite"),
|
||||
CompressionLevel: 3,
|
||||
BlobSizeLimit: Size(10 * 1024 * 1024 * 1024), // 10GB
|
||||
ChunkSize: Size(10 * 1024 * 1024), // 10MB
|
||||
IndexPath: filepath.Join(xdg.DataHome, appName, "index.sqlite"),
|
||||
CompressionLevel: 3,
|
||||
}
|
||||
|
||||
// Convert smartconfig data to YAML then unmarshal
|
||||
|
||||
@@ -63,10 +63,3 @@ type Chunk struct {
|
||||
Offset int64
|
||||
Length int64
|
||||
}
|
||||
|
||||
// DirtyPath represents a path marked for backup by inotify
|
||||
type DirtyPath struct {
|
||||
Path string
|
||||
MarkedAt time.Time
|
||||
EventType string // "create", "modify", "delete"
|
||||
}
|
||||
|
||||
@@ -66,18 +66,6 @@ func (v *Vaultik) ShowInfo() error {
|
||||
}
|
||||
v.printlnStdout()
|
||||
|
||||
// Daemon Settings (if applicable)
|
||||
if v.Config.BackupInterval > 0 || v.Config.MinTimeBetweenRun > 0 {
|
||||
v.printfStdout("=== Daemon Settings ===\n")
|
||||
if v.Config.BackupInterval > 0 {
|
||||
v.printfStdout("Backup Interval: %s\n", v.Config.BackupInterval)
|
||||
}
|
||||
if v.Config.MinTimeBetweenRun > 0 {
|
||||
v.printfStdout("Minimum Time: %s\n", v.Config.MinTimeBetweenRun)
|
||||
}
|
||||
v.printlnStdout()
|
||||
}
|
||||
|
||||
// Local Database
|
||||
v.printfStdout("=== Local Database ===\n")
|
||||
v.printfStdout("Index Path: %s\n", v.Config.IndexPath)
|
||||
|
||||
@@ -541,3 +541,142 @@ func TestBackupAndRestore(t *testing.T) {
|
||||
|
||||
t.Log("Backup and restore test completed successfully")
|
||||
}
|
||||
|
||||
// TestEndToEndFileStorage exercises the full backup → restore loop against the
|
||||
// real `file://` storage backend (FileStorer) on a real OS filesystem. This is
|
||||
// the closest local approximation of a production backup: encrypted blobs get
|
||||
// written to disk, the metadata SQLite database is exported through the same
|
||||
// blobgen pipeline as a real backup, and restoration reads them back through
|
||||
// the public Vaultik.Restore entrypoint. It is the canonical end-to-end smoke
|
||||
// test for 1.0.
|
||||
func TestEndToEndFileStorage(t *testing.T) {
|
||||
log.Initialize(log.Config{})
|
||||
|
||||
// Real OS filesystem (SQLite + FileStorer both need it).
|
||||
fs := afero.NewOsFs()
|
||||
tempDir, err := os.MkdirTemp("", "vaultik-e2e-")
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = os.RemoveAll(tempDir) }()
|
||||
|
||||
dataDir := filepath.Join(tempDir, "source")
|
||||
storeDir := filepath.Join(tempDir, "remote")
|
||||
restoreDir := filepath.Join(tempDir, "restored")
|
||||
dbPath := filepath.Join(tempDir, "index.sqlite")
|
||||
|
||||
// Write a representative mix of file sizes:
|
||||
// - empty file
|
||||
// - tiny text file
|
||||
// - file just under chunk boundary
|
||||
// - file forcing multiple chunks
|
||||
// - nested subdirectories
|
||||
chunkSize := int64(64 * 1024)
|
||||
maxBlobSize := int64(512 * 1024)
|
||||
|
||||
testFiles := map[string][]byte{
|
||||
filepath.Join(dataDir, "empty.txt"): {},
|
||||
filepath.Join(dataDir, "small.txt"): []byte("hello vaultik"),
|
||||
filepath.Join(dataDir, "subdir", "medium.bin"): bytesPattern("medium-", int(chunkSize/2)),
|
||||
filepath.Join(dataDir, "subdir", "large.bin"): bytesPattern("large-", int(chunkSize*4)),
|
||||
filepath.Join(dataDir, "deep", "nest", "leaf.txt"): []byte("leaf"),
|
||||
}
|
||||
|
||||
for path, content := range testFiles {
|
||||
require.NoError(t, fs.MkdirAll(filepath.Dir(path), 0o755))
|
||||
require.NoError(t, afero.WriteFile(fs, path, content, 0o644))
|
||||
}
|
||||
|
||||
// FileStorer is the real-world local-disk backend.
|
||||
storer, err := storage.NewFileStorer(storeDir)
|
||||
require.NoError(t, err)
|
||||
|
||||
agePublicKey := "age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"
|
||||
ageSecretKey := "AGE-SECRET-KEY-19CR5YSFW59HM4TLD6GXVEDMZFTVVF7PPHKUT68TXSFPK7APHXA2QS2NJA5"
|
||||
|
||||
cfg := &config.Config{
|
||||
AgeRecipients: []string{agePublicKey},
|
||||
AgeSecretKey: ageSecretKey,
|
||||
CompressionLevel: 3,
|
||||
Hostname: "test-host",
|
||||
}
|
||||
|
||||
ctx := context.Background()
|
||||
|
||||
db, err := database.New(ctx, dbPath)
|
||||
require.NoError(t, err)
|
||||
defer func() { _ = db.Close() }()
|
||||
|
||||
repos := database.NewRepositories(db)
|
||||
|
||||
sm := snapshot.NewSnapshotManager(snapshot.SnapshotManagerParams{
|
||||
Repos: repos,
|
||||
Storage: storer,
|
||||
Config: cfg,
|
||||
})
|
||||
sm.SetFilesystem(fs)
|
||||
|
||||
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
|
||||
FS: fs,
|
||||
Storage: storer,
|
||||
ChunkSize: chunkSize,
|
||||
MaxBlobSize: maxBlobSize,
|
||||
CompressionLevel: cfg.CompressionLevel,
|
||||
AgeRecipients: cfg.AgeRecipients,
|
||||
Repositories: repos,
|
||||
})
|
||||
|
||||
snapshotID, err := sm.CreateSnapshotWithName(ctx, cfg.Hostname, "e2e", "test-version", "test-git")
|
||||
require.NoError(t, err)
|
||||
|
||||
scanResult, err := scanner.Scan(ctx, dataDir, snapshotID)
|
||||
require.NoError(t, err)
|
||||
require.Greater(t, scanResult.FilesScanned, 0)
|
||||
require.Greater(t, scanResult.BlobsCreated, 0)
|
||||
|
||||
require.NoError(t, sm.CompleteSnapshot(ctx, snapshotID))
|
||||
require.NoError(t, sm.ExportSnapshotMetadata(ctx, dbPath, snapshotID))
|
||||
|
||||
// Verify the backup actually landed on disk under blobs/ and metadata/.
|
||||
blobInfo, err := os.Stat(filepath.Join(storeDir, "blobs"))
|
||||
require.NoError(t, err)
|
||||
require.True(t, blobInfo.IsDir())
|
||||
metaInfo, err := os.Stat(filepath.Join(storeDir, "metadata", snapshotID))
|
||||
require.NoError(t, err)
|
||||
require.True(t, metaInfo.IsDir())
|
||||
|
||||
// Tear down the source DB before restore — restore must work using only
|
||||
// the remote bytes plus the secret key, with no help from the local index.
|
||||
require.NoError(t, db.Close())
|
||||
|
||||
restoreVaultik := &vaultik.Vaultik{
|
||||
Config: cfg,
|
||||
Storage: storer,
|
||||
Fs: fs,
|
||||
Stdout: io.Discard,
|
||||
Stderr: io.Discard,
|
||||
}
|
||||
restoreVaultik.SetContext(ctx)
|
||||
|
||||
require.NoError(t, restoreVaultik.Restore(&vaultik.RestoreOptions{
|
||||
SnapshotID: snapshotID,
|
||||
TargetDir: restoreDir,
|
||||
Verify: true,
|
||||
}))
|
||||
|
||||
// Byte-equality compare every original against its restored copy.
|
||||
for origPath, expected := range testFiles {
|
||||
restoredPath := filepath.Join(restoreDir, origPath)
|
||||
got, err := afero.ReadFile(fs, restoredPath)
|
||||
require.NoError(t, err, "restored file missing: %s", restoredPath)
|
||||
require.Equalf(t, expected, got, "byte-equality failed for %s", origPath)
|
||||
}
|
||||
}
|
||||
|
||||
// bytesPattern returns a deterministic byte slice of length n with a tag prefix,
|
||||
// useful for forcing chunker behavior with reproducible content.
|
||||
func bytesPattern(tag string, n int) []byte {
|
||||
out := make([]byte, n)
|
||||
for i := range out {
|
||||
out[i] = byte(tag[i%len(tag)] ^ byte(i&0xff))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
@@ -156,7 +156,7 @@ func TestPurgeKeepLatest_WithNameFilter(t *testing.T) {
|
||||
err := v.PurgeSnapshotsWithOptions(&vaultik.SnapshotPurgeOptions{
|
||||
KeepLatest: true,
|
||||
Force: true,
|
||||
Name: "home",
|
||||
Names: []string{"home"},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
@@ -190,7 +190,7 @@ func TestPurgeKeepLatest_NameFilterNoMatch(t *testing.T) {
|
||||
err := v.PurgeSnapshotsWithOptions(&vaultik.SnapshotPurgeOptions{
|
||||
KeepLatest: true,
|
||||
Force: true,
|
||||
Name: "nonexistent",
|
||||
Names: []string{"nonexistent"},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
@@ -215,7 +215,7 @@ func TestPurgeOlderThan_WithNameFilter(t *testing.T) {
|
||||
err := v.PurgeSnapshotsWithOptions(&vaultik.SnapshotPurgeOptions{
|
||||
OlderThan: "365d",
|
||||
Force: true,
|
||||
Name: "home",
|
||||
Names: []string{"home"},
|
||||
})
|
||||
require.NoError(t, err)
|
||||
|
||||
|
||||
@@ -22,7 +22,6 @@ import (
|
||||
|
||||
// SnapshotCreateOptions contains options for the snapshot create command
|
||||
type SnapshotCreateOptions struct {
|
||||
Daemon bool
|
||||
Cron bool
|
||||
Prune bool
|
||||
SkipErrors bool // Skip file read errors (log them loudly but continue)
|
||||
@@ -57,12 +56,6 @@ func (v *Vaultik) CreateSnapshot(opts *SnapshotCreateOptions) error {
|
||||
return fmt.Errorf("prune database: %w", err)
|
||||
}
|
||||
|
||||
if opts.Daemon {
|
||||
log.Info("Running in daemon mode")
|
||||
// TODO: Implement daemon mode with inotify
|
||||
return fmt.Errorf("daemon mode not yet implemented")
|
||||
}
|
||||
|
||||
// Determine which snapshots to process
|
||||
snapshotNames := opts.Snapshots
|
||||
if len(snapshotNames) == 0 {
|
||||
@@ -92,25 +85,34 @@ func (v *Vaultik) CreateSnapshot(opts *SnapshotCreateOptions) error {
|
||||
v.printfStdout("\nAll %d snapshots completed in %s\n", len(snapshotNames), time.Since(overallStartTime).Round(time.Second))
|
||||
}
|
||||
|
||||
// Prune old snapshots and unreferenced blobs if --prune was specified
|
||||
if opts.Prune {
|
||||
log.Info("Pruning enabled - deleting old snapshots and unreferenced blobs")
|
||||
v.printlnStdout("\nPruning old snapshots (keeping latest)...")
|
||||
|
||||
if err := v.PurgeSnapshotsWithOptions(&SnapshotPurgeOptions{
|
||||
KeepLatest: true,
|
||||
Force: true,
|
||||
}); err != nil {
|
||||
return fmt.Errorf("prune: purging old snapshots: %w", err)
|
||||
if err := v.runPostBackupPrune(snapshotNames); err != nil {
|
||||
return fmt.Errorf("post-backup prune: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
v.printlnStdout("Pruning unreferenced blobs...")
|
||||
return nil
|
||||
}
|
||||
|
||||
if err := v.PruneBlobs(&PruneOptions{Force: true}); err != nil {
|
||||
return fmt.Errorf("prune: removing unreferenced blobs: %w", err)
|
||||
}
|
||||
// runPostBackupPrune drops older snapshots of the given names (keeping only
|
||||
// the latest of each) and removes orphan blobs from remote storage. Invoked
|
||||
// when `snapshot create --prune` is used.
|
||||
func (v *Vaultik) runPostBackupPrune(snapshotNames []string) error {
|
||||
log.Info("Running post-backup prune", "snapshots", snapshotNames)
|
||||
v.printlnStdout("\n=== Post-backup prune ===")
|
||||
|
||||
log.Info("Pruning complete")
|
||||
purgeOpts := &SnapshotPurgeOptions{
|
||||
KeepLatest: true,
|
||||
Force: true,
|
||||
Names: snapshotNames,
|
||||
Quiet: true,
|
||||
}
|
||||
if err := v.PurgeSnapshotsWithOptions(purgeOpts); err != nil {
|
||||
return fmt.Errorf("purging old snapshots: %w", err)
|
||||
}
|
||||
|
||||
if err := v.PruneBlobs(&PruneOptions{Force: true}); err != nil {
|
||||
return fmt.Errorf("pruning orphaned blobs: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
@@ -585,18 +587,19 @@ func (v *Vaultik) printSnapshotTable(snapshots []SnapshotInfo) error {
|
||||
return w.Flush()
|
||||
}
|
||||
|
||||
// SnapshotPurgeOptions contains options for the snapshot purge command
|
||||
// SnapshotPurgeOptions contains options for the snapshot purge command.
|
||||
type SnapshotPurgeOptions struct {
|
||||
KeepLatest bool
|
||||
OlderThan string
|
||||
Force bool
|
||||
Name string // Filter purge to a specific snapshot name
|
||||
KeepLatest bool // Keep only the most recent snapshot per name
|
||||
OlderThan string // Drop snapshots older than this duration (e.g. "30d", "6m", "1y")
|
||||
Force bool // Skip confirmation prompt
|
||||
Names []string // If non-empty, only operate on snapshots with one of these names
|
||||
Quiet bool // Suppress informational output (used by --prune flag)
|
||||
}
|
||||
|
||||
// PurgeSnapshotsWithOptions removes old snapshots based on criteria.
|
||||
// When KeepLatest is true, retention is applied per snapshot name — the latest
|
||||
// snapshot for each distinct name is kept. If Name is non-empty, only snapshots
|
||||
// matching that name are considered for purge.
|
||||
// Retention is per-snapshot-name: KeepLatest keeps the latest of EACH configured
|
||||
// snapshot name, not the latest globally. This prevents `home` and `system`
|
||||
// snapshots from cannibalizing each other.
|
||||
func (v *Vaultik) PurgeSnapshotsWithOptions(opts *SnapshotPurgeOptions) error {
|
||||
// Sync with remote first
|
||||
if err := v.syncWithRemote(); err != nil {
|
||||
@@ -609,27 +612,28 @@ func (v *Vaultik) PurgeSnapshotsWithOptions(opts *SnapshotPurgeOptions) error {
|
||||
return fmt.Errorf("listing snapshots: %w", err)
|
||||
}
|
||||
|
||||
// Convert to SnapshotInfo format, only including completed snapshots
|
||||
snapshots := make([]SnapshotInfo, 0, len(dbSnapshots))
|
||||
for _, s := range dbSnapshots {
|
||||
if s.CompletedAt != nil {
|
||||
snapshots = append(snapshots, SnapshotInfo{
|
||||
ID: s.ID,
|
||||
Timestamp: s.StartedAt,
|
||||
CompressedSize: s.BlobSize,
|
||||
})
|
||||
}
|
||||
// Build name filter set if --snapshot was specified.
|
||||
nameFilter := make(map[string]struct{}, len(opts.Names))
|
||||
for _, n := range opts.Names {
|
||||
nameFilter[n] = struct{}{}
|
||||
}
|
||||
|
||||
// If --name is specified, filter to only snapshots matching that name
|
||||
if opts.Name != "" {
|
||||
filtered := make([]SnapshotInfo, 0, len(snapshots))
|
||||
for _, snap := range snapshots {
|
||||
if parseSnapshotName(snap.ID.String()) == opts.Name {
|
||||
filtered = append(filtered, snap)
|
||||
// Collect completed snapshots, applying the name filter.
|
||||
snapshots := make([]SnapshotInfo, 0, len(dbSnapshots))
|
||||
for _, s := range dbSnapshots {
|
||||
if s.CompletedAt == nil {
|
||||
continue
|
||||
}
|
||||
if len(nameFilter) > 0 {
|
||||
if _, ok := nameFilter[parseSnapshotName(s.ID.String())]; !ok {
|
||||
continue
|
||||
}
|
||||
}
|
||||
snapshots = filtered
|
||||
snapshots = append(snapshots, SnapshotInfo{
|
||||
ID: s.ID,
|
||||
Timestamp: s.StartedAt,
|
||||
CompressedSize: s.BlobSize,
|
||||
})
|
||||
}
|
||||
|
||||
// Sort by timestamp (newest first)
|
||||
@@ -640,21 +644,18 @@ func (v *Vaultik) PurgeSnapshotsWithOptions(opts *SnapshotPurgeOptions) error {
|
||||
var toDelete []SnapshotInfo
|
||||
|
||||
if opts.KeepLatest {
|
||||
// Keep the latest snapshot per snapshot name
|
||||
// Group snapshots by name, then mark all but the newest in each group
|
||||
latestByName := make(map[string]bool) // tracks whether we've seen the latest for each name
|
||||
// Keep the latest snapshot per snapshot name. Snapshots are sorted
|
||||
// newest-first, so the first occurrence of each name is kept.
|
||||
seen := make(map[string]bool)
|
||||
for _, snap := range snapshots {
|
||||
name := parseSnapshotName(snap.ID.String())
|
||||
if latestByName[name] {
|
||||
// Already kept the latest for this name — delete this one
|
||||
if seen[name] {
|
||||
toDelete = append(toDelete, snap)
|
||||
} else {
|
||||
// This is the latest (sorted newest-first) — keep it
|
||||
latestByName[name] = true
|
||||
continue
|
||||
}
|
||||
seen[name] = true
|
||||
}
|
||||
} else if opts.OlderThan != "" {
|
||||
// Parse duration
|
||||
duration, err := parseDuration(opts.OlderThan)
|
||||
if err != nil {
|
||||
return fmt.Errorf("invalid duration: %w", err)
|
||||
@@ -669,22 +670,25 @@ func (v *Vaultik) PurgeSnapshotsWithOptions(opts *SnapshotPurgeOptions) error {
|
||||
}
|
||||
|
||||
if len(toDelete) == 0 {
|
||||
v.printlnStdout("No snapshots to delete")
|
||||
if !opts.Quiet {
|
||||
v.printlnStdout("No snapshots to delete")
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
return v.confirmAndExecutePurge(toDelete, opts.Force)
|
||||
return v.confirmAndExecutePurge(toDelete, opts.Force, opts.Quiet)
|
||||
}
|
||||
|
||||
// confirmAndExecutePurge shows deletion candidates, confirms with user, and deletes snapshots
|
||||
func (v *Vaultik) confirmAndExecutePurge(toDelete []SnapshotInfo, force bool) error {
|
||||
// Show what will be deleted
|
||||
v.printfStdout("The following snapshots will be deleted:\n\n")
|
||||
for _, snap := range toDelete {
|
||||
v.printfStdout(" %s (%s, %s)\n",
|
||||
snap.ID,
|
||||
snap.Timestamp.Format("2006-01-02 15:04:05"),
|
||||
formatBytes(snap.CompressedSize))
|
||||
func (v *Vaultik) confirmAndExecutePurge(toDelete []SnapshotInfo, force, quiet bool) error {
|
||||
if !quiet {
|
||||
v.printfStdout("The following snapshots will be deleted:\n\n")
|
||||
for _, snap := range toDelete {
|
||||
v.printfStdout(" %s (%s, %s)\n",
|
||||
snap.ID,
|
||||
snap.Timestamp.Format("2006-01-02 15:04:05"),
|
||||
formatBytes(snap.CompressedSize))
|
||||
}
|
||||
}
|
||||
|
||||
// Confirm unless --force is used
|
||||
@@ -700,7 +704,7 @@ func (v *Vaultik) confirmAndExecutePurge(toDelete []SnapshotInfo, force bool) er
|
||||
v.printlnStdout("Cancelled")
|
||||
return nil
|
||||
}
|
||||
} else {
|
||||
} else if !quiet {
|
||||
v.printfStdout("\nDeleting %d snapshot(s) (--force specified)\n", len(toDelete))
|
||||
}
|
||||
|
||||
@@ -716,10 +720,10 @@ func (v *Vaultik) confirmAndExecutePurge(toDelete []SnapshotInfo, force bool) er
|
||||
}
|
||||
}
|
||||
|
||||
v.printfStdout("Deleted %d snapshot(s)\n", len(toDelete))
|
||||
|
||||
// Note: Run 'vaultik prune' separately to clean up unreferenced blobs
|
||||
v.printlnStdout("\nNote: Run 'vaultik prune' to clean up unreferenced blobs.")
|
||||
if !quiet {
|
||||
v.printfStdout("Deleted %d snapshot(s)\n", len(toDelete))
|
||||
v.printlnStdout("\nNote: Run 'vaultik prune' to clean up unreferenced blobs.")
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
@@ -733,15 +737,17 @@ func (v *Vaultik) VerifySnapshot(snapshotID string, deep bool) error {
|
||||
return v.VerifySnapshotWithOptions(snapshotID, opts)
|
||||
}
|
||||
|
||||
// VerifySnapshotWithOptions checks snapshot integrity with full options
|
||||
// VerifySnapshotWithOptions checks snapshot integrity with full options.
|
||||
// Deep verification is delegated to RunDeepVerify so this function only
|
||||
// implements the shallow (existence-only) path.
|
||||
func (v *Vaultik) VerifySnapshotWithOptions(snapshotID string, opts *VerifyOptions) error {
|
||||
if opts.Deep {
|
||||
return v.RunDeepVerify(snapshotID, opts)
|
||||
}
|
||||
result := &VerifyResult{
|
||||
SnapshotID: snapshotID,
|
||||
Mode: "shallow",
|
||||
}
|
||||
if opts.Deep {
|
||||
result.Mode = "deep"
|
||||
}
|
||||
|
||||
v.printVerifyHeader(snapshotID, opts)
|
||||
|
||||
@@ -779,22 +785,12 @@ func (v *Vaultik) VerifySnapshotWithOptions(snapshotID string, opts *VerifyOptio
|
||||
return v.formatVerifyResult(result, manifest, opts)
|
||||
}
|
||||
|
||||
// printVerifyHeader prints the snapshot ID and parsed timestamp for verification output
|
||||
// printVerifyHeader prints the snapshot ID and parsed timestamp for verification output.
|
||||
// Snapshot ID format: hostname[_name]_<RFC3339>
|
||||
func (v *Vaultik) printVerifyHeader(snapshotID string, opts *VerifyOptions) {
|
||||
// Parse snapshot ID to extract timestamp
|
||||
parts := strings.Split(snapshotID, "-")
|
||||
var snapshotTime time.Time
|
||||
if len(parts) >= 3 {
|
||||
// Format: hostname-YYYYMMDD-HHMMSSZ
|
||||
dateStr := parts[len(parts)-2]
|
||||
timeStr := parts[len(parts)-1]
|
||||
if len(dateStr) == 8 && len(timeStr) == 7 && strings.HasSuffix(timeStr, "Z") {
|
||||
timeStr = timeStr[:6] // Remove Z
|
||||
timestamp, err := time.Parse("20060102150405", dateStr+timeStr)
|
||||
if err == nil {
|
||||
snapshotTime = timestamp
|
||||
}
|
||||
}
|
||||
if t, err := parseSnapshotTimestamp(snapshotID); err == nil {
|
||||
snapshotTime = t
|
||||
}
|
||||
|
||||
if !opts.JSON {
|
||||
@@ -811,7 +807,7 @@ func (v *Vaultik) verifyManifestBlobsExist(manifest *snapshot.Manifest, opts *Ve
|
||||
for _, blob := range manifest.Blobs {
|
||||
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blob.Hash[:2], blob.Hash[2:4], blob.Hash)
|
||||
|
||||
// Just check existence (deep verification is handled by RunDeepVerify)
|
||||
// Shallow: just check existence (deep verification is handled by RunDeepVerify)
|
||||
_, err := v.Storage.Stat(v.ctx, blobPath)
|
||||
if err != nil {
|
||||
if !opts.JSON {
|
||||
|
||||
@@ -14,7 +14,7 @@ import (
|
||||
"git.eeqj.de/sneak/vaultik/internal/snapshot"
|
||||
"github.com/dustin/go-humanize"
|
||||
"github.com/klauspost/compress/zstd"
|
||||
_ "github.com/mattn/go-sqlite3"
|
||||
_ "modernc.org/sqlite"
|
||||
)
|
||||
|
||||
// VerifyOptions contains options for the verify command
|
||||
@@ -258,7 +258,7 @@ func (v *Vaultik) decryptAndLoadDatabase(reader io.ReadCloser, secretKey string)
|
||||
log.Info("Database decompressed", "size", humanize.Bytes(uint64(written)))
|
||||
|
||||
// Open the database
|
||||
db, err := sql.Open("sqlite3", tempPath)
|
||||
db, err := sql.Open("sqlite", tempPath)
|
||||
if err != nil {
|
||||
_ = os.Remove(tempPath)
|
||||
return nil, fmt.Errorf("failed to open database: %w", err)
|
||||
|
||||
@@ -20,9 +20,6 @@ s3:
|
||||
region: us-east-1
|
||||
use_ssl: true
|
||||
part_size: 5242880 # 5MB
|
||||
backup_interval: 1h
|
||||
full_scan_interval: 24h
|
||||
min_time_between_run: 15m
|
||||
index_path: /tmp/vaultik-test.sqlite
|
||||
chunk_size: 10MB
|
||||
blob_size_limit: 10GB
|
||||
|
||||
@@ -17,9 +17,6 @@ s3:
|
||||
region: us-east-1
|
||||
use_ssl: false
|
||||
part_size: 5242880 # 5MB
|
||||
backup_interval: 1h
|
||||
full_scan_interval: 24h
|
||||
min_time_between_run: 15m
|
||||
index_path: /tmp/vaultik-integration-test.sqlite
|
||||
chunk_size: 10MB
|
||||
blob_size_limit: 10GB
|
||||
|
||||
Reference in New Issue
Block a user