Compare commits
5 Commits
899448e1da
...
2afd54d693
| Author | SHA1 | Date | |
|---|---|---|---|
| 2afd54d693 | |||
| 05286bed01 | |||
| f2c120f026 | |||
| bbe09ec5b5 | |||
| 43a69c2cfb |
10
CLAUDE.md
10
CLAUDE.md
@ -10,6 +10,9 @@ Read the rules in AGENTS.md and follow them.
|
|||||||
corporate advertising for Anthropic and is therefore completely
|
corporate advertising for Anthropic and is therefore completely
|
||||||
unacceptable in commit messages.
|
unacceptable in commit messages.
|
||||||
|
|
||||||
|
* NEVER use `git add -A`. Always add only the files you intentionally
|
||||||
|
changed.
|
||||||
|
|
||||||
* Tests should always be run before committing code. No commits should be
|
* Tests should always be run before committing code. No commits should be
|
||||||
made that do not pass tests.
|
made that do not pass tests.
|
||||||
|
|
||||||
@ -33,6 +36,9 @@ Read the rules in AGENTS.md and follow them.
|
|||||||
* When testing on a 2.5Gbit/s ethernet to an s3 server backed by 2000MB/sec SSD,
|
* When testing on a 2.5Gbit/s ethernet to an s3 server backed by 2000MB/sec SSD,
|
||||||
estimate about 4 seconds per gigabyte of backup time.
|
estimate about 4 seconds per gigabyte of backup time.
|
||||||
|
|
||||||
* When running tests, don't run individual tests, or grep the output. run the entire test suite every time and read the full output.
|
* When running tests, don't run individual tests, or grep the output. run
|
||||||
|
the entire test suite every time and read the full output.
|
||||||
|
|
||||||
* When running tests, don't run individual tests, or try to grep the output. never run "go test". only ever run "make test" to run the full test suite, and examine the full output.
|
* When running tests, don't run individual tests, or try to grep the output.
|
||||||
|
never run "go test". only ever run "make test" to run the full test
|
||||||
|
suite, and examine the full output.
|
||||||
|
|||||||
4
Makefile
4
Makefile
@ -60,3 +60,7 @@ test-coverage:
|
|||||||
# Run integration tests
|
# Run integration tests
|
||||||
test-integration:
|
test-integration:
|
||||||
go test -v -tags=integration ./...
|
go test -v -tags=integration ./...
|
||||||
|
|
||||||
|
local:
|
||||||
|
VAULTIK_CONFIG=$(HOME)/etc/vaultik/config.yml ./vaultik snapshot --debug list 2>&1
|
||||||
|
VAULTIK_CONFIG=$(HOME)/etc/vaultik/config.yml ./vaultik snapshot --debug create 2>&1
|
||||||
|
|||||||
556
PROCESS.md
Normal file
556
PROCESS.md
Normal file
@ -0,0 +1,556 @@
|
|||||||
|
# Vaultik Snapshot Creation Process
|
||||||
|
|
||||||
|
This document describes the lifecycle of objects during snapshot creation, with a focus on database transactions and foreign key constraints.
|
||||||
|
|
||||||
|
## Database Schema Overview
|
||||||
|
|
||||||
|
### Tables and Foreign Key Dependencies
|
||||||
|
|
||||||
|
```
|
||||||
|
┌─────────────────────────────────────────────────────────────────────────┐
|
||||||
|
│ FOREIGN KEY GRAPH │
|
||||||
|
│ │
|
||||||
|
│ snapshots ◄────── snapshot_files ────────► files │
|
||||||
|
│ │ │ │
|
||||||
|
│ └───────── snapshot_blobs ────────► blobs │ │
|
||||||
|
│ │ │ │
|
||||||
|
│ │ ├──► file_chunks ◄── chunks│
|
||||||
|
│ │ │ ▲ │
|
||||||
|
│ │ └──► chunk_files ────┘ │
|
||||||
|
│ │ │
|
||||||
|
│ └──► blob_chunks ─────────────┘│
|
||||||
|
│ │
|
||||||
|
│ uploads ───────► blobs.blob_hash │
|
||||||
|
│ └──────────► snapshots.id │
|
||||||
|
└─────────────────────────────────────────────────────────────────────────┘
|
||||||
|
```
|
||||||
|
|
||||||
|
### Critical Constraint: `chunks` Must Exist First
|
||||||
|
|
||||||
|
These tables reference `chunks.chunk_hash` **without CASCADE**:
|
||||||
|
- `file_chunks.chunk_hash` → `chunks.chunk_hash`
|
||||||
|
- `chunk_files.chunk_hash` → `chunks.chunk_hash`
|
||||||
|
- `blob_chunks.chunk_hash` → `chunks.chunk_hash`
|
||||||
|
|
||||||
|
**Implication**: A chunk record MUST be committed to the database BEFORE any of these referencing records can be created.
|
||||||
|
|
||||||
|
### Order of Operations Required by Schema
|
||||||
|
|
||||||
|
```
|
||||||
|
1. snapshots (created first, before scan)
|
||||||
|
2. blobs (created when packer starts new blob)
|
||||||
|
3. chunks (created during file processing)
|
||||||
|
4. blob_chunks (created immediately after chunk added to packer)
|
||||||
|
5. files (created after file fully chunked)
|
||||||
|
6. file_chunks (created with file record)
|
||||||
|
7. chunk_files (created with file record)
|
||||||
|
8. snapshot_files (created with file record)
|
||||||
|
9. snapshot_blobs (created after blob uploaded)
|
||||||
|
10. uploads (created after blob uploaded)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Snapshot Creation Phases
|
||||||
|
|
||||||
|
### Phase 0: Initialization
|
||||||
|
|
||||||
|
**Actions:**
|
||||||
|
1. Snapshot record created in database (Transaction T0)
|
||||||
|
2. Known files loaded into memory from `files` table
|
||||||
|
3. Known chunks loaded into memory from `chunks` table
|
||||||
|
|
||||||
|
**Transactions:**
|
||||||
|
```
|
||||||
|
T0: INSERT INTO snapshots (id, hostname, ...) VALUES (...)
|
||||||
|
COMMIT
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Phase 1: Scan Directory
|
||||||
|
|
||||||
|
**Actions:**
|
||||||
|
1. Walk filesystem directory tree
|
||||||
|
2. For each file, compare against in-memory `knownFiles` map
|
||||||
|
3. Classify files as: unchanged, new, or modified
|
||||||
|
4. Collect unchanged file IDs for later association
|
||||||
|
5. Collect new/modified files for processing
|
||||||
|
|
||||||
|
**Transactions:**
|
||||||
|
```
|
||||||
|
(None during scan - all in-memory)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Phase 1b: Associate Unchanged Files
|
||||||
|
|
||||||
|
**Actions:**
|
||||||
|
1. For unchanged files, add entries to `snapshot_files` table
|
||||||
|
2. Done in batches of 1000
|
||||||
|
|
||||||
|
**Transactions:**
|
||||||
|
```
|
||||||
|
For each batch of 1000 file IDs:
|
||||||
|
T: BEGIN
|
||||||
|
INSERT INTO snapshot_files (snapshot_id, file_id) VALUES (?, ?)
|
||||||
|
... (up to 1000 inserts)
|
||||||
|
COMMIT
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Phase 2: Process Files
|
||||||
|
|
||||||
|
For each file that needs processing:
|
||||||
|
|
||||||
|
#### Step 2a: Open and Chunk File
|
||||||
|
|
||||||
|
**Location:** `processFileStreaming()`
|
||||||
|
|
||||||
|
For each chunk produced by content-defined chunking:
|
||||||
|
|
||||||
|
##### Step 2a-1: Check Chunk Existence
|
||||||
|
```go
|
||||||
|
chunkExists := s.chunkExists(chunk.Hash) // In-memory lookup
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Step 2a-2: Create Chunk Record (if new)
|
||||||
|
```go
|
||||||
|
// TRANSACTION: Create chunk in database
|
||||||
|
err := s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||||
|
dbChunk := &database.Chunk{ChunkHash: chunk.Hash, Size: chunk.Size}
|
||||||
|
return s.repos.Chunks.Create(txCtx, tx, dbChunk)
|
||||||
|
})
|
||||||
|
// COMMIT immediately after WithTx returns
|
||||||
|
|
||||||
|
// Update in-memory cache
|
||||||
|
s.addKnownChunk(chunk.Hash)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Transaction:**
|
||||||
|
```
|
||||||
|
T_chunk: BEGIN
|
||||||
|
INSERT INTO chunks (chunk_hash, size) VALUES (?, ?)
|
||||||
|
COMMIT
|
||||||
|
```
|
||||||
|
|
||||||
|
##### Step 2a-3: Add Chunk to Packer
|
||||||
|
|
||||||
|
```go
|
||||||
|
s.packer.AddChunk(&blob.ChunkRef{Hash: chunk.Hash, Data: chunk.Data})
|
||||||
|
```
|
||||||
|
|
||||||
|
**Inside packer.AddChunk → addChunkToCurrentBlob():**
|
||||||
|
|
||||||
|
```go
|
||||||
|
// TRANSACTION: Create blob_chunks record IMMEDIATELY
|
||||||
|
if p.repos != nil {
|
||||||
|
blobChunk := &database.BlobChunk{
|
||||||
|
BlobID: p.currentBlob.id,
|
||||||
|
ChunkHash: chunk.Hash,
|
||||||
|
Offset: offset,
|
||||||
|
Length: chunkSize,
|
||||||
|
}
|
||||||
|
err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||||
|
return p.repos.BlobChunks.Create(ctx, tx, blobChunk)
|
||||||
|
})
|
||||||
|
// COMMIT immediately
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Transaction:**
|
||||||
|
```
|
||||||
|
T_blob_chunk: BEGIN
|
||||||
|
INSERT INTO blob_chunks (blob_id, chunk_hash, offset, length) VALUES (?, ?, ?, ?)
|
||||||
|
COMMIT
|
||||||
|
```
|
||||||
|
|
||||||
|
**⚠️ CRITICAL DEPENDENCY**: This transaction requires `chunks.chunk_hash` to exist (FK constraint).
|
||||||
|
The chunk MUST be committed in Step 2a-2 BEFORE this can succeed.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### Step 2b: Blob Size Limit Handling
|
||||||
|
|
||||||
|
If adding a chunk would exceed blob size limit:
|
||||||
|
|
||||||
|
```go
|
||||||
|
if err == blob.ErrBlobSizeLimitExceeded {
|
||||||
|
if err := s.packer.FinalizeBlob(); err != nil { ... }
|
||||||
|
// Retry adding the chunk
|
||||||
|
if err := s.packer.AddChunk(...); err != nil { ... }
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**FinalizeBlob() transactions:**
|
||||||
|
```
|
||||||
|
T_blob_finish: BEGIN
|
||||||
|
UPDATE blobs SET blob_hash=?, uncompressed_size=?, compressed_size=?, finished_ts=? WHERE id=?
|
||||||
|
COMMIT
|
||||||
|
```
|
||||||
|
|
||||||
|
Then blob handler is called (handleBlobReady):
|
||||||
|
```
|
||||||
|
(Upload to S3 - no transaction)
|
||||||
|
|
||||||
|
T_blob_uploaded: BEGIN
|
||||||
|
UPDATE blobs SET uploaded_ts=? WHERE id=?
|
||||||
|
INSERT INTO snapshot_blobs (snapshot_id, blob_id, blob_hash) VALUES (?, ?, ?)
|
||||||
|
INSERT INTO uploads (blob_hash, snapshot_id, uploaded_at, size, duration_ms) VALUES (?, ?, ?, ?, ?)
|
||||||
|
COMMIT
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
#### Step 2c: Queue File for Batch Insertion
|
||||||
|
|
||||||
|
After all chunks for a file are processed:
|
||||||
|
|
||||||
|
```go
|
||||||
|
// Build file data (in-memory, no DB)
|
||||||
|
fileChunks := make([]database.FileChunk, len(chunks))
|
||||||
|
chunkFiles := make([]database.ChunkFile, len(chunks))
|
||||||
|
|
||||||
|
// Queue for batch insertion
|
||||||
|
return s.addPendingFile(ctx, pendingFileData{
|
||||||
|
file: fileToProcess.File,
|
||||||
|
fileChunks: fileChunks,
|
||||||
|
chunkFiles: chunkFiles,
|
||||||
|
})
|
||||||
|
```
|
||||||
|
|
||||||
|
**No transaction yet** - just adds to `pendingFiles` slice.
|
||||||
|
|
||||||
|
If `len(pendingFiles) >= fileBatchSize (100)`, triggers `flushPendingFiles()`.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Step 2d: Flush Pending Files
|
||||||
|
|
||||||
|
**Location:** `flushPendingFiles()` - called when batch is full or at end of processing
|
||||||
|
|
||||||
|
```go
|
||||||
|
return s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||||
|
for _, data := range files {
|
||||||
|
// 1. Create file record
|
||||||
|
s.repos.Files.Create(txCtx, tx, data.file) // INSERT OR REPLACE
|
||||||
|
|
||||||
|
// 2. Delete old associations
|
||||||
|
s.repos.FileChunks.DeleteByFileID(txCtx, tx, data.file.ID)
|
||||||
|
s.repos.ChunkFiles.DeleteByFileID(txCtx, tx, data.file.ID)
|
||||||
|
|
||||||
|
// 3. Create file_chunks records
|
||||||
|
for _, fc := range data.fileChunks {
|
||||||
|
s.repos.FileChunks.Create(txCtx, tx, &fc) // FK: chunks.chunk_hash
|
||||||
|
}
|
||||||
|
|
||||||
|
// 4. Create chunk_files records
|
||||||
|
for _, cf := range data.chunkFiles {
|
||||||
|
s.repos.ChunkFiles.Create(txCtx, tx, &cf) // FK: chunks.chunk_hash
|
||||||
|
}
|
||||||
|
|
||||||
|
// 5. Add file to snapshot
|
||||||
|
s.repos.Snapshots.AddFileByID(txCtx, tx, s.snapshotID, data.file.ID)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
// COMMIT (all or nothing for the batch)
|
||||||
|
```
|
||||||
|
|
||||||
|
**Transaction:**
|
||||||
|
```
|
||||||
|
T_files_batch: BEGIN
|
||||||
|
-- For each file in batch:
|
||||||
|
INSERT OR REPLACE INTO files (...) VALUES (...)
|
||||||
|
DELETE FROM file_chunks WHERE file_id = ?
|
||||||
|
DELETE FROM chunk_files WHERE file_id = ?
|
||||||
|
INSERT INTO file_chunks (file_id, idx, chunk_hash) VALUES (?, ?, ?) -- FK: chunks
|
||||||
|
INSERT INTO chunk_files (chunk_hash, file_id, ...) VALUES (?, ?, ...) -- FK: chunks
|
||||||
|
INSERT INTO snapshot_files (snapshot_id, file_id) VALUES (?, ?)
|
||||||
|
-- Repeat for each file
|
||||||
|
COMMIT
|
||||||
|
```
|
||||||
|
|
||||||
|
**⚠️ CRITICAL DEPENDENCY**: `file_chunks` and `chunk_files` require `chunks.chunk_hash` to exist.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
### Phase 2 End: Final Flush
|
||||||
|
|
||||||
|
```go
|
||||||
|
// Flush any remaining pending files
|
||||||
|
if err := s.flushAllPending(ctx); err != nil { ... }
|
||||||
|
|
||||||
|
// Final packer flush
|
||||||
|
s.packer.Flush()
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## The Current Bug
|
||||||
|
|
||||||
|
### Problem
|
||||||
|
|
||||||
|
The current code attempts to batch file insertions, but `file_chunks` and `chunk_files` have foreign keys to `chunks.chunk_hash`. The batched file flush tries to insert these records, but if the chunks haven't been committed yet, the FK constraint fails.
|
||||||
|
|
||||||
|
### Why It's Happening
|
||||||
|
|
||||||
|
Looking at the sequence:
|
||||||
|
|
||||||
|
1. Process file A, chunk X
|
||||||
|
2. Create chunk X in DB (Transaction commits)
|
||||||
|
3. Add chunk X to packer
|
||||||
|
4. Packer creates blob_chunks for chunk X (needs chunk X - OK, committed in step 2)
|
||||||
|
5. Queue file A with chunk references
|
||||||
|
6. Process file B, chunk Y
|
||||||
|
7. Create chunk Y in DB (Transaction commits)
|
||||||
|
8. ... etc ...
|
||||||
|
9. At end: flushPendingFiles()
|
||||||
|
10. Insert file_chunks for file A referencing chunk X (chunk X committed - should work)
|
||||||
|
|
||||||
|
The chunks ARE being created individually. But something is going wrong.
|
||||||
|
|
||||||
|
### Actual Issue
|
||||||
|
|
||||||
|
Wait - let me re-read the code. The issue is:
|
||||||
|
|
||||||
|
In `processFileStreaming`, when we queue file data:
|
||||||
|
```go
|
||||||
|
fileChunks[i] = database.FileChunk{
|
||||||
|
FileID: fileToProcess.File.ID,
|
||||||
|
Idx: ci.fileChunk.Idx,
|
||||||
|
ChunkHash: ci.fileChunk.ChunkHash,
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The `FileID` is set, but `fileToProcess.File.ID` might be empty at this point because the file record hasn't been created yet!
|
||||||
|
|
||||||
|
Looking at `checkFileInMemory`:
|
||||||
|
```go
|
||||||
|
// For new files:
|
||||||
|
if !exists {
|
||||||
|
return file, true // file.ID is empty string!
|
||||||
|
}
|
||||||
|
|
||||||
|
// For existing files:
|
||||||
|
file.ID = existingFile.ID // Reuse existing ID
|
||||||
|
```
|
||||||
|
|
||||||
|
**For NEW files, `file.ID` is empty!**
|
||||||
|
|
||||||
|
Then in `flushPendingFiles`:
|
||||||
|
```go
|
||||||
|
s.repos.Files.Create(txCtx, tx, data.file) // This generates/uses the ID
|
||||||
|
```
|
||||||
|
|
||||||
|
But `data.fileChunks` was built with the EMPTY ID!
|
||||||
|
|
||||||
|
### The Real Problem
|
||||||
|
|
||||||
|
For new files:
|
||||||
|
1. `checkFileInMemory` creates file record with empty ID
|
||||||
|
2. `processFileStreaming` queues file_chunks with empty `FileID`
|
||||||
|
3. `flushPendingFiles` creates file (generates ID), but file_chunks still have empty `FileID`
|
||||||
|
|
||||||
|
Wait, but `Files.Create` should be INSERT OR REPLACE by path, and the file struct should get updated... Let me check.
|
||||||
|
|
||||||
|
Actually, looking more carefully at the code path - the file IS created first in the flush, but the `fileChunks` slice was already built with the old (possibly empty) ID. The ID isn't updated after the file is created.
|
||||||
|
|
||||||
|
Hmm, but looking at the current code:
|
||||||
|
```go
|
||||||
|
fileChunks[i] = database.FileChunk{
|
||||||
|
FileID: fileToProcess.File.ID, // This uses the ID from the File struct
|
||||||
|
```
|
||||||
|
|
||||||
|
And in `checkFileInMemory` for new files, we create a file struct but don't set the ID. However, looking at the database repository, `Files.Create` should be doing `INSERT OR REPLACE` and the ID should be pre-generated...
|
||||||
|
|
||||||
|
Let me check if IDs are being generated. Looking at the File struct usage, it seems like UUIDs should be generated somewhere...
|
||||||
|
|
||||||
|
Actually, looking at the test failures again:
|
||||||
|
```
|
||||||
|
creating file chunk: inserting file_chunk: constraint failed: FOREIGN KEY constraint failed (787)
|
||||||
|
```
|
||||||
|
|
||||||
|
Error 787 is SQLite's foreign key constraint error. The failing FK is on `file_chunks.chunk_hash → chunks.chunk_hash`.
|
||||||
|
|
||||||
|
So the chunks ARE NOT in the database when we try to insert file_chunks. Let me trace through more carefully...
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Transaction Timing Issue
|
||||||
|
|
||||||
|
The problem is transaction visibility in SQLite.
|
||||||
|
|
||||||
|
Each `WithTx` creates a new transaction that commits at the end. But with batched file insertion:
|
||||||
|
|
||||||
|
1. Chunk transactions commit one at a time
|
||||||
|
2. File batch transaction runs later
|
||||||
|
|
||||||
|
If chunks are being inserted but something goes wrong with transaction isolation, the file batch might not see them.
|
||||||
|
|
||||||
|
But actually SQLite in WAL mode should have SERIALIZABLE isolation by default, so committed transactions should be visible.
|
||||||
|
|
||||||
|
Let me check if the in-memory cache is masking a database problem...
|
||||||
|
|
||||||
|
Actually, wait. Let me re-check the current broken code more carefully. The issue might be simpler.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Current Code Flow Analysis
|
||||||
|
|
||||||
|
Looking at `processFileStreaming` in the current broken state:
|
||||||
|
|
||||||
|
```go
|
||||||
|
// For each chunk:
|
||||||
|
if !chunkExists {
|
||||||
|
err := s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||||
|
dbChunk := &database.Chunk{ChunkHash: chunk.Hash, Size: chunk.Size}
|
||||||
|
return s.repos.Chunks.Create(txCtx, tx, dbChunk)
|
||||||
|
})
|
||||||
|
// ... check error ...
|
||||||
|
s.addKnownChunk(chunk.Hash)
|
||||||
|
}
|
||||||
|
|
||||||
|
// ... add to packer (creates blob_chunks) ...
|
||||||
|
|
||||||
|
// Collect chunk info for file
|
||||||
|
chunks = append(chunks, chunkInfo{...})
|
||||||
|
```
|
||||||
|
|
||||||
|
Then at end of function:
|
||||||
|
```go
|
||||||
|
// Queue file for batch insertion
|
||||||
|
return s.addPendingFile(ctx, pendingFileData{
|
||||||
|
file: fileToProcess.File,
|
||||||
|
fileChunks: fileChunks,
|
||||||
|
chunkFiles: chunkFiles,
|
||||||
|
})
|
||||||
|
```
|
||||||
|
|
||||||
|
At end of `processPhase`:
|
||||||
|
```go
|
||||||
|
if err := s.flushAllPending(ctx); err != nil { ... }
|
||||||
|
```
|
||||||
|
|
||||||
|
The chunks are being created one-by-one with individual transactions. By the time `flushPendingFiles` runs, all chunk transactions should have committed.
|
||||||
|
|
||||||
|
Unless... there's a bug in how the chunks are being referenced. Let me check if the chunk_hash values are correct.
|
||||||
|
|
||||||
|
Or... maybe the test database is being recreated between operations somehow?
|
||||||
|
|
||||||
|
Actually, let me check the test setup. Maybe the issue is specific to the test environment.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Summary of Object Lifecycle
|
||||||
|
|
||||||
|
| Object | When Created | Transaction | Dependencies |
|
||||||
|
|--------|--------------|-------------|--------------|
|
||||||
|
| snapshot | Before scan | Individual tx | None |
|
||||||
|
| blob | When packer needs new blob | Individual tx | None |
|
||||||
|
| chunk | During file chunking (each chunk) | Individual tx | None |
|
||||||
|
| blob_chunks | Immediately after adding chunk to packer | Individual tx | chunks, blobs |
|
||||||
|
| files | Batched at end of processing | Batch tx | None |
|
||||||
|
| file_chunks | With file (batched) | Batch tx | files, chunks |
|
||||||
|
| chunk_files | With file (batched) | Batch tx | files, chunks |
|
||||||
|
| snapshot_files | With file (batched) | Batch tx | snapshots, files |
|
||||||
|
| snapshot_blobs | After blob upload | Individual tx | snapshots, blobs |
|
||||||
|
| uploads | After blob upload | Same tx as snapshot_blobs | blobs, snapshots |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Root Cause Analysis
|
||||||
|
|
||||||
|
After detailed analysis, I believe the issue is one of the following:
|
||||||
|
|
||||||
|
### Hypothesis 1: File ID Not Set
|
||||||
|
|
||||||
|
Looking at `checkFileInMemory()` for NEW files:
|
||||||
|
```go
|
||||||
|
if !exists {
|
||||||
|
return file, true // file.ID is empty string!
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
For new files, `file.ID` is empty. Then in `processFileStreaming`:
|
||||||
|
```go
|
||||||
|
fileChunks[i] = database.FileChunk{
|
||||||
|
FileID: fileToProcess.File.ID, // Empty for new files!
|
||||||
|
...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
The `FileID` in the built `fileChunks` slice is empty.
|
||||||
|
|
||||||
|
Then in `flushPendingFiles`:
|
||||||
|
```go
|
||||||
|
s.repos.Files.Create(txCtx, tx, data.file) // This generates the ID
|
||||||
|
// But data.fileChunks still has empty FileID!
|
||||||
|
for i := range data.fileChunks {
|
||||||
|
s.repos.FileChunks.Create(...) // Uses empty FileID
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
**Solution**: Generate file IDs upfront in `checkFileInMemory()`:
|
||||||
|
```go
|
||||||
|
file := &database.File{
|
||||||
|
ID: uuid.New().String(), // Generate ID immediately
|
||||||
|
Path: path,
|
||||||
|
...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Hypothesis 2: Transaction Isolation
|
||||||
|
|
||||||
|
SQLite with a single connection pool (`MaxOpenConns(1)`) should serialize all transactions. Committed data should be visible to subsequent transactions.
|
||||||
|
|
||||||
|
However, there might be a subtle issue with how `context.Background()` is used in the packer vs the scanner's context.
|
||||||
|
|
||||||
|
## Recommended Fix
|
||||||
|
|
||||||
|
**Step 1: Generate file IDs upfront**
|
||||||
|
|
||||||
|
In `checkFileInMemory()`, generate the UUID for new files immediately:
|
||||||
|
```go
|
||||||
|
file := &database.File{
|
||||||
|
ID: uuid.New().String(), // Always generate ID
|
||||||
|
Path: path,
|
||||||
|
...
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
This ensures `file.ID` is set when building `fileChunks` and `chunkFiles` slices.
|
||||||
|
|
||||||
|
**Step 2: Verify by reverting to per-file transactions**
|
||||||
|
|
||||||
|
If Step 1 doesn't fix it, revert to non-batched file insertion to isolate the issue:
|
||||||
|
|
||||||
|
```go
|
||||||
|
// Instead of queuing:
|
||||||
|
// return s.addPendingFile(ctx, pendingFileData{...})
|
||||||
|
|
||||||
|
// Do immediate insertion:
|
||||||
|
return s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||||
|
// Create file
|
||||||
|
s.repos.Files.Create(txCtx, tx, fileToProcess.File)
|
||||||
|
// Delete old associations
|
||||||
|
s.repos.FileChunks.DeleteByFileID(...)
|
||||||
|
s.repos.ChunkFiles.DeleteByFileID(...)
|
||||||
|
// Create new associations
|
||||||
|
for _, fc := range fileChunks {
|
||||||
|
s.repos.FileChunks.Create(...)
|
||||||
|
}
|
||||||
|
for _, cf := range chunkFiles {
|
||||||
|
s.repos.ChunkFiles.Create(...)
|
||||||
|
}
|
||||||
|
// Add to snapshot
|
||||||
|
s.repos.Snapshots.AddFileByID(...)
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
```
|
||||||
|
|
||||||
|
**Step 3: If batching is still desired**
|
||||||
|
|
||||||
|
After confirming per-file transactions work, re-implement batching with the ID fix in place, and add debug logging to trace exactly which chunk_hash is failing and why.
|
||||||
28
README.md
28
README.md
@ -1,21 +1,22 @@
|
|||||||
# vaultik (ваултик)
|
# vaultik (ваултик)
|
||||||
|
|
||||||
`vaultik` is a incremental backup daemon written in Go. It
|
WIP: pre-1.0, some functions may not be fully implemented yet
|
||||||
encrypts data using an `age` public key and uploads each encrypted blob
|
|
||||||
directly to a remote S3-compatible object store. It requires no private
|
`vaultik` is a incremental backup daemon written in Go. It encrypts data
|
||||||
keys, secrets, or credentials stored on the backed-up system.
|
using an `age` public key and uploads each encrypted blob directly to a
|
||||||
|
remote S3-compatible object store. It requires no private keys, secrets, or
|
||||||
|
credentials (other than those required to PUT to encrypted object storage,
|
||||||
|
such as S3 API keys) stored on the backed-up system.
|
||||||
|
|
||||||
It includes table-stakes features such as:
|
It includes table-stakes features such as:
|
||||||
|
|
||||||
* modern authenticated encryption
|
* modern encryption (the excellent `age`)
|
||||||
* deduplication
|
* deduplication
|
||||||
* incremental backups
|
* incremental backups
|
||||||
* modern multithreaded zstd compression with configurable levels
|
* modern multithreaded zstd compression with configurable levels
|
||||||
* content-addressed immutable storage
|
* content-addressed immutable storage
|
||||||
* local state tracking in standard SQLite database
|
* local state tracking in standard SQLite database, enables write-only
|
||||||
* inotify-based change detection
|
incremental backups to destination
|
||||||
* streaming processing of all data to not require lots of ram or temp file
|
|
||||||
storage
|
|
||||||
* no mutable remote metadata
|
* no mutable remote metadata
|
||||||
* no plaintext file paths or metadata stored in remote
|
* no plaintext file paths or metadata stored in remote
|
||||||
* does not create huge numbers of small files (to keep S3 operation counts
|
* does not create huge numbers of small files (to keep S3 operation counts
|
||||||
@ -27,12 +28,12 @@ It includes table-stakes features such as:
|
|||||||
content-addressable chunk map of changed files using deterministic chunking.
|
content-addressable chunk map of changed files using deterministic chunking.
|
||||||
Each chunk is streamed into a blob packer. Blobs are compressed with `zstd`,
|
Each chunk is streamed into a blob packer. Blobs are compressed with `zstd`,
|
||||||
encrypted with `age`, and uploaded directly to remote storage under a
|
encrypted with `age`, and uploaded directly to remote storage under a
|
||||||
content-addressed S3 path.
|
content-addressed S3 path. at the end, a pruned snapshot-specific sqlite
|
||||||
|
database of metadata is created, encrypted, and uploaded alongside the
|
||||||
|
blobs.
|
||||||
|
|
||||||
No plaintext file contents ever hit disk. No private key or secret
|
No plaintext file contents ever hit disk. No private key or secret
|
||||||
passphrase is needed or stored locally. All encrypted data is
|
passphrase is needed or stored locally.
|
||||||
streaming-processed and immediately discarded once uploaded. Metadata is
|
|
||||||
encrypted and pushed with the same mechanism.
|
|
||||||
|
|
||||||
## why
|
## why
|
||||||
|
|
||||||
@ -42,6 +43,7 @@ Existing backup software fails under one or more of these conditions:
|
|||||||
compromises encrypted backups in the case of host system compromise
|
compromises encrypted backups in the case of host system compromise
|
||||||
* Depends on symmetric encryption unsuitable for zero-trust environments
|
* Depends on symmetric encryption unsuitable for zero-trust environments
|
||||||
* Creates one-blob-per-file, which results in excessive S3 operation counts
|
* Creates one-blob-per-file, which results in excessive S3 operation counts
|
||||||
|
* is slow
|
||||||
|
|
||||||
`vaultik` addresses these by using:
|
`vaultik` addresses these by using:
|
||||||
|
|
||||||
|
|||||||
22
TODO.md
22
TODO.md
@ -40,6 +40,28 @@ Reorganize commands to provide better visibility into stored data and snapshots.
|
|||||||
- `--deep` mode: Downloads each blob and verifies its hash matches the stored hash
|
- `--deep` mode: Downloads each blob and verifies its hash matches the stored hash
|
||||||
- **Stub implementation for now**
|
- **Stub implementation for now**
|
||||||
|
|
||||||
|
- `vaultik snapshot remove <snapshot-id>` (alias: `rm`)
|
||||||
|
- Removes a snapshot and any blobs that become orphaned
|
||||||
|
- Algorithm:
|
||||||
|
1. Validate target snapshot exists in storage
|
||||||
|
2. List all snapshots in storage
|
||||||
|
3. Download manifests from all OTHER snapshots to build "in-use" blob set
|
||||||
|
4. Download target snapshot's manifest to get its blob hashes
|
||||||
|
5. Identify orphaned blobs: target blobs NOT in the in-use set
|
||||||
|
6. Delete orphaned blobs from storage
|
||||||
|
7. Delete snapshot metadata using existing `deleteSnapshot()` helper
|
||||||
|
- Flags:
|
||||||
|
- `--force` / `-f`: Skip confirmation prompt
|
||||||
|
- `--dry-run`: Show what would be deleted without deleting
|
||||||
|
- Files to modify:
|
||||||
|
- `internal/cli/snapshot.go`: Add `newSnapshotRemoveCommand()`
|
||||||
|
- `internal/vaultik/snapshot.go`: Add `RemoveSnapshot()` method
|
||||||
|
- Reuse existing code:
|
||||||
|
- Snapshot enumeration pattern from `PruneBlobs()` in `prune.go`
|
||||||
|
- `v.downloadManifest(snapshotID)` for manifest downloading
|
||||||
|
- Blob path format: `blobs/{hash[:2]}/{hash[2:4]}/{hash}`
|
||||||
|
- `v.deleteSnapshot(snapshotID)` for metadata deletion
|
||||||
|
|
||||||
### Implementation Notes
|
### Implementation Notes
|
||||||
|
|
||||||
1. **No Decryption Required**: All commands work with unencrypted blob manifests
|
1. **No Decryption Required**: All commands work with unencrypted blob manifests
|
||||||
|
|||||||
@ -1,9 +1,41 @@
|
|||||||
package main
|
package main
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"os"
|
||||||
|
"runtime"
|
||||||
|
"runtime/pprof"
|
||||||
|
|
||||||
"git.eeqj.de/sneak/vaultik/internal/cli"
|
"git.eeqj.de/sneak/vaultik/internal/cli"
|
||||||
)
|
)
|
||||||
|
|
||||||
func main() {
|
func main() {
|
||||||
|
// CPU profiling: set VAULTIK_CPUPROFILE=/path/to/cpu.prof
|
||||||
|
if cpuProfile := os.Getenv("VAULTIK_CPUPROFILE"); cpuProfile != "" {
|
||||||
|
f, err := os.Create(cpuProfile)
|
||||||
|
if err != nil {
|
||||||
|
panic("could not create CPU profile: " + err.Error())
|
||||||
|
}
|
||||||
|
defer func() { _ = f.Close() }()
|
||||||
|
if err := pprof.StartCPUProfile(f); err != nil {
|
||||||
|
panic("could not start CPU profile: " + err.Error())
|
||||||
|
}
|
||||||
|
defer pprof.StopCPUProfile()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Memory profiling: set VAULTIK_MEMPROFILE=/path/to/mem.prof
|
||||||
|
if memProfile := os.Getenv("VAULTIK_MEMPROFILE"); memProfile != "" {
|
||||||
|
defer func() {
|
||||||
|
f, err := os.Create(memProfile)
|
||||||
|
if err != nil {
|
||||||
|
panic("could not create memory profile: " + err.Error())
|
||||||
|
}
|
||||||
|
defer func() { _ = f.Close() }()
|
||||||
|
runtime.GC() // get up-to-date statistics
|
||||||
|
if err := pprof.WriteHeapProfile(f); err != nil {
|
||||||
|
panic("could not write memory profile: " + err.Error())
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
}
|
||||||
|
|
||||||
cli.CLIEntry()
|
cli.CLIEntry()
|
||||||
}
|
}
|
||||||
|
|||||||
1
go.mod
1
go.mod
@ -68,6 +68,7 @@ require (
|
|||||||
github.com/go-openapi/jsonpointer v0.21.0 // indirect
|
github.com/go-openapi/jsonpointer v0.21.0 // indirect
|
||||||
github.com/go-openapi/jsonreference v0.20.2 // indirect
|
github.com/go-openapi/jsonreference v0.20.2 // indirect
|
||||||
github.com/go-openapi/swag v0.23.0 // indirect
|
github.com/go-openapi/swag v0.23.0 // indirect
|
||||||
|
github.com/gobwas/glob v0.2.3 // indirect
|
||||||
github.com/gogo/protobuf v1.3.2 // indirect
|
github.com/gogo/protobuf v1.3.2 // indirect
|
||||||
github.com/golang-jwt/jwt/v5 v5.2.2 // indirect
|
github.com/golang-jwt/jwt/v5 v5.2.2 // indirect
|
||||||
github.com/golang/protobuf v1.5.4 // indirect
|
github.com/golang/protobuf v1.5.4 // indirect
|
||||||
|
|||||||
2
go.sum
2
go.sum
@ -149,6 +149,8 @@ github.com/go-task/slim-sprig/v3 v3.0.0 h1:sUs3vkvUymDpBKi3qH1YSqBQk9+9D/8M2mN1v
|
|||||||
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
|
github.com/go-task/slim-sprig/v3 v3.0.0/go.mod h1:W848ghGpv3Qj3dhTPRyJypKRiqCdHZiAzKg9hl15HA8=
|
||||||
github.com/go-test/deep v1.0.2 h1:onZX1rnHT3Wv6cqNgYyFOOlgVKJrksuCMCRvJStbMYw=
|
github.com/go-test/deep v1.0.2 h1:onZX1rnHT3Wv6cqNgYyFOOlgVKJrksuCMCRvJStbMYw=
|
||||||
github.com/go-test/deep v1.0.2/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA=
|
github.com/go-test/deep v1.0.2/go.mod h1:wGDj63lr65AM2AQyKZd/NYHGb0R+1RLqB8NKt3aSFNA=
|
||||||
|
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
|
||||||
|
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
|
||||||
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
github.com/godbus/dbus/v5 v5.0.4/go.mod h1:xhWf0FNVPg57R7Z0UbKHbJfkEywrmjJnf7w5xrFpKfA=
|
||||||
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
|
github.com/gogo/protobuf v1.1.1/go.mod h1:r8qH/GZQm5c6nD/R0oafs1akxWv10x8SbQlK7atdtwQ=
|
||||||
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
|
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
|
||||||
|
|||||||
@ -47,6 +47,12 @@ type PackerConfig struct {
|
|||||||
Fs afero.Fs // Filesystem for temporary files
|
Fs afero.Fs // Filesystem for temporary files
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// PendingChunk represents a chunk waiting to be inserted into the database.
|
||||||
|
type PendingChunk struct {
|
||||||
|
Hash string
|
||||||
|
Size int64
|
||||||
|
}
|
||||||
|
|
||||||
// Packer accumulates chunks and packs them into blobs.
|
// Packer accumulates chunks and packs them into blobs.
|
||||||
// It handles compression, encryption, and coordination with the database
|
// It handles compression, encryption, and coordination with the database
|
||||||
// to track blob metadata. Packer is thread-safe.
|
// to track blob metadata. Packer is thread-safe.
|
||||||
@ -64,6 +70,9 @@ type Packer struct {
|
|||||||
// Current blob being packed
|
// Current blob being packed
|
||||||
currentBlob *blobInProgress
|
currentBlob *blobInProgress
|
||||||
finishedBlobs []*FinishedBlob // Only used if no handler provided
|
finishedBlobs []*FinishedBlob // Only used if no handler provided
|
||||||
|
|
||||||
|
// Pending chunks to be inserted when blob finalizes
|
||||||
|
pendingChunks []PendingChunk
|
||||||
}
|
}
|
||||||
|
|
||||||
// blobInProgress represents a blob being assembled
|
// blobInProgress represents a blob being assembled
|
||||||
@ -116,6 +125,7 @@ type BlobWithReader struct {
|
|||||||
*FinishedBlob
|
*FinishedBlob
|
||||||
Reader io.ReadSeeker
|
Reader io.ReadSeeker
|
||||||
TempFile afero.File // Optional, only set for disk-based blobs
|
TempFile afero.File // Optional, only set for disk-based blobs
|
||||||
|
InsertedChunkHashes []string // Chunk hashes that were inserted to DB with this blob
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewPacker creates a new blob packer that accumulates chunks into blobs.
|
// NewPacker creates a new blob packer that accumulates chunks into blobs.
|
||||||
@ -152,6 +162,15 @@ func (p *Packer) SetBlobHandler(handler BlobHandler) {
|
|||||||
p.blobHandler = handler
|
p.blobHandler = handler
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AddPendingChunk queues a chunk to be inserted into the database when the
|
||||||
|
// current blob is finalized. This batches chunk inserts to reduce transaction
|
||||||
|
// overhead. Thread-safe.
|
||||||
|
func (p *Packer) AddPendingChunk(hash string, size int64) {
|
||||||
|
p.mu.Lock()
|
||||||
|
defer p.mu.Unlock()
|
||||||
|
p.pendingChunks = append(p.pendingChunks, PendingChunk{Hash: hash, Size: size})
|
||||||
|
}
|
||||||
|
|
||||||
// AddChunk adds a chunk to the current blob being packed.
|
// AddChunk adds a chunk to the current blob being packed.
|
||||||
// If adding the chunk would exceed MaxBlobSize, returns ErrBlobSizeLimitExceeded.
|
// If adding the chunk would exceed MaxBlobSize, returns ErrBlobSizeLimitExceeded.
|
||||||
// In this case, the caller should finalize the current blob and retry.
|
// In this case, the caller should finalize the current blob and retry.
|
||||||
@ -314,23 +333,9 @@ func (p *Packer) addChunkToCurrentBlob(chunk *ChunkRef) error {
|
|||||||
p.currentBlob.chunks = append(p.currentBlob.chunks, chunkInfo)
|
p.currentBlob.chunks = append(p.currentBlob.chunks, chunkInfo)
|
||||||
p.currentBlob.chunkSet[chunk.Hash] = true
|
p.currentBlob.chunkSet[chunk.Hash] = true
|
||||||
|
|
||||||
// Store blob-chunk association in database immediately
|
// Note: blob_chunk records are inserted in batch when blob is finalized
|
||||||
if p.repos != nil {
|
// to reduce transaction overhead. The chunk info is already stored in
|
||||||
blobChunk := &database.BlobChunk{
|
// p.currentBlob.chunks for later insertion.
|
||||||
BlobID: p.currentBlob.id,
|
|
||||||
ChunkHash: chunk.Hash,
|
|
||||||
Offset: offset,
|
|
||||||
Length: chunkSize,
|
|
||||||
}
|
|
||||||
err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
|
||||||
return p.repos.BlobChunks.Create(ctx, tx, blobChunk)
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
log.Error("Failed to store blob-chunk association in database", "error", err,
|
|
||||||
"blob_id", p.currentBlob.id, "chunk_hash", chunk.Hash)
|
|
||||||
// Continue anyway - we can reconstruct this later if needed
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update total size
|
// Update total size
|
||||||
p.currentBlob.size += chunkSize
|
p.currentBlob.size += chunkSize
|
||||||
@ -392,16 +397,49 @@ func (p *Packer) finalizeCurrentBlob() error {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
// Update blob record in database with hash and sizes
|
// Get pending chunks (will be inserted to DB and reported to handler)
|
||||||
|
chunksToInsert := p.pendingChunks
|
||||||
|
p.pendingChunks = nil // Clear pending list
|
||||||
|
|
||||||
|
// Insert pending chunks, blob_chunks, and update blob in a single transaction
|
||||||
if p.repos != nil {
|
if p.repos != nil {
|
||||||
err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
err := p.repos.WithTx(context.Background(), func(ctx context.Context, tx *sql.Tx) error {
|
||||||
|
// First insert all pending chunks (required for blob_chunks FK)
|
||||||
|
for _, chunk := range chunksToInsert {
|
||||||
|
dbChunk := &database.Chunk{
|
||||||
|
ChunkHash: chunk.Hash,
|
||||||
|
Size: chunk.Size,
|
||||||
|
}
|
||||||
|
if err := p.repos.Chunks.Create(ctx, tx, dbChunk); err != nil {
|
||||||
|
return fmt.Errorf("creating chunk: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Insert all blob_chunk records in batch
|
||||||
|
for _, chunk := range p.currentBlob.chunks {
|
||||||
|
blobChunk := &database.BlobChunk{
|
||||||
|
BlobID: p.currentBlob.id,
|
||||||
|
ChunkHash: chunk.Hash,
|
||||||
|
Offset: chunk.Offset,
|
||||||
|
Length: chunk.Size,
|
||||||
|
}
|
||||||
|
if err := p.repos.BlobChunks.Create(ctx, tx, blobChunk); err != nil {
|
||||||
|
return fmt.Errorf("creating blob_chunk: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Update blob record with final hash and sizes
|
||||||
return p.repos.Blobs.UpdateFinished(ctx, tx, p.currentBlob.id, blobHash,
|
return p.repos.Blobs.UpdateFinished(ctx, tx, p.currentBlob.id, blobHash,
|
||||||
p.currentBlob.size, finalSize)
|
p.currentBlob.size, finalSize)
|
||||||
})
|
})
|
||||||
if err != nil {
|
if err != nil {
|
||||||
p.cleanupTempFile()
|
p.cleanupTempFile()
|
||||||
return fmt.Errorf("updating blob record: %w", err)
|
return fmt.Errorf("finalizing blob transaction: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
log.Debug("Committed blob transaction",
|
||||||
|
"chunks_inserted", len(chunksToInsert),
|
||||||
|
"blob_chunks_inserted", len(p.currentBlob.chunks))
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create finished blob
|
// Create finished blob
|
||||||
@ -424,6 +462,12 @@ func (p *Packer) finalizeCurrentBlob() error {
|
|||||||
"ratio", fmt.Sprintf("%.2f", compressionRatio),
|
"ratio", fmt.Sprintf("%.2f", compressionRatio),
|
||||||
"duration", time.Since(p.currentBlob.startTime))
|
"duration", time.Since(p.currentBlob.startTime))
|
||||||
|
|
||||||
|
// Collect inserted chunk hashes for the scanner to track
|
||||||
|
var insertedChunkHashes []string
|
||||||
|
for _, chunk := range chunksToInsert {
|
||||||
|
insertedChunkHashes = append(insertedChunkHashes, chunk.Hash)
|
||||||
|
}
|
||||||
|
|
||||||
// Call blob handler if set
|
// Call blob handler if set
|
||||||
if p.blobHandler != nil {
|
if p.blobHandler != nil {
|
||||||
// Reset file position for handler
|
// Reset file position for handler
|
||||||
@ -437,6 +481,7 @@ func (p *Packer) finalizeCurrentBlob() error {
|
|||||||
FinishedBlob: finished,
|
FinishedBlob: finished,
|
||||||
Reader: p.currentBlob.tempFile,
|
Reader: p.currentBlob.tempFile,
|
||||||
TempFile: p.currentBlob.tempFile,
|
TempFile: p.currentBlob.tempFile,
|
||||||
|
InsertedChunkHashes: insertedChunkHashes,
|
||||||
}
|
}
|
||||||
|
|
||||||
if err := p.blobHandler(blobWithReader); err != nil {
|
if err := p.blobHandler(blobWithReader); err != nil {
|
||||||
|
|||||||
@ -6,8 +6,6 @@ import (
|
|||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"os"
|
"os"
|
||||||
|
|
||||||
"github.com/jotfs/fastcdc-go"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
// Chunk represents a single chunk of data produced by the content-defined chunking algorithm.
|
// Chunk represents a single chunk of data produced by the content-defined chunking algorithm.
|
||||||
@ -48,16 +46,8 @@ func NewChunker(avgChunkSize int64) *Chunker {
|
|||||||
// reasonably sized inputs. For large files or streams, use ChunkReaderStreaming instead.
|
// reasonably sized inputs. For large files or streams, use ChunkReaderStreaming instead.
|
||||||
// Returns an error if chunking fails or if reading from the input fails.
|
// Returns an error if chunking fails or if reading from the input fails.
|
||||||
func (c *Chunker) ChunkReader(r io.Reader) ([]Chunk, error) {
|
func (c *Chunker) ChunkReader(r io.Reader) ([]Chunk, error) {
|
||||||
opts := fastcdc.Options{
|
chunker := AcquireReusableChunker(r, c.minChunkSize, c.avgChunkSize, c.maxChunkSize)
|
||||||
MinSize: c.minChunkSize,
|
defer chunker.Release()
|
||||||
AverageSize: c.avgChunkSize,
|
|
||||||
MaxSize: c.maxChunkSize,
|
|
||||||
}
|
|
||||||
|
|
||||||
chunker, err := fastcdc.NewChunker(r, opts)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("creating chunker: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
var chunks []Chunk
|
var chunks []Chunk
|
||||||
offset := int64(0)
|
offset := int64(0)
|
||||||
@ -74,7 +64,7 @@ func (c *Chunker) ChunkReader(r io.Reader) ([]Chunk, error) {
|
|||||||
// Calculate hash
|
// Calculate hash
|
||||||
hash := sha256.Sum256(chunk.Data)
|
hash := sha256.Sum256(chunk.Data)
|
||||||
|
|
||||||
// Make a copy of the data since FastCDC reuses the buffer
|
// Make a copy of the data since the chunker reuses the buffer
|
||||||
chunkData := make([]byte, len(chunk.Data))
|
chunkData := make([]byte, len(chunk.Data))
|
||||||
copy(chunkData, chunk.Data)
|
copy(chunkData, chunk.Data)
|
||||||
|
|
||||||
@ -107,16 +97,8 @@ func (c *Chunker) ChunkReaderStreaming(r io.Reader, callback ChunkCallback) (str
|
|||||||
fileHasher := sha256.New()
|
fileHasher := sha256.New()
|
||||||
teeReader := io.TeeReader(r, fileHasher)
|
teeReader := io.TeeReader(r, fileHasher)
|
||||||
|
|
||||||
opts := fastcdc.Options{
|
chunker := AcquireReusableChunker(teeReader, c.minChunkSize, c.avgChunkSize, c.maxChunkSize)
|
||||||
MinSize: c.minChunkSize,
|
defer chunker.Release()
|
||||||
AverageSize: c.avgChunkSize,
|
|
||||||
MaxSize: c.maxChunkSize,
|
|
||||||
}
|
|
||||||
|
|
||||||
chunker, err := fastcdc.NewChunker(teeReader, opts)
|
|
||||||
if err != nil {
|
|
||||||
return "", fmt.Errorf("creating chunker: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
offset := int64(0)
|
offset := int64(0)
|
||||||
|
|
||||||
@ -132,13 +114,12 @@ func (c *Chunker) ChunkReaderStreaming(r io.Reader, callback ChunkCallback) (str
|
|||||||
// Calculate chunk hash
|
// Calculate chunk hash
|
||||||
hash := sha256.Sum256(chunk.Data)
|
hash := sha256.Sum256(chunk.Data)
|
||||||
|
|
||||||
// Make a copy of the data since FastCDC reuses the buffer
|
// Pass the data directly - caller must process it before we call Next() again
|
||||||
chunkData := make([]byte, len(chunk.Data))
|
// (chunker reuses its internal buffer, but since we process synchronously
|
||||||
copy(chunkData, chunk.Data)
|
// and completely before continuing, no copy is needed)
|
||||||
|
|
||||||
if err := callback(Chunk{
|
if err := callback(Chunk{
|
||||||
Hash: hex.EncodeToString(hash[:]),
|
Hash: hex.EncodeToString(hash[:]),
|
||||||
Data: chunkData,
|
Data: chunk.Data,
|
||||||
Offset: offset,
|
Offset: offset,
|
||||||
Size: int64(len(chunk.Data)),
|
Size: int64(len(chunk.Data)),
|
||||||
}); err != nil {
|
}); err != nil {
|
||||||
|
|||||||
265
internal/chunker/fastcdc.go
Normal file
265
internal/chunker/fastcdc.go
Normal file
@ -0,0 +1,265 @@
|
|||||||
|
package chunker
|
||||||
|
|
||||||
|
import (
|
||||||
|
"io"
|
||||||
|
"math"
|
||||||
|
"sync"
|
||||||
|
)
|
||||||
|
|
||||||
|
// ReusableChunker implements FastCDC with reusable buffers to minimize allocations.
|
||||||
|
// Unlike the upstream fastcdc-go library which allocates a new buffer per file,
|
||||||
|
// this implementation uses sync.Pool to reuse buffers across files.
|
||||||
|
type ReusableChunker struct {
|
||||||
|
minSize int
|
||||||
|
maxSize int
|
||||||
|
normSize int
|
||||||
|
bufSize int
|
||||||
|
|
||||||
|
maskS uint64
|
||||||
|
maskL uint64
|
||||||
|
|
||||||
|
rd io.Reader
|
||||||
|
|
||||||
|
buf []byte
|
||||||
|
cursor int
|
||||||
|
offset int
|
||||||
|
eof bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// reusableChunkerPool pools ReusableChunker instances to avoid allocations.
|
||||||
|
var reusableChunkerPool = sync.Pool{
|
||||||
|
New: func() interface{} {
|
||||||
|
return &ReusableChunker{}
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
// bufferPools contains pools for different buffer sizes.
|
||||||
|
// Key is the buffer size.
|
||||||
|
var bufferPools = sync.Map{}
|
||||||
|
|
||||||
|
func getBuffer(size int) []byte {
|
||||||
|
poolI, _ := bufferPools.LoadOrStore(size, &sync.Pool{
|
||||||
|
New: func() interface{} {
|
||||||
|
buf := make([]byte, size)
|
||||||
|
return &buf
|
||||||
|
},
|
||||||
|
})
|
||||||
|
pool := poolI.(*sync.Pool)
|
||||||
|
return *pool.Get().(*[]byte)
|
||||||
|
}
|
||||||
|
|
||||||
|
func putBuffer(buf []byte) {
|
||||||
|
size := cap(buf)
|
||||||
|
poolI, ok := bufferPools.Load(size)
|
||||||
|
if ok {
|
||||||
|
pool := poolI.(*sync.Pool)
|
||||||
|
b := buf[:size]
|
||||||
|
pool.Put(&b)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// FastCDCChunk represents a chunk from the FastCDC algorithm.
|
||||||
|
type FastCDCChunk struct {
|
||||||
|
Offset int
|
||||||
|
Length int
|
||||||
|
Data []byte
|
||||||
|
Fingerprint uint64
|
||||||
|
}
|
||||||
|
|
||||||
|
// AcquireReusableChunker gets a chunker from the pool and initializes it for the given reader.
|
||||||
|
func AcquireReusableChunker(rd io.Reader, minSize, avgSize, maxSize int) *ReusableChunker {
|
||||||
|
c := reusableChunkerPool.Get().(*ReusableChunker)
|
||||||
|
|
||||||
|
bufSize := maxSize * 2
|
||||||
|
|
||||||
|
// Reuse buffer if it's the right size, otherwise get a new one
|
||||||
|
if c.buf == nil || cap(c.buf) != bufSize {
|
||||||
|
if c.buf != nil {
|
||||||
|
putBuffer(c.buf)
|
||||||
|
}
|
||||||
|
c.buf = getBuffer(bufSize)
|
||||||
|
} else {
|
||||||
|
// Restore buffer to full capacity (may have been truncated by previous EOF)
|
||||||
|
c.buf = c.buf[:cap(c.buf)]
|
||||||
|
}
|
||||||
|
|
||||||
|
bits := int(math.Round(math.Log2(float64(avgSize))))
|
||||||
|
normalization := 2
|
||||||
|
smallBits := bits + normalization
|
||||||
|
largeBits := bits - normalization
|
||||||
|
|
||||||
|
c.minSize = minSize
|
||||||
|
c.maxSize = maxSize
|
||||||
|
c.normSize = avgSize
|
||||||
|
c.bufSize = bufSize
|
||||||
|
c.maskS = (1 << smallBits) - 1
|
||||||
|
c.maskL = (1 << largeBits) - 1
|
||||||
|
c.rd = rd
|
||||||
|
c.cursor = bufSize
|
||||||
|
c.offset = 0
|
||||||
|
c.eof = false
|
||||||
|
|
||||||
|
return c
|
||||||
|
}
|
||||||
|
|
||||||
|
// Release returns the chunker to the pool for reuse.
|
||||||
|
func (c *ReusableChunker) Release() {
|
||||||
|
c.rd = nil
|
||||||
|
reusableChunkerPool.Put(c)
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *ReusableChunker) fillBuffer() error {
|
||||||
|
n := len(c.buf) - c.cursor
|
||||||
|
if n >= c.maxSize {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Move all data after the cursor to the start of the buffer
|
||||||
|
copy(c.buf[:n], c.buf[c.cursor:])
|
||||||
|
c.cursor = 0
|
||||||
|
|
||||||
|
if c.eof {
|
||||||
|
c.buf = c.buf[:n]
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Restore buffer to full capacity for reading
|
||||||
|
c.buf = c.buf[:c.bufSize]
|
||||||
|
|
||||||
|
// Fill the rest of the buffer
|
||||||
|
m, err := io.ReadFull(c.rd, c.buf[n:])
|
||||||
|
if err == io.EOF || err == io.ErrUnexpectedEOF {
|
||||||
|
c.buf = c.buf[:n+m]
|
||||||
|
c.eof = true
|
||||||
|
} else if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Next returns the next chunk or io.EOF when done.
|
||||||
|
// The returned Data slice is only valid until the next call to Next.
|
||||||
|
func (c *ReusableChunker) Next() (FastCDCChunk, error) {
|
||||||
|
if err := c.fillBuffer(); err != nil {
|
||||||
|
return FastCDCChunk{}, err
|
||||||
|
}
|
||||||
|
if len(c.buf) == 0 {
|
||||||
|
return FastCDCChunk{}, io.EOF
|
||||||
|
}
|
||||||
|
|
||||||
|
length, fp := c.nextChunk(c.buf[c.cursor:])
|
||||||
|
|
||||||
|
chunk := FastCDCChunk{
|
||||||
|
Offset: c.offset,
|
||||||
|
Length: length,
|
||||||
|
Data: c.buf[c.cursor : c.cursor+length],
|
||||||
|
Fingerprint: fp,
|
||||||
|
}
|
||||||
|
|
||||||
|
c.cursor += length
|
||||||
|
c.offset += chunk.Length
|
||||||
|
|
||||||
|
return chunk, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *ReusableChunker) nextChunk(data []byte) (int, uint64) {
|
||||||
|
fp := uint64(0)
|
||||||
|
i := c.minSize
|
||||||
|
|
||||||
|
if len(data) <= c.minSize {
|
||||||
|
return len(data), fp
|
||||||
|
}
|
||||||
|
|
||||||
|
n := min(len(data), c.maxSize)
|
||||||
|
|
||||||
|
for ; i < min(n, c.normSize); i++ {
|
||||||
|
fp = (fp << 1) + table[data[i]]
|
||||||
|
if (fp & c.maskS) == 0 {
|
||||||
|
return i + 1, fp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for ; i < n; i++ {
|
||||||
|
fp = (fp << 1) + table[data[i]]
|
||||||
|
if (fp & c.maskL) == 0 {
|
||||||
|
return i + 1, fp
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return i, fp
|
||||||
|
}
|
||||||
|
|
||||||
|
func min(a, b int) int {
|
||||||
|
if a < b {
|
||||||
|
return a
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
// 256 random uint64s for the rolling hash function (from FastCDC paper)
|
||||||
|
var table = [256]uint64{
|
||||||
|
0xe80e8d55032474b3, 0x11b25b61f5924e15, 0x03aa5bd82a9eb669, 0xc45a153ef107a38c,
|
||||||
|
0xeac874b86f0f57b9, 0xa5ccedec95ec79c7, 0xe15a3320ad42ac0a, 0x5ed3583fa63cec15,
|
||||||
|
0xcd497bf624a4451d, 0xf9ade5b059683605, 0x773940c03fb11ca1, 0xa36b16e4a6ae15b2,
|
||||||
|
0x67afd1adb5a89eac, 0xc44c75ee32f0038e, 0x2101790f365c0967, 0x76415c64a222fc4a,
|
||||||
|
0x579929249a1e577a, 0xe4762fc41fdbf750, 0xea52198e57dfcdcc, 0xe2535aafe30b4281,
|
||||||
|
0xcb1a1bd6c77c9056, 0x5a1aa9bfc4612a62, 0x15a728aef8943eb5, 0x2f8f09738a8ec8d9,
|
||||||
|
0x200f3dec9fac8074, 0x0fa9a7b1e0d318df, 0x06c0804ffd0d8e3a, 0x630cbc412669dd25,
|
||||||
|
0x10e34f85f4b10285, 0x2a6fe8164b9b6410, 0xcacb57d857d55810, 0x77f8a3a36ff11b46,
|
||||||
|
0x66af517e0dc3003e, 0x76c073c789b4009a, 0x853230dbb529f22a, 0x1e9e9c09a1f77e56,
|
||||||
|
0x1e871223802ee65d, 0x37fe4588718ff813, 0x10088539f30db464, 0x366f7470b80b72d1,
|
||||||
|
0x33f2634d9a6b31db, 0xd43917751d69ea18, 0xa0f492bc1aa7b8de, 0x3f94e5a8054edd20,
|
||||||
|
0xedfd6e25eb8b1dbf, 0x759517a54f196a56, 0xe81d5006ec7b6b17, 0x8dd8385fa894a6b7,
|
||||||
|
0x45f4d5467b0d6f91, 0xa1f894699de22bc8, 0x33829d09ef93e0fe, 0x3e29e250caed603c,
|
||||||
|
0xf7382cba7f63a45e, 0x970f95412bb569d1, 0xc7fcea456d356b4b, 0x723042513f3e7a57,
|
||||||
|
0x17ae7688de3596f1, 0x27ac1fcd7cd23c1a, 0xf429beeb78b3f71f, 0xd0780692fb93a3f9,
|
||||||
|
0x9f507e28a7c9842f, 0x56001ad536e433ae, 0x7e1dd1ecf58be306, 0x15fee353aa233fc6,
|
||||||
|
0xb033a0730b7638e8, 0xeb593ad6bd2406d1, 0x7c86502574d0f133, 0xce3b008d4ccb4be7,
|
||||||
|
0xf8566e3d383594c8, 0xb2c261e9b7af4429, 0xf685e7e253799dbb, 0x05d33ed60a494cbc,
|
||||||
|
0xeaf88d55a4cb0d1a, 0x3ee9368a902415a1, 0x8980fe6a8493a9a4, 0x358ed008cb448631,
|
||||||
|
0xd0cb7e37b46824b8, 0xe9bc375c0bc94f84, 0xea0bf1d8e6b55bb3, 0xb66a60d0f9f6f297,
|
||||||
|
0x66db2cc4807b3758, 0x7e4e014afbca8b4d, 0xa5686a4938b0c730, 0xa5f0d7353d623316,
|
||||||
|
0x26e38c349242d5e8, 0xeeefa80a29858e30, 0x8915cb912aa67386, 0x4b957a47bfc420d4,
|
||||||
|
0xbb53d051a895f7e1, 0x09f5e3235f6911ce, 0x416b98e695cfb7ce, 0x97a08183344c5c86,
|
||||||
|
0xbf68e0791839a861, 0xea05dde59ed3ed56, 0x0ca732280beda160, 0xac748ed62fe7f4e2,
|
||||||
|
0xc686da075cf6e151, 0xe1ba5658f4af05c8, 0xe9ff09fbeb67cc35, 0xafaea9470323b28d,
|
||||||
|
0x0291e8db5bb0ac2a, 0x342072a9bbee77ae, 0x03147eed6b3d0a9c, 0x21379d4de31dbadb,
|
||||||
|
0x2388d965226fb986, 0x52c96988bfebabfa, 0xa6fc29896595bc2d, 0x38fa4af70aa46b8b,
|
||||||
|
0xa688dd13939421ee, 0x99d5275d9b1415da, 0x453d31bb4fe73631, 0xde51debc1fbe3356,
|
||||||
|
0x75a3c847a06c622f, 0xe80e32755d272579, 0x5444052250d8ec0d, 0x8f17dfda19580a3b,
|
||||||
|
0xf6b3e9363a185e42, 0x7a42adec6868732f, 0x32cb6a07629203a2, 0x1eca8957defe56d9,
|
||||||
|
0x9fa85e4bc78ff9ed, 0x20ff07224a499ca7, 0x3fa6295ff9682c70, 0xe3d5b1e3ce993eff,
|
||||||
|
0xa341209362e0b79a, 0x64bd9eae5712ffe8, 0xceebb537babbd12a, 0x5586ef404315954f,
|
||||||
|
0x46c3085c938ab51a, 0xa82ccb9199907cee, 0x8c51b6690a3523c8, 0xc4dbd4c9ae518332,
|
||||||
|
0x979898dbb23db7b2, 0x1b5b585e6f672a9d, 0xce284da7c4903810, 0x841166e8bb5f1c4f,
|
||||||
|
0xb7d884a3fceca7d0, 0xa76468f5a4572374, 0xc10c45f49ee9513d, 0x68f9a5663c1908c9,
|
||||||
|
0x0095a13476a6339d, 0xd1d7516ffbe9c679, 0xfd94ab0c9726f938, 0x627468bbdb27c959,
|
||||||
|
0xedc3f8988e4a8c9a, 0x58efd33f0dfaa499, 0x21e37d7e2ef4ac8b, 0x297f9ab5586259c6,
|
||||||
|
0xda3ba4dc6cb9617d, 0xae11d8d9de2284d2, 0xcfeed88cb3729865, 0xefc2f9e4f03e2633,
|
||||||
|
0x8226393e8f0855a4, 0xd6e25fd7acf3a767, 0x435784c3bfd6d14a, 0xf97142e6343fe757,
|
||||||
|
0xd73b9fe826352f85, 0x6c3ac444b5b2bd76, 0xd8e88f3e9fd4a3fd, 0x31e50875c36f3460,
|
||||||
|
0xa824f1bf88cf4d44, 0x54a4d2c8f5f25899, 0xbff254637ce3b1e6, 0xa02cfe92561b3caa,
|
||||||
|
0x7bedb4edee9f0af7, 0x879c0620ac49a102, 0xa12c4ccd23b332e7, 0x09a5ff47bf94ed1e,
|
||||||
|
0x7b62f43cd3046fa0, 0xaa3af0476b9c2fb9, 0x22e55301abebba8e, 0x3a6035c42747bd58,
|
||||||
|
0x1705373106c8ec07, 0xb1f660de828d0628, 0x065fe82d89ca563d, 0xf555c2d8074d516d,
|
||||||
|
0x6bb6c186b423ee99, 0x54a807be6f3120a8, 0x8a3c7fe2f88860b8, 0xbeffc344f5118e81,
|
||||||
|
0xd686e80b7d1bd268, 0x661aef4ef5e5e88b, 0x5bf256c654cd1dda, 0x9adb1ab85d7640f4,
|
||||||
|
0x68449238920833a2, 0x843279f4cebcb044, 0xc8710cdefa93f7bb, 0x236943294538f3e6,
|
||||||
|
0x80d7d136c486d0b4, 0x61653956b28851d3, 0x3f843be9a9a956b5, 0xf73cfbbf137987e5,
|
||||||
|
0xcf0cb6dee8ceac2c, 0x50c401f52f185cae, 0xbdbe89ce735c4c1c, 0xeef3ade9c0570bc7,
|
||||||
|
0xbe8b066f8f64cbf6, 0x5238d6131705dcb9, 0x20219086c950e9f6, 0x634468d9ed74de02,
|
||||||
|
0x0aba4b3d705c7fa5, 0x3374416f725a6672, 0xe7378bdf7beb3bc6, 0x0f7b6a1b1cee565b,
|
||||||
|
0x234e4c41b0c33e64, 0x4efa9a0c3f21fe28, 0x1167fc551643e514, 0x9f81a69d3eb01fa4,
|
||||||
|
0xdb75c22b12306ed0, 0xe25055d738fc9686, 0x9f9f167a3f8507bb, 0x195f8336d3fbe4d3,
|
||||||
|
0x8442b6feffdcb6f6, 0x1e07ed24746ffde9, 0x140e31462d555266, 0x8bd0ce515ae1406e,
|
||||||
|
0x2c0be0042b5584b3, 0x35a23d0e15d45a60, 0xc14f1ba147d9bc83, 0xbbf168691264b23f,
|
||||||
|
0xad2cc7b57e589ade, 0x9501963154c7815c, 0x9664afa6b8d67d47, 0x7f9e5101fea0a81c,
|
||||||
|
0x45ecffb610d25bfd, 0x3157f7aecf9b6ab3, 0xc43ca6f88d87501d, 0x9576ff838dee38dc,
|
||||||
|
0x93f21afe0ce1c7d7, 0xceac699df343d8f9, 0x2fec49e29f03398d, 0x8805ccd5730281ed,
|
||||||
|
0xf9fc16fc750a8e59, 0x35308cc771adf736, 0x4a57b7c9ee2b7def, 0x03a4c6cdc937a02a,
|
||||||
|
0x6c9a8a269fc8c4fc, 0x4681decec7a03f43, 0x342eecded1353ef9, 0x8be0552d8413a867,
|
||||||
|
0xc7b4ac51beda8be8, 0xebcc64fb719842c0, 0xde8e4c7fb6d40c1c, 0xcc8263b62f9738b1,
|
||||||
|
0xd3cfc0f86511929a, 0x466024ce8bb226ea, 0x459ff690253a3c18, 0x98b27e9d91284c9c,
|
||||||
|
0x75c3ae8aa3af373d, 0xfbf8f8e79a866ffc, 0x32327f59d0662799, 0x8228b57e729e9830,
|
||||||
|
0x065ceb7a18381b58, 0xd2177671a31dc5ff, 0x90cd801f2f8701f9, 0x9d714428471c65fe,
|
||||||
|
}
|
||||||
@ -8,7 +8,6 @@ import (
|
|||||||
"git.eeqj.de/sneak/vaultik/internal/database"
|
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||||
"git.eeqj.de/sneak/vaultik/internal/globals"
|
"git.eeqj.de/sneak/vaultik/internal/globals"
|
||||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||||
"git.eeqj.de/sneak/vaultik/internal/snapshot"
|
|
||||||
"git.eeqj.de/sneak/vaultik/internal/storage"
|
"git.eeqj.de/sneak/vaultik/internal/storage"
|
||||||
"github.com/spf13/cobra"
|
"github.com/spf13/cobra"
|
||||||
"go.uber.org/fx"
|
"go.uber.org/fx"
|
||||||
@ -60,7 +59,6 @@ The age_secret_key must be configured in the config file for decryption.`,
|
|||||||
Debug: rootFlags.Debug,
|
Debug: rootFlags.Debug,
|
||||||
},
|
},
|
||||||
Modules: []fx.Option{
|
Modules: []fx.Option{
|
||||||
snapshot.Module,
|
|
||||||
fx.Provide(fx.Annotate(
|
fx.Provide(fx.Annotate(
|
||||||
func(g *globals.Globals, cfg *config.Config, repos *database.Repositories,
|
func(g *globals.Globals, cfg *config.Config, repos *database.Repositories,
|
||||||
storer storage.Storer, db *database.DB, shutdowner fx.Shutdowner) *RestoreApp {
|
storer storage.Storer, db *database.DB, shutdowner fx.Shutdowner) *RestoreApp {
|
||||||
@ -113,7 +111,7 @@ The age_secret_key must be configured in the config file for decryption.`,
|
|||||||
func (app *RestoreApp) runRestore(ctx context.Context, snapshotID string, opts *RestoreOptions) error {
|
func (app *RestoreApp) runRestore(ctx context.Context, snapshotID string, opts *RestoreOptions) error {
|
||||||
// Check for age_secret_key
|
// Check for age_secret_key
|
||||||
if app.Config.AgeSecretKey == "" {
|
if app.Config.AgeSecretKey == "" {
|
||||||
return fmt.Errorf("age_secret_key missing from config - required for restore")
|
return fmt.Errorf("age_secret_key required for restore - set in config file or VAULTIK_AGE_SECRET_KEY environment variable")
|
||||||
}
|
}
|
||||||
|
|
||||||
log.Info("Starting restore operation",
|
log.Info("Starting restore operation",
|
||||||
|
|||||||
@ -24,6 +24,8 @@ func NewSnapshotCommand() *cobra.Command {
|
|||||||
cmd.AddCommand(newSnapshotListCommand())
|
cmd.AddCommand(newSnapshotListCommand())
|
||||||
cmd.AddCommand(newSnapshotPurgeCommand())
|
cmd.AddCommand(newSnapshotPurgeCommand())
|
||||||
cmd.AddCommand(newSnapshotVerifyCommand())
|
cmd.AddCommand(newSnapshotVerifyCommand())
|
||||||
|
cmd.AddCommand(newSnapshotRemoveCommand())
|
||||||
|
cmd.AddCommand(newSnapshotPruneCommand())
|
||||||
|
|
||||||
return cmd
|
return cmd
|
||||||
}
|
}
|
||||||
@ -281,3 +283,123 @@ func newSnapshotVerifyCommand() *cobra.Command {
|
|||||||
|
|
||||||
return cmd
|
return cmd
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// newSnapshotRemoveCommand creates the 'snapshot remove' subcommand
|
||||||
|
func newSnapshotRemoveCommand() *cobra.Command {
|
||||||
|
opts := &vaultik.RemoveOptions{}
|
||||||
|
|
||||||
|
cmd := &cobra.Command{
|
||||||
|
Use: "remove <snapshot-id>",
|
||||||
|
Aliases: []string{"rm"},
|
||||||
|
Short: "Remove a snapshot and its orphaned blobs",
|
||||||
|
Long: `Removes a snapshot and any blobs that are no longer referenced by other snapshots.
|
||||||
|
|
||||||
|
This command downloads manifests from all other snapshots to determine which blobs
|
||||||
|
are still in use, then deletes any blobs that would become orphaned.`,
|
||||||
|
Args: cobra.ExactArgs(1),
|
||||||
|
RunE: func(cmd *cobra.Command, args []string) error {
|
||||||
|
snapshotID := args[0]
|
||||||
|
|
||||||
|
// Use unified config resolution
|
||||||
|
configPath, err := ResolveConfigPath()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
rootFlags := GetRootFlags()
|
||||||
|
return RunWithApp(cmd.Context(), AppOptions{
|
||||||
|
ConfigPath: configPath,
|
||||||
|
LogOptions: log.LogOptions{
|
||||||
|
Verbose: rootFlags.Verbose,
|
||||||
|
Debug: rootFlags.Debug,
|
||||||
|
},
|
||||||
|
Modules: []fx.Option{},
|
||||||
|
Invokes: []fx.Option{
|
||||||
|
fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) {
|
||||||
|
lc.Append(fx.Hook{
|
||||||
|
OnStart: func(ctx context.Context) error {
|
||||||
|
go func() {
|
||||||
|
if _, err := v.RemoveSnapshot(snapshotID, opts); err != nil {
|
||||||
|
if err != context.Canceled {
|
||||||
|
log.Error("Failed to remove snapshot", "error", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := v.Shutdowner.Shutdown(); err != nil {
|
||||||
|
log.Error("Failed to shutdown", "error", err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
OnStop: func(ctx context.Context) error {
|
||||||
|
v.Cancel()
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
cmd.Flags().BoolVarP(&opts.Force, "force", "f", false, "Skip confirmation prompt")
|
||||||
|
cmd.Flags().BoolVar(&opts.DryRun, "dry-run", false, "Show what would be deleted without deleting")
|
||||||
|
|
||||||
|
return cmd
|
||||||
|
}
|
||||||
|
|
||||||
|
// newSnapshotPruneCommand creates the 'snapshot prune' subcommand
|
||||||
|
func newSnapshotPruneCommand() *cobra.Command {
|
||||||
|
cmd := &cobra.Command{
|
||||||
|
Use: "prune",
|
||||||
|
Short: "Remove orphaned data from local database",
|
||||||
|
Long: `Removes orphaned files, chunks, and blobs from the local database.
|
||||||
|
|
||||||
|
This cleans up data that is no longer referenced by any snapshot, which can
|
||||||
|
accumulate from incomplete backups or deleted snapshots.`,
|
||||||
|
Args: cobra.NoArgs,
|
||||||
|
RunE: func(cmd *cobra.Command, args []string) error {
|
||||||
|
// Use unified config resolution
|
||||||
|
configPath, err := ResolveConfigPath()
|
||||||
|
if err != nil {
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
rootFlags := GetRootFlags()
|
||||||
|
return RunWithApp(cmd.Context(), AppOptions{
|
||||||
|
ConfigPath: configPath,
|
||||||
|
LogOptions: log.LogOptions{
|
||||||
|
Verbose: rootFlags.Verbose,
|
||||||
|
Debug: rootFlags.Debug,
|
||||||
|
},
|
||||||
|
Modules: []fx.Option{},
|
||||||
|
Invokes: []fx.Option{
|
||||||
|
fx.Invoke(func(v *vaultik.Vaultik, lc fx.Lifecycle) {
|
||||||
|
lc.Append(fx.Hook{
|
||||||
|
OnStart: func(ctx context.Context) error {
|
||||||
|
go func() {
|
||||||
|
if _, err := v.PruneDatabase(); err != nil {
|
||||||
|
if err != context.Canceled {
|
||||||
|
log.Error("Failed to prune database", "error", err)
|
||||||
|
os.Exit(1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if err := v.Shutdowner.Shutdown(); err != nil {
|
||||||
|
log.Error("Failed to shutdown", "error", err)
|
||||||
|
}
|
||||||
|
}()
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
OnStop: func(ctx context.Context) error {
|
||||||
|
v.Cancel()
|
||||||
|
return nil
|
||||||
|
},
|
||||||
|
})
|
||||||
|
}),
|
||||||
|
},
|
||||||
|
})
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
return cmd
|
||||||
|
}
|
||||||
|
|||||||
@ -146,6 +146,11 @@ func Load(path string) (*Config, error) {
|
|||||||
cfg.IndexPath = expandTilde(envIndexPath)
|
cfg.IndexPath = expandTilde(envIndexPath)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check for environment variable override for AgeSecretKey
|
||||||
|
if envAgeSecretKey := os.Getenv("VAULTIK_AGE_SECRET_KEY"); envAgeSecretKey != "" {
|
||||||
|
cfg.AgeSecretKey = envAgeSecretKey
|
||||||
|
}
|
||||||
|
|
||||||
// Get hostname if not set
|
// Get hostname if not set
|
||||||
if cfg.Hostname == "" {
|
if cfg.Hostname == "" {
|
||||||
hostname, err := os.Hostname()
|
hostname, err := os.Hostname()
|
||||||
|
|||||||
@ -132,3 +132,80 @@ func (r *ChunkFileRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fi
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DeleteByFileIDs deletes all chunk_files for multiple files in a single statement.
|
||||||
|
func (r *ChunkFileRepository) DeleteByFileIDs(ctx context.Context, tx *sql.Tx, fileIDs []string) error {
|
||||||
|
if len(fileIDs) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Batch at 500 to stay within SQLite's variable limit
|
||||||
|
const batchSize = 500
|
||||||
|
|
||||||
|
for i := 0; i < len(fileIDs); i += batchSize {
|
||||||
|
end := i + batchSize
|
||||||
|
if end > len(fileIDs) {
|
||||||
|
end = len(fileIDs)
|
||||||
|
}
|
||||||
|
batch := fileIDs[i:end]
|
||||||
|
|
||||||
|
query := "DELETE FROM chunk_files WHERE file_id IN (?" + repeatPlaceholder(len(batch)-1) + ")"
|
||||||
|
args := make([]interface{}, len(batch))
|
||||||
|
for j, id := range batch {
|
||||||
|
args[j] = id
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
if tx != nil {
|
||||||
|
_, err = tx.ExecContext(ctx, query, args...)
|
||||||
|
} else {
|
||||||
|
_, err = r.db.ExecWithLog(ctx, query, args...)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("batch deleting chunk_files: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateBatch inserts multiple chunk_files in a single statement for efficiency.
|
||||||
|
func (r *ChunkFileRepository) CreateBatch(ctx context.Context, tx *sql.Tx, cfs []ChunkFile) error {
|
||||||
|
if len(cfs) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Each ChunkFile has 4 values, so batch at 200 to be safe with SQLite's variable limit
|
||||||
|
const batchSize = 200
|
||||||
|
|
||||||
|
for i := 0; i < len(cfs); i += batchSize {
|
||||||
|
end := i + batchSize
|
||||||
|
if end > len(cfs) {
|
||||||
|
end = len(cfs)
|
||||||
|
}
|
||||||
|
batch := cfs[i:end]
|
||||||
|
|
||||||
|
query := "INSERT INTO chunk_files (chunk_hash, file_id, file_offset, length) VALUES "
|
||||||
|
args := make([]interface{}, 0, len(batch)*4)
|
||||||
|
for j, cf := range batch {
|
||||||
|
if j > 0 {
|
||||||
|
query += ", "
|
||||||
|
}
|
||||||
|
query += "(?, ?, ?, ?)"
|
||||||
|
args = append(args, cf.ChunkHash, cf.FileID, cf.FileOffset, cf.Length)
|
||||||
|
}
|
||||||
|
query += " ON CONFLICT(chunk_hash, file_id) DO NOTHING"
|
||||||
|
|
||||||
|
var err error
|
||||||
|
if tx != nil {
|
||||||
|
_, err = tx.ExecContext(ctx, query, args...)
|
||||||
|
} else {
|
||||||
|
_, err = r.db.ExecWithLog(ctx, query, args...)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("batch inserting chunk_files: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|||||||
@ -36,26 +36,17 @@ type DB struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// New creates a new database connection at the specified path.
|
// New creates a new database connection at the specified path.
|
||||||
// It automatically handles database recovery, creates the schema if needed,
|
// It creates the schema if needed and configures SQLite with WAL mode for
|
||||||
// and configures SQLite with appropriate settings for performance and reliability.
|
// better concurrency. SQLite handles crash recovery automatically when
|
||||||
// The database uses WAL mode for better concurrency and sets a busy timeout
|
// opening a database with journal/WAL files present.
|
||||||
// to handle concurrent access gracefully.
|
|
||||||
//
|
|
||||||
// If the database appears locked, it will attempt recovery by removing stale
|
|
||||||
// lock files and switching temporarily to TRUNCATE journal mode.
|
|
||||||
//
|
|
||||||
// New creates a new database connection at the specified path.
|
|
||||||
// It automatically handles recovery from stale locks, creates the schema if needed,
|
|
||||||
// and configures SQLite with WAL mode for better concurrency.
|
|
||||||
// The path parameter can be a file path for persistent storage or ":memory:"
|
// The path parameter can be a file path for persistent storage or ":memory:"
|
||||||
// for an in-memory database (useful for testing).
|
// for an in-memory database (useful for testing).
|
||||||
func New(ctx context.Context, path string) (*DB, error) {
|
func New(ctx context.Context, path string) (*DB, error) {
|
||||||
log.Debug("Opening database connection", "path", path)
|
log.Debug("Opening database connection", "path", path)
|
||||||
|
|
||||||
// First, try to recover from any stale locks
|
// Note: We do NOT delete journal/WAL files before opening.
|
||||||
if err := recoverDatabase(ctx, path); err != nil {
|
// SQLite handles crash recovery automatically when the database is opened.
|
||||||
log.Warn("Failed to recover database", "error", err)
|
// Deleting these files would corrupt the database after an unclean shutdown.
|
||||||
}
|
|
||||||
|
|
||||||
// First attempt with standard WAL mode
|
// First attempt with standard WAL mode
|
||||||
log.Debug("Attempting to open database with WAL mode", "path", path)
|
log.Debug("Attempting to open database with WAL mode", "path", path)
|
||||||
@ -156,62 +147,6 @@ func (db *DB) Close() error {
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// recoverDatabase attempts to recover a locked database
|
|
||||||
func recoverDatabase(ctx context.Context, path string) error {
|
|
||||||
// Check if database file exists
|
|
||||||
if _, err := os.Stat(path); os.IsNotExist(err) {
|
|
||||||
// No database file, nothing to recover
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove stale lock files
|
|
||||||
// SQLite creates -wal and -shm files for WAL mode
|
|
||||||
walPath := path + "-wal"
|
|
||||||
shmPath := path + "-shm"
|
|
||||||
journalPath := path + "-journal"
|
|
||||||
|
|
||||||
log.Info("Attempting database recovery", "path", path)
|
|
||||||
|
|
||||||
// Always remove lock files on startup to ensure clean state
|
|
||||||
removed := false
|
|
||||||
|
|
||||||
// Check for and remove journal file (from non-WAL mode)
|
|
||||||
if _, err := os.Stat(journalPath); err == nil {
|
|
||||||
log.Info("Found journal file, removing", "path", journalPath)
|
|
||||||
if err := os.Remove(journalPath); err != nil {
|
|
||||||
log.Warn("Failed to remove journal file", "error", err)
|
|
||||||
} else {
|
|
||||||
removed = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove WAL file
|
|
||||||
if _, err := os.Stat(walPath); err == nil {
|
|
||||||
log.Info("Found WAL file, removing", "path", walPath)
|
|
||||||
if err := os.Remove(walPath); err != nil {
|
|
||||||
log.Warn("Failed to remove WAL file", "error", err)
|
|
||||||
} else {
|
|
||||||
removed = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove SHM file
|
|
||||||
if _, err := os.Stat(shmPath); err == nil {
|
|
||||||
log.Info("Found shared memory file, removing", "path", shmPath)
|
|
||||||
if err := os.Remove(shmPath); err != nil {
|
|
||||||
log.Warn("Failed to remove shared memory file", "error", err)
|
|
||||||
} else {
|
|
||||||
removed = true
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if removed {
|
|
||||||
log.Info("Database lock files removed")
|
|
||||||
}
|
|
||||||
|
|
||||||
return nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// Conn returns the underlying *sql.DB connection.
|
// Conn returns the underlying *sql.DB connection.
|
||||||
// This should be used sparingly and primarily for read operations.
|
// This should be used sparingly and primarily for read operations.
|
||||||
// For write operations, prefer using the ExecWithLog method.
|
// For write operations, prefer using the ExecWithLog method.
|
||||||
@ -270,6 +205,15 @@ func NewTestDB() (*DB, error) {
|
|||||||
return New(context.Background(), ":memory:")
|
return New(context.Background(), ":memory:")
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// repeatPlaceholder generates a string of ", ?" repeated n times for IN clause construction.
|
||||||
|
// For example, repeatPlaceholder(2) returns ", ?, ?".
|
||||||
|
func repeatPlaceholder(n int) string {
|
||||||
|
if n <= 0 {
|
||||||
|
return ""
|
||||||
|
}
|
||||||
|
return strings.Repeat(", ?", n)
|
||||||
|
}
|
||||||
|
|
||||||
// LogSQL logs SQL queries and their arguments when debug mode is enabled.
|
// LogSQL logs SQL queries and their arguments when debug mode is enabled.
|
||||||
// Debug mode is activated by setting the GODEBUG environment variable to include "vaultik".
|
// Debug mode is activated by setting the GODEBUG environment variable to include "vaultik".
|
||||||
// This is useful for troubleshooting database operations and understanding query patterns.
|
// This is useful for troubleshooting database operations and understanding query patterns.
|
||||||
|
|||||||
@ -157,6 +157,86 @@ func (r *FileChunkRepository) DeleteByFileID(ctx context.Context, tx *sql.Tx, fi
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// DeleteByFileIDs deletes all chunks for multiple files in a single statement.
|
||||||
|
func (r *FileChunkRepository) DeleteByFileIDs(ctx context.Context, tx *sql.Tx, fileIDs []string) error {
|
||||||
|
if len(fileIDs) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Batch at 500 to stay within SQLite's variable limit
|
||||||
|
const batchSize = 500
|
||||||
|
|
||||||
|
for i := 0; i < len(fileIDs); i += batchSize {
|
||||||
|
end := i + batchSize
|
||||||
|
if end > len(fileIDs) {
|
||||||
|
end = len(fileIDs)
|
||||||
|
}
|
||||||
|
batch := fileIDs[i:end]
|
||||||
|
|
||||||
|
query := "DELETE FROM file_chunks WHERE file_id IN (?" + repeatPlaceholder(len(batch)-1) + ")"
|
||||||
|
args := make([]interface{}, len(batch))
|
||||||
|
for j, id := range batch {
|
||||||
|
args[j] = id
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
if tx != nil {
|
||||||
|
_, err = tx.ExecContext(ctx, query, args...)
|
||||||
|
} else {
|
||||||
|
_, err = r.db.ExecWithLog(ctx, query, args...)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("batch deleting file_chunks: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// CreateBatch inserts multiple file_chunks in a single statement for efficiency.
|
||||||
|
// Batches are automatically split to stay within SQLite's variable limit.
|
||||||
|
func (r *FileChunkRepository) CreateBatch(ctx context.Context, tx *sql.Tx, fcs []FileChunk) error {
|
||||||
|
if len(fcs) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// SQLite has a limit on variables (typically 999 or 32766).
|
||||||
|
// Each FileChunk has 3 values, so batch at 300 to be safe.
|
||||||
|
const batchSize = 300
|
||||||
|
|
||||||
|
for i := 0; i < len(fcs); i += batchSize {
|
||||||
|
end := i + batchSize
|
||||||
|
if end > len(fcs) {
|
||||||
|
end = len(fcs)
|
||||||
|
}
|
||||||
|
batch := fcs[i:end]
|
||||||
|
|
||||||
|
// Build the query with multiple value sets
|
||||||
|
query := "INSERT INTO file_chunks (file_id, idx, chunk_hash) VALUES "
|
||||||
|
args := make([]interface{}, 0, len(batch)*3)
|
||||||
|
for j, fc := range batch {
|
||||||
|
if j > 0 {
|
||||||
|
query += ", "
|
||||||
|
}
|
||||||
|
query += "(?, ?, ?)"
|
||||||
|
args = append(args, fc.FileID, fc.Idx, fc.ChunkHash)
|
||||||
|
}
|
||||||
|
query += " ON CONFLICT(file_id, idx) DO NOTHING"
|
||||||
|
|
||||||
|
var err error
|
||||||
|
if tx != nil {
|
||||||
|
_, err = tx.ExecContext(ctx, query, args...)
|
||||||
|
} else {
|
||||||
|
_, err = r.db.ExecWithLog(ctx, query, args...)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("batch inserting file_chunks: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// GetByFile is an alias for GetByPath for compatibility
|
// GetByFile is an alias for GetByPath for compatibility
|
||||||
func (r *FileChunkRepository) GetByFile(ctx context.Context, path string) ([]*FileChunk, error) {
|
func (r *FileChunkRepository) GetByFile(ctx context.Context, path string) ([]*FileChunk, error) {
|
||||||
LogSQL("GetByFile", "Starting", path)
|
LogSQL("GetByFile", "Starting", path)
|
||||||
|
|||||||
@ -302,6 +302,55 @@ func (r *FileRepository) ListByPrefix(ctx context.Context, prefix string) ([]*Fi
|
|||||||
return files, rows.Err()
|
return files, rows.Err()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// CreateBatch inserts or updates multiple files in a single statement for efficiency.
|
||||||
|
// File IDs must be pre-generated before calling this method.
|
||||||
|
func (r *FileRepository) CreateBatch(ctx context.Context, tx *sql.Tx, files []*File) error {
|
||||||
|
if len(files) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Each File has 9 values, so batch at 100 to be safe with SQLite's variable limit
|
||||||
|
const batchSize = 100
|
||||||
|
|
||||||
|
for i := 0; i < len(files); i += batchSize {
|
||||||
|
end := i + batchSize
|
||||||
|
if end > len(files) {
|
||||||
|
end = len(files)
|
||||||
|
}
|
||||||
|
batch := files[i:end]
|
||||||
|
|
||||||
|
query := `INSERT INTO files (id, path, mtime, ctime, size, mode, uid, gid, link_target) VALUES `
|
||||||
|
args := make([]interface{}, 0, len(batch)*9)
|
||||||
|
for j, f := range batch {
|
||||||
|
if j > 0 {
|
||||||
|
query += ", "
|
||||||
|
}
|
||||||
|
query += "(?, ?, ?, ?, ?, ?, ?, ?, ?)"
|
||||||
|
args = append(args, f.ID, f.Path, f.MTime.Unix(), f.CTime.Unix(), f.Size, f.Mode, f.UID, f.GID, f.LinkTarget)
|
||||||
|
}
|
||||||
|
query += ` ON CONFLICT(path) DO UPDATE SET
|
||||||
|
mtime = excluded.mtime,
|
||||||
|
ctime = excluded.ctime,
|
||||||
|
size = excluded.size,
|
||||||
|
mode = excluded.mode,
|
||||||
|
uid = excluded.uid,
|
||||||
|
gid = excluded.gid,
|
||||||
|
link_target = excluded.link_target`
|
||||||
|
|
||||||
|
var err error
|
||||||
|
if tx != nil {
|
||||||
|
_, err = tx.ExecContext(ctx, query, args...)
|
||||||
|
} else {
|
||||||
|
_, err = r.db.ExecWithLog(ctx, query, args...)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("batch inserting files: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// DeleteOrphaned deletes files that are not referenced by any snapshot
|
// DeleteOrphaned deletes files that are not referenced by any snapshot
|
||||||
func (r *FileRepository) DeleteOrphaned(ctx context.Context) error {
|
func (r *FileRepository) DeleteOrphaned(ctx context.Context) error {
|
||||||
query := `
|
query := `
|
||||||
|
|||||||
@ -28,6 +28,9 @@ CREATE TABLE IF NOT EXISTS file_chunks (
|
|||||||
FOREIGN KEY (chunk_hash) REFERENCES chunks(chunk_hash)
|
FOREIGN KEY (chunk_hash) REFERENCES chunks(chunk_hash)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- Index for efficient chunk lookups (used in orphan detection)
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_file_chunks_chunk_hash ON file_chunks(chunk_hash);
|
||||||
|
|
||||||
-- Chunks table: stores unique content-defined chunks
|
-- Chunks table: stores unique content-defined chunks
|
||||||
CREATE TABLE IF NOT EXISTS chunks (
|
CREATE TABLE IF NOT EXISTS chunks (
|
||||||
chunk_hash TEXT PRIMARY KEY,
|
chunk_hash TEXT PRIMARY KEY,
|
||||||
@ -56,6 +59,9 @@ CREATE TABLE IF NOT EXISTS blob_chunks (
|
|||||||
FOREIGN KEY (chunk_hash) REFERENCES chunks(chunk_hash)
|
FOREIGN KEY (chunk_hash) REFERENCES chunks(chunk_hash)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- Index for efficient chunk lookups (used in orphan detection)
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_blob_chunks_chunk_hash ON blob_chunks(chunk_hash);
|
||||||
|
|
||||||
-- Chunk files table: reverse mapping of chunks to files
|
-- Chunk files table: reverse mapping of chunks to files
|
||||||
CREATE TABLE IF NOT EXISTS chunk_files (
|
CREATE TABLE IF NOT EXISTS chunk_files (
|
||||||
chunk_hash TEXT NOT NULL,
|
chunk_hash TEXT NOT NULL,
|
||||||
@ -67,6 +73,9 @@ CREATE TABLE IF NOT EXISTS chunk_files (
|
|||||||
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
|
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- Index for efficient file lookups (used in orphan detection)
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_chunk_files_file_id ON chunk_files(file_id);
|
||||||
|
|
||||||
-- Snapshots table: tracks backup snapshots
|
-- Snapshots table: tracks backup snapshots
|
||||||
CREATE TABLE IF NOT EXISTS snapshots (
|
CREATE TABLE IF NOT EXISTS snapshots (
|
||||||
id TEXT PRIMARY KEY,
|
id TEXT PRIMARY KEY,
|
||||||
@ -96,6 +105,9 @@ CREATE TABLE IF NOT EXISTS snapshot_files (
|
|||||||
FOREIGN KEY (file_id) REFERENCES files(id)
|
FOREIGN KEY (file_id) REFERENCES files(id)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- Index for efficient file lookups (used in orphan detection)
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_snapshot_files_file_id ON snapshot_files(file_id);
|
||||||
|
|
||||||
-- Snapshot blobs table: maps snapshots to blobs
|
-- Snapshot blobs table: maps snapshots to blobs
|
||||||
CREATE TABLE IF NOT EXISTS snapshot_blobs (
|
CREATE TABLE IF NOT EXISTS snapshot_blobs (
|
||||||
snapshot_id TEXT NOT NULL,
|
snapshot_id TEXT NOT NULL,
|
||||||
@ -106,6 +118,9 @@ CREATE TABLE IF NOT EXISTS snapshot_blobs (
|
|||||||
FOREIGN KEY (blob_id) REFERENCES blobs(id)
|
FOREIGN KEY (blob_id) REFERENCES blobs(id)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- Index for efficient blob lookups (used in orphan detection)
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_snapshot_blobs_blob_id ON snapshot_blobs(blob_id);
|
||||||
|
|
||||||
-- Uploads table: tracks blob upload metrics
|
-- Uploads table: tracks blob upload metrics
|
||||||
CREATE TABLE IF NOT EXISTS uploads (
|
CREATE TABLE IF NOT EXISTS uploads (
|
||||||
blob_hash TEXT PRIMARY KEY,
|
blob_hash TEXT PRIMARY KEY,
|
||||||
@ -116,3 +131,6 @@ CREATE TABLE IF NOT EXISTS uploads (
|
|||||||
FOREIGN KEY (blob_hash) REFERENCES blobs(blob_hash),
|
FOREIGN KEY (blob_hash) REFERENCES blobs(blob_hash),
|
||||||
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id)
|
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id)
|
||||||
);
|
);
|
||||||
|
|
||||||
|
-- Index for efficient snapshot lookups
|
||||||
|
CREATE INDEX IF NOT EXISTS idx_uploads_snapshot_id ON uploads(snapshot_id);
|
||||||
@ -289,6 +289,46 @@ func (r *SnapshotRepository) AddFileByID(ctx context.Context, tx *sql.Tx, snapsh
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// AddFilesByIDBatch adds multiple files to a snapshot in batched inserts
|
||||||
|
func (r *SnapshotRepository) AddFilesByIDBatch(ctx context.Context, tx *sql.Tx, snapshotID string, fileIDs []string) error {
|
||||||
|
if len(fileIDs) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Each entry has 2 values, so batch at 400 to be safe
|
||||||
|
const batchSize = 400
|
||||||
|
|
||||||
|
for i := 0; i < len(fileIDs); i += batchSize {
|
||||||
|
end := i + batchSize
|
||||||
|
if end > len(fileIDs) {
|
||||||
|
end = len(fileIDs)
|
||||||
|
}
|
||||||
|
batch := fileIDs[i:end]
|
||||||
|
|
||||||
|
query := "INSERT OR IGNORE INTO snapshot_files (snapshot_id, file_id) VALUES "
|
||||||
|
args := make([]interface{}, 0, len(batch)*2)
|
||||||
|
for j, fileID := range batch {
|
||||||
|
if j > 0 {
|
||||||
|
query += ", "
|
||||||
|
}
|
||||||
|
query += "(?, ?)"
|
||||||
|
args = append(args, snapshotID, fileID)
|
||||||
|
}
|
||||||
|
|
||||||
|
var err error
|
||||||
|
if tx != nil {
|
||||||
|
_, err = tx.ExecContext(ctx, query, args...)
|
||||||
|
} else {
|
||||||
|
_, err = r.db.ExecWithLog(ctx, query, args...)
|
||||||
|
}
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("batch adding files to snapshot: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// AddBlob adds a blob to a snapshot
|
// AddBlob adds a blob to a snapshot
|
||||||
func (r *SnapshotRepository) AddBlob(ctx context.Context, tx *sql.Tx, snapshotID string, blobID string, blobHash string) error {
|
func (r *SnapshotRepository) AddBlob(ctx context.Context, tx *sql.Tx, snapshotID string, blobID string, blobHash string) error {
|
||||||
query := `
|
query := `
|
||||||
|
|||||||
453
internal/snapshot/exclude_test.go
Normal file
453
internal/snapshot/exclude_test.go
Normal file
@ -0,0 +1,453 @@
|
|||||||
|
package snapshot_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"database/sql"
|
||||||
|
"path/filepath"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"git.eeqj.de/sneak/vaultik/internal/database"
|
||||||
|
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||||
|
"git.eeqj.de/sneak/vaultik/internal/snapshot"
|
||||||
|
"github.com/spf13/afero"
|
||||||
|
"github.com/stretchr/testify/require"
|
||||||
|
)
|
||||||
|
|
||||||
|
func setupExcludeTestFS(t *testing.T) afero.Fs {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
// Create in-memory filesystem
|
||||||
|
fs := afero.NewMemMapFs()
|
||||||
|
|
||||||
|
// Create test directory structure:
|
||||||
|
// /backup/
|
||||||
|
// file1.txt (should be backed up)
|
||||||
|
// file2.log (should be excluded if *.log is in patterns)
|
||||||
|
// .git/
|
||||||
|
// config (should be excluded if .git is in patterns)
|
||||||
|
// objects/
|
||||||
|
// pack/
|
||||||
|
// data.pack (should be excluded if .git is in patterns)
|
||||||
|
// src/
|
||||||
|
// main.go (should be backed up)
|
||||||
|
// test.go (should be backed up)
|
||||||
|
// node_modules/
|
||||||
|
// package/
|
||||||
|
// index.js (should be excluded if node_modules is in patterns)
|
||||||
|
// cache/
|
||||||
|
// temp.dat (should be excluded if cache/ is in patterns)
|
||||||
|
// build/
|
||||||
|
// output.bin (should be excluded if build is in patterns)
|
||||||
|
// docs/
|
||||||
|
// readme.md (should be backed up)
|
||||||
|
// .DS_Store (should be excluded if .DS_Store is in patterns)
|
||||||
|
// thumbs.db (should be excluded if thumbs.db is in patterns)
|
||||||
|
|
||||||
|
files := map[string]string{
|
||||||
|
"/backup/file1.txt": "content1",
|
||||||
|
"/backup/file2.log": "log content",
|
||||||
|
"/backup/.git/config": "git config",
|
||||||
|
"/backup/.git/objects/pack/data.pack": "pack data",
|
||||||
|
"/backup/src/main.go": "package main",
|
||||||
|
"/backup/src/test.go": "package main_test",
|
||||||
|
"/backup/node_modules/package/index.js": "module.exports = {}",
|
||||||
|
"/backup/cache/temp.dat": "cached data",
|
||||||
|
"/backup/build/output.bin": "binary data",
|
||||||
|
"/backup/docs/readme.md": "# Documentation",
|
||||||
|
"/backup/.DS_Store": "ds store data",
|
||||||
|
"/backup/thumbs.db": "thumbs data",
|
||||||
|
"/backup/src/.hidden": "hidden file",
|
||||||
|
"/backup/important.log.bak": "backup of log",
|
||||||
|
}
|
||||||
|
|
||||||
|
testTime := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
|
||||||
|
for path, content := range files {
|
||||||
|
dir := filepath.Dir(path)
|
||||||
|
err := fs.MkdirAll(dir, 0755)
|
||||||
|
require.NoError(t, err)
|
||||||
|
err = afero.WriteFile(fs, path, []byte(content), 0644)
|
||||||
|
require.NoError(t, err)
|
||||||
|
err = fs.Chtimes(path, testTime, testTime)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return fs
|
||||||
|
}
|
||||||
|
|
||||||
|
func createTestScanner(t *testing.T, fs afero.Fs, excludePatterns []string) (*snapshot.Scanner, *database.Repositories, func()) {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
// Initialize logger
|
||||||
|
log.Initialize(log.Config{})
|
||||||
|
|
||||||
|
// Create test database
|
||||||
|
db, err := database.NewTestDB()
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
repos := database.NewRepositories(db)
|
||||||
|
|
||||||
|
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
|
||||||
|
FS: fs,
|
||||||
|
ChunkSize: 64 * 1024,
|
||||||
|
Repositories: repos,
|
||||||
|
MaxBlobSize: 1024 * 1024,
|
||||||
|
CompressionLevel: 3,
|
||||||
|
AgeRecipients: []string{"age1ql3z7hjy54pw3hyww5ayyfg7zqgvc7w3j2elw8zmrj2kg5sfn9aqmcac8p"},
|
||||||
|
Exclude: excludePatterns,
|
||||||
|
})
|
||||||
|
|
||||||
|
cleanup := func() {
|
||||||
|
_ = db.Close()
|
||||||
|
}
|
||||||
|
|
||||||
|
return scanner, repos, cleanup
|
||||||
|
}
|
||||||
|
|
||||||
|
func createSnapshotRecord(t *testing.T, ctx context.Context, repos *database.Repositories, snapshotID string) {
|
||||||
|
t.Helper()
|
||||||
|
err := repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
|
||||||
|
snap := &database.Snapshot{
|
||||||
|
ID: snapshotID,
|
||||||
|
Hostname: "test-host",
|
||||||
|
VaultikVersion: "test",
|
||||||
|
StartedAt: time.Now(),
|
||||||
|
CompletedAt: nil,
|
||||||
|
FileCount: 0,
|
||||||
|
ChunkCount: 0,
|
||||||
|
BlobCount: 0,
|
||||||
|
TotalSize: 0,
|
||||||
|
BlobSize: 0,
|
||||||
|
CompressionRatio: 1.0,
|
||||||
|
}
|
||||||
|
return repos.Snapshots.Create(ctx, tx, snap)
|
||||||
|
})
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExcludePatterns_ExcludeGitDirectory(t *testing.T) {
|
||||||
|
fs := setupExcludeTestFS(t)
|
||||||
|
scanner, repos, cleanup := createTestScanner(t, fs, []string{".git"})
|
||||||
|
defer cleanup()
|
||||||
|
require.NotNil(t, scanner)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||||
|
|
||||||
|
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Should have scanned files but NOT .git directory contents
|
||||||
|
// Expected: file1.txt, file2.log, src/main.go, src/test.go, node_modules/package/index.js,
|
||||||
|
// cache/temp.dat, build/output.bin, docs/readme.md, .DS_Store, thumbs.db,
|
||||||
|
// src/.hidden, important.log.bak
|
||||||
|
// Excluded: .git/config, .git/objects/pack/data.pack
|
||||||
|
require.Equal(t, 12, result.FilesScanned, "Should exclude .git directory contents")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExcludePatterns_ExcludeByExtension(t *testing.T) {
|
||||||
|
fs := setupExcludeTestFS(t)
|
||||||
|
scanner, repos, cleanup := createTestScanner(t, fs, []string{"*.log"})
|
||||||
|
defer cleanup()
|
||||||
|
require.NotNil(t, scanner)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||||
|
|
||||||
|
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Should exclude file2.log but NOT important.log.bak (different extension)
|
||||||
|
// Total files: 14, excluded: 1 (file2.log)
|
||||||
|
require.Equal(t, 13, result.FilesScanned, "Should exclude *.log files")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExcludePatterns_ExcludeNodeModules(t *testing.T) {
|
||||||
|
fs := setupExcludeTestFS(t)
|
||||||
|
scanner, repos, cleanup := createTestScanner(t, fs, []string{"node_modules"})
|
||||||
|
defer cleanup()
|
||||||
|
require.NotNil(t, scanner)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||||
|
|
||||||
|
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Should exclude node_modules/package/index.js
|
||||||
|
// Total files: 14, excluded: 1
|
||||||
|
require.Equal(t, 13, result.FilesScanned, "Should exclude node_modules directory")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExcludePatterns_MultiplePatterns(t *testing.T) {
|
||||||
|
fs := setupExcludeTestFS(t)
|
||||||
|
scanner, repos, cleanup := createTestScanner(t, fs, []string{".git", "node_modules", "*.log", ".DS_Store", "thumbs.db", "cache", "build"})
|
||||||
|
defer cleanup()
|
||||||
|
require.NotNil(t, scanner)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||||
|
|
||||||
|
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Should only have: file1.txt, src/main.go, src/test.go, docs/readme.md, src/.hidden, important.log.bak
|
||||||
|
// Excluded: .git/*, node_modules/*, *.log (file2.log), .DS_Store, thumbs.db, cache/*, build/*
|
||||||
|
require.Equal(t, 6, result.FilesScanned, "Should exclude multiple patterns")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExcludePatterns_NoExclusions(t *testing.T) {
|
||||||
|
fs := setupExcludeTestFS(t)
|
||||||
|
scanner, repos, cleanup := createTestScanner(t, fs, []string{})
|
||||||
|
defer cleanup()
|
||||||
|
require.NotNil(t, scanner)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||||
|
|
||||||
|
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Should scan all 14 files
|
||||||
|
require.Equal(t, 14, result.FilesScanned, "Should scan all files when no exclusions")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExcludePatterns_ExcludeHiddenFiles(t *testing.T) {
|
||||||
|
fs := setupExcludeTestFS(t)
|
||||||
|
scanner, repos, cleanup := createTestScanner(t, fs, []string{".*"})
|
||||||
|
defer cleanup()
|
||||||
|
require.NotNil(t, scanner)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||||
|
|
||||||
|
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Should exclude: .git/*, .DS_Store, src/.hidden
|
||||||
|
// Total files: 14, excluded: 4 (.git/config, .git/objects/pack/data.pack, .DS_Store, src/.hidden)
|
||||||
|
require.Equal(t, 10, result.FilesScanned, "Should exclude hidden files and directories")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExcludePatterns_DoubleStarGlob(t *testing.T) {
|
||||||
|
fs := setupExcludeTestFS(t)
|
||||||
|
scanner, repos, cleanup := createTestScanner(t, fs, []string{"**/*.pack"})
|
||||||
|
defer cleanup()
|
||||||
|
require.NotNil(t, scanner)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||||
|
|
||||||
|
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Should exclude .git/objects/pack/data.pack
|
||||||
|
// Total files: 14, excluded: 1
|
||||||
|
require.Equal(t, 13, result.FilesScanned, "Should exclude **/*.pack files")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExcludePatterns_ExactFileName(t *testing.T) {
|
||||||
|
fs := setupExcludeTestFS(t)
|
||||||
|
scanner, repos, cleanup := createTestScanner(t, fs, []string{"thumbs.db", ".DS_Store"})
|
||||||
|
defer cleanup()
|
||||||
|
require.NotNil(t, scanner)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||||
|
|
||||||
|
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Should exclude thumbs.db and .DS_Store
|
||||||
|
// Total files: 14, excluded: 2
|
||||||
|
require.Equal(t, 12, result.FilesScanned, "Should exclude exact file names")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExcludePatterns_CaseSensitive(t *testing.T) {
|
||||||
|
// Pattern matching should be case-sensitive
|
||||||
|
fs := setupExcludeTestFS(t)
|
||||||
|
scanner, repos, cleanup := createTestScanner(t, fs, []string{"THUMBS.DB"})
|
||||||
|
defer cleanup()
|
||||||
|
require.NotNil(t, scanner)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||||
|
|
||||||
|
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Case-sensitive matching: THUMBS.DB should NOT match thumbs.db
|
||||||
|
// All 14 files should be scanned
|
||||||
|
require.Equal(t, 14, result.FilesScanned, "Pattern matching should be case-sensitive")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExcludePatterns_DirectoryWithTrailingSlash(t *testing.T) {
|
||||||
|
fs := setupExcludeTestFS(t)
|
||||||
|
// Some users might add trailing slashes to directory patterns
|
||||||
|
scanner, repos, cleanup := createTestScanner(t, fs, []string{"cache/", "build/"})
|
||||||
|
defer cleanup()
|
||||||
|
require.NotNil(t, scanner)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||||
|
|
||||||
|
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Should exclude cache/temp.dat and build/output.bin
|
||||||
|
// Total files: 14, excluded: 2
|
||||||
|
require.Equal(t, 12, result.FilesScanned, "Should handle directory patterns with trailing slashes")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExcludePatterns_PatternInSubdirectory(t *testing.T) {
|
||||||
|
fs := setupExcludeTestFS(t)
|
||||||
|
// Exclude .hidden file specifically in src directory
|
||||||
|
scanner, repos, cleanup := createTestScanner(t, fs, []string{"src/.hidden"})
|
||||||
|
defer cleanup()
|
||||||
|
require.NotNil(t, scanner)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||||
|
|
||||||
|
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// Should exclude only src/.hidden
|
||||||
|
// Total files: 14, excluded: 1
|
||||||
|
require.Equal(t, 13, result.FilesScanned, "Should exclude specific subdirectory files")
|
||||||
|
}
|
||||||
|
|
||||||
|
// setupAnchoredTestFS creates a filesystem for testing anchored patterns
|
||||||
|
// Source dir: /backup
|
||||||
|
// Structure:
|
||||||
|
//
|
||||||
|
// /backup/
|
||||||
|
// projectname/
|
||||||
|
// file.txt (should be excluded with /projectname)
|
||||||
|
// otherproject/
|
||||||
|
// projectname/
|
||||||
|
// file.txt (should NOT be excluded with /projectname, only with projectname)
|
||||||
|
// src/
|
||||||
|
// file.go
|
||||||
|
func setupAnchoredTestFS(t *testing.T) afero.Fs {
|
||||||
|
t.Helper()
|
||||||
|
|
||||||
|
fs := afero.NewMemMapFs()
|
||||||
|
|
||||||
|
files := map[string]string{
|
||||||
|
"/backup/projectname/file.txt": "root project file",
|
||||||
|
"/backup/otherproject/projectname/file.txt": "nested project file",
|
||||||
|
"/backup/src/file.go": "source file",
|
||||||
|
"/backup/file.txt": "root file",
|
||||||
|
}
|
||||||
|
|
||||||
|
testTime := time.Date(2024, 1, 1, 12, 0, 0, 0, time.UTC)
|
||||||
|
for path, content := range files {
|
||||||
|
dir := filepath.Dir(path)
|
||||||
|
err := fs.MkdirAll(dir, 0755)
|
||||||
|
require.NoError(t, err)
|
||||||
|
err = afero.WriteFile(fs, path, []byte(content), 0644)
|
||||||
|
require.NoError(t, err)
|
||||||
|
err = fs.Chtimes(path, testTime, testTime)
|
||||||
|
require.NoError(t, err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return fs
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExcludePatterns_AnchoredPattern(t *testing.T) {
|
||||||
|
// Pattern starting with / should only match from root of source dir
|
||||||
|
fs := setupAnchoredTestFS(t)
|
||||||
|
scanner, repos, cleanup := createTestScanner(t, fs, []string{"/projectname"})
|
||||||
|
defer cleanup()
|
||||||
|
require.NotNil(t, scanner)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||||
|
|
||||||
|
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// /projectname should ONLY exclude /backup/projectname/file.txt (1 file)
|
||||||
|
// /backup/otherproject/projectname/file.txt should NOT be excluded
|
||||||
|
// Total files: 4, excluded: 1
|
||||||
|
require.Equal(t, 3, result.FilesScanned, "Anchored pattern /projectname should only match at root of source dir")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExcludePatterns_UnanchoredPattern(t *testing.T) {
|
||||||
|
// Pattern without leading / should match anywhere in path
|
||||||
|
fs := setupAnchoredTestFS(t)
|
||||||
|
scanner, repos, cleanup := createTestScanner(t, fs, []string{"projectname"})
|
||||||
|
defer cleanup()
|
||||||
|
require.NotNil(t, scanner)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||||
|
|
||||||
|
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// projectname (without /) should exclude BOTH:
|
||||||
|
// - /backup/projectname/file.txt
|
||||||
|
// - /backup/otherproject/projectname/file.txt
|
||||||
|
// Total files: 4, excluded: 2
|
||||||
|
require.Equal(t, 2, result.FilesScanned, "Unanchored pattern should match anywhere in path")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExcludePatterns_AnchoredPatternWithGlob(t *testing.T) {
|
||||||
|
// Anchored pattern with glob
|
||||||
|
fs := setupAnchoredTestFS(t)
|
||||||
|
scanner, repos, cleanup := createTestScanner(t, fs, []string{"/src/*.go"})
|
||||||
|
defer cleanup()
|
||||||
|
require.NotNil(t, scanner)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||||
|
|
||||||
|
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// /src/*.go should exclude /backup/src/file.go
|
||||||
|
// Total files: 4, excluded: 1
|
||||||
|
require.Equal(t, 3, result.FilesScanned, "Anchored pattern with glob should work")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExcludePatterns_AnchoredPatternFile(t *testing.T) {
|
||||||
|
// Anchored pattern for exact file at root
|
||||||
|
fs := setupAnchoredTestFS(t)
|
||||||
|
scanner, repos, cleanup := createTestScanner(t, fs, []string{"/file.txt"})
|
||||||
|
defer cleanup()
|
||||||
|
require.NotNil(t, scanner)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||||
|
|
||||||
|
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// /file.txt should ONLY exclude /backup/file.txt
|
||||||
|
// NOT /backup/projectname/file.txt or /backup/otherproject/projectname/file.txt
|
||||||
|
// Total files: 4, excluded: 1
|
||||||
|
require.Equal(t, 3, result.FilesScanned, "Anchored pattern for file should only match at root")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestExcludePatterns_UnanchoredPatternFile(t *testing.T) {
|
||||||
|
// Unanchored pattern for file should match anywhere
|
||||||
|
fs := setupAnchoredTestFS(t)
|
||||||
|
scanner, repos, cleanup := createTestScanner(t, fs, []string{"file.txt"})
|
||||||
|
defer cleanup()
|
||||||
|
require.NotNil(t, scanner)
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
createSnapshotRecord(t, ctx, repos, "test-snapshot")
|
||||||
|
|
||||||
|
result, err := scanner.Scan(ctx, "/backup", "test-snapshot")
|
||||||
|
require.NoError(t, err)
|
||||||
|
|
||||||
|
// file.txt should exclude ALL file.txt files:
|
||||||
|
// - /backup/file.txt
|
||||||
|
// - /backup/projectname/file.txt
|
||||||
|
// - /backup/otherproject/projectname/file.txt
|
||||||
|
// Total files: 4, excluded: 3
|
||||||
|
require.Equal(t, 1, result.FilesScanned, "Unanchored pattern for file should match anywhere")
|
||||||
|
}
|
||||||
@ -38,6 +38,7 @@ func provideScannerFactory(cfg *config.Config, repos *database.Repositories, sto
|
|||||||
CompressionLevel: cfg.CompressionLevel,
|
CompressionLevel: cfg.CompressionLevel,
|
||||||
AgeRecipients: cfg.AgeRecipients,
|
AgeRecipients: cfg.AgeRecipients,
|
||||||
EnableProgress: params.EnableProgress,
|
EnableProgress: params.EnableProgress,
|
||||||
|
Exclude: cfg.Exclude,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@ -3,8 +3,10 @@ package snapshot
|
|||||||
import (
|
import (
|
||||||
"context"
|
"context"
|
||||||
"database/sql"
|
"database/sql"
|
||||||
|
"errors"
|
||||||
"fmt"
|
"fmt"
|
||||||
"os"
|
"os"
|
||||||
|
"path/filepath"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
@ -15,6 +17,8 @@ import (
|
|||||||
"git.eeqj.de/sneak/vaultik/internal/log"
|
"git.eeqj.de/sneak/vaultik/internal/log"
|
||||||
"git.eeqj.de/sneak/vaultik/internal/storage"
|
"git.eeqj.de/sneak/vaultik/internal/storage"
|
||||||
"github.com/dustin/go-humanize"
|
"github.com/dustin/go-humanize"
|
||||||
|
"github.com/gobwas/glob"
|
||||||
|
"github.com/google/uuid"
|
||||||
"github.com/spf13/afero"
|
"github.com/spf13/afero"
|
||||||
)
|
)
|
||||||
|
|
||||||
@ -25,6 +29,20 @@ type FileToProcess struct {
|
|||||||
File *database.File
|
File *database.File
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// pendingFileData holds all data needed to commit a file to the database
|
||||||
|
type pendingFileData struct {
|
||||||
|
file *database.File
|
||||||
|
fileChunks []database.FileChunk
|
||||||
|
chunkFiles []database.ChunkFile
|
||||||
|
}
|
||||||
|
|
||||||
|
// compiledPattern holds a compiled glob pattern and whether it's anchored
|
||||||
|
type compiledPattern struct {
|
||||||
|
pattern glob.Glob
|
||||||
|
anchored bool // If true, only matches from root of source dir
|
||||||
|
original string
|
||||||
|
}
|
||||||
|
|
||||||
// Scanner scans directories and populates the database with file and chunk information
|
// Scanner scans directories and populates the database with file and chunk information
|
||||||
type Scanner struct {
|
type Scanner struct {
|
||||||
fs afero.Fs
|
fs afero.Fs
|
||||||
@ -36,12 +54,24 @@ type Scanner struct {
|
|||||||
compressionLevel int
|
compressionLevel int
|
||||||
ageRecipient string
|
ageRecipient string
|
||||||
snapshotID string // Current snapshot being processed
|
snapshotID string // Current snapshot being processed
|
||||||
|
exclude []string // Glob patterns for files/directories to exclude
|
||||||
|
compiledExclude []compiledPattern // Compiled glob patterns
|
||||||
progress *ProgressReporter
|
progress *ProgressReporter
|
||||||
|
|
||||||
// In-memory cache of known chunk hashes for fast existence checks
|
// In-memory cache of known chunk hashes for fast existence checks
|
||||||
knownChunks map[string]struct{}
|
knownChunks map[string]struct{}
|
||||||
knownChunksMu sync.RWMutex
|
knownChunksMu sync.RWMutex
|
||||||
|
|
||||||
|
// Pending chunk hashes - chunks that have been added to packer but not yet committed to DB
|
||||||
|
// When a blob finalizes, the committed chunks are removed from this set
|
||||||
|
pendingChunkHashes map[string]struct{}
|
||||||
|
pendingChunkHashesMu sync.Mutex
|
||||||
|
|
||||||
|
// Pending file data buffer for batch insertion
|
||||||
|
// Files are flushed when all their chunks have been committed to DB
|
||||||
|
pendingFiles []pendingFileData
|
||||||
|
pendingFilesMu sync.Mutex
|
||||||
|
|
||||||
// Mutex for coordinating blob creation
|
// Mutex for coordinating blob creation
|
||||||
packerMu sync.Mutex // Blocks chunk production during blob creation
|
packerMu sync.Mutex // Blocks chunk production during blob creation
|
||||||
|
|
||||||
@ -59,6 +89,7 @@ type ScannerConfig struct {
|
|||||||
CompressionLevel int
|
CompressionLevel int
|
||||||
AgeRecipients []string // Optional, empty means no encryption
|
AgeRecipients []string // Optional, empty means no encryption
|
||||||
EnableProgress bool // Enable progress reporting
|
EnableProgress bool // Enable progress reporting
|
||||||
|
Exclude []string // Glob patterns for files/directories to exclude
|
||||||
}
|
}
|
||||||
|
|
||||||
// ScanResult contains the results of a scan operation
|
// ScanResult contains the results of a scan operation
|
||||||
@ -102,6 +133,9 @@ func NewScanner(cfg ScannerConfig) *Scanner {
|
|||||||
progress = NewProgressReporter()
|
progress = NewProgressReporter()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Compile exclude patterns
|
||||||
|
compiledExclude := compileExcludePatterns(cfg.Exclude)
|
||||||
|
|
||||||
return &Scanner{
|
return &Scanner{
|
||||||
fs: cfg.FS,
|
fs: cfg.FS,
|
||||||
chunker: chunker.NewChunker(cfg.ChunkSize),
|
chunker: chunker.NewChunker(cfg.ChunkSize),
|
||||||
@ -111,7 +145,10 @@ func NewScanner(cfg ScannerConfig) *Scanner {
|
|||||||
maxBlobSize: cfg.MaxBlobSize,
|
maxBlobSize: cfg.MaxBlobSize,
|
||||||
compressionLevel: cfg.CompressionLevel,
|
compressionLevel: cfg.CompressionLevel,
|
||||||
ageRecipient: strings.Join(cfg.AgeRecipients, ","),
|
ageRecipient: strings.Join(cfg.AgeRecipients, ","),
|
||||||
|
exclude: cfg.Exclude,
|
||||||
|
compiledExclude: compiledExclude,
|
||||||
progress: progress,
|
progress: progress,
|
||||||
|
pendingChunkHashes: make(map[string]struct{}),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -286,6 +323,222 @@ func (s *Scanner) addKnownChunk(hash string) {
|
|||||||
s.knownChunksMu.Unlock()
|
s.knownChunksMu.Unlock()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// addPendingChunkHash marks a chunk as pending (not yet committed to DB)
|
||||||
|
func (s *Scanner) addPendingChunkHash(hash string) {
|
||||||
|
s.pendingChunkHashesMu.Lock()
|
||||||
|
s.pendingChunkHashes[hash] = struct{}{}
|
||||||
|
s.pendingChunkHashesMu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// removePendingChunkHashes removes committed chunk hashes from the pending set
|
||||||
|
func (s *Scanner) removePendingChunkHashes(hashes []string) {
|
||||||
|
log.Debug("removePendingChunkHashes: starting", "count", len(hashes))
|
||||||
|
start := time.Now()
|
||||||
|
s.pendingChunkHashesMu.Lock()
|
||||||
|
for _, hash := range hashes {
|
||||||
|
delete(s.pendingChunkHashes, hash)
|
||||||
|
}
|
||||||
|
s.pendingChunkHashesMu.Unlock()
|
||||||
|
log.Debug("removePendingChunkHashes: done", "count", len(hashes), "duration", time.Since(start))
|
||||||
|
}
|
||||||
|
|
||||||
|
// isChunkPending returns true if the chunk is still pending (not yet committed to DB)
|
||||||
|
func (s *Scanner) isChunkPending(hash string) bool {
|
||||||
|
s.pendingChunkHashesMu.Lock()
|
||||||
|
_, pending := s.pendingChunkHashes[hash]
|
||||||
|
s.pendingChunkHashesMu.Unlock()
|
||||||
|
return pending
|
||||||
|
}
|
||||||
|
|
||||||
|
// addPendingFile adds a file to the pending buffer
|
||||||
|
// Files are NOT auto-flushed here - they are flushed when their chunks are committed
|
||||||
|
// (in handleBlobReady after blob finalize)
|
||||||
|
func (s *Scanner) addPendingFile(_ context.Context, data pendingFileData) {
|
||||||
|
s.pendingFilesMu.Lock()
|
||||||
|
s.pendingFiles = append(s.pendingFiles, data)
|
||||||
|
s.pendingFilesMu.Unlock()
|
||||||
|
}
|
||||||
|
|
||||||
|
// flushPendingFiles writes all pending files to the database in a single transaction
|
||||||
|
func (s *Scanner) flushPendingFiles(ctx context.Context) error {
|
||||||
|
s.pendingFilesMu.Lock()
|
||||||
|
files := s.pendingFiles
|
||||||
|
s.pendingFiles = nil
|
||||||
|
s.pendingFilesMu.Unlock()
|
||||||
|
|
||||||
|
if len(files) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
return s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||||
|
for _, data := range files {
|
||||||
|
// Create or update the file record
|
||||||
|
if err := s.repos.Files.Create(txCtx, tx, data.file); err != nil {
|
||||||
|
return fmt.Errorf("creating file record: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Delete any existing file_chunks and chunk_files for this file
|
||||||
|
if err := s.repos.FileChunks.DeleteByFileID(txCtx, tx, data.file.ID); err != nil {
|
||||||
|
return fmt.Errorf("deleting old file chunks: %w", err)
|
||||||
|
}
|
||||||
|
if err := s.repos.ChunkFiles.DeleteByFileID(txCtx, tx, data.file.ID); err != nil {
|
||||||
|
return fmt.Errorf("deleting old chunk files: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create file-chunk mappings
|
||||||
|
for i := range data.fileChunks {
|
||||||
|
if err := s.repos.FileChunks.Create(txCtx, tx, &data.fileChunks[i]); err != nil {
|
||||||
|
return fmt.Errorf("creating file chunk: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create chunk-file mappings
|
||||||
|
for i := range data.chunkFiles {
|
||||||
|
if err := s.repos.ChunkFiles.Create(txCtx, tx, &data.chunkFiles[i]); err != nil {
|
||||||
|
return fmt.Errorf("creating chunk file: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add file to snapshot
|
||||||
|
if err := s.repos.Snapshots.AddFileByID(txCtx, tx, s.snapshotID, data.file.ID); err != nil {
|
||||||
|
return fmt.Errorf("adding file to snapshot: %w", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
// flushAllPending flushes all pending files to the database
|
||||||
|
func (s *Scanner) flushAllPending(ctx context.Context) error {
|
||||||
|
return s.flushPendingFiles(ctx)
|
||||||
|
}
|
||||||
|
|
||||||
|
// flushCompletedPendingFiles flushes only files whose chunks are all committed to DB
|
||||||
|
// Files with pending chunks are kept in the queue for later flushing
|
||||||
|
func (s *Scanner) flushCompletedPendingFiles(ctx context.Context) error {
|
||||||
|
flushStart := time.Now()
|
||||||
|
log.Debug("flushCompletedPendingFiles: starting")
|
||||||
|
|
||||||
|
log.Debug("flushCompletedPendingFiles: acquiring pendingFilesMu lock")
|
||||||
|
s.pendingFilesMu.Lock()
|
||||||
|
log.Debug("flushCompletedPendingFiles: acquired lock", "pending_files", len(s.pendingFiles))
|
||||||
|
|
||||||
|
// Separate files into complete (can flush) and incomplete (keep pending)
|
||||||
|
var canFlush []pendingFileData
|
||||||
|
var stillPending []pendingFileData
|
||||||
|
|
||||||
|
log.Debug("flushCompletedPendingFiles: checking which files can flush")
|
||||||
|
checkStart := time.Now()
|
||||||
|
for _, data := range s.pendingFiles {
|
||||||
|
allChunksCommitted := true
|
||||||
|
for _, fc := range data.fileChunks {
|
||||||
|
if s.isChunkPending(fc.ChunkHash) {
|
||||||
|
allChunksCommitted = false
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if allChunksCommitted {
|
||||||
|
canFlush = append(canFlush, data)
|
||||||
|
} else {
|
||||||
|
stillPending = append(stillPending, data)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
log.Debug("flushCompletedPendingFiles: check done", "duration", time.Since(checkStart), "can_flush", len(canFlush), "still_pending", len(stillPending))
|
||||||
|
|
||||||
|
s.pendingFiles = stillPending
|
||||||
|
s.pendingFilesMu.Unlock()
|
||||||
|
log.Debug("flushCompletedPendingFiles: released lock")
|
||||||
|
|
||||||
|
if len(canFlush) == 0 {
|
||||||
|
log.Debug("flushCompletedPendingFiles: nothing to flush")
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debug("Flushing completed files after blob finalize",
|
||||||
|
"files_to_flush", len(canFlush),
|
||||||
|
"files_still_pending", len(stillPending))
|
||||||
|
|
||||||
|
// Collect all data for batch operations
|
||||||
|
log.Debug("flushCompletedPendingFiles: collecting data for batch ops")
|
||||||
|
collectStart := time.Now()
|
||||||
|
var allFileChunks []database.FileChunk
|
||||||
|
var allChunkFiles []database.ChunkFile
|
||||||
|
var allFileIDs []string
|
||||||
|
var allFiles []*database.File
|
||||||
|
|
||||||
|
for _, data := range canFlush {
|
||||||
|
allFileChunks = append(allFileChunks, data.fileChunks...)
|
||||||
|
allChunkFiles = append(allChunkFiles, data.chunkFiles...)
|
||||||
|
allFileIDs = append(allFileIDs, data.file.ID)
|
||||||
|
allFiles = append(allFiles, data.file)
|
||||||
|
}
|
||||||
|
log.Debug("flushCompletedPendingFiles: collected data",
|
||||||
|
"duration", time.Since(collectStart),
|
||||||
|
"file_chunks", len(allFileChunks),
|
||||||
|
"chunk_files", len(allChunkFiles),
|
||||||
|
"files", len(allFiles))
|
||||||
|
|
||||||
|
// Flush the complete files using batch operations
|
||||||
|
log.Debug("flushCompletedPendingFiles: starting transaction")
|
||||||
|
txStart := time.Now()
|
||||||
|
err := s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
||||||
|
log.Debug("flushCompletedPendingFiles: inside transaction")
|
||||||
|
|
||||||
|
// Batch delete old file_chunks and chunk_files
|
||||||
|
log.Debug("flushCompletedPendingFiles: deleting old file_chunks")
|
||||||
|
opStart := time.Now()
|
||||||
|
if err := s.repos.FileChunks.DeleteByFileIDs(txCtx, tx, allFileIDs); err != nil {
|
||||||
|
return fmt.Errorf("batch deleting old file chunks: %w", err)
|
||||||
|
}
|
||||||
|
log.Debug("flushCompletedPendingFiles: deleted file_chunks", "duration", time.Since(opStart))
|
||||||
|
|
||||||
|
log.Debug("flushCompletedPendingFiles: deleting old chunk_files")
|
||||||
|
opStart = time.Now()
|
||||||
|
if err := s.repos.ChunkFiles.DeleteByFileIDs(txCtx, tx, allFileIDs); err != nil {
|
||||||
|
return fmt.Errorf("batch deleting old chunk files: %w", err)
|
||||||
|
}
|
||||||
|
log.Debug("flushCompletedPendingFiles: deleted chunk_files", "duration", time.Since(opStart))
|
||||||
|
|
||||||
|
// Batch create/update file records
|
||||||
|
log.Debug("flushCompletedPendingFiles: creating files")
|
||||||
|
opStart = time.Now()
|
||||||
|
if err := s.repos.Files.CreateBatch(txCtx, tx, allFiles); err != nil {
|
||||||
|
return fmt.Errorf("batch creating file records: %w", err)
|
||||||
|
}
|
||||||
|
log.Debug("flushCompletedPendingFiles: created files", "duration", time.Since(opStart))
|
||||||
|
|
||||||
|
// Batch insert file_chunks
|
||||||
|
log.Debug("flushCompletedPendingFiles: inserting file_chunks")
|
||||||
|
opStart = time.Now()
|
||||||
|
if err := s.repos.FileChunks.CreateBatch(txCtx, tx, allFileChunks); err != nil {
|
||||||
|
return fmt.Errorf("batch creating file chunks: %w", err)
|
||||||
|
}
|
||||||
|
log.Debug("flushCompletedPendingFiles: inserted file_chunks", "duration", time.Since(opStart))
|
||||||
|
|
||||||
|
// Batch insert chunk_files
|
||||||
|
log.Debug("flushCompletedPendingFiles: inserting chunk_files")
|
||||||
|
opStart = time.Now()
|
||||||
|
if err := s.repos.ChunkFiles.CreateBatch(txCtx, tx, allChunkFiles); err != nil {
|
||||||
|
return fmt.Errorf("batch creating chunk files: %w", err)
|
||||||
|
}
|
||||||
|
log.Debug("flushCompletedPendingFiles: inserted chunk_files", "duration", time.Since(opStart))
|
||||||
|
|
||||||
|
// Batch add files to snapshot
|
||||||
|
log.Debug("flushCompletedPendingFiles: adding files to snapshot")
|
||||||
|
opStart = time.Now()
|
||||||
|
if err := s.repos.Snapshots.AddFilesByIDBatch(txCtx, tx, s.snapshotID, allFileIDs); err != nil {
|
||||||
|
return fmt.Errorf("batch adding files to snapshot: %w", err)
|
||||||
|
}
|
||||||
|
log.Debug("flushCompletedPendingFiles: added files to snapshot", "duration", time.Since(opStart))
|
||||||
|
|
||||||
|
log.Debug("flushCompletedPendingFiles: transaction complete")
|
||||||
|
return nil
|
||||||
|
})
|
||||||
|
log.Debug("flushCompletedPendingFiles: transaction done", "duration", time.Since(txStart))
|
||||||
|
log.Debug("flushCompletedPendingFiles: total duration", "duration", time.Since(flushStart))
|
||||||
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
// ScanPhaseResult contains the results of the scan phase
|
// ScanPhaseResult contains the results of the scan phase
|
||||||
type ScanPhaseResult struct {
|
type ScanPhaseResult struct {
|
||||||
FilesToProcess []*FileToProcess
|
FilesToProcess []*FileToProcess
|
||||||
@ -324,6 +577,14 @@ func (s *Scanner) scanPhase(ctx context.Context, path string, result *ScanResult
|
|||||||
default:
|
default:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Check exclude patterns - for directories, skip the entire subtree
|
||||||
|
if s.shouldExclude(filePath, path) {
|
||||||
|
if info.IsDir() {
|
||||||
|
return filepath.SkipDir
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
// Skip non-regular files for processing (but still count them)
|
// Skip non-regular files for processing (but still count them)
|
||||||
if !info.Mode().IsRegular() {
|
if !info.Mode().IsRegular() {
|
||||||
return nil
|
return nil
|
||||||
@ -429,8 +690,21 @@ func (s *Scanner) checkFileInMemory(path string, info os.FileInfo, knownFiles ma
|
|||||||
gid = stat.Gid()
|
gid = stat.Gid()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create file record
|
// Check against in-memory map first to get existing ID if available
|
||||||
|
existingFile, exists := knownFiles[path]
|
||||||
|
|
||||||
|
// Create file record with ID set upfront
|
||||||
|
// For new files, generate UUID immediately so it's available for chunk associations
|
||||||
|
// For existing files, reuse the existing ID
|
||||||
|
var fileID string
|
||||||
|
if exists {
|
||||||
|
fileID = existingFile.ID
|
||||||
|
} else {
|
||||||
|
fileID = uuid.New().String()
|
||||||
|
}
|
||||||
|
|
||||||
file := &database.File{
|
file := &database.File{
|
||||||
|
ID: fileID,
|
||||||
Path: path,
|
Path: path,
|
||||||
MTime: info.ModTime(),
|
MTime: info.ModTime(),
|
||||||
CTime: info.ModTime(), // afero doesn't provide ctime
|
CTime: info.ModTime(), // afero doesn't provide ctime
|
||||||
@ -440,16 +714,11 @@ func (s *Scanner) checkFileInMemory(path string, info os.FileInfo, knownFiles ma
|
|||||||
GID: gid,
|
GID: gid,
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check against in-memory map
|
// New file - needs processing
|
||||||
existingFile, exists := knownFiles[path]
|
|
||||||
if !exists {
|
if !exists {
|
||||||
// New file
|
|
||||||
return file, true
|
return file, true
|
||||||
}
|
}
|
||||||
|
|
||||||
// Reuse existing ID
|
|
||||||
file.ID = existingFile.ID
|
|
||||||
|
|
||||||
// Check if file has changed
|
// Check if file has changed
|
||||||
if existingFile.Size != file.Size ||
|
if existingFile.Size != file.Size ||
|
||||||
existingFile.MTime.Unix() != file.MTime.Unix() ||
|
existingFile.MTime.Unix() != file.MTime.Unix() ||
|
||||||
@ -542,6 +811,12 @@ func (s *Scanner) processPhase(ctx context.Context, filesToProcess []*FileToProc
|
|||||||
|
|
||||||
// Process file in streaming fashion
|
// Process file in streaming fashion
|
||||||
if err := s.processFileStreaming(ctx, fileToProcess, result); err != nil {
|
if err := s.processFileStreaming(ctx, fileToProcess, result); err != nil {
|
||||||
|
// Handle files that were deleted between scan and process phases
|
||||||
|
if errors.Is(err, os.ErrNotExist) {
|
||||||
|
log.Warn("File was deleted during backup, skipping", "path", fileToProcess.Path)
|
||||||
|
result.FilesSkipped++
|
||||||
|
continue
|
||||||
|
}
|
||||||
return fmt.Errorf("processing file %s: %w", fileToProcess.Path, err)
|
return fmt.Errorf("processing file %s: %w", fileToProcess.Path, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -585,7 +860,8 @@ func (s *Scanner) processPhase(ctx context.Context, filesToProcess []*FileToProc
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Final flush (outside any transaction)
|
// Final packer flush first - this commits remaining chunks to DB
|
||||||
|
// and handleBlobReady will flush files whose chunks are now committed
|
||||||
s.packerMu.Lock()
|
s.packerMu.Lock()
|
||||||
if err := s.packer.Flush(); err != nil {
|
if err := s.packer.Flush(); err != nil {
|
||||||
s.packerMu.Unlock()
|
s.packerMu.Unlock()
|
||||||
@ -593,6 +869,12 @@ func (s *Scanner) processPhase(ctx context.Context, filesToProcess []*FileToProc
|
|||||||
}
|
}
|
||||||
s.packerMu.Unlock()
|
s.packerMu.Unlock()
|
||||||
|
|
||||||
|
// Flush any remaining pending files (e.g., files with only pre-existing chunks
|
||||||
|
// that didn't trigger a blob finalize)
|
||||||
|
if err := s.flushAllPending(ctx); err != nil {
|
||||||
|
return fmt.Errorf("flushing remaining pending files: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
// If no storage configured, store any remaining blobs locally
|
// If no storage configured, store any remaining blobs locally
|
||||||
if s.storage == nil {
|
if s.storage == nil {
|
||||||
blobs := s.packer.GetFinishedBlobs()
|
blobs := s.packer.GetFinishedBlobs()
|
||||||
@ -739,7 +1021,25 @@ func (s *Scanner) handleBlobReady(blobWithReader *blob.BlobWithReader) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if err != nil {
|
||||||
return err
|
return err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Chunks from this blob are now committed to DB - remove from pending set
|
||||||
|
log.Debug("handleBlobReady: removing pending chunk hashes")
|
||||||
|
s.removePendingChunkHashes(blobWithReader.InsertedChunkHashes)
|
||||||
|
log.Debug("handleBlobReady: removed pending chunk hashes")
|
||||||
|
|
||||||
|
// Flush files whose chunks are now all committed
|
||||||
|
// This maintains database consistency after each blob
|
||||||
|
log.Debug("handleBlobReady: calling flushCompletedPendingFiles")
|
||||||
|
if err := s.flushCompletedPendingFiles(dbCtx); err != nil {
|
||||||
|
return fmt.Errorf("flushing completed files: %w", err)
|
||||||
|
}
|
||||||
|
log.Debug("handleBlobReady: flushCompletedPendingFiles returned")
|
||||||
|
|
||||||
|
log.Debug("handleBlobReady: complete")
|
||||||
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// processFileStreaming processes a file by streaming chunks directly to the packer
|
// processFileStreaming processes a file by streaming chunks directly to the packer
|
||||||
@ -779,23 +1079,14 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
|
|||||||
// Check if chunk already exists (fast in-memory lookup)
|
// Check if chunk already exists (fast in-memory lookup)
|
||||||
chunkExists := s.chunkExists(chunk.Hash)
|
chunkExists := s.chunkExists(chunk.Hash)
|
||||||
|
|
||||||
// Store chunk if new
|
// Queue new chunks for batch insert when blob finalizes
|
||||||
|
// This dramatically reduces transaction overhead
|
||||||
if !chunkExists {
|
if !chunkExists {
|
||||||
err := s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
s.packer.AddPendingChunk(chunk.Hash, chunk.Size)
|
||||||
dbChunk := &database.Chunk{
|
// Add to in-memory cache immediately for fast duplicate detection
|
||||||
ChunkHash: chunk.Hash,
|
|
||||||
Size: chunk.Size,
|
|
||||||
}
|
|
||||||
if err := s.repos.Chunks.Create(txCtx, tx, dbChunk); err != nil {
|
|
||||||
return fmt.Errorf("creating chunk: %w", err)
|
|
||||||
}
|
|
||||||
return nil
|
|
||||||
})
|
|
||||||
if err != nil {
|
|
||||||
return fmt.Errorf("storing chunk: %w", err)
|
|
||||||
}
|
|
||||||
// Add to in-memory cache for fast duplicate detection
|
|
||||||
s.addKnownChunk(chunk.Hash)
|
s.addKnownChunk(chunk.Hash)
|
||||||
|
// Track as pending until blob finalizes and commits to DB
|
||||||
|
s.addPendingChunkHash(chunk.Hash)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Track file chunk association for later storage
|
// Track file chunk association for later storage
|
||||||
@ -871,56 +1162,32 @@ func (s *Scanner) processFileStreaming(ctx context.Context, fileToProcess *FileT
|
|||||||
"file_hash", fileHash,
|
"file_hash", fileHash,
|
||||||
"chunks", len(chunks))
|
"chunks", len(chunks))
|
||||||
|
|
||||||
// Store file record, chunk associations, and snapshot association in database
|
// Build file data for batch insertion
|
||||||
// This happens AFTER successful chunking to avoid orphaned records on interruption
|
// Update chunk associations with the file ID
|
||||||
err = s.repos.WithTx(ctx, func(txCtx context.Context, tx *sql.Tx) error {
|
fileChunks := make([]database.FileChunk, len(chunks))
|
||||||
// Create or update the file record
|
chunkFiles := make([]database.ChunkFile, len(chunks))
|
||||||
// Files.Create uses INSERT OR REPLACE, so it handles both new and changed files
|
for i, ci := range chunks {
|
||||||
if err := s.repos.Files.Create(txCtx, tx, fileToProcess.File); err != nil {
|
fileChunks[i] = database.FileChunk{
|
||||||
return fmt.Errorf("creating file record: %w", err)
|
FileID: fileToProcess.File.ID,
|
||||||
|
Idx: ci.fileChunk.Idx,
|
||||||
|
ChunkHash: ci.fileChunk.ChunkHash,
|
||||||
}
|
}
|
||||||
|
chunkFiles[i] = database.ChunkFile{
|
||||||
// Delete any existing file_chunks and chunk_files for this file
|
|
||||||
// This ensures old chunks are no longer associated when file content changes
|
|
||||||
if err := s.repos.FileChunks.DeleteByFileID(txCtx, tx, fileToProcess.File.ID); err != nil {
|
|
||||||
return fmt.Errorf("deleting old file chunks: %w", err)
|
|
||||||
}
|
|
||||||
if err := s.repos.ChunkFiles.DeleteByFileID(txCtx, tx, fileToProcess.File.ID); err != nil {
|
|
||||||
return fmt.Errorf("deleting old chunk files: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Update chunk associations with the file ID (now that we have it)
|
|
||||||
for i := range chunks {
|
|
||||||
chunks[i].fileChunk.FileID = fileToProcess.File.ID
|
|
||||||
}
|
|
||||||
|
|
||||||
for _, ci := range chunks {
|
|
||||||
// Create file-chunk mapping
|
|
||||||
if err := s.repos.FileChunks.Create(txCtx, tx, &ci.fileChunk); err != nil {
|
|
||||||
return fmt.Errorf("creating file chunk: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create chunk-file mapping
|
|
||||||
chunkFile := &database.ChunkFile{
|
|
||||||
ChunkHash: ci.fileChunk.ChunkHash,
|
ChunkHash: ci.fileChunk.ChunkHash,
|
||||||
FileID: fileToProcess.File.ID,
|
FileID: fileToProcess.File.ID,
|
||||||
FileOffset: ci.offset,
|
FileOffset: ci.offset,
|
||||||
Length: ci.size,
|
Length: ci.size,
|
||||||
}
|
}
|
||||||
if err := s.repos.ChunkFiles.Create(txCtx, tx, chunkFile); err != nil {
|
|
||||||
return fmt.Errorf("creating chunk file: %w", err)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add file to snapshot
|
// Queue file for batch insertion
|
||||||
if err := s.repos.Snapshots.AddFileByID(txCtx, tx, s.snapshotID, fileToProcess.File.ID); err != nil {
|
// Files will be flushed when their chunks are committed (after blob finalize)
|
||||||
return fmt.Errorf("adding file to snapshot: %w", err)
|
s.addPendingFile(ctx, pendingFileData{
|
||||||
}
|
file: fileToProcess.File,
|
||||||
|
fileChunks: fileChunks,
|
||||||
return nil
|
chunkFiles: chunkFiles,
|
||||||
})
|
})
|
||||||
|
return nil
|
||||||
return err
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// GetProgress returns the progress reporter for this scanner
|
// GetProgress returns the progress reporter for this scanner
|
||||||
@ -960,6 +1227,105 @@ func (s *Scanner) detectDeletedFilesFromMap(ctx context.Context, knownFiles map[
|
|||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// compileExcludePatterns compiles the exclude patterns into glob matchers
|
||||||
|
func compileExcludePatterns(patterns []string) []compiledPattern {
|
||||||
|
var compiled []compiledPattern
|
||||||
|
for _, p := range patterns {
|
||||||
|
if p == "" {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check if pattern is anchored (starts with /)
|
||||||
|
anchored := strings.HasPrefix(p, "/")
|
||||||
|
pattern := p
|
||||||
|
if anchored {
|
||||||
|
pattern = p[1:] // Remove leading /
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove trailing slash if present (directory indicator)
|
||||||
|
pattern = strings.TrimSuffix(pattern, "/")
|
||||||
|
|
||||||
|
// Compile the glob pattern
|
||||||
|
// For patterns without path separators, we need to match them as components
|
||||||
|
// e.g., ".git" should match ".git" anywhere in the path
|
||||||
|
g, err := glob.Compile(pattern, '/')
|
||||||
|
if err != nil {
|
||||||
|
log.Warn("Invalid exclude pattern, skipping", "pattern", p, "error", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
compiled = append(compiled, compiledPattern{
|
||||||
|
pattern: g,
|
||||||
|
anchored: anchored,
|
||||||
|
original: p,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
return compiled
|
||||||
|
}
|
||||||
|
|
||||||
|
// shouldExclude checks if a path should be excluded based on exclude patterns
|
||||||
|
// filePath is the full path to the file
|
||||||
|
// rootPath is the root of the backup source directory
|
||||||
|
func (s *Scanner) shouldExclude(filePath, rootPath string) bool {
|
||||||
|
if len(s.compiledExclude) == 0 {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get the relative path from root
|
||||||
|
relPath, err := filepath.Rel(rootPath, filePath)
|
||||||
|
if err != nil {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Never exclude the root directory itself
|
||||||
|
if relPath == "." {
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize path separators
|
||||||
|
relPath = filepath.ToSlash(relPath)
|
||||||
|
|
||||||
|
// Check each pattern
|
||||||
|
for _, cp := range s.compiledExclude {
|
||||||
|
if cp.anchored {
|
||||||
|
// Anchored pattern: must match from the root
|
||||||
|
// Match the relative path directly
|
||||||
|
if cp.pattern.Match(relPath) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Also check if any prefix of the path matches (for directory patterns)
|
||||||
|
parts := strings.Split(relPath, "/")
|
||||||
|
for i := 1; i <= len(parts); i++ {
|
||||||
|
prefix := strings.Join(parts[:i], "/")
|
||||||
|
if cp.pattern.Match(prefix) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Unanchored pattern: can match anywhere in path
|
||||||
|
// Check the full relative path
|
||||||
|
if cp.pattern.Match(relPath) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Check each path component and subpath
|
||||||
|
parts := strings.Split(relPath, "/")
|
||||||
|
for i := range parts {
|
||||||
|
// Match individual component (e.g., ".git" matches ".git" directory)
|
||||||
|
if cp.pattern.Match(parts[i]) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
// Match subpath from this component onwards
|
||||||
|
subpath := strings.Join(parts[i:], "/")
|
||||||
|
if cp.pattern.Match(subpath) {
|
||||||
|
return true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return false
|
||||||
|
}
|
||||||
|
|
||||||
// formatNumber formats a number with comma separators
|
// formatNumber formats a number with comma separators
|
||||||
func formatNumber(n int) string {
|
func formatNumber(n int) string {
|
||||||
if n < 1000 {
|
if n < 1000 {
|
||||||
|
|||||||
@ -46,6 +46,7 @@ import (
|
|||||||
"io"
|
"io"
|
||||||
"os/exec"
|
"os/exec"
|
||||||
"path/filepath"
|
"path/filepath"
|
||||||
|
"strings"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
"git.eeqj.de/sneak/vaultik/internal/blobgen"
|
"git.eeqj.de/sneak/vaultik/internal/blobgen"
|
||||||
@ -91,7 +92,12 @@ func (sm *SnapshotManager) SetFilesystem(fs afero.Fs) {
|
|||||||
|
|
||||||
// CreateSnapshot creates a new snapshot record in the database at the start of a backup
|
// CreateSnapshot creates a new snapshot record in the database at the start of a backup
|
||||||
func (sm *SnapshotManager) CreateSnapshot(ctx context.Context, hostname, version, gitRevision string) (string, error) {
|
func (sm *SnapshotManager) CreateSnapshot(ctx context.Context, hostname, version, gitRevision string) (string, error) {
|
||||||
snapshotID := fmt.Sprintf("%s-%s", hostname, time.Now().UTC().Format("20060102-150405Z"))
|
// Use short hostname (strip domain if present)
|
||||||
|
shortHostname := hostname
|
||||||
|
if idx := strings.Index(hostname, "."); idx != -1 {
|
||||||
|
shortHostname = hostname[:idx]
|
||||||
|
}
|
||||||
|
snapshotID := fmt.Sprintf("%s_%s", shortHostname, time.Now().UTC().Format("2006-01-02T15:04:05Z"))
|
||||||
|
|
||||||
snapshot := &database.Snapshot{
|
snapshot := &database.Snapshot{
|
||||||
ID: snapshotID,
|
ID: snapshotID,
|
||||||
@ -688,15 +694,16 @@ func (sm *SnapshotManager) deleteSnapshot(ctx context.Context, snapshotID string
|
|||||||
|
|
||||||
// Clean up orphaned data
|
// Clean up orphaned data
|
||||||
log.Debug("Cleaning up orphaned records in main database")
|
log.Debug("Cleaning up orphaned records in main database")
|
||||||
if err := sm.cleanupOrphanedData(ctx); err != nil {
|
if err := sm.CleanupOrphanedData(ctx); err != nil {
|
||||||
return fmt.Errorf("cleaning up orphaned data: %w", err)
|
return fmt.Errorf("cleaning up orphaned data: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// cleanupOrphanedData removes files, chunks, and blobs that are no longer referenced by any snapshot
|
// CleanupOrphanedData removes files, chunks, and blobs that are no longer referenced by any snapshot.
|
||||||
func (sm *SnapshotManager) cleanupOrphanedData(ctx context.Context) error {
|
// This should be called periodically to clean up data from deleted or incomplete snapshots.
|
||||||
|
func (sm *SnapshotManager) CleanupOrphanedData(ctx context.Context) error {
|
||||||
// Order is important to respect foreign key constraints:
|
// Order is important to respect foreign key constraints:
|
||||||
// 1. Delete orphaned files (will cascade delete file_chunks)
|
// 1. Delete orphaned files (will cascade delete file_chunks)
|
||||||
// 2. Delete orphaned blobs (will cascade delete blob_chunks for deleted blobs)
|
// 2. Delete orphaned blobs (will cascade delete blob_chunks for deleted blobs)
|
||||||
|
|||||||
@ -43,8 +43,12 @@ func (v *Vaultik) CreateSnapshot(opts *SnapshotCreateOptions) error {
|
|||||||
// CRITICAL: This MUST succeed. If we fail to clean up incomplete snapshots,
|
// CRITICAL: This MUST succeed. If we fail to clean up incomplete snapshots,
|
||||||
// the deduplication logic will think files from the incomplete snapshot were
|
// the deduplication logic will think files from the incomplete snapshot were
|
||||||
// already backed up and skip them, resulting in data loss.
|
// already backed up and skip them, resulting in data loss.
|
||||||
if err := v.SnapshotManager.CleanupIncompleteSnapshots(v.ctx, hostname); err != nil {
|
//
|
||||||
return fmt.Errorf("cleanup incomplete snapshots: %w", err)
|
// Prune the database before starting: delete incomplete snapshots and orphaned data.
|
||||||
|
// This ensures the database is consistent before we start a new snapshot.
|
||||||
|
// Since we use locking, only one vaultik instance accesses the DB at a time.
|
||||||
|
if _, err := v.PruneDatabase(); err != nil {
|
||||||
|
return fmt.Errorf("prune database: %w", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
if opts.Daemon {
|
if opts.Daemon {
|
||||||
@ -633,7 +637,8 @@ func (v *Vaultik) deleteSnapshot(snapshotID string) error {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Then, delete from local database
|
// Then, delete from local database (if we have a local database)
|
||||||
|
if v.Repositories != nil {
|
||||||
// Delete related records first to avoid foreign key constraints
|
// Delete related records first to avoid foreign key constraints
|
||||||
if err := v.Repositories.Snapshots.DeleteSnapshotFiles(v.ctx, snapshotID); err != nil {
|
if err := v.Repositories.Snapshots.DeleteSnapshotFiles(v.ctx, snapshotID); err != nil {
|
||||||
log.Error("Failed to delete snapshot files", "snapshot_id", snapshotID, "error", err)
|
log.Error("Failed to delete snapshot files", "snapshot_id", snapshotID, "error", err)
|
||||||
@ -649,6 +654,7 @@ func (v *Vaultik) deleteSnapshot(snapshotID string) error {
|
|||||||
if err := v.Repositories.Snapshots.Delete(v.ctx, snapshotID); err != nil {
|
if err := v.Repositories.Snapshots.Delete(v.ctx, snapshotID); err != nil {
|
||||||
return fmt.Errorf("deleting snapshot from database: %w", err)
|
return fmt.Errorf("deleting snapshot from database: %w", err)
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@ -699,3 +705,277 @@ func (v *Vaultik) syncWithRemote() error {
|
|||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// RemoveOptions contains options for the snapshot remove command
|
||||||
|
type RemoveOptions struct {
|
||||||
|
Force bool
|
||||||
|
DryRun bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// RemoveResult contains the result of a snapshot removal
|
||||||
|
type RemoveResult struct {
|
||||||
|
SnapshotID string
|
||||||
|
BlobsDeleted int
|
||||||
|
BytesFreed int64
|
||||||
|
BlobsFailed int
|
||||||
|
}
|
||||||
|
|
||||||
|
// RemoveSnapshot removes a snapshot and any blobs that become orphaned
|
||||||
|
func (v *Vaultik) RemoveSnapshot(snapshotID string, opts *RemoveOptions) (*RemoveResult, error) {
|
||||||
|
log.Info("Starting snapshot removal", "snapshot_id", snapshotID)
|
||||||
|
|
||||||
|
result := &RemoveResult{
|
||||||
|
SnapshotID: snapshotID,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 1: List all snapshots in storage
|
||||||
|
log.Info("Listing remote snapshots")
|
||||||
|
objectCh := v.Storage.ListStream(v.ctx, "metadata/")
|
||||||
|
|
||||||
|
var allSnapshotIDs []string
|
||||||
|
targetExists := false
|
||||||
|
for object := range objectCh {
|
||||||
|
if object.Err != nil {
|
||||||
|
return nil, fmt.Errorf("listing remote snapshots: %w", object.Err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Extract snapshot ID from paths like metadata/hostname-20240115-143052Z/
|
||||||
|
parts := strings.Split(object.Key, "/")
|
||||||
|
if len(parts) >= 2 && parts[0] == "metadata" && parts[1] != "" {
|
||||||
|
if strings.HasSuffix(object.Key, "/") || strings.Contains(object.Key, "/manifest.json.zst") {
|
||||||
|
sid := parts[1]
|
||||||
|
// Only add unique snapshot IDs
|
||||||
|
found := false
|
||||||
|
for _, id := range allSnapshotIDs {
|
||||||
|
if id == sid {
|
||||||
|
found = true
|
||||||
|
break
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if !found {
|
||||||
|
allSnapshotIDs = append(allSnapshotIDs, sid)
|
||||||
|
if sid == snapshotID {
|
||||||
|
targetExists = true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if !targetExists {
|
||||||
|
return nil, fmt.Errorf("snapshot not found: %s", snapshotID)
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info("Found snapshots", "total", len(allSnapshotIDs))
|
||||||
|
|
||||||
|
// Step 2: Download target snapshot's manifest
|
||||||
|
log.Info("Downloading target manifest", "snapshot_id", snapshotID)
|
||||||
|
targetManifest, err := v.downloadManifest(snapshotID)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("downloading target manifest: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Build set of target blob hashes with sizes
|
||||||
|
targetBlobs := make(map[string]int64) // hash -> size
|
||||||
|
for _, blob := range targetManifest.Blobs {
|
||||||
|
targetBlobs[blob.Hash] = blob.CompressedSize
|
||||||
|
}
|
||||||
|
log.Info("Target snapshot has blobs", "count", len(targetBlobs))
|
||||||
|
|
||||||
|
// Step 3: Download manifests from all OTHER snapshots to build "in-use" set
|
||||||
|
inUseBlobs := make(map[string]bool)
|
||||||
|
otherCount := 0
|
||||||
|
|
||||||
|
for _, sid := range allSnapshotIDs {
|
||||||
|
if sid == snapshotID {
|
||||||
|
continue // Skip target snapshot
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Debug("Processing manifest", "snapshot_id", sid)
|
||||||
|
manifest, err := v.downloadManifest(sid)
|
||||||
|
if err != nil {
|
||||||
|
log.Error("Failed to download manifest", "snapshot_id", sid, "error", err)
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, blob := range manifest.Blobs {
|
||||||
|
inUseBlobs[blob.Hash] = true
|
||||||
|
}
|
||||||
|
otherCount++
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info("Processed other manifests", "count", otherCount, "in_use_blobs", len(inUseBlobs))
|
||||||
|
|
||||||
|
// Step 4: Find orphaned blobs (in target but not in use by others)
|
||||||
|
var orphanedBlobs []string
|
||||||
|
var totalSize int64
|
||||||
|
for hash, size := range targetBlobs {
|
||||||
|
if !inUseBlobs[hash] {
|
||||||
|
orphanedBlobs = append(orphanedBlobs, hash)
|
||||||
|
totalSize += size
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
log.Info("Found orphaned blobs",
|
||||||
|
"count", len(orphanedBlobs),
|
||||||
|
"total_size", humanize.Bytes(uint64(totalSize)),
|
||||||
|
)
|
||||||
|
|
||||||
|
// Show summary
|
||||||
|
_, _ = fmt.Fprintf(v.Stdout, "\nSnapshot: %s\n", snapshotID)
|
||||||
|
_, _ = fmt.Fprintf(v.Stdout, "Blobs in snapshot: %d\n", len(targetBlobs))
|
||||||
|
_, _ = fmt.Fprintf(v.Stdout, "Orphaned blobs to delete: %d (%s)\n", len(orphanedBlobs), humanize.Bytes(uint64(totalSize)))
|
||||||
|
|
||||||
|
if opts.DryRun {
|
||||||
|
_, _ = fmt.Fprintln(v.Stdout, "\n[Dry run - no changes made]")
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Confirm unless --force is used
|
||||||
|
if !opts.Force {
|
||||||
|
_, _ = fmt.Fprintf(v.Stdout, "\nDelete snapshot and %d orphaned blob(s)? [y/N] ", len(orphanedBlobs))
|
||||||
|
var confirm string
|
||||||
|
if _, err := fmt.Fscanln(v.Stdin, &confirm); err != nil {
|
||||||
|
_, _ = fmt.Fprintln(v.Stdout, "Cancelled")
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
if strings.ToLower(confirm) != "y" {
|
||||||
|
_, _ = fmt.Fprintln(v.Stdout, "Cancelled")
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 5: Delete orphaned blobs
|
||||||
|
if len(orphanedBlobs) > 0 {
|
||||||
|
log.Info("Deleting orphaned blobs")
|
||||||
|
for i, hash := range orphanedBlobs {
|
||||||
|
blobPath := fmt.Sprintf("blobs/%s/%s/%s", hash[:2], hash[2:4], hash)
|
||||||
|
|
||||||
|
if err := v.Storage.Delete(v.ctx, blobPath); err != nil {
|
||||||
|
log.Error("Failed to delete blob", "hash", hash, "error", err)
|
||||||
|
result.BlobsFailed++
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
|
result.BlobsDeleted++
|
||||||
|
result.BytesFreed += targetBlobs[hash]
|
||||||
|
|
||||||
|
// Progress update every 100 blobs
|
||||||
|
if (i+1)%100 == 0 || i == len(orphanedBlobs)-1 {
|
||||||
|
log.Info("Deletion progress",
|
||||||
|
"deleted", i+1,
|
||||||
|
"total", len(orphanedBlobs),
|
||||||
|
"percent", fmt.Sprintf("%.1f%%", float64(i+1)/float64(len(orphanedBlobs))*100),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Step 6: Delete snapshot metadata
|
||||||
|
log.Info("Deleting snapshot metadata")
|
||||||
|
if err := v.deleteSnapshot(snapshotID); err != nil {
|
||||||
|
return result, fmt.Errorf("deleting snapshot metadata: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Print summary
|
||||||
|
_, _ = fmt.Fprintf(v.Stdout, "\nRemoved snapshot %s\n", snapshotID)
|
||||||
|
_, _ = fmt.Fprintf(v.Stdout, " Blobs deleted: %d\n", result.BlobsDeleted)
|
||||||
|
_, _ = fmt.Fprintf(v.Stdout, " Storage freed: %s\n", humanize.Bytes(uint64(result.BytesFreed)))
|
||||||
|
if result.BlobsFailed > 0 {
|
||||||
|
_, _ = fmt.Fprintf(v.Stdout, " Blobs failed: %d\n", result.BlobsFailed)
|
||||||
|
}
|
||||||
|
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// PruneResult contains statistics about the prune operation
|
||||||
|
type PruneResult struct {
|
||||||
|
SnapshotsDeleted int64
|
||||||
|
FilesDeleted int64
|
||||||
|
ChunksDeleted int64
|
||||||
|
BlobsDeleted int64
|
||||||
|
}
|
||||||
|
|
||||||
|
// PruneDatabase removes incomplete snapshots and orphaned files, chunks,
|
||||||
|
// and blobs from the local database. This ensures database consistency
|
||||||
|
// before starting a new backup or on-demand via the prune command.
|
||||||
|
func (v *Vaultik) PruneDatabase() (*PruneResult, error) {
|
||||||
|
log.Info("Pruning database: removing incomplete snapshots and orphaned data")
|
||||||
|
|
||||||
|
result := &PruneResult{}
|
||||||
|
|
||||||
|
// First, delete any incomplete snapshots
|
||||||
|
incompleteSnapshots, err := v.Repositories.Snapshots.GetIncompleteSnapshots(v.ctx)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("getting incomplete snapshots: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
for _, snapshot := range incompleteSnapshots {
|
||||||
|
log.Info("Deleting incomplete snapshot", "snapshot_id", snapshot.ID)
|
||||||
|
// Delete related records first
|
||||||
|
if err := v.Repositories.Snapshots.DeleteSnapshotFiles(v.ctx, snapshot.ID); err != nil {
|
||||||
|
log.Error("Failed to delete snapshot files", "snapshot_id", snapshot.ID, "error", err)
|
||||||
|
}
|
||||||
|
if err := v.Repositories.Snapshots.DeleteSnapshotBlobs(v.ctx, snapshot.ID); err != nil {
|
||||||
|
log.Error("Failed to delete snapshot blobs", "snapshot_id", snapshot.ID, "error", err)
|
||||||
|
}
|
||||||
|
if err := v.Repositories.Snapshots.DeleteSnapshotUploads(v.ctx, snapshot.ID); err != nil {
|
||||||
|
log.Error("Failed to delete snapshot uploads", "snapshot_id", snapshot.ID, "error", err)
|
||||||
|
}
|
||||||
|
if err := v.Repositories.Snapshots.Delete(v.ctx, snapshot.ID); err != nil {
|
||||||
|
log.Error("Failed to delete snapshot", "snapshot_id", snapshot.ID, "error", err)
|
||||||
|
} else {
|
||||||
|
result.SnapshotsDeleted++
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get counts before cleanup for reporting
|
||||||
|
fileCountBefore, _ := v.getTableCount("files")
|
||||||
|
chunkCountBefore, _ := v.getTableCount("chunks")
|
||||||
|
blobCountBefore, _ := v.getTableCount("blobs")
|
||||||
|
|
||||||
|
// Run the cleanup
|
||||||
|
if err := v.SnapshotManager.CleanupOrphanedData(v.ctx); err != nil {
|
||||||
|
return nil, fmt.Errorf("cleanup orphaned data: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Get counts after cleanup
|
||||||
|
fileCountAfter, _ := v.getTableCount("files")
|
||||||
|
chunkCountAfter, _ := v.getTableCount("chunks")
|
||||||
|
blobCountAfter, _ := v.getTableCount("blobs")
|
||||||
|
|
||||||
|
result.FilesDeleted = fileCountBefore - fileCountAfter
|
||||||
|
result.ChunksDeleted = chunkCountBefore - chunkCountAfter
|
||||||
|
result.BlobsDeleted = blobCountBefore - blobCountAfter
|
||||||
|
|
||||||
|
log.Info("Prune complete",
|
||||||
|
"incomplete_snapshots", result.SnapshotsDeleted,
|
||||||
|
"orphaned_files", result.FilesDeleted,
|
||||||
|
"orphaned_chunks", result.ChunksDeleted,
|
||||||
|
"orphaned_blobs", result.BlobsDeleted,
|
||||||
|
)
|
||||||
|
|
||||||
|
// Print summary
|
||||||
|
_, _ = fmt.Fprintf(v.Stdout, "Prune complete:\n")
|
||||||
|
_, _ = fmt.Fprintf(v.Stdout, " Incomplete snapshots removed: %d\n", result.SnapshotsDeleted)
|
||||||
|
_, _ = fmt.Fprintf(v.Stdout, " Orphaned files removed: %d\n", result.FilesDeleted)
|
||||||
|
_, _ = fmt.Fprintf(v.Stdout, " Orphaned chunks removed: %d\n", result.ChunksDeleted)
|
||||||
|
_, _ = fmt.Fprintf(v.Stdout, " Orphaned blobs removed: %d\n", result.BlobsDeleted)
|
||||||
|
|
||||||
|
return result, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// getTableCount returns the count of rows in a table
|
||||||
|
func (v *Vaultik) getTableCount(tableName string) (int64, error) {
|
||||||
|
if v.DB == nil {
|
||||||
|
return 0, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
var count int64
|
||||||
|
query := fmt.Sprintf("SELECT COUNT(*) FROM %s", tableName)
|
||||||
|
err := v.DB.Conn().QueryRowContext(v.ctx, query).Scan(&count)
|
||||||
|
if err != nil {
|
||||||
|
return 0, err
|
||||||
|
}
|
||||||
|
return count, nil
|
||||||
|
}
|
||||||
|
|||||||
@ -1,6 +1,7 @@
|
|||||||
package vaultik
|
package vaultik
|
||||||
|
|
||||||
import (
|
import (
|
||||||
|
"bytes"
|
||||||
"context"
|
"context"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
@ -122,3 +123,34 @@ func (v *Vaultik) GetDecryptor() (*crypto.Decryptor, error) {
|
|||||||
func (v *Vaultik) GetFilesystem() afero.Fs {
|
func (v *Vaultik) GetFilesystem() afero.Fs {
|
||||||
return v.Fs
|
return v.Fs
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TestVaultik wraps a Vaultik with captured stdout/stderr for testing
|
||||||
|
type TestVaultik struct {
|
||||||
|
*Vaultik
|
||||||
|
Stdout *bytes.Buffer
|
||||||
|
Stderr *bytes.Buffer
|
||||||
|
Stdin *bytes.Buffer
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewForTesting creates a minimal Vaultik instance for testing purposes.
|
||||||
|
// Only the Storage field is populated; other fields are nil.
|
||||||
|
// Returns a TestVaultik that captures stdout/stderr in buffers.
|
||||||
|
func NewForTesting(storage storage.Storer) *TestVaultik {
|
||||||
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
stdout := &bytes.Buffer{}
|
||||||
|
stderr := &bytes.Buffer{}
|
||||||
|
stdin := &bytes.Buffer{}
|
||||||
|
return &TestVaultik{
|
||||||
|
Vaultik: &Vaultik{
|
||||||
|
Storage: storage,
|
||||||
|
ctx: ctx,
|
||||||
|
cancel: cancel,
|
||||||
|
Stdout: stdout,
|
||||||
|
Stderr: stderr,
|
||||||
|
Stdin: stdin,
|
||||||
|
},
|
||||||
|
Stdout: stdout,
|
||||||
|
Stderr: stderr,
|
||||||
|
Stdin: stdin,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Loading…
Reference in New Issue
Block a user