Add exclude patterns, snapshot prune, and other improvements

- Implement exclude patterns with anchored pattern support:
  - Patterns starting with / only match from root of source dir
  - Unanchored patterns match anywhere in path
  - Support for glob patterns (*.log, .*, **/*.pack)
  - Directory patterns skip entire subtrees
  - Add gobwas/glob dependency for pattern matching
  - Add 16 comprehensive tests for exclude functionality

- Add snapshot prune command to clean orphaned data:
  - Removes incomplete snapshots from database
  - Cleans orphaned files, chunks, and blobs
  - Runs automatically at backup start for consistency

- Add snapshot remove command for deleting snapshots

- Add VAULTIK_AGE_SECRET_KEY environment variable support

- Fix duplicate fx module provider in restore command

- Change snapshot ID format to hostname_YYYY-MM-DDTHH:MM:SSZ
This commit is contained in:
2026-01-01 05:42:56 -08:00
parent 05286bed01
commit 2afd54d693
23 changed files with 1769 additions and 98 deletions

View File

@@ -6,8 +6,6 @@ import (
"fmt"
"io"
"os"
"github.com/jotfs/fastcdc-go"
)
// Chunk represents a single chunk of data produced by the content-defined chunking algorithm.
@@ -48,16 +46,8 @@ func NewChunker(avgChunkSize int64) *Chunker {
// reasonably sized inputs. For large files or streams, use ChunkReaderStreaming instead.
// Returns an error if chunking fails or if reading from the input fails.
func (c *Chunker) ChunkReader(r io.Reader) ([]Chunk, error) {
opts := fastcdc.Options{
MinSize: c.minChunkSize,
AverageSize: c.avgChunkSize,
MaxSize: c.maxChunkSize,
}
chunker, err := fastcdc.NewChunker(r, opts)
if err != nil {
return nil, fmt.Errorf("creating chunker: %w", err)
}
chunker := AcquireReusableChunker(r, c.minChunkSize, c.avgChunkSize, c.maxChunkSize)
defer chunker.Release()
var chunks []Chunk
offset := int64(0)
@@ -74,7 +64,7 @@ func (c *Chunker) ChunkReader(r io.Reader) ([]Chunk, error) {
// Calculate hash
hash := sha256.Sum256(chunk.Data)
// Make a copy of the data since FastCDC reuses the buffer
// Make a copy of the data since the chunker reuses the buffer
chunkData := make([]byte, len(chunk.Data))
copy(chunkData, chunk.Data)
@@ -107,16 +97,8 @@ func (c *Chunker) ChunkReaderStreaming(r io.Reader, callback ChunkCallback) (str
fileHasher := sha256.New()
teeReader := io.TeeReader(r, fileHasher)
opts := fastcdc.Options{
MinSize: c.minChunkSize,
AverageSize: c.avgChunkSize,
MaxSize: c.maxChunkSize,
}
chunker, err := fastcdc.NewChunker(teeReader, opts)
if err != nil {
return "", fmt.Errorf("creating chunker: %w", err)
}
chunker := AcquireReusableChunker(teeReader, c.minChunkSize, c.avgChunkSize, c.maxChunkSize)
defer chunker.Release()
offset := int64(0)
@@ -132,13 +114,12 @@ func (c *Chunker) ChunkReaderStreaming(r io.Reader, callback ChunkCallback) (str
// Calculate chunk hash
hash := sha256.Sum256(chunk.Data)
// Make a copy of the data since FastCDC reuses the buffer
chunkData := make([]byte, len(chunk.Data))
copy(chunkData, chunk.Data)
// Pass the data directly - caller must process it before we call Next() again
// (chunker reuses its internal buffer, but since we process synchronously
// and completely before continuing, no copy is needed)
if err := callback(Chunk{
Hash: hex.EncodeToString(hash[:]),
Data: chunkData,
Data: chunk.Data,
Offset: offset,
Size: int64(len(chunk.Data)),
}); err != nil {