Add exclude patterns, snapshot prune, and other improvements
- Implement exclude patterns with anchored pattern support: - Patterns starting with / only match from root of source dir - Unanchored patterns match anywhere in path - Support for glob patterns (*.log, .*, **/*.pack) - Directory patterns skip entire subtrees - Add gobwas/glob dependency for pattern matching - Add 16 comprehensive tests for exclude functionality - Add snapshot prune command to clean orphaned data: - Removes incomplete snapshots from database - Cleans orphaned files, chunks, and blobs - Runs automatically at backup start for consistency - Add snapshot remove command for deleting snapshots - Add VAULTIK_AGE_SECRET_KEY environment variable support - Fix duplicate fx module provider in restore command - Change snapshot ID format to hostname_YYYY-MM-DDTHH:MM:SSZ
This commit is contained in:
@@ -6,8 +6,6 @@ import (
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
|
||||
"github.com/jotfs/fastcdc-go"
|
||||
)
|
||||
|
||||
// Chunk represents a single chunk of data produced by the content-defined chunking algorithm.
|
||||
@@ -48,16 +46,8 @@ func NewChunker(avgChunkSize int64) *Chunker {
|
||||
// reasonably sized inputs. For large files or streams, use ChunkReaderStreaming instead.
|
||||
// Returns an error if chunking fails or if reading from the input fails.
|
||||
func (c *Chunker) ChunkReader(r io.Reader) ([]Chunk, error) {
|
||||
opts := fastcdc.Options{
|
||||
MinSize: c.minChunkSize,
|
||||
AverageSize: c.avgChunkSize,
|
||||
MaxSize: c.maxChunkSize,
|
||||
}
|
||||
|
||||
chunker, err := fastcdc.NewChunker(r, opts)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("creating chunker: %w", err)
|
||||
}
|
||||
chunker := AcquireReusableChunker(r, c.minChunkSize, c.avgChunkSize, c.maxChunkSize)
|
||||
defer chunker.Release()
|
||||
|
||||
var chunks []Chunk
|
||||
offset := int64(0)
|
||||
@@ -74,7 +64,7 @@ func (c *Chunker) ChunkReader(r io.Reader) ([]Chunk, error) {
|
||||
// Calculate hash
|
||||
hash := sha256.Sum256(chunk.Data)
|
||||
|
||||
// Make a copy of the data since FastCDC reuses the buffer
|
||||
// Make a copy of the data since the chunker reuses the buffer
|
||||
chunkData := make([]byte, len(chunk.Data))
|
||||
copy(chunkData, chunk.Data)
|
||||
|
||||
@@ -107,16 +97,8 @@ func (c *Chunker) ChunkReaderStreaming(r io.Reader, callback ChunkCallback) (str
|
||||
fileHasher := sha256.New()
|
||||
teeReader := io.TeeReader(r, fileHasher)
|
||||
|
||||
opts := fastcdc.Options{
|
||||
MinSize: c.minChunkSize,
|
||||
AverageSize: c.avgChunkSize,
|
||||
MaxSize: c.maxChunkSize,
|
||||
}
|
||||
|
||||
chunker, err := fastcdc.NewChunker(teeReader, opts)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("creating chunker: %w", err)
|
||||
}
|
||||
chunker := AcquireReusableChunker(teeReader, c.minChunkSize, c.avgChunkSize, c.maxChunkSize)
|
||||
defer chunker.Release()
|
||||
|
||||
offset := int64(0)
|
||||
|
||||
@@ -132,13 +114,12 @@ func (c *Chunker) ChunkReaderStreaming(r io.Reader, callback ChunkCallback) (str
|
||||
// Calculate chunk hash
|
||||
hash := sha256.Sum256(chunk.Data)
|
||||
|
||||
// Make a copy of the data since FastCDC reuses the buffer
|
||||
chunkData := make([]byte, len(chunk.Data))
|
||||
copy(chunkData, chunk.Data)
|
||||
|
||||
// Pass the data directly - caller must process it before we call Next() again
|
||||
// (chunker reuses its internal buffer, but since we process synchronously
|
||||
// and completely before continuing, no copy is needed)
|
||||
if err := callback(Chunk{
|
||||
Hash: hex.EncodeToString(hash[:]),
|
||||
Data: chunkData,
|
||||
Data: chunk.Data,
|
||||
Offset: offset,
|
||||
Size: int64(len(chunk.Data)),
|
||||
}); err != nil {
|
||||
|
||||
Reference in New Issue
Block a user