Add exclude patterns, snapshot prune, and other improvements

- Implement exclude patterns with anchored pattern support: - Patterns starting with / only match from root of source dir - Unanchored patterns match anywhere in path - Support for glob patterns (*.log, .*, **/*.pack) - Directory patterns skip entire subtrees - Add gobwas/glob dependency for pattern matching - Add 16 comprehensive tests for exclude functionality - Add snapshot prune command to clean orphaned data: - Removes incomplete snapshots from database - Cleans orphaned files, chunks, and blobs - Runs automatically at backup start for consistency - Add snapshot remove command for deleting snapshots - Add VAULTIK_AGE_SECRET_KEY environment variable support - Fix duplicate fx module provider in restore command - Change snapshot ID format to hostname_YYYY-MM-DDTHH:MM:SSZ
2026-01-01 05:42:56 -08:00
parent 05286bed01
commit 2afd54d693
23 changed files with 1769 additions and 98 deletions
--- a/internal/chunker/chunker.go
+++ b/internal/chunker/chunker.go
@@ -6,8 +6,6 @@ import (
 	"fmt"
 	"io"
 	"os"
-
-	"github.com/jotfs/fastcdc-go"
 )

 // Chunk represents a single chunk of data produced by the content-defined chunking algorithm.
@@ -48,16 +46,8 @@ func NewChunker(avgChunkSize int64) *Chunker {
 // reasonably sized inputs. For large files or streams, use ChunkReaderStreaming instead.
 // Returns an error if chunking fails or if reading from the input fails.
 func (c *Chunker) ChunkReader(r io.Reader) ([]Chunk, error) {
-	opts := fastcdc.Options{
-		MinSize:     c.minChunkSize,
-		AverageSize: c.avgChunkSize,
-		MaxSize:     c.maxChunkSize,
-	}
-
-	chunker, err := fastcdc.NewChunker(r, opts)
-	if err != nil {
-		return nil, fmt.Errorf("creating chunker: %w", err)
-	}
+	chunker := AcquireReusableChunker(r, c.minChunkSize, c.avgChunkSize, c.maxChunkSize)
+	defer chunker.Release()

 	var chunks []Chunk
 	offset := int64(0)
@@ -74,7 +64,7 @@ func (c *Chunker) ChunkReader(r io.Reader) ([]Chunk, error) {
 		// Calculate hash
 		hash := sha256.Sum256(chunk.Data)

-		// Make a copy of the data since FastCDC reuses the buffer
+		// Make a copy of the data since the chunker reuses the buffer
 		chunkData := make([]byte, len(chunk.Data))
 		copy(chunkData, chunk.Data)

@@ -107,16 +97,8 @@ func (c *Chunker) ChunkReaderStreaming(r io.Reader, callback ChunkCallback) (str
 	fileHasher := sha256.New()
 	teeReader := io.TeeReader(r, fileHasher)

-	opts := fastcdc.Options{
-		MinSize:     c.minChunkSize,
-		AverageSize: c.avgChunkSize,
-		MaxSize:     c.maxChunkSize,
-	}
-
-	chunker, err := fastcdc.NewChunker(teeReader, opts)
-	if err != nil {
-		return "", fmt.Errorf("creating chunker: %w", err)
-	}
+	chunker := AcquireReusableChunker(teeReader, c.minChunkSize, c.avgChunkSize, c.maxChunkSize)
+	defer chunker.Release()

 	offset := int64(0)

@@ -132,13 +114,12 @@ func (c *Chunker) ChunkReaderStreaming(r io.Reader, callback ChunkCallback) (str
 		// Calculate chunk hash
 		hash := sha256.Sum256(chunk.Data)

-		// Make a copy of the data since FastCDC reuses the buffer
-		chunkData := make([]byte, len(chunk.Data))
-		copy(chunkData, chunk.Data)
-
+		// Pass the data directly - caller must process it before we call Next() again
+		// (chunker reuses its internal buffer, but since we process synchronously
+		// and completely before continuing, no copy is needed)
 		if err := callback(Chunk{
 			Hash:   hex.EncodeToString(hash[:]),
-			Data:   chunkData,
+			Data:   chunk.Data,
 			Offset: offset,
 			Size:   int64(len(chunk.Data)),
 		}); err != nil {