Add deterministic deduplication, rclone backend, and database purge command

- Implement deterministic blob hashing using double SHA256 of uncompressed
  plaintext data, enabling deduplication even after local DB is cleared
- Add Stat() check before blob upload to skip existing blobs in storage
- Add rclone storage backend for additional remote storage options
- Add 'vaultik database purge' command to erase local state DB
- Add 'vaultik remote check' command to verify remote connectivity
- Show configured snapshots in 'vaultik snapshot list' output
- Skip macOS resource fork files (._*) when listing remote snapshots
- Use multi-threaded zstd compression (CPUs - 2 threads)
- Add writer tests for double hashing behavior
This commit is contained in:
2026-01-28 15:50:17 -08:00
parent bdaaadf990
commit 470bf648c4
26 changed files with 2966 additions and 777 deletions

View File

@@ -73,6 +73,9 @@ func storerFromURL(rawURL string, cfg *config.Config) (Storer, error) {
}
return NewS3Storer(client), nil
case "rclone":
return NewRcloneStorer(context.Background(), parsed.RcloneRemote, parsed.Prefix)
default:
return nil, fmt.Errorf("unsupported storage scheme: %s", parsed.Scheme)
}

236
internal/storage/rclone.go Normal file
View File

@@ -0,0 +1,236 @@
package storage
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"strings"
"time"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/config/configfile"
"github.com/rclone/rclone/fs/operations"
// Import all rclone backends
_ "github.com/rclone/rclone/backend/all"
)
// ErrRemoteNotFound is returned when an rclone remote is not configured.
var ErrRemoteNotFound = errors.New("rclone remote not found in config")
// RcloneStorer implements Storer using rclone's filesystem abstraction.
// This allows vaultik to use any of rclone's 70+ supported storage providers.
type RcloneStorer struct {
fsys fs.Fs // rclone filesystem
remote string // remote name (for Info())
path string // path within remote (for Info())
}
// NewRcloneStorer creates a new rclone storage backend.
// The remote parameter is the rclone remote name (as configured via `rclone config`).
// The path parameter is the path within the remote.
func NewRcloneStorer(ctx context.Context, remote, path string) (*RcloneStorer, error) {
// Install the default config file handler
configfile.Install()
// Build the rclone path string (e.g., "myremote:path/to/backups")
rclonePath := remote + ":"
if path != "" {
rclonePath += path
}
// Create the rclone filesystem
fsys, err := fs.NewFs(ctx, rclonePath)
if err != nil {
// Check for remote not found error
if strings.Contains(err.Error(), "didn't find section in config file") ||
strings.Contains(err.Error(), "failed to find remote") {
return nil, fmt.Errorf("%w: %s", ErrRemoteNotFound, remote)
}
return nil, fmt.Errorf("creating rclone filesystem: %w", err)
}
return &RcloneStorer{
fsys: fsys,
remote: remote,
path: path,
}, nil
}
// Put stores data at the specified key.
func (r *RcloneStorer) Put(ctx context.Context, key string, data io.Reader) error {
// Read all data into memory to get size (required by rclone)
buf, err := io.ReadAll(data)
if err != nil {
return fmt.Errorf("reading data: %w", err)
}
// Upload the object
_, err = operations.Rcat(ctx, r.fsys, key, io.NopCloser(bytes.NewReader(buf)), time.Now(), nil)
if err != nil {
return fmt.Errorf("uploading object: %w", err)
}
return nil
}
// PutWithProgress stores data with progress reporting.
func (r *RcloneStorer) PutWithProgress(ctx context.Context, key string, data io.Reader, size int64, progress ProgressCallback) error {
// Wrap reader with progress tracking
pr := &progressReader{
reader: data,
callback: progress,
}
// Upload the object
_, err := operations.Rcat(ctx, r.fsys, key, io.NopCloser(pr), time.Now(), nil)
if err != nil {
return fmt.Errorf("uploading object: %w", err)
}
return nil
}
// Get retrieves data from the specified key.
func (r *RcloneStorer) Get(ctx context.Context, key string) (io.ReadCloser, error) {
// Get the object
obj, err := r.fsys.NewObject(ctx, key)
if err != nil {
if errors.Is(err, fs.ErrorObjectNotFound) {
return nil, ErrNotFound
}
if errors.Is(err, fs.ErrorDirNotFound) {
return nil, ErrNotFound
}
return nil, fmt.Errorf("getting object: %w", err)
}
// Open the object for reading
reader, err := obj.Open(ctx)
if err != nil {
return nil, fmt.Errorf("opening object: %w", err)
}
return reader, nil
}
// Stat returns metadata about an object without retrieving its contents.
func (r *RcloneStorer) Stat(ctx context.Context, key string) (*ObjectInfo, error) {
obj, err := r.fsys.NewObject(ctx, key)
if err != nil {
if errors.Is(err, fs.ErrorObjectNotFound) {
return nil, ErrNotFound
}
if errors.Is(err, fs.ErrorDirNotFound) {
return nil, ErrNotFound
}
return nil, fmt.Errorf("getting object: %w", err)
}
return &ObjectInfo{
Key: key,
Size: obj.Size(),
}, nil
}
// Delete removes an object.
func (r *RcloneStorer) Delete(ctx context.Context, key string) error {
obj, err := r.fsys.NewObject(ctx, key)
if err != nil {
if errors.Is(err, fs.ErrorObjectNotFound) {
return nil // Match S3 behavior: no error if doesn't exist
}
if errors.Is(err, fs.ErrorDirNotFound) {
return nil
}
return fmt.Errorf("getting object: %w", err)
}
if err := obj.Remove(ctx); err != nil {
return fmt.Errorf("removing object: %w", err)
}
return nil
}
// List returns all keys with the given prefix.
func (r *RcloneStorer) List(ctx context.Context, prefix string) ([]string, error) {
var keys []string
err := operations.ListFn(ctx, r.fsys, func(obj fs.Object) {
key := obj.Remote()
if prefix == "" || strings.HasPrefix(key, prefix) {
keys = append(keys, key)
}
})
if err != nil {
return nil, fmt.Errorf("listing objects: %w", err)
}
return keys, nil
}
// ListStream returns a channel of ObjectInfo for large result sets.
func (r *RcloneStorer) ListStream(ctx context.Context, prefix string) <-chan ObjectInfo {
ch := make(chan ObjectInfo)
go func() {
defer close(ch)
err := operations.ListFn(ctx, r.fsys, func(obj fs.Object) {
// Check context cancellation
select {
case <-ctx.Done():
return
default:
}
key := obj.Remote()
if prefix == "" || strings.HasPrefix(key, prefix) {
ch <- ObjectInfo{
Key: key,
Size: obj.Size(),
}
}
})
if err != nil {
ch <- ObjectInfo{Err: fmt.Errorf("listing objects: %w", err)}
}
}()
return ch
}
// Info returns human-readable storage location information.
func (r *RcloneStorer) Info() StorageInfo {
location := r.remote
if r.path != "" {
location += ":" + r.path
}
return StorageInfo{
Type: "rclone",
Location: location,
}
}
// progressReader wraps an io.Reader to track read progress.
type progressReader struct {
reader io.Reader
read int64
callback ProgressCallback
}
func (pr *progressReader) Read(p []byte) (int, error) {
n, err := pr.reader.Read(p)
if n > 0 {
pr.read += int64(n)
if pr.callback != nil {
if callbackErr := pr.callback(pr.read); callbackErr != nil {
return n, callbackErr
}
}
}
return n, err
}

View File

@@ -8,18 +8,20 @@ import (
// StorageURL represents a parsed storage URL.
type StorageURL struct {
Scheme string // "s3" or "file"
Bucket string // S3 bucket name (empty for file)
Prefix string // Path within bucket or filesystem base path
Endpoint string // S3 endpoint (optional, default AWS)
Region string // S3 region (optional)
UseSSL bool // Use HTTPS for S3 (default true)
Scheme string // "s3", "file", or "rclone"
Bucket string // S3 bucket name (empty for file/rclone)
Prefix string // Path within bucket or filesystem base path
Endpoint string // S3 endpoint (optional, default AWS)
Region string // S3 region (optional)
UseSSL bool // Use HTTPS for S3 (default true)
RcloneRemote string // rclone remote name (for rclone:// URLs)
}
// ParseStorageURL parses a storage URL string.
// Supported formats:
// - s3://bucket/prefix?endpoint=host&region=us-east-1&ssl=true
// - file:///absolute/path/to/backup
// - rclone://remote/path/to/backups
func ParseStorageURL(rawURL string) (*StorageURL, error) {
if rawURL == "" {
return nil, fmt.Errorf("storage URL is empty")
@@ -67,7 +69,28 @@ func ParseStorageURL(rawURL string) (*StorageURL, error) {
}, nil
}
return nil, fmt.Errorf("unsupported URL scheme: must start with s3:// or file://")
// Handle rclone:// URLs
if strings.HasPrefix(rawURL, "rclone://") {
u, err := url.Parse(rawURL)
if err != nil {
return nil, fmt.Errorf("invalid URL: %w", err)
}
remote := u.Host
if remote == "" {
return nil, fmt.Errorf("rclone URL missing remote name")
}
path := strings.TrimPrefix(u.Path, "/")
return &StorageURL{
Scheme: "rclone",
Prefix: path,
RcloneRemote: remote,
}, nil
}
return nil, fmt.Errorf("unsupported URL scheme: must start with s3://, file://, or rclone://")
}
// String returns a human-readable representation of the storage URL.
@@ -84,6 +107,11 @@ func (u *StorageURL) String() string {
return fmt.Sprintf("s3://%s/%s (endpoint: %s)", u.Bucket, u.Prefix, endpoint)
}
return fmt.Sprintf("s3://%s (endpoint: %s)", u.Bucket, endpoint)
case "rclone":
if u.Prefix != "" {
return fmt.Sprintf("rclone://%s/%s", u.RcloneRemote, u.Prefix)
}
return fmt.Sprintf("rclone://%s", u.RcloneRemote)
default:
return fmt.Sprintf("%s://?", u.Scheme)
}