Add deterministic deduplication, rclone backend, and database purge command
- Implement deterministic blob hashing using double SHA256 of uncompressed plaintext data, enabling deduplication even after local DB is cleared - Add Stat() check before blob upload to skip existing blobs in storage - Add rclone storage backend for additional remote storage options - Add 'vaultik database purge' command to erase local state DB - Add 'vaultik remote check' command to verify remote connectivity - Show configured snapshots in 'vaultik snapshot list' output - Skip macOS resource fork files (._*) when listing remote snapshots - Use multi-threaded zstd compression (CPUs - 2 threads) - Add writer tests for double hashing behavior
This commit is contained in:
@@ -73,6 +73,9 @@ func storerFromURL(rawURL string, cfg *config.Config) (Storer, error) {
|
||||
}
|
||||
return NewS3Storer(client), nil
|
||||
|
||||
case "rclone":
|
||||
return NewRcloneStorer(context.Background(), parsed.RcloneRemote, parsed.Prefix)
|
||||
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported storage scheme: %s", parsed.Scheme)
|
||||
}
|
||||
|
||||
236
internal/storage/rclone.go
Normal file
236
internal/storage/rclone.go
Normal file
@@ -0,0 +1,236 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"github.com/rclone/rclone/fs"
|
||||
"github.com/rclone/rclone/fs/config/configfile"
|
||||
"github.com/rclone/rclone/fs/operations"
|
||||
|
||||
// Import all rclone backends
|
||||
_ "github.com/rclone/rclone/backend/all"
|
||||
)
|
||||
|
||||
// ErrRemoteNotFound is returned when an rclone remote is not configured.
|
||||
var ErrRemoteNotFound = errors.New("rclone remote not found in config")
|
||||
|
||||
// RcloneStorer implements Storer using rclone's filesystem abstraction.
|
||||
// This allows vaultik to use any of rclone's 70+ supported storage providers.
|
||||
type RcloneStorer struct {
|
||||
fsys fs.Fs // rclone filesystem
|
||||
remote string // remote name (for Info())
|
||||
path string // path within remote (for Info())
|
||||
}
|
||||
|
||||
// NewRcloneStorer creates a new rclone storage backend.
|
||||
// The remote parameter is the rclone remote name (as configured via `rclone config`).
|
||||
// The path parameter is the path within the remote.
|
||||
func NewRcloneStorer(ctx context.Context, remote, path string) (*RcloneStorer, error) {
|
||||
// Install the default config file handler
|
||||
configfile.Install()
|
||||
|
||||
// Build the rclone path string (e.g., "myremote:path/to/backups")
|
||||
rclonePath := remote + ":"
|
||||
if path != "" {
|
||||
rclonePath += path
|
||||
}
|
||||
|
||||
// Create the rclone filesystem
|
||||
fsys, err := fs.NewFs(ctx, rclonePath)
|
||||
if err != nil {
|
||||
// Check for remote not found error
|
||||
if strings.Contains(err.Error(), "didn't find section in config file") ||
|
||||
strings.Contains(err.Error(), "failed to find remote") {
|
||||
return nil, fmt.Errorf("%w: %s", ErrRemoteNotFound, remote)
|
||||
}
|
||||
return nil, fmt.Errorf("creating rclone filesystem: %w", err)
|
||||
}
|
||||
|
||||
return &RcloneStorer{
|
||||
fsys: fsys,
|
||||
remote: remote,
|
||||
path: path,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Put stores data at the specified key.
|
||||
func (r *RcloneStorer) Put(ctx context.Context, key string, data io.Reader) error {
|
||||
// Read all data into memory to get size (required by rclone)
|
||||
buf, err := io.ReadAll(data)
|
||||
if err != nil {
|
||||
return fmt.Errorf("reading data: %w", err)
|
||||
}
|
||||
|
||||
// Upload the object
|
||||
_, err = operations.Rcat(ctx, r.fsys, key, io.NopCloser(bytes.NewReader(buf)), time.Now(), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("uploading object: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// PutWithProgress stores data with progress reporting.
|
||||
func (r *RcloneStorer) PutWithProgress(ctx context.Context, key string, data io.Reader, size int64, progress ProgressCallback) error {
|
||||
// Wrap reader with progress tracking
|
||||
pr := &progressReader{
|
||||
reader: data,
|
||||
callback: progress,
|
||||
}
|
||||
|
||||
// Upload the object
|
||||
_, err := operations.Rcat(ctx, r.fsys, key, io.NopCloser(pr), time.Now(), nil)
|
||||
if err != nil {
|
||||
return fmt.Errorf("uploading object: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get retrieves data from the specified key.
|
||||
func (r *RcloneStorer) Get(ctx context.Context, key string) (io.ReadCloser, error) {
|
||||
// Get the object
|
||||
obj, err := r.fsys.NewObject(ctx, key)
|
||||
if err != nil {
|
||||
if errors.Is(err, fs.ErrorObjectNotFound) {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
if errors.Is(err, fs.ErrorDirNotFound) {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
return nil, fmt.Errorf("getting object: %w", err)
|
||||
}
|
||||
|
||||
// Open the object for reading
|
||||
reader, err := obj.Open(ctx)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("opening object: %w", err)
|
||||
}
|
||||
|
||||
return reader, nil
|
||||
}
|
||||
|
||||
// Stat returns metadata about an object without retrieving its contents.
|
||||
func (r *RcloneStorer) Stat(ctx context.Context, key string) (*ObjectInfo, error) {
|
||||
obj, err := r.fsys.NewObject(ctx, key)
|
||||
if err != nil {
|
||||
if errors.Is(err, fs.ErrorObjectNotFound) {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
if errors.Is(err, fs.ErrorDirNotFound) {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
return nil, fmt.Errorf("getting object: %w", err)
|
||||
}
|
||||
|
||||
return &ObjectInfo{
|
||||
Key: key,
|
||||
Size: obj.Size(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Delete removes an object.
|
||||
func (r *RcloneStorer) Delete(ctx context.Context, key string) error {
|
||||
obj, err := r.fsys.NewObject(ctx, key)
|
||||
if err != nil {
|
||||
if errors.Is(err, fs.ErrorObjectNotFound) {
|
||||
return nil // Match S3 behavior: no error if doesn't exist
|
||||
}
|
||||
if errors.Is(err, fs.ErrorDirNotFound) {
|
||||
return nil
|
||||
}
|
||||
return fmt.Errorf("getting object: %w", err)
|
||||
}
|
||||
|
||||
if err := obj.Remove(ctx); err != nil {
|
||||
return fmt.Errorf("removing object: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// List returns all keys with the given prefix.
|
||||
func (r *RcloneStorer) List(ctx context.Context, prefix string) ([]string, error) {
|
||||
var keys []string
|
||||
|
||||
err := operations.ListFn(ctx, r.fsys, func(obj fs.Object) {
|
||||
key := obj.Remote()
|
||||
if prefix == "" || strings.HasPrefix(key, prefix) {
|
||||
keys = append(keys, key)
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("listing objects: %w", err)
|
||||
}
|
||||
|
||||
return keys, nil
|
||||
}
|
||||
|
||||
// ListStream returns a channel of ObjectInfo for large result sets.
|
||||
func (r *RcloneStorer) ListStream(ctx context.Context, prefix string) <-chan ObjectInfo {
|
||||
ch := make(chan ObjectInfo)
|
||||
|
||||
go func() {
|
||||
defer close(ch)
|
||||
|
||||
err := operations.ListFn(ctx, r.fsys, func(obj fs.Object) {
|
||||
// Check context cancellation
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return
|
||||
default:
|
||||
}
|
||||
|
||||
key := obj.Remote()
|
||||
if prefix == "" || strings.HasPrefix(key, prefix) {
|
||||
ch <- ObjectInfo{
|
||||
Key: key,
|
||||
Size: obj.Size(),
|
||||
}
|
||||
}
|
||||
})
|
||||
if err != nil {
|
||||
ch <- ObjectInfo{Err: fmt.Errorf("listing objects: %w", err)}
|
||||
}
|
||||
}()
|
||||
|
||||
return ch
|
||||
}
|
||||
|
||||
// Info returns human-readable storage location information.
|
||||
func (r *RcloneStorer) Info() StorageInfo {
|
||||
location := r.remote
|
||||
if r.path != "" {
|
||||
location += ":" + r.path
|
||||
}
|
||||
return StorageInfo{
|
||||
Type: "rclone",
|
||||
Location: location,
|
||||
}
|
||||
}
|
||||
|
||||
// progressReader wraps an io.Reader to track read progress.
|
||||
type progressReader struct {
|
||||
reader io.Reader
|
||||
read int64
|
||||
callback ProgressCallback
|
||||
}
|
||||
|
||||
func (pr *progressReader) Read(p []byte) (int, error) {
|
||||
n, err := pr.reader.Read(p)
|
||||
if n > 0 {
|
||||
pr.read += int64(n)
|
||||
if pr.callback != nil {
|
||||
if callbackErr := pr.callback(pr.read); callbackErr != nil {
|
||||
return n, callbackErr
|
||||
}
|
||||
}
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
@@ -8,18 +8,20 @@ import (
|
||||
|
||||
// StorageURL represents a parsed storage URL.
|
||||
type StorageURL struct {
|
||||
Scheme string // "s3" or "file"
|
||||
Bucket string // S3 bucket name (empty for file)
|
||||
Prefix string // Path within bucket or filesystem base path
|
||||
Endpoint string // S3 endpoint (optional, default AWS)
|
||||
Region string // S3 region (optional)
|
||||
UseSSL bool // Use HTTPS for S3 (default true)
|
||||
Scheme string // "s3", "file", or "rclone"
|
||||
Bucket string // S3 bucket name (empty for file/rclone)
|
||||
Prefix string // Path within bucket or filesystem base path
|
||||
Endpoint string // S3 endpoint (optional, default AWS)
|
||||
Region string // S3 region (optional)
|
||||
UseSSL bool // Use HTTPS for S3 (default true)
|
||||
RcloneRemote string // rclone remote name (for rclone:// URLs)
|
||||
}
|
||||
|
||||
// ParseStorageURL parses a storage URL string.
|
||||
// Supported formats:
|
||||
// - s3://bucket/prefix?endpoint=host®ion=us-east-1&ssl=true
|
||||
// - file:///absolute/path/to/backup
|
||||
// - rclone://remote/path/to/backups
|
||||
func ParseStorageURL(rawURL string) (*StorageURL, error) {
|
||||
if rawURL == "" {
|
||||
return nil, fmt.Errorf("storage URL is empty")
|
||||
@@ -67,7 +69,28 @@ func ParseStorageURL(rawURL string) (*StorageURL, error) {
|
||||
}, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unsupported URL scheme: must start with s3:// or file://")
|
||||
// Handle rclone:// URLs
|
||||
if strings.HasPrefix(rawURL, "rclone://") {
|
||||
u, err := url.Parse(rawURL)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("invalid URL: %w", err)
|
||||
}
|
||||
|
||||
remote := u.Host
|
||||
if remote == "" {
|
||||
return nil, fmt.Errorf("rclone URL missing remote name")
|
||||
}
|
||||
|
||||
path := strings.TrimPrefix(u.Path, "/")
|
||||
|
||||
return &StorageURL{
|
||||
Scheme: "rclone",
|
||||
Prefix: path,
|
||||
RcloneRemote: remote,
|
||||
}, nil
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unsupported URL scheme: must start with s3://, file://, or rclone://")
|
||||
}
|
||||
|
||||
// String returns a human-readable representation of the storage URL.
|
||||
@@ -84,6 +107,11 @@ func (u *StorageURL) String() string {
|
||||
return fmt.Sprintf("s3://%s/%s (endpoint: %s)", u.Bucket, u.Prefix, endpoint)
|
||||
}
|
||||
return fmt.Sprintf("s3://%s (endpoint: %s)", u.Bucket, endpoint)
|
||||
case "rclone":
|
||||
if u.Prefix != "" {
|
||||
return fmt.Sprintf("rclone://%s/%s", u.RcloneRemote, u.Prefix)
|
||||
}
|
||||
return fmt.Sprintf("rclone://%s", u.RcloneRemote)
|
||||
default:
|
||||
return fmt.Sprintf("%s://?", u.Scheme)
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user