vaultik/internal/storage/rclone.go
sneak 470bf648c4 Add deterministic deduplication, rclone backend, and database purge command
- Implement deterministic blob hashing using double SHA256 of uncompressed
  plaintext data, enabling deduplication even after local DB is cleared
- Add Stat() check before blob upload to skip existing blobs in storage
- Add rclone storage backend for additional remote storage options
- Add 'vaultik database purge' command to erase local state DB
- Add 'vaultik remote check' command to verify remote connectivity
- Show configured snapshots in 'vaultik snapshot list' output
- Skip macOS resource fork files (._*) when listing remote snapshots
- Use multi-threaded zstd compression (CPUs - 2 threads)
- Add writer tests for double hashing behavior
2026-01-28 15:50:17 -08:00

237 lines
5.8 KiB
Go

package storage
import (
"bytes"
"context"
"errors"
"fmt"
"io"
"strings"
"time"
"github.com/rclone/rclone/fs"
"github.com/rclone/rclone/fs/config/configfile"
"github.com/rclone/rclone/fs/operations"
// Import all rclone backends
_ "github.com/rclone/rclone/backend/all"
)
// ErrRemoteNotFound is returned when an rclone remote is not configured.
var ErrRemoteNotFound = errors.New("rclone remote not found in config")
// RcloneStorer implements Storer using rclone's filesystem abstraction.
// This allows vaultik to use any of rclone's 70+ supported storage providers.
type RcloneStorer struct {
fsys fs.Fs // rclone filesystem
remote string // remote name (for Info())
path string // path within remote (for Info())
}
// NewRcloneStorer creates a new rclone storage backend.
// The remote parameter is the rclone remote name (as configured via `rclone config`).
// The path parameter is the path within the remote.
func NewRcloneStorer(ctx context.Context, remote, path string) (*RcloneStorer, error) {
// Install the default config file handler
configfile.Install()
// Build the rclone path string (e.g., "myremote:path/to/backups")
rclonePath := remote + ":"
if path != "" {
rclonePath += path
}
// Create the rclone filesystem
fsys, err := fs.NewFs(ctx, rclonePath)
if err != nil {
// Check for remote not found error
if strings.Contains(err.Error(), "didn't find section in config file") ||
strings.Contains(err.Error(), "failed to find remote") {
return nil, fmt.Errorf("%w: %s", ErrRemoteNotFound, remote)
}
return nil, fmt.Errorf("creating rclone filesystem: %w", err)
}
return &RcloneStorer{
fsys: fsys,
remote: remote,
path: path,
}, nil
}
// Put stores data at the specified key.
func (r *RcloneStorer) Put(ctx context.Context, key string, data io.Reader) error {
// Read all data into memory to get size (required by rclone)
buf, err := io.ReadAll(data)
if err != nil {
return fmt.Errorf("reading data: %w", err)
}
// Upload the object
_, err = operations.Rcat(ctx, r.fsys, key, io.NopCloser(bytes.NewReader(buf)), time.Now(), nil)
if err != nil {
return fmt.Errorf("uploading object: %w", err)
}
return nil
}
// PutWithProgress stores data with progress reporting.
func (r *RcloneStorer) PutWithProgress(ctx context.Context, key string, data io.Reader, size int64, progress ProgressCallback) error {
// Wrap reader with progress tracking
pr := &progressReader{
reader: data,
callback: progress,
}
// Upload the object
_, err := operations.Rcat(ctx, r.fsys, key, io.NopCloser(pr), time.Now(), nil)
if err != nil {
return fmt.Errorf("uploading object: %w", err)
}
return nil
}
// Get retrieves data from the specified key.
func (r *RcloneStorer) Get(ctx context.Context, key string) (io.ReadCloser, error) {
// Get the object
obj, err := r.fsys.NewObject(ctx, key)
if err != nil {
if errors.Is(err, fs.ErrorObjectNotFound) {
return nil, ErrNotFound
}
if errors.Is(err, fs.ErrorDirNotFound) {
return nil, ErrNotFound
}
return nil, fmt.Errorf("getting object: %w", err)
}
// Open the object for reading
reader, err := obj.Open(ctx)
if err != nil {
return nil, fmt.Errorf("opening object: %w", err)
}
return reader, nil
}
// Stat returns metadata about an object without retrieving its contents.
func (r *RcloneStorer) Stat(ctx context.Context, key string) (*ObjectInfo, error) {
obj, err := r.fsys.NewObject(ctx, key)
if err != nil {
if errors.Is(err, fs.ErrorObjectNotFound) {
return nil, ErrNotFound
}
if errors.Is(err, fs.ErrorDirNotFound) {
return nil, ErrNotFound
}
return nil, fmt.Errorf("getting object: %w", err)
}
return &ObjectInfo{
Key: key,
Size: obj.Size(),
}, nil
}
// Delete removes an object.
func (r *RcloneStorer) Delete(ctx context.Context, key string) error {
obj, err := r.fsys.NewObject(ctx, key)
if err != nil {
if errors.Is(err, fs.ErrorObjectNotFound) {
return nil // Match S3 behavior: no error if doesn't exist
}
if errors.Is(err, fs.ErrorDirNotFound) {
return nil
}
return fmt.Errorf("getting object: %w", err)
}
if err := obj.Remove(ctx); err != nil {
return fmt.Errorf("removing object: %w", err)
}
return nil
}
// List returns all keys with the given prefix.
func (r *RcloneStorer) List(ctx context.Context, prefix string) ([]string, error) {
var keys []string
err := operations.ListFn(ctx, r.fsys, func(obj fs.Object) {
key := obj.Remote()
if prefix == "" || strings.HasPrefix(key, prefix) {
keys = append(keys, key)
}
})
if err != nil {
return nil, fmt.Errorf("listing objects: %w", err)
}
return keys, nil
}
// ListStream returns a channel of ObjectInfo for large result sets.
func (r *RcloneStorer) ListStream(ctx context.Context, prefix string) <-chan ObjectInfo {
ch := make(chan ObjectInfo)
go func() {
defer close(ch)
err := operations.ListFn(ctx, r.fsys, func(obj fs.Object) {
// Check context cancellation
select {
case <-ctx.Done():
return
default:
}
key := obj.Remote()
if prefix == "" || strings.HasPrefix(key, prefix) {
ch <- ObjectInfo{
Key: key,
Size: obj.Size(),
}
}
})
if err != nil {
ch <- ObjectInfo{Err: fmt.Errorf("listing objects: %w", err)}
}
}()
return ch
}
// Info returns human-readable storage location information.
func (r *RcloneStorer) Info() StorageInfo {
location := r.remote
if r.path != "" {
location += ":" + r.path
}
return StorageInfo{
Type: "rclone",
Location: location,
}
}
// progressReader wraps an io.Reader to track read progress.
type progressReader struct {
reader io.Reader
read int64
callback ProgressCallback
}
func (pr *progressReader) Read(p []byte) (int, error) {
n, err := pr.reader.Read(p)
if n > 0 {
pr.read += int64(n)
if pr.callback != nil {
if callbackErr := pr.callback(pr.read); callbackErr != nil {
return n, callbackErr
}
}
}
return n, err
}