Add pluggable storage backend, PID locking, and improved scan progress

Storage backend:
- Add internal/storage package with Storer interface
- Implement FileStorer for local filesystem storage (file:// URLs)
- Implement S3Storer wrapping existing s3.Client
- Support storage_url config field (s3:// or file://)
- Migrate all consumers to use storage.Storer interface

PID locking:
- Add internal/pidlock package to prevent concurrent instances
- Acquire lock before app start, release on exit
- Detect stale locks from crashed processes

Scan progress improvements:
- Add fast file enumeration pass before stat() phase
- Use enumerated set for deletion detection (no extra filesystem access)
- Show progress with percentage, files/sec, elapsed time, and ETA
- Change "changed" to "changed/new" for clarity

Config improvements:
- Add tilde expansion for paths (~/)
- Use xdg library for platform-specific default index path
This commit is contained in:
2025-12-19 11:52:51 +07:00
parent cda0cf865a
commit badc0c07e0
22 changed files with 1245 additions and 188 deletions

View File

@@ -4,7 +4,6 @@ import (
"bytes"
"context"
"database/sql"
"fmt"
"io"
"sync"
"testing"
@@ -13,100 +12,122 @@ import (
"git.eeqj.de/sneak/vaultik/internal/config"
"git.eeqj.de/sneak/vaultik/internal/database"
"git.eeqj.de/sneak/vaultik/internal/log"
"git.eeqj.de/sneak/vaultik/internal/s3"
"git.eeqj.de/sneak/vaultik/internal/snapshot"
"git.eeqj.de/sneak/vaultik/internal/storage"
"github.com/spf13/afero"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
// MockS3Client implements a mock S3 client for testing
type MockS3Client struct {
mu sync.Mutex
storage map[string][]byte
calls []string
// MockStorer implements storage.Storer for testing
type MockStorer struct {
mu sync.Mutex
data map[string][]byte
calls []string
}
func NewMockS3Client() *MockS3Client {
return &MockS3Client{
storage: make(map[string][]byte),
calls: make([]string, 0),
func NewMockStorer() *MockStorer {
return &MockStorer{
data: make(map[string][]byte),
calls: make([]string, 0),
}
}
func (m *MockS3Client) PutObject(ctx context.Context, key string, reader io.Reader) error {
func (m *MockStorer) Put(ctx context.Context, key string, reader io.Reader) error {
m.mu.Lock()
defer m.mu.Unlock()
m.calls = append(m.calls, "PutObject:"+key)
m.calls = append(m.calls, "Put:"+key)
data, err := io.ReadAll(reader)
if err != nil {
return err
}
m.storage[key] = data
m.data[key] = data
return nil
}
func (m *MockS3Client) PutObjectWithProgress(ctx context.Context, key string, reader io.Reader, size int64, progress s3.ProgressCallback) error {
// For testing, just call PutObject
return m.PutObject(ctx, key, reader)
func (m *MockStorer) PutWithProgress(ctx context.Context, key string, reader io.Reader, size int64, progress storage.ProgressCallback) error {
return m.Put(ctx, key, reader)
}
func (m *MockS3Client) GetObject(ctx context.Context, key string) (io.ReadCloser, error) {
func (m *MockStorer) Get(ctx context.Context, key string) (io.ReadCloser, error) {
m.mu.Lock()
defer m.mu.Unlock()
m.calls = append(m.calls, "GetObject:"+key)
data, exists := m.storage[key]
m.calls = append(m.calls, "Get:"+key)
data, exists := m.data[key]
if !exists {
return nil, fmt.Errorf("key not found: %s", key)
return nil, storage.ErrNotFound
}
return io.NopCloser(bytes.NewReader(data)), nil
}
func (m *MockS3Client) StatObject(ctx context.Context, key string) (*s3.ObjectInfo, error) {
func (m *MockStorer) Stat(ctx context.Context, key string) (*storage.ObjectInfo, error) {
m.mu.Lock()
defer m.mu.Unlock()
m.calls = append(m.calls, "StatObject:"+key)
data, exists := m.storage[key]
m.calls = append(m.calls, "Stat:"+key)
data, exists := m.data[key]
if !exists {
return nil, fmt.Errorf("key not found: %s", key)
return nil, storage.ErrNotFound
}
return &s3.ObjectInfo{
return &storage.ObjectInfo{
Key: key,
Size: int64(len(data)),
}, nil
}
func (m *MockS3Client) DeleteObject(ctx context.Context, key string) error {
func (m *MockStorer) Delete(ctx context.Context, key string) error {
m.mu.Lock()
defer m.mu.Unlock()
m.calls = append(m.calls, "DeleteObject:"+key)
delete(m.storage, key)
m.calls = append(m.calls, "Delete:"+key)
delete(m.data, key)
return nil
}
func (m *MockS3Client) ListObjects(ctx context.Context, prefix string) ([]*s3.ObjectInfo, error) {
func (m *MockStorer) List(ctx context.Context, prefix string) ([]string, error) {
m.mu.Lock()
defer m.mu.Unlock()
m.calls = append(m.calls, "ListObjects:"+prefix)
var objects []*s3.ObjectInfo
for key, data := range m.storage {
m.calls = append(m.calls, "List:"+prefix)
var keys []string
for key := range m.data {
if len(prefix) == 0 || (len(key) >= len(prefix) && key[:len(prefix)] == prefix) {
objects = append(objects, &s3.ObjectInfo{
Key: key,
Size: int64(len(data)),
})
keys = append(keys, key)
}
}
return objects, nil
return keys, nil
}
// GetCalls returns the list of S3 operations that were called
func (m *MockS3Client) GetCalls() []string {
func (m *MockStorer) ListStream(ctx context.Context, prefix string) <-chan storage.ObjectInfo {
ch := make(chan storage.ObjectInfo)
go func() {
defer close(ch)
m.mu.Lock()
defer m.mu.Unlock()
for key, data := range m.data {
if len(prefix) == 0 || (len(key) >= len(prefix) && key[:len(prefix)] == prefix) {
ch <- storage.ObjectInfo{
Key: key,
Size: int64(len(data)),
}
}
}
}()
return ch
}
func (m *MockStorer) Info() storage.StorageInfo {
return storage.StorageInfo{
Type: "mock",
Location: "memory",
}
}
// GetCalls returns the list of operations that were called
func (m *MockStorer) GetCalls() []string {
m.mu.Lock()
defer m.mu.Unlock()
@@ -116,11 +137,11 @@ func (m *MockS3Client) GetCalls() []string {
}
// GetStorageSize returns the number of objects in storage
func (m *MockS3Client) GetStorageSize() int {
func (m *MockStorer) GetStorageSize() int {
m.mu.Lock()
defer m.mu.Unlock()
return len(m.storage)
return len(m.data)
}
// TestEndToEndBackup tests the full backup workflow with mocked dependencies
@@ -158,8 +179,8 @@ func TestEndToEndBackup(t *testing.T) {
}
}
// Create mock S3 client
mockS3 := NewMockS3Client()
// Create mock storage
mockStorage := NewMockStorer()
// Create test configuration
cfg := &config.Config{
@@ -181,7 +202,7 @@ func TestEndToEndBackup(t *testing.T) {
}
// For a true end-to-end test, we'll create a simpler test that focuses on
// the core backup logic using the scanner directly with our mock S3 client
// the core backup logic using the scanner directly with our mock storage
ctx := context.Background()
// Create in-memory database
@@ -195,12 +216,12 @@ func TestEndToEndBackup(t *testing.T) {
repos := database.NewRepositories(db)
// Create scanner with mock S3 client
// Create scanner with mock storage
scanner := snapshot.NewScanner(snapshot.ScannerConfig{
FS: fs,
ChunkSize: cfg.ChunkSize.Int64(),
Repositories: repos,
S3Client: mockS3,
Storage: mockStorage,
MaxBlobSize: cfg.BlobSizeLimit.Int64(),
CompressionLevel: cfg.CompressionLevel,
AgeRecipients: cfg.AgeRecipients,
@@ -232,15 +253,15 @@ func TestEndToEndBackup(t *testing.T) {
assert.Greater(t, result.ChunksCreated, 0, "Should create chunks")
assert.Greater(t, result.BlobsCreated, 0, "Should create blobs")
// Verify S3 operations
calls := mockS3.GetCalls()
t.Logf("S3 operations performed: %v", calls)
// Verify storage operations
calls := mockStorage.GetCalls()
t.Logf("Storage operations performed: %v", calls)
// Should have uploaded at least one blob
blobUploads := 0
for _, call := range calls {
if len(call) > 10 && call[:10] == "PutObject:" {
if len(call) > 16 && call[10:16] == "blobs/" {
if len(call) > 4 && call[:4] == "Put:" {
if len(call) > 10 && call[4:10] == "blobs/" {
blobUploads++
}
}
@@ -264,8 +285,8 @@ func TestEndToEndBackup(t *testing.T) {
require.NoError(t, err)
assert.Greater(t, len(fileChunks), 0, "Should have chunks for file1.txt")
// Verify blobs were uploaded to S3
assert.Greater(t, mockS3.GetStorageSize(), 0, "Should have blobs in S3 storage")
// Verify blobs were uploaded to storage
assert.Greater(t, mockStorage.GetStorageSize(), 0, "Should have blobs in storage")
// Complete the snapshot - just verify we got results
// In a real integration test, we'd update the snapshot record
@@ -283,7 +304,7 @@ func TestEndToEndBackup(t *testing.T) {
t.Logf(" Bytes scanned: %d", result.BytesScanned)
t.Logf(" Chunks created: %d", result.ChunksCreated)
t.Logf(" Blobs created: %d", result.BlobsCreated)
t.Logf(" S3 storage size: %d objects", mockS3.GetStorageSize())
t.Logf(" Storage size: %d objects", mockStorage.GetStorageSize())
}
// TestBackupAndVerify tests backing up files and verifying the blobs
@@ -301,8 +322,8 @@ func TestBackupAndVerify(t *testing.T) {
err = afero.WriteFile(fs, "/data/test.txt", []byte(testContent), 0644)
require.NoError(t, err)
// Create mock S3 client
mockS3 := NewMockS3Client()
// Create mock storage
mockStorage := NewMockStorer()
// Create test database
ctx := context.Background()
@@ -321,7 +342,7 @@ func TestBackupAndVerify(t *testing.T) {
FS: fs,
ChunkSize: int64(1024 * 16), // 16KB chunks
Repositories: repos,
S3Client: mockS3,
Storage: mockStorage,
MaxBlobSize: int64(1024 * 1024), // 1MB blobs
CompressionLevel: 3,
AgeRecipients: []string{"age1ezrjmfpwsc95svdg0y54mums3zevgzu0x0ecq2f7tp8a05gl0sjq9q9wjg"}, // Test public key
@@ -346,25 +367,25 @@ func TestBackupAndVerify(t *testing.T) {
// Verify backup created blobs
assert.Greater(t, result.BlobsCreated, 0, "Should create at least one blob")
assert.Equal(t, mockS3.GetStorageSize(), result.BlobsCreated, "S3 should have the blobs")
assert.Equal(t, mockStorage.GetStorageSize(), result.BlobsCreated, "Storage should have the blobs")
// Verify we can retrieve the blob from S3
objects, err := mockS3.ListObjects(ctx, "blobs/")
// Verify we can retrieve the blob from storage
objects, err := mockStorage.List(ctx, "blobs/")
require.NoError(t, err)
assert.Len(t, objects, result.BlobsCreated, "Should have correct number of blobs in S3")
assert.Len(t, objects, result.BlobsCreated, "Should have correct number of blobs in storage")
// Get the first blob and verify it exists
if len(objects) > 0 {
blobKey := objects[0].Key
blobKey := objects[0]
t.Logf("Verifying blob: %s", blobKey)
// Get blob info
blobInfo, err := mockS3.StatObject(ctx, blobKey)
blobInfo, err := mockStorage.Stat(ctx, blobKey)
require.NoError(t, err)
assert.Greater(t, blobInfo.Size, int64(0), "Blob should have content")
// Get blob content
reader, err := mockS3.GetObject(ctx, blobKey)
reader, err := mockStorage.Get(ctx, blobKey)
require.NoError(t, err)
defer func() { _ = reader.Close() }()

View File

@@ -21,9 +21,9 @@ func (v *Vaultik) PruneBlobs(opts *PruneOptions) error {
allBlobsReferenced := make(map[string]bool)
manifestCount := 0
// List all snapshots in S3
// List all snapshots in storage
log.Info("Listing remote snapshots")
objectCh := v.S3Client.ListObjectsStream(v.ctx, "metadata/", false)
objectCh := v.Storage.ListStream(v.ctx, "metadata/")
var snapshotIDs []string
for object := range objectCh {
@@ -73,10 +73,10 @@ func (v *Vaultik) PruneBlobs(opts *PruneOptions) error {
log.Info("Processed manifests", "count", manifestCount, "unique_blobs_referenced", len(allBlobsReferenced))
// List all blobs in S3
// List all blobs in storage
log.Info("Listing all blobs in storage")
allBlobs := make(map[string]int64) // hash -> size
blobObjectCh := v.S3Client.ListObjectsStream(v.ctx, "blobs/", true)
blobObjectCh := v.Storage.ListStream(v.ctx, "blobs/")
for object := range blobObjectCh {
if object.Err != nil {
@@ -136,7 +136,7 @@ func (v *Vaultik) PruneBlobs(opts *PruneOptions) error {
for i, hash := range unreferencedBlobs {
blobPath := fmt.Sprintf("blobs/%s/%s/%s", hash[:2], hash[2:4], hash)
if err := v.S3Client.RemoveObject(v.ctx, blobPath); err != nil {
if err := v.Storage.Delete(v.ctx, blobPath); err != nil {
log.Error("Failed to delete blob", "hash", hash, "error", err)
continue
}

View File

@@ -265,7 +265,7 @@ func (v *Vaultik) CreateSnapshot(opts *SnapshotCreateOptions) error {
func (v *Vaultik) ListSnapshots(jsonOutput bool) error {
// Get all remote snapshots
remoteSnapshots := make(map[string]bool)
objectCh := v.S3Client.ListObjectsStream(v.ctx, "metadata/", false)
objectCh := v.Storage.ListStream(v.ctx, "metadata/")
for object := range objectCh {
if object.Err != nil {
@@ -546,7 +546,7 @@ func (v *Vaultik) VerifySnapshot(snapshotID string, deep bool) error {
return nil
} else {
// Just check existence
_, err := v.S3Client.StatObject(v.ctx, blobPath)
_, err := v.Storage.Stat(v.ctx, blobPath)
if err != nil {
fmt.Printf(" Missing: %s (%s)\n", blob.Hash, humanize.Bytes(uint64(blob.CompressedSize)))
missing++
@@ -581,7 +581,7 @@ func (v *Vaultik) VerifySnapshot(snapshotID string, deep bool) error {
func (v *Vaultik) getManifestSize(snapshotID string) (int64, error) {
manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
reader, err := v.S3Client.GetObject(v.ctx, manifestPath)
reader, err := v.Storage.Get(v.ctx, manifestPath)
if err != nil {
return 0, fmt.Errorf("downloading manifest: %w", err)
}
@@ -598,7 +598,7 @@ func (v *Vaultik) getManifestSize(snapshotID string) (int64, error) {
func (v *Vaultik) downloadManifest(snapshotID string) (*snapshot.Manifest, error) {
manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
reader, err := v.S3Client.GetObject(v.ctx, manifestPath)
reader, err := v.Storage.Get(v.ctx, manifestPath)
if err != nil {
return nil, err
}
@@ -613,10 +613,10 @@ func (v *Vaultik) downloadManifest(snapshotID string) (*snapshot.Manifest, error
}
func (v *Vaultik) deleteSnapshot(snapshotID string) error {
// First, delete from S3
// First, delete from storage
// List all objects under metadata/{snapshotID}/
prefix := fmt.Sprintf("metadata/%s/", snapshotID)
objectCh := v.S3Client.ListObjectsStream(v.ctx, prefix, true)
objectCh := v.Storage.ListStream(v.ctx, prefix)
var objectsToDelete []string
for object := range objectCh {
@@ -628,7 +628,7 @@ func (v *Vaultik) deleteSnapshot(snapshotID string) error {
// Delete all objects
for _, key := range objectsToDelete {
if err := v.S3Client.RemoveObject(v.ctx, key); err != nil {
if err := v.Storage.Delete(v.ctx, key); err != nil {
return fmt.Errorf("removing %s: %w", key, err)
}
}
@@ -658,7 +658,7 @@ func (v *Vaultik) syncWithRemote() error {
// Get all remote snapshot IDs
remoteSnapshots := make(map[string]bool)
objectCh := v.S3Client.ListObjectsStream(v.ctx, "metadata/", false)
objectCh := v.Storage.ListStream(v.ctx, "metadata/")
for object := range objectCh {
if object.Err != nil {

View File

@@ -10,8 +10,8 @@ import (
"git.eeqj.de/sneak/vaultik/internal/crypto"
"git.eeqj.de/sneak/vaultik/internal/database"
"git.eeqj.de/sneak/vaultik/internal/globals"
"git.eeqj.de/sneak/vaultik/internal/s3"
"git.eeqj.de/sneak/vaultik/internal/snapshot"
"git.eeqj.de/sneak/vaultik/internal/storage"
"github.com/spf13/afero"
"go.uber.org/fx"
)
@@ -22,7 +22,7 @@ type Vaultik struct {
Config *config.Config
DB *database.DB
Repositories *database.Repositories
S3Client *s3.Client
Storage storage.Storer
ScannerFactory snapshot.ScannerFactory
SnapshotManager *snapshot.SnapshotManager
Shutdowner fx.Shutdowner
@@ -46,7 +46,7 @@ type VaultikParams struct {
Config *config.Config
DB *database.DB
Repositories *database.Repositories
S3Client *s3.Client
Storage storage.Storer
ScannerFactory snapshot.ScannerFactory
SnapshotManager *snapshot.SnapshotManager
Shutdowner fx.Shutdowner
@@ -72,7 +72,7 @@ func New(params VaultikParams) *Vaultik {
Config: params.Config,
DB: params.DB,
Repositories: params.Repositories,
S3Client: params.S3Client,
Storage: params.Storage,
ScannerFactory: params.ScannerFactory,
SnapshotManager: params.SnapshotManager,
Shutdowner: params.Shutdowner,

View File

@@ -36,7 +36,7 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
log.Info("Downloading manifest", "path", manifestPath)
manifestReader, err := v.S3Client.GetObject(v.ctx, manifestPath)
manifestReader, err := v.Storage.Get(v.ctx, manifestPath)
if err != nil {
return fmt.Errorf("failed to download manifest: %w", err)
}
@@ -57,7 +57,7 @@ func (v *Vaultik) RunDeepVerify(snapshotID string, opts *VerifyOptions) error {
dbPath := fmt.Sprintf("metadata/%s/db.zst.age", snapshotID)
log.Info("Downloading encrypted database", "path", dbPath)
dbReader, err := v.S3Client.GetObject(v.ctx, dbPath)
dbReader, err := v.Storage.Get(v.ctx, dbPath)
if err != nil {
return fmt.Errorf("failed to download database: %w", err)
}
@@ -236,10 +236,10 @@ func (v *Vaultik) verifyBlobExistence(manifest *snapshot.Manifest) error {
// Construct blob path
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blob.Hash[:2], blob.Hash[2:4], blob.Hash)
// Check blob exists with HeadObject
stat, err := v.S3Client.StatObject(v.ctx, blobPath)
// Check blob exists
stat, err := v.Storage.Stat(v.ctx, blobPath)
if err != nil {
return fmt.Errorf("blob %s missing from S3: %w", blob.Hash, err)
return fmt.Errorf("blob %s missing from storage: %w", blob.Hash, err)
}
// Verify size matches
@@ -258,7 +258,7 @@ func (v *Vaultik) verifyBlobExistence(manifest *snapshot.Manifest) error {
}
}
log.Info("✓ All blobs exist in S3")
log.Info("✓ All blobs exist in storage")
return nil
}
@@ -295,7 +295,7 @@ func (v *Vaultik) performDeepVerification(manifest *snapshot.Manifest, db *sql.D
func (v *Vaultik) verifyBlob(blobInfo snapshot.BlobInfo, db *sql.DB) error {
// Download blob
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobInfo.Hash[:2], blobInfo.Hash[2:4], blobInfo.Hash)
reader, err := v.S3Client.GetObject(v.ctx, blobPath)
reader, err := v.Storage.Get(v.ctx, blobPath)
if err != nil {
return fmt.Errorf("failed to download: %w", err)
}