Add pluggable storage backend, PID locking, and improved scan progress
Storage backend: - Add internal/storage package with Storer interface - Implement FileStorer for local filesystem storage (file:// URLs) - Implement S3Storer wrapping existing s3.Client - Support storage_url config field (s3:// or file://) - Migrate all consumers to use storage.Storer interface PID locking: - Add internal/pidlock package to prevent concurrent instances - Acquire lock before app start, release on exit - Detect stale locks from crashed processes Scan progress improvements: - Add fast file enumeration pass before stat() phase - Use enumerated set for deletion detection (no extra filesystem access) - Show progress with percentage, files/sec, elapsed time, and ETA - Change "changed" to "changed/new" for clarity Config improvements: - Add tilde expansion for paths (~/) - Use xdg library for platform-specific default index path
This commit is contained in:
262
internal/storage/file.go
Normal file
262
internal/storage/file.go
Normal file
@@ -0,0 +1,262 @@
|
||||
package storage
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"io"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
|
||||
"github.com/spf13/afero"
|
||||
)
|
||||
|
||||
// FileStorer implements Storer using the local filesystem.
|
||||
// It mirrors the S3 path structure for consistency.
|
||||
type FileStorer struct {
|
||||
fs afero.Fs
|
||||
basePath string
|
||||
}
|
||||
|
||||
// NewFileStorer creates a new filesystem storage backend.
|
||||
// The basePath directory will be created if it doesn't exist.
|
||||
// Uses the real OS filesystem by default; call SetFilesystem to override for testing.
|
||||
func NewFileStorer(basePath string) (*FileStorer, error) {
|
||||
fs := afero.NewOsFs()
|
||||
// Ensure base path exists
|
||||
if err := fs.MkdirAll(basePath, 0755); err != nil {
|
||||
return nil, fmt.Errorf("creating base path: %w", err)
|
||||
}
|
||||
return &FileStorer{
|
||||
fs: fs,
|
||||
basePath: basePath,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// SetFilesystem overrides the filesystem for testing.
|
||||
func (f *FileStorer) SetFilesystem(fs afero.Fs) {
|
||||
f.fs = fs
|
||||
}
|
||||
|
||||
// fullPath returns the full filesystem path for a key.
|
||||
func (f *FileStorer) fullPath(key string) string {
|
||||
return filepath.Join(f.basePath, key)
|
||||
}
|
||||
|
||||
// Put stores data at the specified key.
|
||||
func (f *FileStorer) Put(ctx context.Context, key string, data io.Reader) error {
|
||||
path := f.fullPath(key)
|
||||
|
||||
// Create parent directories
|
||||
dir := filepath.Dir(path)
|
||||
if err := f.fs.MkdirAll(dir, 0755); err != nil {
|
||||
return fmt.Errorf("creating directories: %w", err)
|
||||
}
|
||||
|
||||
file, err := f.fs.Create(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating file: %w", err)
|
||||
}
|
||||
defer func() { _ = file.Close() }()
|
||||
|
||||
if _, err := io.Copy(file, data); err != nil {
|
||||
return fmt.Errorf("writing file: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// PutWithProgress stores data with progress reporting.
|
||||
func (f *FileStorer) PutWithProgress(ctx context.Context, key string, data io.Reader, size int64, progress ProgressCallback) error {
|
||||
path := f.fullPath(key)
|
||||
|
||||
// Create parent directories
|
||||
dir := filepath.Dir(path)
|
||||
if err := f.fs.MkdirAll(dir, 0755); err != nil {
|
||||
return fmt.Errorf("creating directories: %w", err)
|
||||
}
|
||||
|
||||
file, err := f.fs.Create(path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("creating file: %w", err)
|
||||
}
|
||||
defer func() { _ = file.Close() }()
|
||||
|
||||
// Wrap with progress tracking
|
||||
pw := &progressWriter{
|
||||
writer: file,
|
||||
callback: progress,
|
||||
}
|
||||
|
||||
if _, err := io.Copy(pw, data); err != nil {
|
||||
return fmt.Errorf("writing file: %w", err)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Get retrieves data from the specified key.
|
||||
func (f *FileStorer) Get(ctx context.Context, key string) (io.ReadCloser, error) {
|
||||
path := f.fullPath(key)
|
||||
file, err := f.fs.Open(path)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
return nil, fmt.Errorf("opening file: %w", err)
|
||||
}
|
||||
return file, nil
|
||||
}
|
||||
|
||||
// Stat returns metadata about an object without retrieving its contents.
|
||||
func (f *FileStorer) Stat(ctx context.Context, key string) (*ObjectInfo, error) {
|
||||
path := f.fullPath(key)
|
||||
info, err := f.fs.Stat(path)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
return nil, ErrNotFound
|
||||
}
|
||||
return nil, fmt.Errorf("stat file: %w", err)
|
||||
}
|
||||
return &ObjectInfo{
|
||||
Key: key,
|
||||
Size: info.Size(),
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Delete removes an object.
|
||||
func (f *FileStorer) Delete(ctx context.Context, key string) error {
|
||||
path := f.fullPath(key)
|
||||
err := f.fs.Remove(path)
|
||||
if os.IsNotExist(err) {
|
||||
return nil // Match S3 behavior: no error if doesn't exist
|
||||
}
|
||||
if err != nil {
|
||||
return fmt.Errorf("removing file: %w", err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// List returns all keys with the given prefix.
|
||||
func (f *FileStorer) List(ctx context.Context, prefix string) ([]string, error) {
|
||||
var keys []string
|
||||
basePath := f.fullPath(prefix)
|
||||
|
||||
// Check if base path exists
|
||||
exists, err := afero.Exists(f.fs, basePath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("checking path: %w", err)
|
||||
}
|
||||
if !exists {
|
||||
return keys, nil // Empty list for non-existent prefix
|
||||
}
|
||||
|
||||
err = afero.Walk(f.fs, basePath, func(path string, info os.FileInfo, err error) error {
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
// Check context cancellation
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
if !info.IsDir() {
|
||||
// Convert back to key (relative path from basePath)
|
||||
relPath, err := filepath.Rel(f.basePath, path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("computing relative path: %w", err)
|
||||
}
|
||||
// Normalize path separators to forward slashes for consistency
|
||||
relPath = strings.ReplaceAll(relPath, string(filepath.Separator), "/")
|
||||
keys = append(keys, relPath)
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("walking directory: %w", err)
|
||||
}
|
||||
|
||||
return keys, nil
|
||||
}
|
||||
|
||||
// ListStream returns a channel of ObjectInfo for large result sets.
|
||||
func (f *FileStorer) ListStream(ctx context.Context, prefix string) <-chan ObjectInfo {
|
||||
ch := make(chan ObjectInfo)
|
||||
go func() {
|
||||
defer close(ch)
|
||||
basePath := f.fullPath(prefix)
|
||||
|
||||
// Check if base path exists
|
||||
exists, err := afero.Exists(f.fs, basePath)
|
||||
if err != nil {
|
||||
ch <- ObjectInfo{Err: fmt.Errorf("checking path: %w", err)}
|
||||
return
|
||||
}
|
||||
if !exists {
|
||||
return // Empty channel for non-existent prefix
|
||||
}
|
||||
|
||||
_ = afero.Walk(f.fs, basePath, func(path string, info os.FileInfo, err error) error {
|
||||
// Check context cancellation
|
||||
select {
|
||||
case <-ctx.Done():
|
||||
ch <- ObjectInfo{Err: ctx.Err()}
|
||||
return ctx.Err()
|
||||
default:
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
ch <- ObjectInfo{Err: err}
|
||||
return nil // Continue walking despite errors
|
||||
}
|
||||
|
||||
if !info.IsDir() {
|
||||
relPath, err := filepath.Rel(f.basePath, path)
|
||||
if err != nil {
|
||||
ch <- ObjectInfo{Err: fmt.Errorf("computing relative path: %w", err)}
|
||||
return nil
|
||||
}
|
||||
// Normalize path separators
|
||||
relPath = strings.ReplaceAll(relPath, string(filepath.Separator), "/")
|
||||
ch <- ObjectInfo{
|
||||
Key: relPath,
|
||||
Size: info.Size(),
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}()
|
||||
return ch
|
||||
}
|
||||
|
||||
// Info returns human-readable storage location information.
|
||||
func (f *FileStorer) Info() StorageInfo {
|
||||
return StorageInfo{
|
||||
Type: "file",
|
||||
Location: f.basePath,
|
||||
}
|
||||
}
|
||||
|
||||
// progressWriter wraps an io.Writer to track write progress.
|
||||
type progressWriter struct {
|
||||
writer io.Writer
|
||||
written int64
|
||||
callback ProgressCallback
|
||||
}
|
||||
|
||||
func (pw *progressWriter) Write(p []byte) (int, error) {
|
||||
n, err := pw.writer.Write(p)
|
||||
if n > 0 {
|
||||
pw.written += int64(n)
|
||||
if pw.callback != nil {
|
||||
if callbackErr := pw.callback(pw.written); callbackErr != nil {
|
||||
return n, callbackErr
|
||||
}
|
||||
}
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
Reference in New Issue
Block a user