1 Commits

Author SHA1 Message Date
user
588e84da9c feat: concurrent manifest downloads in ListSnapshots
All checks were successful
check / check (pull_request) Successful in 4m7s
Replace serial getManifestSize() calls with bounded concurrent downloads
using errgroup. For each remote snapshot not in the local DB, manifest
downloads now run in parallel (up to 10 concurrent) instead of one at a
time.

Changes:
- Use errgroup with SetLimit(10) for bounded concurrency
- Collect remote-only snapshot IDs first, pre-add entries with zero size
- Download manifests concurrently, patch sizes from results
- Remove now-unused getManifestSize helper (logic inlined into goroutines)
- Promote golang.org/x/sync from indirect to direct dependency

closes #8
2026-03-17 21:51:52 -07:00
10 changed files with 82 additions and 258 deletions

View File

@@ -194,9 +194,8 @@ vaultik [--config <path>] store info
* Requires `VAULTIK_AGE_SECRET_KEY` environment variable with age private key * Requires `VAULTIK_AGE_SECRET_KEY` environment variable with age private key
* Optional path arguments to restore specific files/directories (default: all) * Optional path arguments to restore specific files/directories (default: all)
* Downloads and decrypts metadata, fetches required blobs, reconstructs files * Downloads and decrypts metadata, fetches required blobs, reconstructs files
* Preserves file permissions, mtime, and ownership (ownership requires root) * Preserves file permissions, timestamps, and ownership (ownership requires root)
* Handles symlinks and directories * Handles symlinks and directories
* Note: ctime cannot be restored (see [platform notes](#platform-specific-ctime-semantics))
**prune**: Remove unreferenced blobs from remote storage **prune**: Remove unreferenced blobs from remote storage
* Scans all snapshots for referenced blobs * Scans all snapshots for referenced blobs
@@ -248,14 +247,11 @@ Snapshot IDs follow the format `<hostname>_<snapshot-name>_<timestamp>` (e.g., `
CREATE TABLE files ( CREATE TABLE files (
id TEXT PRIMARY KEY, id TEXT PRIMARY KEY,
path TEXT NOT NULL UNIQUE, path TEXT NOT NULL UNIQUE,
source_path TEXT NOT NULL DEFAULT '',
mtime INTEGER NOT NULL, mtime INTEGER NOT NULL,
ctime INTEGER NOT NULL,
size INTEGER NOT NULL, size INTEGER NOT NULL,
mode INTEGER NOT NULL, mode INTEGER NOT NULL,
uid INTEGER NOT NULL, uid INTEGER NOT NULL,
gid INTEGER NOT NULL, gid INTEGER NOT NULL
link_target TEXT
); );
CREATE TABLE file_chunks ( CREATE TABLE file_chunks (
@@ -343,25 +339,7 @@ CREATE TABLE snapshot_blobs (
1. For each file, get ordered chunk list from file_chunks 1. For each file, get ordered chunk list from file_chunks
1. Download required blobs, decrypt, decompress 1. Download required blobs, decrypt, decompress
1. Extract chunks and reconstruct files 1. Extract chunks and reconstruct files
1. Restore permissions, mtime, uid/gid (ctime cannot be restored — see platform notes above) 1. Restore permissions, mtime, uid/gid
### platform-specific ctime semantics
The `ctime` field in the files table stores a platform-dependent timestamp:
* **macOS (Darwin)**: `ctime` is the file's **birth time** — when the file was
first created on disk. This value never changes after file creation, even if
the file's content or metadata is modified.
* **Linux**: `ctime` is the **inode change time** — the last time the file's
metadata (permissions, ownership, link count, etc.) was modified. This is NOT
the file creation time. Linux did not expose birth time (via `statx(2)`) until
kernel 4.11, and Go's `syscall` package does not yet surface it.
**Restore limitation**: `ctime` cannot be restored on either platform. On Linux,
the kernel manages the inode change time and userspace cannot set it. On macOS,
there is no standard POSIX API to set birth time. The `ctime` value is preserved
in the snapshot database for informational/forensic purposes only.
#### prune #### prune

2
go.mod
View File

@@ -24,6 +24,7 @@ require (
github.com/spf13/cobra v1.10.1 github.com/spf13/cobra v1.10.1
github.com/stretchr/testify v1.11.1 github.com/stretchr/testify v1.11.1
go.uber.org/fx v1.24.0 go.uber.org/fx v1.24.0
golang.org/x/sync v0.18.0
golang.org/x/term v0.37.0 golang.org/x/term v0.37.0
gopkg.in/yaml.v3 v3.0.1 gopkg.in/yaml.v3 v3.0.1
modernc.org/sqlite v1.38.0 modernc.org/sqlite v1.38.0
@@ -266,7 +267,6 @@ require (
golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect golang.org/x/exp v0.0.0-20251023183803-a4bb9ffd2546 // indirect
golang.org/x/net v0.47.0 // indirect golang.org/x/net v0.47.0 // indirect
golang.org/x/oauth2 v0.33.0 // indirect golang.org/x/oauth2 v0.33.0 // indirect
golang.org/x/sync v0.18.0 // indirect
golang.org/x/sys v0.38.0 // indirect golang.org/x/sys v0.38.0 // indirect
golang.org/x/text v0.31.0 // indirect golang.org/x/text v0.31.0 // indirect
golang.org/x/time v0.14.0 // indirect golang.org/x/time v0.14.0 // indirect

View File

@@ -16,8 +16,8 @@ type File struct {
ID types.FileID // UUID primary key ID types.FileID // UUID primary key
Path types.FilePath // Absolute path of the file Path types.FilePath // Absolute path of the file
SourcePath types.SourcePath // The source directory this file came from (for restore path stripping) SourcePath types.SourcePath // The source directory this file came from (for restore path stripping)
MTime time.Time // Last modification time MTime time.Time
CTime time.Time // Creation/change time (platform-specific: birth time on macOS, inode change time on Linux) CTime time.Time
Size int64 Size int64
Mode uint32 Mode uint32
UID uint32 UID uint32

View File

@@ -345,9 +345,9 @@ func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (str
Size: info.Size(), Size: info.Size(),
Mode: uint32(info.Mode()), Mode: uint32(info.Mode()),
MTime: info.ModTime(), MTime: info.ModTime(),
CTime: fileCTime(info), // platform-specific: birth time on macOS, inode change time on Linux CTime: info.ModTime(), // Use mtime as ctime for test
UID: 1000, // Default UID for test UID: 1000, // Default UID for test
GID: 1000, // Default GID for test GID: 1000, // Default GID for test
} }
err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error { err = b.repos.WithTx(ctx, func(ctx context.Context, tx *sql.Tx) error {
return b.repos.Files.Create(ctx, tx, file) return b.repos.Files.Create(ctx, tx, file)

View File

@@ -1,26 +0,0 @@
package snapshot
import (
"os"
"syscall"
"time"
)
// fileCTime returns the file creation time (birth time) on macOS.
//
// On macOS/Darwin, "ctime" refers to the file's birth time (when the file
// was first created on disk). This is stored in the Birthtimespec field of
// the syscall.Stat_t structure.
//
// This differs from Linux where "ctime" means inode change time (the last
// time file metadata was modified). See ctime_linux.go for details.
//
// If the underlying stat information is unavailable (e.g. when using a
// virtual filesystem like afero.MemMapFs), this falls back to mtime.
func fileCTime(info os.FileInfo) time.Time {
stat, ok := info.Sys().(*syscall.Stat_t)
if !ok {
return info.ModTime()
}
return time.Unix(stat.Birthtimespec.Sec, stat.Birthtimespec.Nsec).UTC()
}

View File

@@ -1,28 +0,0 @@
package snapshot
import (
"os"
"syscall"
"time"
)
// fileCTime returns the inode change time on Linux.
//
// On Linux, "ctime" refers to the inode change time — the last time the
// file's metadata (permissions, ownership, link count, etc.) was modified.
// This is NOT the file creation time; Linux did not expose birth time until
// the statx(2) syscall was added in kernel 4.11, and Go's syscall package
// does not yet surface it.
//
// This differs from macOS/Darwin where "ctime" means birth time (file
// creation time). See ctime_darwin.go for details.
//
// If the underlying stat information is unavailable (e.g. when using a
// virtual filesystem like afero.MemMapFs), this falls back to mtime.
func fileCTime(info os.FileInfo) time.Time {
stat, ok := info.Sys().(*syscall.Stat_t)
if !ok {
return info.ModTime()
}
return time.Unix(stat.Ctim.Sec, stat.Ctim.Nsec).UTC()
}

View File

@@ -1,133 +0,0 @@
package snapshot
import (
"os"
"path/filepath"
"testing"
"time"
)
func TestFileCTime_RealFile(t *testing.T) {
// Create a temporary file
dir := t.TempDir()
path := filepath.Join(dir, "testfile.txt")
if err := os.WriteFile(path, []byte("hello"), 0644); err != nil {
t.Fatal(err)
}
info, err := os.Stat(path)
if err != nil {
t.Fatal(err)
}
ctime := fileCTime(info)
// ctime should be a valid time (not zero)
if ctime.IsZero() {
t.Fatal("fileCTime returned zero time")
}
// ctime should be close to now (within a few seconds)
diff := time.Since(ctime)
if diff < 0 || diff > 5*time.Second {
t.Fatalf("fileCTime returned unexpected time: %v (diff from now: %v)", ctime, diff)
}
// ctime should not equal mtime exactly in all cases, but for a freshly
// created file they should be very close
mtime := info.ModTime()
ctimeMtimeDiff := ctime.Sub(mtime)
if ctimeMtimeDiff < 0 {
ctimeMtimeDiff = -ctimeMtimeDiff
}
// For a freshly created file, ctime and mtime should be within 1 second
if ctimeMtimeDiff > time.Second {
t.Fatalf("ctime and mtime differ by too much for a new file: ctime=%v, mtime=%v, diff=%v",
ctime, mtime, ctimeMtimeDiff)
}
}
func TestFileCTime_AfterMtimeChange(t *testing.T) {
// Create a temporary file
dir := t.TempDir()
path := filepath.Join(dir, "testfile.txt")
if err := os.WriteFile(path, []byte("hello"), 0644); err != nil {
t.Fatal(err)
}
// Get initial ctime
info1, err := os.Stat(path)
if err != nil {
t.Fatal(err)
}
ctime1 := fileCTime(info1)
// Change mtime to a time in the past
pastTime := time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)
if err := os.Chtimes(path, pastTime, pastTime); err != nil {
t.Fatal(err)
}
// Get new stats
info2, err := os.Stat(path)
if err != nil {
t.Fatal(err)
}
ctime2 := fileCTime(info2)
mtime2 := info2.ModTime()
// mtime should now be in the past
if mtime2.Year() != 2020 {
t.Fatalf("mtime not set correctly: %v", mtime2)
}
// On macOS: ctime (birth time) should remain unchanged since birth time
// doesn't change when mtime is updated.
// On Linux: ctime (inode change time) will be updated to ~now because
// changing mtime is a metadata change.
// Either way, ctime should NOT equal the past mtime we just set.
if ctime2.Equal(pastTime) {
t.Fatal("ctime should not equal the artificially set past mtime")
}
// ctime should still be a recent time (the original creation time or
// the metadata change time, depending on platform)
_ = ctime1 // used for reference; both platforms will have a recent ctime2
if time.Since(ctime2) > 10*time.Second {
t.Fatalf("ctime is unexpectedly old: %v", ctime2)
}
}
// TestFileCTime_NonSyscallFileInfo verifies the fallback to mtime when
// the FileInfo doesn't have a *syscall.Stat_t (e.g. afero.MemMapFs).
type mockFileInfo struct {
name string
size int64
mode os.FileMode
modTime time.Time
isDir bool
}
func (m *mockFileInfo) Name() string { return m.name }
func (m *mockFileInfo) Size() int64 { return m.size }
func (m *mockFileInfo) Mode() os.FileMode { return m.mode }
func (m *mockFileInfo) ModTime() time.Time { return m.modTime }
func (m *mockFileInfo) IsDir() bool { return m.isDir }
func (m *mockFileInfo) Sys() interface{} { return nil } // No syscall.Stat_t
func TestFileCTime_FallbackToMtime(t *testing.T) {
now := time.Now().UTC().Truncate(time.Second)
info := &mockFileInfo{
name: "test.txt",
size: 100,
mode: 0644,
modTime: now,
}
ctime := fileCTime(info)
if !ctime.Equal(now) {
t.Fatalf("expected fallback to mtime %v, got %v", now, ctime)
}
}

View File

@@ -728,7 +728,7 @@ func (s *Scanner) checkFileInMemory(path string, info os.FileInfo, knownFiles ma
Path: types.FilePath(path), Path: types.FilePath(path),
SourcePath: types.SourcePath(s.currentSourcePath), // Store source directory for restore path stripping SourcePath: types.SourcePath(s.currentSourcePath), // Store source directory for restore path stripping
MTime: info.ModTime(), MTime: info.ModTime(),
CTime: fileCTime(info), // platform-specific: birth time on macOS, inode change time on Linux CTime: info.ModTime(), // afero doesn't provide ctime
Size: info.Size(), Size: info.Size(),
Mode: uint32(info.Mode()), Mode: uint32(info.Mode()),
UID: uid, UID: uid,

View File

@@ -411,11 +411,7 @@ func (v *Vaultik) restoreDirectory(file *database.File, targetPath string, resul
} }
} }
// Set mtime (atime is set to mtime as a reasonable default). // Set mtime
// Note: ctime cannot be restored. On Linux, ctime (inode change time) is
// managed by the kernel and cannot be set by userspace. On macOS, birth
// time cannot be set via standard POSIX APIs. The ctime value is preserved
// in the snapshot database for informational purposes.
if err := v.Fs.Chtimes(targetPath, file.MTime, file.MTime); err != nil { if err := v.Fs.Chtimes(targetPath, file.MTime, file.MTime); err != nil {
log.Debug("Failed to set directory mtime", "path", targetPath, "error", err) log.Debug("Failed to set directory mtime", "path", targetPath, "error", err)
} }
@@ -512,11 +508,7 @@ func (v *Vaultik) restoreRegularFile(
} }
} }
// Set mtime (atime is set to mtime as a reasonable default). // Set mtime
// Note: ctime cannot be restored. On Linux, ctime (inode change time) is
// managed by the kernel and cannot be set by userspace. On macOS, birth
// time cannot be set via standard POSIX APIs. The ctime value is preserved
// in the snapshot database for informational purposes.
if err := v.Fs.Chtimes(targetPath, file.MTime, file.MTime); err != nil { if err := v.Fs.Chtimes(targetPath, file.MTime, file.MTime); err != nil {
log.Debug("Failed to set file mtime", "path", targetPath, "error", err) log.Debug("Failed to set file mtime", "path", targetPath, "error", err)
} }

View File

@@ -8,6 +8,7 @@ import (
"regexp" "regexp"
"sort" "sort"
"strings" "strings"
"sync"
"text/tabwriter" "text/tabwriter"
"time" "time"
@@ -16,6 +17,7 @@ import (
"git.eeqj.de/sneak/vaultik/internal/snapshot" "git.eeqj.de/sneak/vaultik/internal/snapshot"
"git.eeqj.de/sneak/vaultik/internal/types" "git.eeqj.de/sneak/vaultik/internal/types"
"github.com/dustin/go-humanize" "github.com/dustin/go-humanize"
"golang.org/x/sync/errgroup"
) )
// SnapshotCreateOptions contains options for the snapshot create command // SnapshotCreateOptions contains options for the snapshot create command
@@ -388,17 +390,19 @@ func (v *Vaultik) ListSnapshots(jsonOutput bool) error {
} }
} }
// Build final snapshot list // Build final snapshot list.
// Separate local (cheap DB lookup) from remote-only (needs manifest download).
snapshots := make([]SnapshotInfo, 0, len(remoteSnapshots)) snapshots := make([]SnapshotInfo, 0, len(remoteSnapshots))
// remoteOnly collects snapshot IDs that need a manifest download.
var remoteOnly []string
for snapshotID := range remoteSnapshots { for snapshotID := range remoteSnapshots {
// Check if we have this snapshot locally
if localSnap, exists := localSnapshotMap[snapshotID]; exists && localSnap.CompletedAt != nil { if localSnap, exists := localSnapshotMap[snapshotID]; exists && localSnap.CompletedAt != nil {
// Get total compressed size of all blobs referenced by this snapshot // Get total compressed size of all blobs referenced by this snapshot
totalSize, err := v.Repositories.Snapshots.GetSnapshotTotalCompressedSize(v.ctx, snapshotID) totalSize, err := v.Repositories.Snapshots.GetSnapshotTotalCompressedSize(v.ctx, snapshotID)
if err != nil { if err != nil {
log.Warn("Failed to get total compressed size", "id", snapshotID, "error", err) log.Warn("Failed to get total compressed size", "id", snapshotID, "error", err)
// Fall back to stored blob size
totalSize = localSnap.BlobSize totalSize = localSnap.BlobSize
} }
@@ -408,24 +412,78 @@ func (v *Vaultik) ListSnapshots(jsonOutput bool) error {
CompressedSize: totalSize, CompressedSize: totalSize,
}) })
} else { } else {
// Remote snapshot not in local DB - fetch manifest to get size
timestamp, err := parseSnapshotTimestamp(snapshotID) timestamp, err := parseSnapshotTimestamp(snapshotID)
if err != nil { if err != nil {
log.Warn("Failed to parse snapshot timestamp", "id", snapshotID, "error", err) log.Warn("Failed to parse snapshot timestamp", "id", snapshotID, "error", err)
continue continue
} }
// Pre-add with zero size; will be filled by concurrent downloads.
// Try to download manifest to get size
totalSize, err := v.getManifestSize(snapshotID)
if err != nil {
return fmt.Errorf("failed to get manifest size for %s: %w", snapshotID, err)
}
snapshots = append(snapshots, SnapshotInfo{ snapshots = append(snapshots, SnapshotInfo{
ID: types.SnapshotID(snapshotID), ID: types.SnapshotID(snapshotID),
Timestamp: timestamp, Timestamp: timestamp,
CompressedSize: totalSize, CompressedSize: 0,
}) })
remoteOnly = append(remoteOnly, snapshotID)
}
}
// Download manifests concurrently for remote-only snapshots.
if len(remoteOnly) > 0 {
// maxConcurrentManifestDownloads bounds parallel manifest fetches to
// avoid overwhelming the S3 endpoint while still being much faster
// than serial downloads.
const maxConcurrentManifestDownloads = 10
type manifestResult struct {
snapshotID string
size int64
}
var (
mu sync.Mutex
results []manifestResult
)
g, gctx := errgroup.WithContext(v.ctx)
g.SetLimit(maxConcurrentManifestDownloads)
for _, sid := range remoteOnly {
g.Go(func() error {
manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", sid)
reader, err := v.Storage.Get(gctx, manifestPath)
if err != nil {
return fmt.Errorf("downloading manifest for %s: %w", sid, err)
}
defer func() { _ = reader.Close() }()
manifest, err := snapshot.DecodeManifest(reader)
if err != nil {
return fmt.Errorf("decoding manifest for %s: %w", sid, err)
}
mu.Lock()
results = append(results, manifestResult{
snapshotID: sid,
size: manifest.TotalCompressedSize,
})
mu.Unlock()
return nil
})
}
if err := g.Wait(); err != nil {
return fmt.Errorf("fetching manifest sizes: %w", err)
}
// Build a lookup from results and patch the pre-added entries.
sizeMap := make(map[string]int64, len(results))
for _, r := range results {
sizeMap[r.snapshotID] = r.size
}
for i := range snapshots {
if sz, ok := sizeMap[string(snapshots[i].ID)]; ok {
snapshots[i].CompressedSize = sz
}
} }
} }
@@ -731,23 +789,6 @@ func (v *Vaultik) outputVerifyJSON(result *VerifyResult) error {
// Helper methods that were previously on SnapshotApp // Helper methods that were previously on SnapshotApp
func (v *Vaultik) getManifestSize(snapshotID string) (int64, error) {
manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)
reader, err := v.Storage.Get(v.ctx, manifestPath)
if err != nil {
return 0, fmt.Errorf("downloading manifest: %w", err)
}
defer func() { _ = reader.Close() }()
manifest, err := snapshot.DecodeManifest(reader)
if err != nil {
return 0, fmt.Errorf("decoding manifest: %w", err)
}
return manifest.TotalCompressedSize, nil
}
func (v *Vaultik) downloadManifest(snapshotID string) (*snapshot.Manifest, error) { func (v *Vaultik) downloadManifest(snapshotID string) (*snapshot.Manifest, error) {
manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID) manifestPath := fmt.Sprintf("metadata/%s/manifest.json.zst", snapshotID)