fix: populate ctime from actual filesystem stats instead of mtime fallback #48

Closed
clawbot wants to merge 1 commits from fix/ctime-scanner-population into main
8 changed files with 228 additions and 11 deletions
Showing only changes of commit 25860c03a9 - Show all commits

View File

@@ -194,8 +194,9 @@ vaultik [--config <path>] store info
* Requires `VAULTIK_AGE_SECRET_KEY` environment variable with age private key * Requires `VAULTIK_AGE_SECRET_KEY` environment variable with age private key
* Optional path arguments to restore specific files/directories (default: all) * Optional path arguments to restore specific files/directories (default: all)
* Downloads and decrypts metadata, fetches required blobs, reconstructs files * Downloads and decrypts metadata, fetches required blobs, reconstructs files
* Preserves file permissions, timestamps, and ownership (ownership requires root) * Preserves file permissions, mtime, and ownership (ownership requires root)
* Handles symlinks and directories * Handles symlinks and directories
* Note: ctime cannot be restored (see [platform notes](#platform-specific-ctime-semantics))
**prune**: Remove unreferenced blobs from remote storage **prune**: Remove unreferenced blobs from remote storage
* Scans all snapshots for referenced blobs * Scans all snapshots for referenced blobs
@@ -247,11 +248,14 @@ Snapshot IDs follow the format `<hostname>_<snapshot-name>_<timestamp>` (e.g., `
CREATE TABLE files ( CREATE TABLE files (
id TEXT PRIMARY KEY, id TEXT PRIMARY KEY,
path TEXT NOT NULL UNIQUE, path TEXT NOT NULL UNIQUE,
source_path TEXT NOT NULL DEFAULT '',
mtime INTEGER NOT NULL, mtime INTEGER NOT NULL,
ctime INTEGER NOT NULL,
size INTEGER NOT NULL, size INTEGER NOT NULL,
mode INTEGER NOT NULL, mode INTEGER NOT NULL,
uid INTEGER NOT NULL, uid INTEGER NOT NULL,
gid INTEGER NOT NULL gid INTEGER NOT NULL,
link_target TEXT
); );
CREATE TABLE file_chunks ( CREATE TABLE file_chunks (
@@ -339,7 +343,25 @@ CREATE TABLE snapshot_blobs (
1. For each file, get ordered chunk list from file_chunks 1. For each file, get ordered chunk list from file_chunks
1. Download required blobs, decrypt, decompress 1. Download required blobs, decrypt, decompress
1. Extract chunks and reconstruct files 1. Extract chunks and reconstruct files
1. Restore permissions, mtime, uid/gid 1. Restore permissions, mtime, uid/gid (ctime cannot be restored — see platform notes above)
### platform-specific ctime semantics
The `ctime` field in the files table stores a platform-dependent timestamp:
* **macOS (Darwin)**: `ctime` is the file's **birth time** — when the file was
first created on disk. This value never changes after file creation, even if
the file's content or metadata is modified.
* **Linux**: `ctime` is the **inode change time** — the last time the file's
metadata (permissions, ownership, link count, etc.) was modified. This is NOT
the file creation time. Linux did not expose birth time (via `statx(2)`) until
kernel 4.11, and Go's `syscall` package does not yet surface it.
**Restore limitation**: `ctime` cannot be restored on either platform. On Linux,
the kernel manages the inode change time and userspace cannot set it. On macOS,
there is no standard POSIX API to set birth time. The `ctime` value is preserved
in the snapshot database for informational/forensic purposes only.
#### prune #### prune

View File

@@ -16,8 +16,8 @@ type File struct {
ID types.FileID // UUID primary key ID types.FileID // UUID primary key
Path types.FilePath // Absolute path of the file Path types.FilePath // Absolute path of the file
SourcePath types.SourcePath // The source directory this file came from (for restore path stripping) SourcePath types.SourcePath // The source directory this file came from (for restore path stripping)
MTime time.Time MTime time.Time // Last modification time
CTime time.Time CTime time.Time // Creation/change time (platform-specific: birth time on macOS, inode change time on Linux)
Size int64 Size int64
Mode uint32 Mode uint32
UID uint32 UID uint32

View File

@@ -345,7 +345,7 @@ func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (str
Size: info.Size(), Size: info.Size(),
Mode: uint32(info.Mode()), Mode: uint32(info.Mode()),
MTime: info.ModTime(), MTime: info.ModTime(),
CTime: info.ModTime(), // Use mtime as ctime for test CTime: fileCTime(info), // platform-specific: birth time on macOS, inode change time on Linux
UID: 1000, // Default UID for test UID: 1000, // Default UID for test
GID: 1000, // Default GID for test GID: 1000, // Default GID for test
} }

View File

@@ -0,0 +1,26 @@
package snapshot
import (
"os"
"syscall"
"time"
)
// fileCTime returns the file creation time (birth time) on macOS.
//
// On macOS/Darwin, "ctime" refers to the file's birth time (when the file
// was first created on disk). This is stored in the Birthtimespec field of
// the syscall.Stat_t structure.
//
// This differs from Linux where "ctime" means inode change time (the last
// time file metadata was modified). See ctime_linux.go for details.
//
// If the underlying stat information is unavailable (e.g. when using a
// virtual filesystem like afero.MemMapFs), this falls back to mtime.
func fileCTime(info os.FileInfo) time.Time {
stat, ok := info.Sys().(*syscall.Stat_t)
if !ok {
return info.ModTime()
}
return time.Unix(stat.Birthtimespec.Sec, stat.Birthtimespec.Nsec).UTC()
}

View File

@@ -0,0 +1,28 @@
package snapshot
import (
"os"
"syscall"
"time"
)
// fileCTime returns the inode change time on Linux.
//
// On Linux, "ctime" refers to the inode change time — the last time the
// file's metadata (permissions, ownership, link count, etc.) was modified.
// This is NOT the file creation time; Linux did not expose birth time until
// the statx(2) syscall was added in kernel 4.11, and Go's syscall package
// does not yet surface it.
//
// This differs from macOS/Darwin where "ctime" means birth time (file
// creation time). See ctime_darwin.go for details.
//
// If the underlying stat information is unavailable (e.g. when using a
// virtual filesystem like afero.MemMapFs), this falls back to mtime.
func fileCTime(info os.FileInfo) time.Time {
stat, ok := info.Sys().(*syscall.Stat_t)
if !ok {
return info.ModTime()
}
return time.Unix(stat.Ctim.Sec, stat.Ctim.Nsec).UTC()
}

View File

@@ -0,0 +1,133 @@
package snapshot
import (
"os"
"path/filepath"
"testing"
"time"
)
func TestFileCTime_RealFile(t *testing.T) {
// Create a temporary file
dir := t.TempDir()
path := filepath.Join(dir, "testfile.txt")
if err := os.WriteFile(path, []byte("hello"), 0644); err != nil {
t.Fatal(err)
}
info, err := os.Stat(path)
if err != nil {
t.Fatal(err)
}
ctime := fileCTime(info)
// ctime should be a valid time (not zero)
if ctime.IsZero() {
t.Fatal("fileCTime returned zero time")
}
// ctime should be close to now (within a few seconds)
diff := time.Since(ctime)
if diff < 0 || diff > 5*time.Second {
t.Fatalf("fileCTime returned unexpected time: %v (diff from now: %v)", ctime, diff)
}
// ctime should not equal mtime exactly in all cases, but for a freshly
// created file they should be very close
mtime := info.ModTime()
ctimeMtimeDiff := ctime.Sub(mtime)
if ctimeMtimeDiff < 0 {
ctimeMtimeDiff = -ctimeMtimeDiff
}
// For a freshly created file, ctime and mtime should be within 1 second
if ctimeMtimeDiff > time.Second {
t.Fatalf("ctime and mtime differ by too much for a new file: ctime=%v, mtime=%v, diff=%v",
ctime, mtime, ctimeMtimeDiff)
}
}
func TestFileCTime_AfterMtimeChange(t *testing.T) {
// Create a temporary file
dir := t.TempDir()
path := filepath.Join(dir, "testfile.txt")
if err := os.WriteFile(path, []byte("hello"), 0644); err != nil {
t.Fatal(err)
}
// Get initial ctime
info1, err := os.Stat(path)
if err != nil {
t.Fatal(err)
}
ctime1 := fileCTime(info1)
// Change mtime to a time in the past
pastTime := time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)
if err := os.Chtimes(path, pastTime, pastTime); err != nil {
t.Fatal(err)
}
// Get new stats
info2, err := os.Stat(path)
if err != nil {
t.Fatal(err)
}
ctime2 := fileCTime(info2)
mtime2 := info2.ModTime()
// mtime should now be in the past
if mtime2.Year() != 2020 {
t.Fatalf("mtime not set correctly: %v", mtime2)
}
// On macOS: ctime (birth time) should remain unchanged since birth time
// doesn't change when mtime is updated.
// On Linux: ctime (inode change time) will be updated to ~now because
// changing mtime is a metadata change.
// Either way, ctime should NOT equal the past mtime we just set.
if ctime2.Equal(pastTime) {
t.Fatal("ctime should not equal the artificially set past mtime")
}
// ctime should still be a recent time (the original creation time or
// the metadata change time, depending on platform)
_ = ctime1 // used for reference; both platforms will have a recent ctime2
if time.Since(ctime2) > 10*time.Second {
t.Fatalf("ctime is unexpectedly old: %v", ctime2)
}
}
// TestFileCTime_NonSyscallFileInfo verifies the fallback to mtime when
// the FileInfo doesn't have a *syscall.Stat_t (e.g. afero.MemMapFs).
type mockFileInfo struct {
name string
size int64
mode os.FileMode
modTime time.Time
isDir bool
}
func (m *mockFileInfo) Name() string { return m.name }
func (m *mockFileInfo) Size() int64 { return m.size }
func (m *mockFileInfo) Mode() os.FileMode { return m.mode }
func (m *mockFileInfo) ModTime() time.Time { return m.modTime }
func (m *mockFileInfo) IsDir() bool { return m.isDir }
func (m *mockFileInfo) Sys() interface{} { return nil } // No syscall.Stat_t
func TestFileCTime_FallbackToMtime(t *testing.T) {
now := time.Now().UTC().Truncate(time.Second)
info := &mockFileInfo{
name: "test.txt",
size: 100,
mode: 0644,
modTime: now,
}
ctime := fileCTime(info)
if !ctime.Equal(now) {
t.Fatalf("expected fallback to mtime %v, got %v", now, ctime)
}
}

View File

@@ -728,7 +728,7 @@ func (s *Scanner) checkFileInMemory(path string, info os.FileInfo, knownFiles ma
Path: types.FilePath(path), Path: types.FilePath(path),
SourcePath: types.SourcePath(s.currentSourcePath), // Store source directory for restore path stripping SourcePath: types.SourcePath(s.currentSourcePath), // Store source directory for restore path stripping
MTime: info.ModTime(), MTime: info.ModTime(),
CTime: info.ModTime(), // afero doesn't provide ctime CTime: fileCTime(info), // platform-specific: birth time on macOS, inode change time on Linux
Size: info.Size(), Size: info.Size(),
Mode: uint32(info.Mode()), Mode: uint32(info.Mode()),
UID: uid, UID: uid,

View File

@@ -411,7 +411,11 @@ func (v *Vaultik) restoreDirectory(file *database.File, targetPath string, resul
} }
} }
// Set mtime // Set mtime (atime is set to mtime as a reasonable default).
// Note: ctime cannot be restored. On Linux, ctime (inode change time) is
// managed by the kernel and cannot be set by userspace. On macOS, birth
// time cannot be set via standard POSIX APIs. The ctime value is preserved
// in the snapshot database for informational purposes.
if err := v.Fs.Chtimes(targetPath, file.MTime, file.MTime); err != nil { if err := v.Fs.Chtimes(targetPath, file.MTime, file.MTime); err != nil {
log.Debug("Failed to set directory mtime", "path", targetPath, "error", err) log.Debug("Failed to set directory mtime", "path", targetPath, "error", err)
} }
@@ -508,7 +512,11 @@ func (v *Vaultik) restoreRegularFile(
} }
} }
// Set mtime // Set mtime (atime is set to mtime as a reasonable default).
// Note: ctime cannot be restored. On Linux, ctime (inode change time) is
// managed by the kernel and cannot be set by userspace. On macOS, birth
// time cannot be set via standard POSIX APIs. The ctime value is preserved
// in the snapshot database for informational purposes.
if err := v.Fs.Chtimes(targetPath, file.MTime, file.MTime); err != nil { if err := v.Fs.Chtimes(targetPath, file.MTime, file.MTime); err != nil {
log.Debug("Failed to set file mtime", "path", targetPath, "error", err) log.Debug("Failed to set file mtime", "path", targetPath, "error", err)
} }