diff --git a/README.md b/README.md index 32a5a37..3b271bd 100644 --- a/README.md +++ b/README.md @@ -194,8 +194,11 @@ vaultik [--config ] store info * Requires `VAULTIK_AGE_SECRET_KEY` environment variable with age private key * Optional path arguments to restore specific files/directories (default: all) * Downloads and decrypts metadata, fetches required blobs, reconstructs files -* Preserves file permissions, timestamps, and ownership (ownership requires root) +* Preserves file permissions, timestamps (mtime), and ownership (ownership requires root) * Handles symlinks and directories +* Note: ctime is recorded in the snapshot for informational purposes but is not + restored, as setting ctime is not possible through standard system calls on + most platforms **prune**: Remove unreferenced blobs from remote storage * Scans all snapshots for referenced blobs @@ -207,6 +210,42 @@ vaultik [--config ] store info --- +## file metadata + +vaultik records the following metadata for each file: path, size, mode +(permissions), uid, gid, mtime (modification time), ctime, and symlink +target. + +### ctime semantics (platform-specific) + +The `ctime` field has different meanings depending on the operating system: + +| Platform | ctime value | Source | +|----------|-------------|--------| +| **macOS** | File birth (creation) time | `syscall.Stat_t.Birthtimespec` | +| **Linux** | Inode change time | `syscall.Stat_t.Ctim` | +| **Other** | Falls back to mtime | `os.FileInfo.ModTime()` | + +**macOS (Darwin):** HFS+ and APFS filesystems natively track file creation +time. The `ctime` field contains the true file birth time — when the file was +first created on disk. + +**Linux:** Most Linux filesystems do not expose file creation time through +standard Go APIs. The `ctime` field contains the inode change time, which is +updated whenever file metadata (permissions, ownership, link count) or content +changes. Linux ext4 (kernel 4.11+) and btrfs do track birth time via the +`statx()` syscall, but this is not exposed through Go's `os.FileInfo.Sys()`. + +**Restore:** ctime is stored in the snapshot database for informational and +forensic purposes but is not restored to the filesystem. Setting ctime is not +possible through standard system calls on most Unix platforms — the kernel +manages ctime automatically. + +When using in-memory filesystems (e.g. afero `MemMapFs` in tests), ctime falls +back to mtime since there is no underlying `syscall.Stat_t`. + +--- + ## architecture ### s3 bucket layout @@ -247,11 +286,14 @@ Snapshot IDs follow the format `__` (e.g., ` CREATE TABLE files ( id TEXT PRIMARY KEY, path TEXT NOT NULL UNIQUE, + source_path TEXT NOT NULL DEFAULT '', mtime INTEGER NOT NULL, + ctime INTEGER NOT NULL, size INTEGER NOT NULL, mode INTEGER NOT NULL, uid INTEGER NOT NULL, - gid INTEGER NOT NULL + gid INTEGER NOT NULL, + link_target TEXT ); CREATE TABLE file_chunks ( diff --git a/internal/database/models.go b/internal/database/models.go index 729b576..db9080d 100644 --- a/internal/database/models.go +++ b/internal/database/models.go @@ -17,6 +17,10 @@ type File struct { Path types.FilePath // Absolute path of the file SourcePath types.SourcePath // The source directory this file came from (for restore path stripping) MTime time.Time + // CTime is the file creation/change time. On macOS this is the birth time + // (when the file was created). On Linux this is the inode change time + // (updated on metadata or content changes). See ctime_darwin.go and + // ctime_linux.go in the snapshot package for extraction details. CTime time.Time Size int64 Mode uint32 diff --git a/internal/snapshot/ctime_darwin.go b/internal/snapshot/ctime_darwin.go new file mode 100644 index 0000000..f4cf4eb --- /dev/null +++ b/internal/snapshot/ctime_darwin.go @@ -0,0 +1,23 @@ +package snapshot + +import ( + "os" + "syscall" + "time" +) + +// getCTime extracts the file creation time (birth time) from os.FileInfo. +// +// On macOS (Darwin), this returns the birth time (Birthtimespec) from the +// underlying syscall.Stat_t. macOS HFS+ and APFS filesystems natively track +// file creation time, making this a true "created at" timestamp. +// +// Falls back to modification time if the underlying Sys() data is not a +// *syscall.Stat_t (e.g. when using in-memory filesystems for testing). +func getCTime(info os.FileInfo) time.Time { + stat, ok := info.Sys().(*syscall.Stat_t) + if !ok { + return info.ModTime() + } + return time.Unix(stat.Birthtimespec.Sec, stat.Birthtimespec.Nsec).UTC() +} diff --git a/internal/snapshot/ctime_linux.go b/internal/snapshot/ctime_linux.go new file mode 100644 index 0000000..5f29438 --- /dev/null +++ b/internal/snapshot/ctime_linux.go @@ -0,0 +1,29 @@ +package snapshot + +import ( + "os" + "syscall" + "time" +) + +// getCTime extracts the inode change time (ctime) from os.FileInfo. +// +// On Linux, this returns the inode change time (Ctim) from the underlying +// syscall.Stat_t. Linux ctime is updated whenever file metadata (permissions, +// ownership, link count) or content changes. It is NOT the file creation +// (birth) time. +// +// Note: Linux ext4 (kernel 4.11+) and btrfs do track birth time via the +// statx() syscall, but this is not exposed through Go's os.FileInfo.Sys(). +// The inode change time is the best available approximation through standard +// Go APIs. +// +// Falls back to modification time if the underlying Sys() data is not a +// *syscall.Stat_t (e.g. when using in-memory filesystems for testing). +func getCTime(info os.FileInfo) time.Time { + stat, ok := info.Sys().(*syscall.Stat_t) + if !ok { + return info.ModTime() + } + return time.Unix(stat.Ctim.Sec, stat.Ctim.Nsec).UTC() +} diff --git a/internal/snapshot/ctime_other.go b/internal/snapshot/ctime_other.go new file mode 100644 index 0000000..75c1d29 --- /dev/null +++ b/internal/snapshot/ctime_other.go @@ -0,0 +1,15 @@ +//go:build !darwin && !linux + +package snapshot + +import ( + "os" + "time" +) + +// getCTime returns the file's modification time as a fallback on unsupported +// platforms. See ctime_darwin.go and ctime_linux.go for platform-specific +// implementations that extract actual ctime/birth time from syscall data. +func getCTime(info os.FileInfo) time.Time { + return info.ModTime() +} diff --git a/internal/snapshot/scanner.go b/internal/snapshot/scanner.go index ca403b4..dec5fd5 100644 --- a/internal/snapshot/scanner.go +++ b/internal/snapshot/scanner.go @@ -728,7 +728,7 @@ func (s *Scanner) checkFileInMemory(path string, info os.FileInfo, knownFiles ma Path: types.FilePath(path), SourcePath: types.SourcePath(s.currentSourcePath), // Store source directory for restore path stripping MTime: info.ModTime(), - CTime: info.ModTime(), // afero doesn't provide ctime + CTime: getCTime(info), Size: info.Size(), Mode: uint32(info.Mode()), UID: uid,