2 Commits

Author SHA1 Message Date
user
8d3a3ab7a1 rename blob_fetch_stub.go to blob_fetch.go
All checks were successful
check / check (pull_request) Successful in 2m29s
The file contains production code (hashVerifyReader, FetchAndDecryptBlob),
not stubs. The _stub suffix was a misnomer from the original implementation.
2026-03-18 16:26:35 -07:00
8c59f55096 fix: verify blob hash after download and decryption (closes #5) (#39)
All checks were successful
check / check (push) Successful in 2m27s
## Summary

Add double-SHA-256 hash verification of decrypted plaintext in `FetchAndDecryptBlob`. This ensures blob integrity during restore operations by comparing the computed hash against the expected blob hash before returning data to the caller.

The blob hash is `SHA256(SHA256(plaintext))` as produced by `blobgen.Writer.Sum256()`. Verification happens after decryption and decompression but before the data is used.

## Test

Added `blob_fetch_hash_test.go` with tests for:
- Correct hash passes verification
- Mismatched hash returns descriptive error

## make test output

```
golangci-lint run
0 issues.

ok  git.eeqj.de/sneak/vaultik/internal/blob       4.563s
ok  git.eeqj.de/sneak/vaultik/internal/blobgen    3.981s
ok  git.eeqj.de/sneak/vaultik/internal/chunker    4.127s
ok  git.eeqj.de/sneak/vaultik/internal/cli        1.499s
ok  git.eeqj.de/sneak/vaultik/internal/config     1.905s
ok  git.eeqj.de/sneak/vaultik/internal/crypto     0.519s
ok  git.eeqj.de/sneak/vaultik/internal/database   4.590s
ok  git.eeqj.de/sneak/vaultik/internal/globals    0.650s
ok  git.eeqj.de/sneak/vaultik/internal/models     0.779s
ok  git.eeqj.de/sneak/vaultik/internal/pidlock    2.945s
ok  git.eeqj.de/sneak/vaultik/internal/s3         3.286s
ok  git.eeqj.de/sneak/vaultik/internal/snapshot   3.979s
ok  git.eeqj.de/sneak/vaultik/internal/vaultik    4.418s
```

All tests pass, 0 lint issues.

Co-authored-by: user <user@Mac.lan guest wan>
Co-authored-by: clawbot <clawbot@noreply.git.eeqj.de>
Reviewed-on: #39
Co-authored-by: clawbot <sneak+clawbot@sneak.cloud>
Co-committed-by: clawbot <sneak+clawbot@sneak.cloud>
2026-03-19 00:21:11 +01:00
11 changed files with 218 additions and 285 deletions

View File

@@ -194,9 +194,8 @@ vaultik [--config <path>] store info
* Requires `VAULTIK_AGE_SECRET_KEY` environment variable with age private key
* Optional path arguments to restore specific files/directories (default: all)
* Downloads and decrypts metadata, fetches required blobs, reconstructs files
* Preserves file permissions, mtime, and ownership (ownership requires root)
* Preserves file permissions, timestamps, and ownership (ownership requires root)
* Handles symlinks and directories
* Note: ctime cannot be restored (see [platform notes](#platform-specific-ctime-semantics))
**prune**: Remove unreferenced blobs from remote storage
* Scans all snapshots for referenced blobs
@@ -248,14 +247,11 @@ Snapshot IDs follow the format `<hostname>_<snapshot-name>_<timestamp>` (e.g., `
CREATE TABLE files (
id TEXT PRIMARY KEY,
path TEXT NOT NULL UNIQUE,
source_path TEXT NOT NULL DEFAULT '',
mtime INTEGER NOT NULL,
ctime INTEGER NOT NULL,
size INTEGER NOT NULL,
mode INTEGER NOT NULL,
uid INTEGER NOT NULL,
gid INTEGER NOT NULL,
link_target TEXT
gid INTEGER NOT NULL
);
CREATE TABLE file_chunks (
@@ -343,25 +339,7 @@ CREATE TABLE snapshot_blobs (
1. For each file, get ordered chunk list from file_chunks
1. Download required blobs, decrypt, decompress
1. Extract chunks and reconstruct files
1. Restore permissions, mtime, uid/gid (ctime cannot be restored — see platform notes above)
### platform-specific ctime semantics
The `ctime` field in the files table stores a platform-dependent timestamp:
* **macOS (Darwin)**: `ctime` is the file's **birth time** — when the file was
first created on disk. This value never changes after file creation, even if
the file's content or metadata is modified.
* **Linux**: `ctime` is the **inode change time** — the last time the file's
metadata (permissions, ownership, link count, etc.) was modified. This is NOT
the file creation time. Linux did not expose birth time (via `statx(2)`) until
kernel 4.11, and Go's `syscall` package does not yet surface it.
**Restore limitation**: `ctime` cannot be restored on either platform. On Linux,
the kernel manages the inode change time and userspace cannot set it. On macOS,
there is no standard POSIX API to set birth time. The `ctime` value is preserved
in the snapshot database for informational/forensic purposes only.
1. Restore permissions, mtime, uid/gid
#### prune

View File

@@ -16,8 +16,8 @@ type File struct {
ID types.FileID // UUID primary key
Path types.FilePath // Absolute path of the file
SourcePath types.SourcePath // The source directory this file came from (for restore path stripping)
MTime time.Time // Last modification time
CTime time.Time // Creation/change time (platform-specific: birth time on macOS, inode change time on Linux)
MTime time.Time
CTime time.Time
Size int64
Mode uint32
UID uint32

View File

@@ -345,7 +345,7 @@ func (b *BackupEngine) Backup(ctx context.Context, fsys fs.FS, root string) (str
Size: info.Size(),
Mode: uint32(info.Mode()),
MTime: info.ModTime(),
CTime: fileCTime(info), // platform-specific: birth time on macOS, inode change time on Linux
CTime: info.ModTime(), // Use mtime as ctime for test
UID: 1000, // Default UID for test
GID: 1000, // Default GID for test
}

View File

@@ -1,26 +0,0 @@
package snapshot
import (
"os"
"syscall"
"time"
)
// fileCTime returns the file creation time (birth time) on macOS.
//
// On macOS/Darwin, "ctime" refers to the file's birth time (when the file
// was first created on disk). This is stored in the Birthtimespec field of
// the syscall.Stat_t structure.
//
// This differs from Linux where "ctime" means inode change time (the last
// time file metadata was modified). See ctime_linux.go for details.
//
// If the underlying stat information is unavailable (e.g. when using a
// virtual filesystem like afero.MemMapFs), this falls back to mtime.
func fileCTime(info os.FileInfo) time.Time {
stat, ok := info.Sys().(*syscall.Stat_t)
if !ok {
return info.ModTime()
}
return time.Unix(stat.Birthtimespec.Sec, stat.Birthtimespec.Nsec).UTC()
}

View File

@@ -1,28 +0,0 @@
package snapshot
import (
"os"
"syscall"
"time"
)
// fileCTime returns the inode change time on Linux.
//
// On Linux, "ctime" refers to the inode change time — the last time the
// file's metadata (permissions, ownership, link count, etc.) was modified.
// This is NOT the file creation time; Linux did not expose birth time until
// the statx(2) syscall was added in kernel 4.11, and Go's syscall package
// does not yet surface it.
//
// This differs from macOS/Darwin where "ctime" means birth time (file
// creation time). See ctime_darwin.go for details.
//
// If the underlying stat information is unavailable (e.g. when using a
// virtual filesystem like afero.MemMapFs), this falls back to mtime.
func fileCTime(info os.FileInfo) time.Time {
stat, ok := info.Sys().(*syscall.Stat_t)
if !ok {
return info.ModTime()
}
return time.Unix(stat.Ctim.Sec, stat.Ctim.Nsec).UTC()
}

View File

@@ -1,133 +0,0 @@
package snapshot
import (
"os"
"path/filepath"
"testing"
"time"
)
func TestFileCTime_RealFile(t *testing.T) {
// Create a temporary file
dir := t.TempDir()
path := filepath.Join(dir, "testfile.txt")
if err := os.WriteFile(path, []byte("hello"), 0644); err != nil {
t.Fatal(err)
}
info, err := os.Stat(path)
if err != nil {
t.Fatal(err)
}
ctime := fileCTime(info)
// ctime should be a valid time (not zero)
if ctime.IsZero() {
t.Fatal("fileCTime returned zero time")
}
// ctime should be close to now (within a few seconds)
diff := time.Since(ctime)
if diff < 0 || diff > 5*time.Second {
t.Fatalf("fileCTime returned unexpected time: %v (diff from now: %v)", ctime, diff)
}
// ctime should not equal mtime exactly in all cases, but for a freshly
// created file they should be very close
mtime := info.ModTime()
ctimeMtimeDiff := ctime.Sub(mtime)
if ctimeMtimeDiff < 0 {
ctimeMtimeDiff = -ctimeMtimeDiff
}
// For a freshly created file, ctime and mtime should be within 1 second
if ctimeMtimeDiff > time.Second {
t.Fatalf("ctime and mtime differ by too much for a new file: ctime=%v, mtime=%v, diff=%v",
ctime, mtime, ctimeMtimeDiff)
}
}
func TestFileCTime_AfterMtimeChange(t *testing.T) {
// Create a temporary file
dir := t.TempDir()
path := filepath.Join(dir, "testfile.txt")
if err := os.WriteFile(path, []byte("hello"), 0644); err != nil {
t.Fatal(err)
}
// Get initial ctime
info1, err := os.Stat(path)
if err != nil {
t.Fatal(err)
}
ctime1 := fileCTime(info1)
// Change mtime to a time in the past
pastTime := time.Date(2020, 1, 1, 0, 0, 0, 0, time.UTC)
if err := os.Chtimes(path, pastTime, pastTime); err != nil {
t.Fatal(err)
}
// Get new stats
info2, err := os.Stat(path)
if err != nil {
t.Fatal(err)
}
ctime2 := fileCTime(info2)
mtime2 := info2.ModTime()
// mtime should now be in the past
if mtime2.Year() != 2020 {
t.Fatalf("mtime not set correctly: %v", mtime2)
}
// On macOS: ctime (birth time) should remain unchanged since birth time
// doesn't change when mtime is updated.
// On Linux: ctime (inode change time) will be updated to ~now because
// changing mtime is a metadata change.
// Either way, ctime should NOT equal the past mtime we just set.
if ctime2.Equal(pastTime) {
t.Fatal("ctime should not equal the artificially set past mtime")
}
// ctime should still be a recent time (the original creation time or
// the metadata change time, depending on platform)
_ = ctime1 // used for reference; both platforms will have a recent ctime2
if time.Since(ctime2) > 10*time.Second {
t.Fatalf("ctime is unexpectedly old: %v", ctime2)
}
}
// TestFileCTime_NonSyscallFileInfo verifies the fallback to mtime when
// the FileInfo doesn't have a *syscall.Stat_t (e.g. afero.MemMapFs).
type mockFileInfo struct {
name string
size int64
mode os.FileMode
modTime time.Time
isDir bool
}
func (m *mockFileInfo) Name() string { return m.name }
func (m *mockFileInfo) Size() int64 { return m.size }
func (m *mockFileInfo) Mode() os.FileMode { return m.mode }
func (m *mockFileInfo) ModTime() time.Time { return m.modTime }
func (m *mockFileInfo) IsDir() bool { return m.isDir }
func (m *mockFileInfo) Sys() interface{} { return nil } // No syscall.Stat_t
func TestFileCTime_FallbackToMtime(t *testing.T) {
now := time.Now().UTC().Truncate(time.Second)
info := &mockFileInfo{
name: "test.txt",
size: 100,
mode: 0644,
modTime: now,
}
ctime := fileCTime(info)
if !ctime.Equal(now) {
t.Fatalf("expected fallback to mtime %v, got %v", now, ctime)
}
}

View File

@@ -728,7 +728,7 @@ func (s *Scanner) checkFileInMemory(path string, info os.FileInfo, knownFiles ma
Path: types.FilePath(path),
SourcePath: types.SourcePath(s.currentSourcePath), // Store source directory for restore path stripping
MTime: info.ModTime(),
CTime: fileCTime(info), // platform-specific: birth time on macOS, inode change time on Linux
CTime: info.ModTime(), // afero doesn't provide ctime
Size: info.Size(),
Mode: uint32(info.Mode()),
UID: uid,

View File

@@ -0,0 +1,93 @@
package vaultik
import (
"context"
"crypto/sha256"
"encoding/hex"
"fmt"
"io"
"filippo.io/age"
"git.eeqj.de/sneak/vaultik/internal/blobgen"
)
// hashVerifyReader wraps a blobgen.Reader and verifies the double-SHA-256 hash
// of decrypted plaintext when Close is called. It reuses the hash that
// blobgen.Reader already computes internally via its TeeReader, avoiding
// redundant SHA-256 computation.
type hashVerifyReader struct {
reader *blobgen.Reader // underlying decrypted blob reader (has internal hasher)
fetcher io.ReadCloser // raw fetched stream (closed on Close)
blobHash string // expected double-SHA-256 hex
done bool // EOF reached
}
func (h *hashVerifyReader) Read(p []byte) (int, error) {
n, err := h.reader.Read(p)
if err == io.EOF {
h.done = true
}
return n, err
}
// Close verifies the hash (if the stream was fully read) and closes underlying readers.
func (h *hashVerifyReader) Close() error {
readerErr := h.reader.Close()
fetcherErr := h.fetcher.Close()
if h.done {
firstHash := h.reader.Sum256()
secondHasher := sha256.New()
secondHasher.Write(firstHash)
actualHashHex := hex.EncodeToString(secondHasher.Sum(nil))
if actualHashHex != h.blobHash {
return fmt.Errorf("blob hash mismatch: expected %s, got %s", h.blobHash[:16], actualHashHex[:16])
}
}
if readerErr != nil {
return readerErr
}
return fetcherErr
}
// FetchAndDecryptBlob downloads a blob, decrypts and decompresses it, and
// returns a streaming reader that computes the double-SHA-256 hash on the fly.
// The hash is verified when the returned reader is closed (after fully reading).
// This avoids buffering the entire blob in memory.
func (v *Vaultik) FetchAndDecryptBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) (io.ReadCloser, error) {
rc, _, err := v.FetchBlob(ctx, blobHash, expectedSize)
if err != nil {
return nil, err
}
reader, err := blobgen.NewReader(rc, identity)
if err != nil {
_ = rc.Close()
return nil, fmt.Errorf("creating blob reader: %w", err)
}
return &hashVerifyReader{
reader: reader,
fetcher: rc,
blobHash: blobHash,
}, nil
}
// FetchBlob downloads a blob and returns a reader for the encrypted data.
func (v *Vaultik) FetchBlob(ctx context.Context, blobHash string, expectedSize int64) (io.ReadCloser, int64, error) {
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobHash[:2], blobHash[2:4], blobHash)
rc, err := v.Storage.Get(ctx, blobPath)
if err != nil {
return nil, 0, fmt.Errorf("downloading blob %s: %w", blobHash[:16], err)
}
info, err := v.Storage.Stat(ctx, blobPath)
if err != nil {
_ = rc.Close()
return nil, 0, fmt.Errorf("stat blob %s: %w", blobHash[:16], err)
}
return rc, info.Size, nil
}

View File

@@ -0,0 +1,100 @@
package vaultik_test
import (
"bytes"
"context"
"crypto/sha256"
"encoding/hex"
"io"
"strings"
"testing"
"filippo.io/age"
"git.eeqj.de/sneak/vaultik/internal/blobgen"
"git.eeqj.de/sneak/vaultik/internal/vaultik"
)
// TestFetchAndDecryptBlobVerifiesHash verifies that FetchAndDecryptBlob checks
// the double-SHA-256 hash of the decrypted plaintext against the expected blob hash.
func TestFetchAndDecryptBlobVerifiesHash(t *testing.T) {
identity, err := age.GenerateX25519Identity()
if err != nil {
t.Fatalf("generating identity: %v", err)
}
// Create test data and encrypt it using blobgen.Writer
plaintext := []byte("hello world test data for blob hash verification")
var encBuf bytes.Buffer
writer, err := blobgen.NewWriter(&encBuf, 1, []string{identity.Recipient().String()})
if err != nil {
t.Fatalf("creating blobgen writer: %v", err)
}
if _, err := writer.Write(plaintext); err != nil {
t.Fatalf("writing plaintext: %v", err)
}
if err := writer.Close(); err != nil {
t.Fatalf("closing writer: %v", err)
}
encryptedData := encBuf.Bytes()
// Compute correct double-SHA-256 hash of the plaintext (matches blobgen.Writer.Sum256)
firstHash := sha256.Sum256(plaintext)
secondHash := sha256.Sum256(firstHash[:])
correctHash := hex.EncodeToString(secondHash[:])
// Verify our hash matches what blobgen.Writer produces
writerHash := hex.EncodeToString(writer.Sum256())
if correctHash != writerHash {
t.Fatalf("hash computation mismatch: manual=%s, writer=%s", correctHash, writerHash)
}
// Set up mock storage with the blob at the correct path
mockStorage := NewMockStorer()
blobPath := "blobs/" + correctHash[:2] + "/" + correctHash[2:4] + "/" + correctHash
mockStorage.mu.Lock()
mockStorage.data[blobPath] = encryptedData
mockStorage.mu.Unlock()
tv := vaultik.NewForTesting(mockStorage)
ctx := context.Background()
t.Run("correct hash succeeds", func(t *testing.T) {
rc, err := tv.FetchAndDecryptBlob(ctx, correctHash, int64(len(encryptedData)), identity)
if err != nil {
t.Fatalf("expected success, got error: %v", err)
}
data, err := io.ReadAll(rc)
if err != nil {
t.Fatalf("reading stream: %v", err)
}
if err := rc.Close(); err != nil {
t.Fatalf("close (hash verification) failed: %v", err)
}
if !bytes.Equal(data, plaintext) {
t.Fatalf("decrypted data mismatch: got %q, want %q", data, plaintext)
}
})
t.Run("wrong hash fails", func(t *testing.T) {
// Use a fake hash that doesn't match the actual plaintext
fakeHash := strings.Repeat("ab", 32) // 64 hex chars
fakePath := "blobs/" + fakeHash[:2] + "/" + fakeHash[2:4] + "/" + fakeHash
mockStorage.mu.Lock()
mockStorage.data[fakePath] = encryptedData
mockStorage.mu.Unlock()
rc, err := tv.FetchAndDecryptBlob(ctx, fakeHash, int64(len(encryptedData)), identity)
if err != nil {
t.Fatalf("unexpected error opening stream: %v", err)
}
// Read all data — hash is verified on Close
_, _ = io.ReadAll(rc)
err = rc.Close()
if err == nil {
t.Fatal("expected error for mismatched hash, got nil")
}
if !strings.Contains(err.Error(), "hash mismatch") {
t.Fatalf("expected hash mismatch error, got: %v", err)
}
})
}

View File

@@ -1,55 +0,0 @@
package vaultik
import (
"context"
"fmt"
"io"
"filippo.io/age"
"git.eeqj.de/sneak/vaultik/internal/blobgen"
)
// FetchAndDecryptBlobResult holds the result of fetching and decrypting a blob.
type FetchAndDecryptBlobResult struct {
Data []byte
}
// FetchAndDecryptBlob downloads a blob, decrypts it, and returns the plaintext data.
func (v *Vaultik) FetchAndDecryptBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) (*FetchAndDecryptBlobResult, error) {
rc, _, err := v.FetchBlob(ctx, blobHash, expectedSize)
if err != nil {
return nil, err
}
defer func() { _ = rc.Close() }()
reader, err := blobgen.NewReader(rc, identity)
if err != nil {
return nil, fmt.Errorf("creating blob reader: %w", err)
}
defer func() { _ = reader.Close() }()
data, err := io.ReadAll(reader)
if err != nil {
return nil, fmt.Errorf("reading blob data: %w", err)
}
return &FetchAndDecryptBlobResult{Data: data}, nil
}
// FetchBlob downloads a blob and returns a reader for the encrypted data.
func (v *Vaultik) FetchBlob(ctx context.Context, blobHash string, expectedSize int64) (io.ReadCloser, int64, error) {
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobHash[:2], blobHash[2:4], blobHash)
rc, err := v.Storage.Get(ctx, blobPath)
if err != nil {
return nil, 0, fmt.Errorf("downloading blob %s: %w", blobHash[:16], err)
}
info, err := v.Storage.Stat(ctx, blobPath)
if err != nil {
_ = rc.Close()
return nil, 0, fmt.Errorf("stat blob %s: %w", blobHash[:16], err)
}
return rc, info.Size, nil
}

View File

@@ -411,11 +411,7 @@ func (v *Vaultik) restoreDirectory(file *database.File, targetPath string, resul
}
}
// Set mtime (atime is set to mtime as a reasonable default).
// Note: ctime cannot be restored. On Linux, ctime (inode change time) is
// managed by the kernel and cannot be set by userspace. On macOS, birth
// time cannot be set via standard POSIX APIs. The ctime value is preserved
// in the snapshot database for informational purposes.
// Set mtime
if err := v.Fs.Chtimes(targetPath, file.MTime, file.MTime); err != nil {
log.Debug("Failed to set directory mtime", "path", targetPath, "error", err)
}
@@ -512,11 +508,7 @@ func (v *Vaultik) restoreRegularFile(
}
}
// Set mtime (atime is set to mtime as a reasonable default).
// Note: ctime cannot be restored. On Linux, ctime (inode change time) is
// managed by the kernel and cannot be set by userspace. On macOS, birth
// time cannot be set via standard POSIX APIs. The ctime value is preserved
// in the snapshot database for informational purposes.
// Set mtime
if err := v.Fs.Chtimes(targetPath, file.MTime, file.MTime); err != nil {
log.Debug("Failed to set file mtime", "path", targetPath, "error", err)
}
@@ -530,11 +522,23 @@ func (v *Vaultik) restoreRegularFile(
// downloadBlob downloads and decrypts a blob
func (v *Vaultik) downloadBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) ([]byte, error) {
result, err := v.FetchAndDecryptBlob(ctx, blobHash, expectedSize, identity)
rc, err := v.FetchAndDecryptBlob(ctx, blobHash, expectedSize, identity)
if err != nil {
return nil, err
}
return result.Data, nil
data, err := io.ReadAll(rc)
if err != nil {
_ = rc.Close()
return nil, fmt.Errorf("reading blob data: %w", err)
}
// Close triggers hash verification
if err := rc.Close(); err != nil {
return nil, err
}
return data, nil
}
// verifyRestoredFiles verifies that all restored files match their expected chunk hashes