Compare commits
7 Commits
71a402650c
...
fix/dedup-
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ea8edd653f | ||
| 60b6746db9 | |||
| f28c8a73b7 | |||
| 1c0f5b8eb2 | |||
| 689109a2b8 | |||
| ac2f21a89d | |||
| 8c59f55096 |
@@ -103,7 +103,7 @@ CREATE TABLE IF NOT EXISTS snapshot_files (
|
|||||||
file_id TEXT NOT NULL,
|
file_id TEXT NOT NULL,
|
||||||
PRIMARY KEY (snapshot_id, file_id),
|
PRIMARY KEY (snapshot_id, file_id),
|
||||||
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
|
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
|
||||||
FOREIGN KEY (file_id) REFERENCES files(id)
|
FOREIGN KEY (file_id) REFERENCES files(id) ON DELETE CASCADE
|
||||||
);
|
);
|
||||||
|
|
||||||
-- Index for efficient file lookups (used in orphan detection)
|
-- Index for efficient file lookups (used in orphan detection)
|
||||||
@@ -116,7 +116,7 @@ CREATE TABLE IF NOT EXISTS snapshot_blobs (
|
|||||||
blob_hash TEXT NOT NULL,
|
blob_hash TEXT NOT NULL,
|
||||||
PRIMARY KEY (snapshot_id, blob_id),
|
PRIMARY KEY (snapshot_id, blob_id),
|
||||||
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
|
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE,
|
||||||
FOREIGN KEY (blob_id) REFERENCES blobs(id)
|
FOREIGN KEY (blob_id) REFERENCES blobs(id) ON DELETE CASCADE
|
||||||
);
|
);
|
||||||
|
|
||||||
-- Index for efficient blob lookups (used in orphan detection)
|
-- Index for efficient blob lookups (used in orphan detection)
|
||||||
@@ -130,7 +130,7 @@ CREATE TABLE IF NOT EXISTS uploads (
|
|||||||
size INTEGER NOT NULL,
|
size INTEGER NOT NULL,
|
||||||
duration_ms INTEGER NOT NULL,
|
duration_ms INTEGER NOT NULL,
|
||||||
FOREIGN KEY (blob_hash) REFERENCES blobs(blob_hash),
|
FOREIGN KEY (blob_hash) REFERENCES blobs(blob_hash),
|
||||||
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id)
|
FOREIGN KEY (snapshot_id) REFERENCES snapshots(id) ON DELETE CASCADE
|
||||||
);
|
);
|
||||||
|
|
||||||
-- Index for efficient snapshot lookups
|
-- Index for efficient snapshot lookups
|
||||||
|
|||||||
93
internal/vaultik/blob_fetch.go
Normal file
93
internal/vaultik/blob_fetch.go
Normal file
@@ -0,0 +1,93 @@
|
|||||||
|
package vaultik
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/hex"
|
||||||
|
"fmt"
|
||||||
|
"io"
|
||||||
|
|
||||||
|
"filippo.io/age"
|
||||||
|
"git.eeqj.de/sneak/vaultik/internal/blobgen"
|
||||||
|
)
|
||||||
|
|
||||||
|
// hashVerifyReader wraps a blobgen.Reader and verifies the double-SHA-256 hash
|
||||||
|
// of decrypted plaintext when Close is called. It reuses the hash that
|
||||||
|
// blobgen.Reader already computes internally via its TeeReader, avoiding
|
||||||
|
// redundant SHA-256 computation.
|
||||||
|
type hashVerifyReader struct {
|
||||||
|
reader *blobgen.Reader // underlying decrypted blob reader (has internal hasher)
|
||||||
|
fetcher io.ReadCloser // raw fetched stream (closed on Close)
|
||||||
|
blobHash string // expected double-SHA-256 hex
|
||||||
|
done bool // EOF reached
|
||||||
|
}
|
||||||
|
|
||||||
|
func (h *hashVerifyReader) Read(p []byte) (int, error) {
|
||||||
|
n, err := h.reader.Read(p)
|
||||||
|
if err == io.EOF {
|
||||||
|
h.done = true
|
||||||
|
}
|
||||||
|
return n, err
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close verifies the hash (if the stream was fully read) and closes underlying readers.
|
||||||
|
func (h *hashVerifyReader) Close() error {
|
||||||
|
readerErr := h.reader.Close()
|
||||||
|
fetcherErr := h.fetcher.Close()
|
||||||
|
|
||||||
|
if h.done {
|
||||||
|
firstHash := h.reader.Sum256()
|
||||||
|
secondHasher := sha256.New()
|
||||||
|
secondHasher.Write(firstHash)
|
||||||
|
actualHashHex := hex.EncodeToString(secondHasher.Sum(nil))
|
||||||
|
if actualHashHex != h.blobHash {
|
||||||
|
return fmt.Errorf("blob hash mismatch: expected %s, got %s", h.blobHash[:16], actualHashHex[:16])
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if readerErr != nil {
|
||||||
|
return readerErr
|
||||||
|
}
|
||||||
|
return fetcherErr
|
||||||
|
}
|
||||||
|
|
||||||
|
// FetchAndDecryptBlob downloads a blob, decrypts and decompresses it, and
|
||||||
|
// returns a streaming reader that computes the double-SHA-256 hash on the fly.
|
||||||
|
// The hash is verified when the returned reader is closed (after fully reading).
|
||||||
|
// This avoids buffering the entire blob in memory.
|
||||||
|
func (v *Vaultik) FetchAndDecryptBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) (io.ReadCloser, error) {
|
||||||
|
rc, _, err := v.FetchBlob(ctx, blobHash, expectedSize)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
reader, err := blobgen.NewReader(rc, identity)
|
||||||
|
if err != nil {
|
||||||
|
_ = rc.Close()
|
||||||
|
return nil, fmt.Errorf("creating blob reader: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return &hashVerifyReader{
|
||||||
|
reader: reader,
|
||||||
|
fetcher: rc,
|
||||||
|
blobHash: blobHash,
|
||||||
|
}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// FetchBlob downloads a blob and returns a reader for the encrypted data.
|
||||||
|
func (v *Vaultik) FetchBlob(ctx context.Context, blobHash string, expectedSize int64) (io.ReadCloser, int64, error) {
|
||||||
|
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobHash[:2], blobHash[2:4], blobHash)
|
||||||
|
|
||||||
|
rc, err := v.Storage.Get(ctx, blobPath)
|
||||||
|
if err != nil {
|
||||||
|
return nil, 0, fmt.Errorf("downloading blob %s: %w", blobHash[:16], err)
|
||||||
|
}
|
||||||
|
|
||||||
|
info, err := v.Storage.Stat(ctx, blobPath)
|
||||||
|
if err != nil {
|
||||||
|
_ = rc.Close()
|
||||||
|
return nil, 0, fmt.Errorf("stat blob %s: %w", blobHash[:16], err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return rc, info.Size, nil
|
||||||
|
}
|
||||||
100
internal/vaultik/blob_fetch_hash_test.go
Normal file
100
internal/vaultik/blob_fetch_hash_test.go
Normal file
@@ -0,0 +1,100 @@
|
|||||||
|
package vaultik_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bytes"
|
||||||
|
"context"
|
||||||
|
"crypto/sha256"
|
||||||
|
"encoding/hex"
|
||||||
|
"io"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"filippo.io/age"
|
||||||
|
"git.eeqj.de/sneak/vaultik/internal/blobgen"
|
||||||
|
"git.eeqj.de/sneak/vaultik/internal/vaultik"
|
||||||
|
)
|
||||||
|
|
||||||
|
// TestFetchAndDecryptBlobVerifiesHash verifies that FetchAndDecryptBlob checks
|
||||||
|
// the double-SHA-256 hash of the decrypted plaintext against the expected blob hash.
|
||||||
|
func TestFetchAndDecryptBlobVerifiesHash(t *testing.T) {
|
||||||
|
identity, err := age.GenerateX25519Identity()
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("generating identity: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create test data and encrypt it using blobgen.Writer
|
||||||
|
plaintext := []byte("hello world test data for blob hash verification")
|
||||||
|
var encBuf bytes.Buffer
|
||||||
|
writer, err := blobgen.NewWriter(&encBuf, 1, []string{identity.Recipient().String()})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("creating blobgen writer: %v", err)
|
||||||
|
}
|
||||||
|
if _, err := writer.Write(plaintext); err != nil {
|
||||||
|
t.Fatalf("writing plaintext: %v", err)
|
||||||
|
}
|
||||||
|
if err := writer.Close(); err != nil {
|
||||||
|
t.Fatalf("closing writer: %v", err)
|
||||||
|
}
|
||||||
|
encryptedData := encBuf.Bytes()
|
||||||
|
|
||||||
|
// Compute correct double-SHA-256 hash of the plaintext (matches blobgen.Writer.Sum256)
|
||||||
|
firstHash := sha256.Sum256(plaintext)
|
||||||
|
secondHash := sha256.Sum256(firstHash[:])
|
||||||
|
correctHash := hex.EncodeToString(secondHash[:])
|
||||||
|
|
||||||
|
// Verify our hash matches what blobgen.Writer produces
|
||||||
|
writerHash := hex.EncodeToString(writer.Sum256())
|
||||||
|
if correctHash != writerHash {
|
||||||
|
t.Fatalf("hash computation mismatch: manual=%s, writer=%s", correctHash, writerHash)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set up mock storage with the blob at the correct path
|
||||||
|
mockStorage := NewMockStorer()
|
||||||
|
blobPath := "blobs/" + correctHash[:2] + "/" + correctHash[2:4] + "/" + correctHash
|
||||||
|
mockStorage.mu.Lock()
|
||||||
|
mockStorage.data[blobPath] = encryptedData
|
||||||
|
mockStorage.mu.Unlock()
|
||||||
|
|
||||||
|
tv := vaultik.NewForTesting(mockStorage)
|
||||||
|
ctx := context.Background()
|
||||||
|
|
||||||
|
t.Run("correct hash succeeds", func(t *testing.T) {
|
||||||
|
rc, err := tv.FetchAndDecryptBlob(ctx, correctHash, int64(len(encryptedData)), identity)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("expected success, got error: %v", err)
|
||||||
|
}
|
||||||
|
data, err := io.ReadAll(rc)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("reading stream: %v", err)
|
||||||
|
}
|
||||||
|
if err := rc.Close(); err != nil {
|
||||||
|
t.Fatalf("close (hash verification) failed: %v", err)
|
||||||
|
}
|
||||||
|
if !bytes.Equal(data, plaintext) {
|
||||||
|
t.Fatalf("decrypted data mismatch: got %q, want %q", data, plaintext)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("wrong hash fails", func(t *testing.T) {
|
||||||
|
// Use a fake hash that doesn't match the actual plaintext
|
||||||
|
fakeHash := strings.Repeat("ab", 32) // 64 hex chars
|
||||||
|
fakePath := "blobs/" + fakeHash[:2] + "/" + fakeHash[2:4] + "/" + fakeHash
|
||||||
|
mockStorage.mu.Lock()
|
||||||
|
mockStorage.data[fakePath] = encryptedData
|
||||||
|
mockStorage.mu.Unlock()
|
||||||
|
|
||||||
|
rc, err := tv.FetchAndDecryptBlob(ctx, fakeHash, int64(len(encryptedData)), identity)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error opening stream: %v", err)
|
||||||
|
}
|
||||||
|
// Read all data — hash is verified on Close
|
||||||
|
_, _ = io.ReadAll(rc)
|
||||||
|
err = rc.Close()
|
||||||
|
if err == nil {
|
||||||
|
t.Fatal("expected error for mismatched hash, got nil")
|
||||||
|
}
|
||||||
|
if !strings.Contains(err.Error(), "hash mismatch") {
|
||||||
|
t.Fatalf("expected hash mismatch error, got: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
@@ -1,55 +0,0 @@
|
|||||||
package vaultik
|
|
||||||
|
|
||||||
import (
|
|
||||||
"context"
|
|
||||||
"fmt"
|
|
||||||
"io"
|
|
||||||
|
|
||||||
"filippo.io/age"
|
|
||||||
"git.eeqj.de/sneak/vaultik/internal/blobgen"
|
|
||||||
)
|
|
||||||
|
|
||||||
// FetchAndDecryptBlobResult holds the result of fetching and decrypting a blob.
|
|
||||||
type FetchAndDecryptBlobResult struct {
|
|
||||||
Data []byte
|
|
||||||
}
|
|
||||||
|
|
||||||
// FetchAndDecryptBlob downloads a blob, decrypts it, and returns the plaintext data.
|
|
||||||
func (v *Vaultik) FetchAndDecryptBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) (*FetchAndDecryptBlobResult, error) {
|
|
||||||
rc, _, err := v.FetchBlob(ctx, blobHash, expectedSize)
|
|
||||||
if err != nil {
|
|
||||||
return nil, err
|
|
||||||
}
|
|
||||||
defer func() { _ = rc.Close() }()
|
|
||||||
|
|
||||||
reader, err := blobgen.NewReader(rc, identity)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("creating blob reader: %w", err)
|
|
||||||
}
|
|
||||||
defer func() { _ = reader.Close() }()
|
|
||||||
|
|
||||||
data, err := io.ReadAll(reader)
|
|
||||||
if err != nil {
|
|
||||||
return nil, fmt.Errorf("reading blob data: %w", err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return &FetchAndDecryptBlobResult{Data: data}, nil
|
|
||||||
}
|
|
||||||
|
|
||||||
// FetchBlob downloads a blob and returns a reader for the encrypted data.
|
|
||||||
func (v *Vaultik) FetchBlob(ctx context.Context, blobHash string, expectedSize int64) (io.ReadCloser, int64, error) {
|
|
||||||
blobPath := fmt.Sprintf("blobs/%s/%s/%s", blobHash[:2], blobHash[2:4], blobHash)
|
|
||||||
|
|
||||||
rc, err := v.Storage.Get(ctx, blobPath)
|
|
||||||
if err != nil {
|
|
||||||
return nil, 0, fmt.Errorf("downloading blob %s: %w", blobHash[:16], err)
|
|
||||||
}
|
|
||||||
|
|
||||||
info, err := v.Storage.Stat(ctx, blobPath)
|
|
||||||
if err != nil {
|
|
||||||
_ = rc.Close()
|
|
||||||
return nil, 0, fmt.Errorf("stat blob %s: %w", blobHash[:16], err)
|
|
||||||
}
|
|
||||||
|
|
||||||
return rc, info.Size, nil
|
|
||||||
}
|
|
||||||
@@ -558,11 +558,23 @@ func (v *Vaultik) restoreRegularFile(
|
|||||||
|
|
||||||
// downloadBlob downloads and decrypts a blob
|
// downloadBlob downloads and decrypts a blob
|
||||||
func (v *Vaultik) downloadBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) ([]byte, error) {
|
func (v *Vaultik) downloadBlob(ctx context.Context, blobHash string, expectedSize int64, identity age.Identity) ([]byte, error) {
|
||||||
result, err := v.FetchAndDecryptBlob(ctx, blobHash, expectedSize, identity)
|
rc, err := v.FetchAndDecryptBlob(ctx, blobHash, expectedSize, identity)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return nil, err
|
return nil, err
|
||||||
}
|
}
|
||||||
return result.Data, nil
|
|
||||||
|
data, err := io.ReadAll(rc)
|
||||||
|
if err != nil {
|
||||||
|
_ = rc.Close()
|
||||||
|
return nil, fmt.Errorf("reading blob data: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Close triggers hash verification
|
||||||
|
if err := rc.Close(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return data, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// verifyRestoredFiles verifies that all restored files match their expected chunk hashes
|
// verifyRestoredFiles verifies that all restored files match their expected chunk hashes
|
||||||
|
|||||||
@@ -419,7 +419,7 @@ func (v *Vaultik) listRemoteSnapshotIDs() (map[string]bool, error) {
|
|||||||
return remoteSnapshots, nil
|
return remoteSnapshots, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
// reconcileLocalWithRemote removes local snapshots not in remote and returns the surviving local map
|
// reconcileLocalWithRemote builds a map of local snapshots keyed by ID for cross-referencing with remote
|
||||||
func (v *Vaultik) reconcileLocalWithRemote(remoteSnapshots map[string]bool) (map[string]*database.Snapshot, error) {
|
func (v *Vaultik) reconcileLocalWithRemote(remoteSnapshots map[string]bool) (map[string]*database.Snapshot, error) {
|
||||||
localSnapshots, err := v.Repositories.Snapshots.ListRecent(v.ctx, 10000)
|
localSnapshots, err := v.Repositories.Snapshots.ListRecent(v.ctx, 10000)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
@@ -431,19 +431,6 @@ func (v *Vaultik) reconcileLocalWithRemote(remoteSnapshots map[string]bool) (map
|
|||||||
localSnapshotMap[s.ID.String()] = s
|
localSnapshotMap[s.ID.String()] = s
|
||||||
}
|
}
|
||||||
|
|
||||||
for _, snap := range localSnapshots {
|
|
||||||
snapshotIDStr := snap.ID.String()
|
|
||||||
if !remoteSnapshots[snapshotIDStr] {
|
|
||||||
log.Info("Removing local snapshot not found in remote", "snapshot_id", snap.ID)
|
|
||||||
if err := v.deleteSnapshotFromLocalDB(snapshotIDStr); err != nil {
|
|
||||||
log.Error("Failed to delete local snapshot", "snapshot_id", snap.ID, "error", err)
|
|
||||||
} else {
|
|
||||||
log.Info("Deleted local snapshot not found in remote", "snapshot_id", snap.ID)
|
|
||||||
delete(localSnapshotMap, snapshotIDStr)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return localSnapshotMap, nil
|
return localSnapshotMap, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -872,7 +859,7 @@ func (v *Vaultik) syncWithRemote() error {
|
|||||||
snapshotIDStr := snapshot.ID.String()
|
snapshotIDStr := snapshot.ID.String()
|
||||||
if !remoteSnapshots[snapshotIDStr] {
|
if !remoteSnapshots[snapshotIDStr] {
|
||||||
log.Info("Removing local snapshot not found in remote", "snapshot_id", snapshot.ID)
|
log.Info("Removing local snapshot not found in remote", "snapshot_id", snapshot.ID)
|
||||||
if err := v.Repositories.Snapshots.Delete(v.ctx, snapshotIDStr); err != nil {
|
if err := v.deleteSnapshotFromLocalDB(snapshotIDStr); err != nil {
|
||||||
log.Error("Failed to delete local snapshot", "snapshot_id", snapshot.ID, "error", err)
|
log.Error("Failed to delete local snapshot", "snapshot_id", snapshot.ID, "error", err)
|
||||||
} else {
|
} else {
|
||||||
removedCount++
|
removedCount++
|
||||||
@@ -1001,6 +988,7 @@ func (v *Vaultik) listAllRemoteSnapshotIDs() ([]string, error) {
|
|||||||
log.Info("Listing all snapshots")
|
log.Info("Listing all snapshots")
|
||||||
objectCh := v.Storage.ListStream(v.ctx, "metadata/")
|
objectCh := v.Storage.ListStream(v.ctx, "metadata/")
|
||||||
|
|
||||||
|
seen := make(map[string]bool)
|
||||||
var snapshotIDs []string
|
var snapshotIDs []string
|
||||||
for object := range objectCh {
|
for object := range objectCh {
|
||||||
if object.Err != nil {
|
if object.Err != nil {
|
||||||
@@ -1015,14 +1003,8 @@ func (v *Vaultik) listAllRemoteSnapshotIDs() ([]string, error) {
|
|||||||
}
|
}
|
||||||
if strings.HasSuffix(object.Key, "/") || strings.Contains(object.Key, "/manifest.json.zst") {
|
if strings.HasSuffix(object.Key, "/") || strings.Contains(object.Key, "/manifest.json.zst") {
|
||||||
sid := parts[1]
|
sid := parts[1]
|
||||||
found := false
|
if !seen[sid] {
|
||||||
for _, id := range snapshotIDs {
|
seen[sid] = true
|
||||||
if id == sid {
|
|
||||||
found = true
|
|
||||||
break
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if !found {
|
|
||||||
snapshotIDs = append(snapshotIDs, sid)
|
snapshotIDs = append(snapshotIDs, sid)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user