Populate snapshot_blobs for dedup-referenced blobs at completion
The bug: fully-deduplicated snapshots (every chunk already in storage from a prior run) had an empty snapshot_blobs table. The metadata- export pipeline then dropped all blob/blob_chunks rows from the exported database, leaving file_chunks references to chunks whose blobs were no longer recorded. Restore fails on every file with "chunk X not found in any blob". Fix: at CompleteSnapshot time, run an INSERT OR IGNORE that links every blob holding a chunk referenced by this snapshot's files into snapshot_blobs. New blobs uploaded during the snapshot are already recorded (no-op for them); dedup-referenced blobs are added. The cleanup query in deleteOrphanedBlobs already restricts to snapshot_blobs entries for the current snapshot — so once snapshot_blobs is correctly populated, the exported database contains the full set of blob/blob_chunks rows needed for restore. Regression test: TestDedupOnlySnapshotRestores creates two identical snapshots (the second uploads zero new blobs) and restores the second. Without the fix, restore fails on every file.
This commit is contained in:
@@ -331,6 +331,43 @@ func (r *SnapshotRepository) AddFilesByIDBatch(ctx context.Context, tx *sql.Tx,
|
||||
return nil
|
||||
}
|
||||
|
||||
// PopulateReferencedBlobs ensures snapshot_blobs contains an entry for
|
||||
// every blob that holds a chunk referenced by any file in the snapshot.
|
||||
// This is necessary because the AddBlob hook only runs when a blob is
|
||||
// newly uploaded during a snapshot — fully-deduplicated snapshots (where
|
||||
// every chunk already exists in storage from a prior run) would otherwise
|
||||
// have an empty snapshot_blobs set and be impossible to restore.
|
||||
//
|
||||
// Returns the number of rows inserted (i.e. blobs that were previously
|
||||
// referenced indirectly via file_chunks but not yet recorded in
|
||||
// snapshot_blobs for this snapshot).
|
||||
func (r *SnapshotRepository) PopulateReferencedBlobs(ctx context.Context, tx *sql.Tx, snapshotID string) (int64, error) {
|
||||
query := `
|
||||
INSERT OR IGNORE INTO snapshot_blobs (snapshot_id, blob_id, blob_hash)
|
||||
SELECT DISTINCT ?, blobs.id, blobs.blob_hash
|
||||
FROM blobs
|
||||
JOIN blob_chunks ON blob_chunks.blob_id = blobs.id
|
||||
JOIN file_chunks ON file_chunks.chunk_hash = blob_chunks.chunk_hash
|
||||
JOIN snapshot_files ON snapshot_files.file_id = file_chunks.file_id
|
||||
WHERE snapshot_files.snapshot_id = ?
|
||||
AND blobs.blob_hash IS NOT NULL
|
||||
`
|
||||
|
||||
var result sql.Result
|
||||
var err error
|
||||
if tx != nil {
|
||||
result, err = tx.ExecContext(ctx, query, snapshotID, snapshotID)
|
||||
} else {
|
||||
result, err = r.db.ExecWithLog(ctx, query, snapshotID, snapshotID)
|
||||
}
|
||||
if err != nil {
|
||||
return 0, fmt.Errorf("populating referenced blobs: %w", err)
|
||||
}
|
||||
|
||||
n, _ := result.RowsAffected()
|
||||
return n, nil
|
||||
}
|
||||
|
||||
// AddBlob adds a blob to a snapshot
|
||||
func (r *SnapshotRepository) AddBlob(ctx context.Context, tx *sql.Tx, snapshotID string, blobID types.BlobID, blobHash types.BlobHash) error {
|
||||
query := `
|
||||
|
||||
Reference in New Issue
Block a user