Read chunks from cached blobs via ReadAt instead of full-blob Get
Restore's per-chunk loop called blobCache.Get(blobHash) and sliced the
returned []byte to extract the chunk it actually wanted. Get reads the
entire blob from disk into memory — so for a 10 GB blob, every chunk
extraction was a 10 GB ReadFile to get back a few KB. With ~40k files
needing ~600ms per cache hit, that alone was burning ~6 hours of
wall-clock on a real restore.
Hot loop now:
- If the blob isn't cached: download (full plaintext into memory),
Put to disk cache, satisfy this chunk from the in-memory buffer.
- If it's cached: blobCache.ReadAt(hash, offset, length) — reads
only the chunk's bytes from the on-disk blob file.
ReadAt was already implemented on blobDiskCache; restore just wasn't
using it.
Debug timings from the user's photo-catalog restore showed
ms_cache_gets dominating every cache-hit file at 500-1000ms. With
ReadAt those should drop to sub-millisecond and the visible throughput
should be bound by single-stream blob download + decrypt/decompress
rather than disk-read amplification.
This commit is contained in:
@@ -569,14 +569,22 @@ func (v *Vaultik) restoreRegularFile(
|
|||||||
return fmt.Errorf("getting blob %s: %w", blobChunk.BlobID, err)
|
return fmt.Errorf("getting blob %s: %w", blobChunk.BlobID, err)
|
||||||
}
|
}
|
||||||
|
|
||||||
// Download and decrypt blob if not cached
|
// If the blob isn't on disk yet, download it (one full readout
|
||||||
|
// of the plaintext into memory), write it to the cache, and
|
||||||
|
// satisfy THIS chunk from the in-memory copy. Subsequent files
|
||||||
|
// needing chunks from the same blob hit the ReadAt branch and
|
||||||
|
// read only the bytes they actually want — never the whole
|
||||||
|
// blob — which is the difference between sub-millisecond chunk
|
||||||
|
// extraction and ~1 s per cache hit on a 10 GB blob.
|
||||||
blobHashStr := blob.Hash.String()
|
blobHashStr := blob.Hash.String()
|
||||||
|
var chunkData []byte
|
||||||
t0 = time.Now()
|
t0 = time.Now()
|
||||||
blobData, ok := blobCache.Get(blobHashStr)
|
blobInCache := blobCache.Has(blobHashStr)
|
||||||
cacheGetDur += time.Since(t0)
|
cacheGetDur += time.Since(t0)
|
||||||
if !ok {
|
|
||||||
|
if !blobInCache {
|
||||||
t0 = time.Now()
|
t0 = time.Now()
|
||||||
blobData, err = v.downloadBlob(ctx, blobHashStr, blob.CompressedSize, identity)
|
blobData, err := v.downloadBlob(ctx, blobHashStr, blob.CompressedSize, identity)
|
||||||
downloadDur += time.Since(t0)
|
downloadDur += time.Since(t0)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("downloading blob %s: %w", blobHashStr[:16], err)
|
return fmt.Errorf("downloading blob %s: %w", blobHashStr[:16], err)
|
||||||
@@ -589,16 +597,21 @@ func (v *Vaultik) restoreRegularFile(
|
|||||||
downloadCount++
|
downloadCount++
|
||||||
result.BlobsDownloaded++
|
result.BlobsDownloaded++
|
||||||
result.BytesDownloaded += blob.CompressedSize
|
result.BytesDownloaded += blob.CompressedSize
|
||||||
} else {
|
|
||||||
cacheHitCount++
|
|
||||||
}
|
|
||||||
|
|
||||||
// Extract chunk from blob
|
|
||||||
if blobChunk.Offset+blobChunk.Length > int64(len(blobData)) {
|
if blobChunk.Offset+blobChunk.Length > int64(len(blobData)) {
|
||||||
return fmt.Errorf("chunk %s extends beyond blob data (offset=%d, length=%d, blob_size=%d)",
|
return fmt.Errorf("chunk %s extends beyond blob data (offset=%d, length=%d, blob_size=%d)",
|
||||||
fc.ChunkHash[:16], blobChunk.Offset, blobChunk.Length, len(blobData))
|
fc.ChunkHash[:16], blobChunk.Offset, blobChunk.Length, len(blobData))
|
||||||
}
|
}
|
||||||
chunkData := blobData[blobChunk.Offset : blobChunk.Offset+blobChunk.Length]
|
chunkData = blobData[blobChunk.Offset : blobChunk.Offset+blobChunk.Length]
|
||||||
|
} else {
|
||||||
|
cacheHitCount++
|
||||||
|
t0 = time.Now()
|
||||||
|
chunkData, err = blobCache.ReadAt(blobHashStr, blobChunk.Offset, blobChunk.Length)
|
||||||
|
cacheGetDur += time.Since(t0)
|
||||||
|
if err != nil {
|
||||||
|
return fmt.Errorf("reading chunk %s from cached blob %s: %w", fc.ChunkHash[:16], blobHashStr[:16], err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Write chunk to output file
|
// Write chunk to output file
|
||||||
t0 = time.Now()
|
t0 = time.Now()
|
||||||
|
|||||||
Reference in New Issue
Block a user