Optimize scan phase: in-memory change detection and batched DB writes
Performance improvements: - Load all known files from DB into memory at startup - Check file changes against in-memory map (no per-file DB queries) - Batch database writes in groups of 1000 files per transaction - Scan phase now only counts regular files, not directories This should improve scan speed from ~600 files/sec to potentially 10,000+ files/sec by eliminating per-file database round trips.
This commit is contained in:
@@ -194,8 +194,8 @@ func TestMultipleFileChanges(t *testing.T) {
|
||||
// First scan
|
||||
result1, err := scanner.Scan(ctx, "/", snapshotID1)
|
||||
require.NoError(t, err)
|
||||
// 4 files because root directory is also counted
|
||||
assert.Equal(t, 4, result1.FilesScanned)
|
||||
// Only regular files are counted, not directories
|
||||
assert.Equal(t, 3, result1.FilesScanned)
|
||||
|
||||
// Modify two files
|
||||
time.Sleep(10 * time.Millisecond) // Ensure mtime changes
|
||||
@@ -221,9 +221,8 @@ func TestMultipleFileChanges(t *testing.T) {
|
||||
result2, err := scanner.Scan(ctx, "/", snapshotID2)
|
||||
require.NoError(t, err)
|
||||
|
||||
// The scanner might examine more items than just our files (includes directories, etc)
|
||||
// We should verify that at least our expected files were scanned
|
||||
assert.GreaterOrEqual(t, result2.FilesScanned, 4, "Should scan at least 4 files (3 files + root dir)")
|
||||
// Only regular files are counted, not directories
|
||||
assert.Equal(t, 3, result2.FilesScanned)
|
||||
|
||||
// Verify each file has exactly one set of chunks
|
||||
for path := range files {
|
||||
|
||||
Reference in New Issue
Block a user