Optimize scan phase: in-memory change detection and batched DB writes

Performance improvements:
- Load all known files from DB into memory at startup
- Check file changes against in-memory map (no per-file DB queries)
- Batch database writes in groups of 1000 files per transaction
- Scan phase now only counts regular files, not directories

This should improve scan speed from ~600 files/sec to potentially
10,000+ files/sec by eliminating per-file database round trips.
This commit is contained in:
2025-12-19 12:08:47 +07:00
parent badc0c07e0
commit c3725e745e
3 changed files with 187 additions and 258 deletions

View File

@@ -194,8 +194,8 @@ func TestMultipleFileChanges(t *testing.T) {
// First scan
result1, err := scanner.Scan(ctx, "/", snapshotID1)
require.NoError(t, err)
// 4 files because root directory is also counted
assert.Equal(t, 4, result1.FilesScanned)
// Only regular files are counted, not directories
assert.Equal(t, 3, result1.FilesScanned)
// Modify two files
time.Sleep(10 * time.Millisecond) // Ensure mtime changes
@@ -221,9 +221,8 @@ func TestMultipleFileChanges(t *testing.T) {
result2, err := scanner.Scan(ctx, "/", snapshotID2)
require.NoError(t, err)
// The scanner might examine more items than just our files (includes directories, etc)
// We should verify that at least our expected files were scanned
assert.GreaterOrEqual(t, result2.FilesScanned, 4, "Should scan at least 4 files (3 files + root dir)")
// Only regular files are counted, not directories
assert.Equal(t, 3, result2.FilesScanned)
// Verify each file has exactly one set of chunks
for path := range files {