Optimize scan phase: in-memory change detection and batched DB writes
Performance improvements: - Load all known files from DB into memory at startup - Check file changes against in-memory map (no per-file DB queries) - Batch database writes in groups of 1000 files per transaction - Scan phase now only counts regular files, not directories This should improve scan speed from ~600 files/sec to potentially 10,000+ files/sec by eliminating per-file database round trips.
This commit is contained in:
@@ -99,26 +99,25 @@ func TestScannerSimpleDirectory(t *testing.T) {
|
||||
t.Fatalf("scan failed: %v", err)
|
||||
}
|
||||
|
||||
// Verify results
|
||||
// We now scan 6 files + 3 directories (source, subdir, subdir2) = 9 entries
|
||||
if result.FilesScanned != 9 {
|
||||
t.Errorf("expected 9 entries scanned, got %d", result.FilesScanned)
|
||||
// Verify results - we only scan regular files, not directories
|
||||
if result.FilesScanned != 6 {
|
||||
t.Errorf("expected 6 files scanned, got %d", result.FilesScanned)
|
||||
}
|
||||
|
||||
// Directories have their own sizes, so the total will be more than just file content
|
||||
// Total bytes should be the sum of all file contents
|
||||
if result.BytesScanned < 97 { // At minimum we have 97 bytes of file content
|
||||
t.Errorf("expected at least 97 bytes scanned, got %d", result.BytesScanned)
|
||||
}
|
||||
|
||||
// Verify files in database
|
||||
// Verify files in database - only regular files are stored
|
||||
files, err := repos.Files.ListByPrefix(ctx, "/source")
|
||||
if err != nil {
|
||||
t.Fatalf("failed to list files: %v", err)
|
||||
}
|
||||
|
||||
// We should have 6 files + 3 directories = 9 entries
|
||||
if len(files) != 9 {
|
||||
t.Errorf("expected 9 entries in database, got %d", len(files))
|
||||
// We should have 6 files (directories are not stored)
|
||||
if len(files) != 6 {
|
||||
t.Errorf("expected 6 files in database, got %d", len(files))
|
||||
}
|
||||
|
||||
// Verify specific file
|
||||
@@ -235,9 +234,9 @@ func TestScannerLargeFile(t *testing.T) {
|
||||
t.Fatalf("scan failed: %v", err)
|
||||
}
|
||||
|
||||
// We scan 1 file + 1 directory = 2 entries
|
||||
if result.FilesScanned != 2 {
|
||||
t.Errorf("expected 2 entries scanned, got %d", result.FilesScanned)
|
||||
// We scan only regular files, not directories
|
||||
if result.FilesScanned != 1 {
|
||||
t.Errorf("expected 1 file scanned, got %d", result.FilesScanned)
|
||||
}
|
||||
|
||||
// The file size should be at least 1MB
|
||||
|
||||
Reference in New Issue
Block a user