Fix deadlock in snapshotter and add timing logs

- Move GetDetailedStats() call outside of read lock to avoid deadlock
- Add timing logs to identify performance bottlenecks during snapshot
- Log duration for copying routes, marshaling JSON, and writing to disk
This commit is contained in:
Jeffrey Paul 2025-07-28 00:16:01 +02:00
parent fa9b086629
commit 5bd3add59b

View File

@ -138,11 +138,18 @@ func (s *Snapshotter) TakeSnapshot() error {
s.logger.Info("Starting routing table snapshot")
// Get a copy of all routes while holding read lock
copyStart := time.Now()
s.rt.RLock()
routes := s.rt.GetAllRoutesUnsafe() // We'll need to add this method
stats := s.rt.GetDetailedStats()
s.rt.RUnlock()
// Get stats separately to avoid deadlock
stats := s.rt.GetDetailedStats()
s.logger.Info("Copied routes from routing table",
"duration", time.Since(copyStart),
"route_count", len(routes))
// Create snapshot data structure
snapshot := struct {
Timestamp time.Time `json:"timestamp"`
@ -155,10 +162,14 @@ func (s *Snapshotter) TakeSnapshot() error {
}
// Serialize to JSON
marshalStart := time.Now()
jsonData, err := json.Marshal(snapshot)
if err != nil {
return fmt.Errorf("failed to marshal snapshot: %w", err)
}
s.logger.Info("Marshaled snapshot to JSON",
"duration", time.Since(marshalStart),
"size_bytes", len(jsonData))
// Write compressed data to temporary file
tempPath := filepath.Join(s.stateDir, snapshotFilename+tempFileSuffix)
@ -183,6 +194,7 @@ func (s *Snapshotter) TakeSnapshot() error {
gzipWriter.Comment = fmt.Sprintf("RouteWatch snapshot taken at %s", snapshot.Timestamp.Format(time.RFC3339))
// Write compressed data
writeStart := time.Now()
if _, err := gzipWriter.Write(jsonData); err != nil {
return fmt.Errorf("failed to write compressed data: %w", err)
}
@ -206,6 +218,8 @@ func (s *Snapshotter) TakeSnapshot() error {
if err := os.Rename(tempPath, finalPath); err != nil {
return fmt.Errorf("failed to rename temporary file: %w", err)
}
s.logger.Info("Wrote compressed snapshot to disk",
"duration", time.Since(writeStart))
duration := time.Since(start)
s.lastSnapshot = time.Now()