Fix deadlock in snapshotter and add timing logs
- Move GetDetailedStats() call outside of read lock to avoid deadlock - Add timing logs to identify performance bottlenecks during snapshot - Log duration for copying routes, marshaling JSON, and writing to disk
This commit is contained in:
parent
fa9b086629
commit
5bd3add59b
@ -138,11 +138,18 @@ func (s *Snapshotter) TakeSnapshot() error {
|
|||||||
s.logger.Info("Starting routing table snapshot")
|
s.logger.Info("Starting routing table snapshot")
|
||||||
|
|
||||||
// Get a copy of all routes while holding read lock
|
// Get a copy of all routes while holding read lock
|
||||||
|
copyStart := time.Now()
|
||||||
s.rt.RLock()
|
s.rt.RLock()
|
||||||
routes := s.rt.GetAllRoutesUnsafe() // We'll need to add this method
|
routes := s.rt.GetAllRoutesUnsafe() // We'll need to add this method
|
||||||
stats := s.rt.GetDetailedStats()
|
|
||||||
s.rt.RUnlock()
|
s.rt.RUnlock()
|
||||||
|
|
||||||
|
// Get stats separately to avoid deadlock
|
||||||
|
stats := s.rt.GetDetailedStats()
|
||||||
|
|
||||||
|
s.logger.Info("Copied routes from routing table",
|
||||||
|
"duration", time.Since(copyStart),
|
||||||
|
"route_count", len(routes))
|
||||||
|
|
||||||
// Create snapshot data structure
|
// Create snapshot data structure
|
||||||
snapshot := struct {
|
snapshot := struct {
|
||||||
Timestamp time.Time `json:"timestamp"`
|
Timestamp time.Time `json:"timestamp"`
|
||||||
@ -155,10 +162,14 @@ func (s *Snapshotter) TakeSnapshot() error {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Serialize to JSON
|
// Serialize to JSON
|
||||||
|
marshalStart := time.Now()
|
||||||
jsonData, err := json.Marshal(snapshot)
|
jsonData, err := json.Marshal(snapshot)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
return fmt.Errorf("failed to marshal snapshot: %w", err)
|
return fmt.Errorf("failed to marshal snapshot: %w", err)
|
||||||
}
|
}
|
||||||
|
s.logger.Info("Marshaled snapshot to JSON",
|
||||||
|
"duration", time.Since(marshalStart),
|
||||||
|
"size_bytes", len(jsonData))
|
||||||
|
|
||||||
// Write compressed data to temporary file
|
// Write compressed data to temporary file
|
||||||
tempPath := filepath.Join(s.stateDir, snapshotFilename+tempFileSuffix)
|
tempPath := filepath.Join(s.stateDir, snapshotFilename+tempFileSuffix)
|
||||||
@ -183,6 +194,7 @@ func (s *Snapshotter) TakeSnapshot() error {
|
|||||||
gzipWriter.Comment = fmt.Sprintf("RouteWatch snapshot taken at %s", snapshot.Timestamp.Format(time.RFC3339))
|
gzipWriter.Comment = fmt.Sprintf("RouteWatch snapshot taken at %s", snapshot.Timestamp.Format(time.RFC3339))
|
||||||
|
|
||||||
// Write compressed data
|
// Write compressed data
|
||||||
|
writeStart := time.Now()
|
||||||
if _, err := gzipWriter.Write(jsonData); err != nil {
|
if _, err := gzipWriter.Write(jsonData); err != nil {
|
||||||
return fmt.Errorf("failed to write compressed data: %w", err)
|
return fmt.Errorf("failed to write compressed data: %w", err)
|
||||||
}
|
}
|
||||||
@ -206,6 +218,8 @@ func (s *Snapshotter) TakeSnapshot() error {
|
|||||||
if err := os.Rename(tempPath, finalPath); err != nil {
|
if err := os.Rename(tempPath, finalPath); err != nil {
|
||||||
return fmt.Errorf("failed to rename temporary file: %w", err)
|
return fmt.Errorf("failed to rename temporary file: %w", err)
|
||||||
}
|
}
|
||||||
|
s.logger.Info("Wrote compressed snapshot to disk",
|
||||||
|
"duration", time.Since(writeStart))
|
||||||
|
|
||||||
duration := time.Since(start)
|
duration := time.Since(start)
|
||||||
s.lastSnapshot = time.Now()
|
s.lastSnapshot = time.Now()
|
||||||
|
Loading…
Reference in New Issue
Block a user