checkpointing, heavy dev
This commit is contained in:
@@ -1,11 +1,17 @@
|
||||
// Package statcollector provides system information collection
|
||||
package statcollector
|
||||
|
||||
import (
|
||||
"log/slog"
|
||||
"os"
|
||||
"os/exec"
|
||||
"regexp"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.eeqj.de/sneak/hdmistat/internal/netmon"
|
||||
"github.com/dustin/go-humanize"
|
||||
"github.com/shirou/gopsutil/v3/cpu"
|
||||
"github.com/shirou/gopsutil/v3/disk"
|
||||
"github.com/shirou/gopsutil/v3/host"
|
||||
@@ -14,6 +20,17 @@ import (
|
||||
"github.com/shirou/gopsutil/v3/process"
|
||||
)
|
||||
|
||||
const (
|
||||
// Process collection constants
|
||||
maxProcesses = 100
|
||||
processTimeout = 50 * time.Millisecond
|
||||
processStableTime = 100 * time.Millisecond
|
||||
msToSecondsDivisor = 1000
|
||||
|
||||
// Network constants
|
||||
bitsPerMegabit = 1000 * 1000
|
||||
)
|
||||
|
||||
// SystemInfo represents overall system information
|
||||
type SystemInfo struct {
|
||||
Hostname string
|
||||
@@ -40,13 +57,23 @@ type DiskInfo struct {
|
||||
|
||||
// NetworkInfo represents network interface information
|
||||
type NetworkInfo struct {
|
||||
Name string
|
||||
IPAddresses []string
|
||||
LinkSpeed uint64
|
||||
BytesSent uint64
|
||||
BytesRecv uint64
|
||||
PacketsSent uint64
|
||||
PacketsRecv uint64
|
||||
Name string
|
||||
IPAddresses []string
|
||||
LinkSpeed uint64
|
||||
BytesSent uint64
|
||||
BytesRecv uint64
|
||||
BitsSentRate uint64 // bits per second
|
||||
BitsRecvRate uint64 // bits per second
|
||||
}
|
||||
|
||||
// FormatSentRate returns the send rate as a human-readable string
|
||||
func (n *NetworkInfo) FormatSentRate() string {
|
||||
return humanize.SI(float64(n.BitsSentRate), "bit/s")
|
||||
}
|
||||
|
||||
// FormatRecvRate returns the receive rate as a human-readable string
|
||||
func (n *NetworkInfo) FormatRecvRate() string {
|
||||
return humanize.SI(float64(n.BitsRecvRate), "bit/s")
|
||||
}
|
||||
|
||||
// ProcessInfo represents process information
|
||||
@@ -67,15 +94,25 @@ type Collector interface {
|
||||
// SystemCollector implements Collector
|
||||
type SystemCollector struct {
|
||||
logger *slog.Logger
|
||||
lastNetStats map[string]psnet.IOCountersStat
|
||||
netMonitor *netmon.Monitor
|
||||
lastCollectTime time.Time
|
||||
}
|
||||
|
||||
// NewSystemCollector creates a new system collector
|
||||
func NewSystemCollector(logger *slog.Logger) *SystemCollector {
|
||||
nm := netmon.New(logger)
|
||||
nm.Start()
|
||||
|
||||
return &SystemCollector{
|
||||
logger: logger,
|
||||
lastNetStats: make(map[string]psnet.IOCountersStat),
|
||||
logger: logger,
|
||||
netMonitor: nm,
|
||||
}
|
||||
}
|
||||
|
||||
// Stop stops the system collector
|
||||
func (c *SystemCollector) Stop() {
|
||||
if c.netMonitor != nil {
|
||||
c.netMonitor.Stop()
|
||||
}
|
||||
}
|
||||
|
||||
@@ -100,7 +137,13 @@ func (c *SystemCollector) Collect() (*SystemInfo, error) {
|
||||
if err != nil {
|
||||
c.logger.Warn("getting uptime", "error", err)
|
||||
} else {
|
||||
info.Uptime = time.Duration(uptimeSecs) * time.Second
|
||||
if uptimeSecs > 0 {
|
||||
// Convert uint64 to int64 safely to avoid overflow
|
||||
maxInt64 := ^uint64(0) >> 1
|
||||
if uptimeSecs <= maxInt64 {
|
||||
info.Uptime = time.Duration(int64(uptimeSecs)) * time.Second
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Memory
|
||||
@@ -160,37 +203,52 @@ func (c *SystemCollector) Collect() (*SystemInfo, error) {
|
||||
}
|
||||
}
|
||||
|
||||
// Network
|
||||
// Network - get rates from network monitor
|
||||
netStats := c.netMonitor.GetStats()
|
||||
|
||||
// Also get interface details for IP addresses
|
||||
interfaces, err := psnet.Interfaces()
|
||||
if err != nil {
|
||||
c.logger.Warn("getting network interfaces", "error", err)
|
||||
} else {
|
||||
ioCounters, _ := psnet.IOCounters(true)
|
||||
ioMap := make(map[string]psnet.IOCountersStat)
|
||||
for _, counter := range ioCounters {
|
||||
ioMap[counter.Name] = counter
|
||||
}
|
||||
// Create a map of interface names to IPs and link speeds
|
||||
ifaceIPs := make(map[string][]string)
|
||||
ifaceSpeeds := make(map[string]uint64)
|
||||
|
||||
for _, iface := range interfaces {
|
||||
if iface.Name == "lo" || strings.HasPrefix(iface.Name, "docker") {
|
||||
continue
|
||||
}
|
||||
|
||||
netInfo := NetworkInfo{
|
||||
Name: iface.Name,
|
||||
}
|
||||
|
||||
// Get IP addresses
|
||||
var ips []string
|
||||
for _, addr := range iface.Addrs {
|
||||
netInfo.IPAddresses = append(netInfo.IPAddresses, addr.Addr)
|
||||
ips = append(ips, addr.Addr)
|
||||
}
|
||||
ifaceIPs[iface.Name] = ips
|
||||
|
||||
// Try to get link speed with ethtool
|
||||
if speed := c.getLinkSpeed(iface.Name); speed > 0 {
|
||||
ifaceSpeeds[iface.Name] = speed
|
||||
}
|
||||
}
|
||||
|
||||
// Combine network monitor stats with interface details
|
||||
for _, stat := range netStats {
|
||||
netInfo := NetworkInfo{
|
||||
Name: stat.Name,
|
||||
BytesSent: stat.BytesSent,
|
||||
BytesRecv: stat.BytesRecv,
|
||||
BitsSentRate: stat.BitsSentRate,
|
||||
BitsRecvRate: stat.BitsRecvRate,
|
||||
}
|
||||
|
||||
// Get stats
|
||||
if stats, ok := ioMap[iface.Name]; ok {
|
||||
netInfo.BytesSent = stats.BytesSent
|
||||
netInfo.BytesRecv = stats.BytesRecv
|
||||
netInfo.PacketsSent = stats.PacketsSent
|
||||
netInfo.PacketsRecv = stats.PacketsRecv
|
||||
// Add IP addresses if available
|
||||
if ips, ok := ifaceIPs[stat.Name]; ok {
|
||||
netInfo.IPAddresses = ips
|
||||
}
|
||||
|
||||
// Add link speed if available
|
||||
if speed, ok := ifaceSpeeds[stat.Name]; ok {
|
||||
netInfo.LinkSpeed = speed
|
||||
}
|
||||
|
||||
info.Network = append(info.Network, netInfo)
|
||||
@@ -202,9 +260,43 @@ func (c *SystemCollector) Collect() (*SystemInfo, error) {
|
||||
if err != nil {
|
||||
c.logger.Warn("getting processes", "error", err)
|
||||
} else {
|
||||
// Limit to top processes to avoid hanging
|
||||
processCount := 0
|
||||
|
||||
for _, p := range processes {
|
||||
name, _ := p.Name()
|
||||
cpuPercent, _ := p.CPUPercent()
|
||||
if processCount >= maxProcesses {
|
||||
break
|
||||
}
|
||||
|
||||
// Skip kernel threads and very short-lived processes
|
||||
name, err := p.Name()
|
||||
if err != nil || name == "" {
|
||||
continue
|
||||
}
|
||||
|
||||
// Use CreateTime to skip very new processes that might not have stable stats
|
||||
createTime, err := p.CreateTime()
|
||||
if err != nil || time.Since(time.Unix(createTime/msToSecondsDivisor, 0)) < processStableTime {
|
||||
continue
|
||||
}
|
||||
|
||||
// Get CPU percent with timeout - this is the call that can hang
|
||||
cpuPercent := 0.0
|
||||
cpuChan := make(chan float64, 1)
|
||||
go func() {
|
||||
cpu, _ := p.CPUPercent()
|
||||
cpuChan <- cpu
|
||||
}()
|
||||
|
||||
select {
|
||||
case cpu := <-cpuChan:
|
||||
cpuPercent = cpu
|
||||
case <-time.After(processTimeout):
|
||||
// Skip this process if CPU sampling takes too long
|
||||
c.logger.Debug("skipping process due to CPU timeout", "pid", p.Pid, "name", name)
|
||||
continue
|
||||
}
|
||||
|
||||
memInfo, _ := p.MemoryInfo()
|
||||
username, _ := p.Username()
|
||||
|
||||
@@ -216,9 +308,36 @@ func (c *SystemCollector) Collect() (*SystemInfo, error) {
|
||||
MemoryVMS: memInfo.VMS,
|
||||
Username: username,
|
||||
})
|
||||
|
||||
processCount++
|
||||
}
|
||||
}
|
||||
|
||||
c.lastCollectTime = time.Now()
|
||||
return info, nil
|
||||
}
|
||||
|
||||
// getLinkSpeed gets the link speed for an interface using ethtool
|
||||
func (c *SystemCollector) getLinkSpeed(ifaceName string) uint64 {
|
||||
// Run ethtool to get link speed
|
||||
output, err := exec.Command("ethtool", ifaceName).Output()
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Parse the output for speed
|
||||
// Look for lines like "Speed: 1000Mb/s" or "Speed: 10000Mb/s"
|
||||
speedRegex := regexp.MustCompile(`Speed:\s+(\d+)Mb/s`)
|
||||
matches := speedRegex.FindSubmatch(output)
|
||||
if len(matches) < 2 {
|
||||
return 0
|
||||
}
|
||||
|
||||
// Convert from Mb/s to bits/s
|
||||
mbps, err := strconv.ParseUint(string(matches[1]), 10, 64)
|
||||
if err != nil {
|
||||
return 0
|
||||
}
|
||||
|
||||
return mbps * bitsPerMegabit // Convert to bits per second
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user