Files
server_status/server_status.go
2026-03-04 11:13:02 +08:00

364 lines
8.1 KiB
Go

package serverstatus
import (
"bufio"
"os"
"strconv"
"strings"
"sync"
"time"
"github.com/prometheus/client_golang/prometheus"
"github.com/shirou/gopsutil/v4/cpu"
"github.com/shirou/gopsutil/v4/disk"
"github.com/shirou/gopsutil/v4/mem"
"github.com/shirou/gopsutil/v4/net"
)
// collector collects server status metrics
type collector struct {
// CPU metrics
cpuUsage *prometheus.Desc
// Memory metrics
memoryUsage *prometheus.Desc
swapUsage *prometheus.Desc
// Network metrics
networkReceiveBytes *prometheus.Desc
networkTransmitBytes *prometheus.Desc
// Disk IOPS metrics
diskReadOps *prometheus.Desc
diskWriteOps *prometheus.Desc
// Server name label
serverName string
// Previous network stats for calculating rates
prevNetStats map[string]net.IOCountersStat
prevNetTime time.Time
mu sync.Mutex
// Previous disk stats for calculating IOPS
prevDiskStats map[string]disk.IOCountersStat
prevDiskTime time.Time
}
// newCollector creates a new server status collector
func newCollector(serverName string) *collector {
labels := []string{"server_name"}
return &collector{
cpuUsage: prometheus.NewDesc(
"server_cpu_usage_percent",
"CPU usage percentage",
labels, nil,
),
memoryUsage: prometheus.NewDesc(
"server_memory_usage_percent",
"Memory usage percentage",
labels, nil,
),
swapUsage: prometheus.NewDesc(
"server_swap_usage_percent",
"Swap usage percentage",
labels, nil,
),
networkReceiveBytes: prometheus.NewDesc(
"server_network_receive_bytes_per_second",
"Network receive rate in bytes per second",
labels, nil,
),
networkTransmitBytes: prometheus.NewDesc(
"server_network_transmit_bytes_per_second",
"Network transmit rate in bytes per second",
labels, nil,
),
diskReadOps: prometheus.NewDesc(
"server_disk_read_ops_per_second",
"Disk read operations per second",
labels, nil,
),
diskWriteOps: prometheus.NewDesc(
"server_disk_write_ops_per_second",
"Disk write operations per second",
labels, nil,
),
serverName: serverName,
prevNetStats: make(map[string]net.IOCountersStat),
prevNetTime: time.Now(),
prevDiskStats: make(map[string]disk.IOCountersStat),
prevDiskTime: time.Now(),
}
}
// Describe implements prometheus.Collector
func (c *collector) Describe(ch chan<- *prometheus.Desc) {
ch <- c.cpuUsage
ch <- c.memoryUsage
ch <- c.swapUsage
ch <- c.networkReceiveBytes
ch <- c.networkTransmitBytes
ch <- c.diskReadOps
ch <- c.diskWriteOps
}
// Collect implements prometheus.Collector
func (c *collector) Collect(ch chan<- prometheus.Metric) {
// Collect CPU usage
c.collectCPU(ch)
// Collect memory and swap usage
c.collectMemory(ch)
// Collect network traffic
c.collectNetwork(ch)
// Collect disk IOPS
c.collectDiskIOPS(ch)
}
// collectCPU collects CPU usage metrics
func (c *collector) collectCPU(ch chan<- prometheus.Metric) {
// Get CPU usage percentage
percentages, err := cpu.Percent(0, false)
if err != nil || len(percentages) == 0 {
// Fallback to reading from /proc/stat
usage := c.getCPUUsageFromProc()
ch <- prometheus.MustNewConstMetric(
c.cpuUsage,
prometheus.GaugeValue,
usage,
c.serverName,
)
return
}
ch <- prometheus.MustNewConstMetric(
c.cpuUsage,
prometheus.GaugeValue,
percentages[0],
c.serverName,
)
}
// getCPUUsageFromProc reads CPU usage from /proc/stat as fallback
func (c *collector) getCPUUsageFromProc() float64 {
file, err := os.Open("/proc/stat")
if err != nil {
return 0
}
defer file.Close()
scanner := bufio.NewScanner(file)
if !scanner.Scan() {
return 0
}
line := scanner.Text()
if !strings.HasPrefix(line, "cpu ") {
return 0
}
fields := strings.Fields(line)
if len(fields) < 5 {
return 0
}
var total, idle uint64
for i := 1; i < len(fields); i++ {
val, _ := strconv.ParseUint(fields[i], 10, 64)
total += val
if i == 4 { // idle time is the 4th field
idle = val
}
}
if total == 0 {
return 0
}
return float64(total-idle) / float64(total) * 100
}
// collectMemory collects memory and swap usage metrics
func (c *collector) collectMemory(ch chan<- prometheus.Metric) {
// Get virtual memory stats
vmStat, err := mem.VirtualMemory()
if err == nil {
ch <- prometheus.MustNewConstMetric(
c.memoryUsage,
prometheus.GaugeValue,
vmStat.UsedPercent,
c.serverName,
)
}
// Get swap memory stats
swapStat, err := mem.SwapMemory()
if err == nil {
ch <- prometheus.MustNewConstMetric(
c.swapUsage,
prometheus.GaugeValue,
swapStat.UsedPercent,
c.serverName,
)
}
}
// isVirtualInterface checks if a network interface is virtual
func (c *collector) isVirtualInterface(name string) bool {
// Common virtual interface patterns
virtualPrefixes := []string{
"lo", // loopback
"docker", // Docker bridge
"veth", // Virtual Ethernet (Docker, K8s)
"br-", // Linux bridge
"virbr", // libvirt bridge
"vnet", // Virtual network (KVM/QEMU)
"tun", // TUN devices (VPN)
"tap", // TAP devices
"cni", // Container Network Interface (K8s)
"flannel", // Flannel network (K8s)
"calico", // Calico network (K8s)
"weave", // Weave network (K8s)
"kube", // Kubernetes interfaces
"cilium", // Cilium network (K8s)
"lxc", // LXC containers
"lxd", // LXD containers
"vmbr", // Proxmox bridge
"vmnet", // VMware network
"ppp", // Point-to-Point Protocol
}
for _, prefix := range virtualPrefixes {
if strings.HasPrefix(name, prefix) {
return true
}
}
return false
}
// collectNetwork collects network traffic metrics
func (c *collector) collectNetwork(ch chan<- prometheus.Metric) {
c.mu.Lock()
defer c.mu.Unlock()
// Get network IO counters
netStats, err := net.IOCounters(true)
if err != nil {
return
}
now := time.Now()
timeDiff := now.Sub(c.prevNetTime).Seconds()
if timeDiff <= 0 {
return
}
var totalReceiveRate, totalTransmitRate float64
for _, stat := range netStats {
// Skip loopback and virtual interfaces
if c.isVirtualInterface(stat.Name) {
continue
}
prevStat, exists := c.prevNetStats[stat.Name]
if exists {
receiveRate := float64(stat.BytesRecv-prevStat.BytesRecv) / timeDiff
transmitRate := float64(stat.BytesSent-prevStat.BytesSent) / timeDiff
totalReceiveRate += receiveRate
totalTransmitRate += transmitRate
}
c.prevNetStats[stat.Name] = stat
}
c.prevNetTime = now
ch <- prometheus.MustNewConstMetric(
c.networkReceiveBytes,
prometheus.GaugeValue,
totalReceiveRate,
c.serverName,
)
ch <- prometheus.MustNewConstMetric(
c.networkTransmitBytes,
prometheus.GaugeValue,
totalTransmitRate,
c.serverName,
)
}
// collectDiskIOPS collects disk IOPS metrics
func (c *collector) collectDiskIOPS(ch chan<- prometheus.Metric) {
c.mu.Lock()
defer c.mu.Unlock()
// Get disk IO counters
diskStats, err := disk.IOCounters()
if err != nil {
return
}
now := time.Now()
timeDiff := now.Sub(c.prevDiskTime).Seconds()
if timeDiff <= 0 {
return
}
var totalReadOps, totalWriteOps float64
for name, stat := range diskStats {
// Skip loop devices and other virtual devices
if strings.HasPrefix(name, "loop") || strings.HasPrefix(name, "ram") {
continue
}
prevStat, exists := c.prevDiskStats[name]
if exists {
readOps := float64(stat.ReadCount-prevStat.ReadCount) / timeDiff
writeOps := float64(stat.WriteCount-prevStat.WriteCount) / timeDiff
totalReadOps += readOps
totalWriteOps += writeOps
}
c.prevDiskStats[name] = stat
}
c.prevDiskTime = now
ch <- prometheus.MustNewConstMetric(
c.diskReadOps,
prometheus.GaugeValue,
totalReadOps,
c.serverName,
)
ch <- prometheus.MustNewConstMetric(
c.diskWriteOps,
prometheus.GaugeValue,
totalWriteOps,
c.serverName,
)
}
// Register registers the server status collector with the provided Prometheus registerer
// If registerer is nil, it will use the default Prometheus registry
// serverName is used as a label for all metrics to identify the server
func Register(registerer prometheus.Registerer, serverName string) error {
if registerer == nil {
registerer = prometheus.DefaultRegisterer
}
collector := newCollector(serverName)
return registerer.Register(collector)
}