652 lines
19 KiB
Go
652 lines
19 KiB
Go
package monitoring
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"database/sql"
|
|
"fmt"
|
|
"os"
|
|
"runtime"
|
|
"strconv"
|
|
"strings"
|
|
"time"
|
|
|
|
"github.com/atlasos/calypso/internal/common/database"
|
|
"github.com/atlasos/calypso/internal/common/logger"
|
|
)
|
|
|
|
// Metrics represents system metrics
|
|
type Metrics struct {
|
|
System SystemMetrics `json:"system"`
|
|
Storage StorageMetrics `json:"storage"`
|
|
SCST SCSTMetrics `json:"scst"`
|
|
Tape TapeMetrics `json:"tape"`
|
|
VTL VTLMetrics `json:"vtl"`
|
|
Tasks TaskMetrics `json:"tasks"`
|
|
API APIMetrics `json:"api"`
|
|
CollectedAt time.Time `json:"collected_at"`
|
|
}
|
|
|
|
// SystemMetrics represents system-level metrics
|
|
type SystemMetrics struct {
|
|
CPUUsagePercent float64 `json:"cpu_usage_percent"`
|
|
MemoryUsed int64 `json:"memory_used_bytes"`
|
|
MemoryTotal int64 `json:"memory_total_bytes"`
|
|
MemoryPercent float64 `json:"memory_usage_percent"`
|
|
DiskUsed int64 `json:"disk_used_bytes"`
|
|
DiskTotal int64 `json:"disk_total_bytes"`
|
|
DiskPercent float64 `json:"disk_usage_percent"`
|
|
UptimeSeconds int64 `json:"uptime_seconds"`
|
|
}
|
|
|
|
// StorageMetrics represents storage metrics
|
|
type StorageMetrics struct {
|
|
TotalDisks int `json:"total_disks"`
|
|
TotalRepositories int `json:"total_repositories"`
|
|
TotalCapacityBytes int64 `json:"total_capacity_bytes"`
|
|
UsedCapacityBytes int64 `json:"used_capacity_bytes"`
|
|
AvailableBytes int64 `json:"available_bytes"`
|
|
UsagePercent float64 `json:"usage_percent"`
|
|
}
|
|
|
|
// SCSTMetrics represents SCST metrics
|
|
type SCSTMetrics struct {
|
|
TotalTargets int `json:"total_targets"`
|
|
TotalLUNs int `json:"total_luns"`
|
|
TotalInitiators int `json:"total_initiators"`
|
|
ActiveTargets int `json:"active_targets"`
|
|
}
|
|
|
|
// TapeMetrics represents physical tape metrics
|
|
type TapeMetrics struct {
|
|
TotalLibraries int `json:"total_libraries"`
|
|
TotalDrives int `json:"total_drives"`
|
|
TotalSlots int `json:"total_slots"`
|
|
OccupiedSlots int `json:"occupied_slots"`
|
|
}
|
|
|
|
// VTLMetrics represents virtual tape library metrics
|
|
type VTLMetrics struct {
|
|
TotalLibraries int `json:"total_libraries"`
|
|
TotalDrives int `json:"total_drives"`
|
|
TotalTapes int `json:"total_tapes"`
|
|
ActiveDrives int `json:"active_drives"`
|
|
LoadedTapes int `json:"loaded_tapes"`
|
|
}
|
|
|
|
// TaskMetrics represents task execution metrics
|
|
type TaskMetrics struct {
|
|
TotalTasks int `json:"total_tasks"`
|
|
PendingTasks int `json:"pending_tasks"`
|
|
RunningTasks int `json:"running_tasks"`
|
|
CompletedTasks int `json:"completed_tasks"`
|
|
FailedTasks int `json:"failed_tasks"`
|
|
AvgDurationSec float64 `json:"avg_duration_seconds"`
|
|
}
|
|
|
|
// APIMetrics represents API metrics
|
|
type APIMetrics struct {
|
|
TotalRequests int64 `json:"total_requests"`
|
|
RequestsPerSec float64 `json:"requests_per_second"`
|
|
ErrorRate float64 `json:"error_rate"`
|
|
AvgLatencyMs float64 `json:"avg_latency_ms"`
|
|
ActiveConnections int `json:"active_connections"`
|
|
}
|
|
|
|
// MetricsService collects and provides system metrics
|
|
type MetricsService struct {
|
|
db *database.DB
|
|
logger *logger.Logger
|
|
startTime time.Time
|
|
lastCPU *cpuStats // For CPU usage calculation
|
|
lastCPUTime time.Time
|
|
}
|
|
|
|
// cpuStats represents CPU statistics from /proc/stat
|
|
type cpuStats struct {
|
|
user uint64
|
|
nice uint64
|
|
system uint64
|
|
idle uint64
|
|
iowait uint64
|
|
irq uint64
|
|
softirq uint64
|
|
steal uint64
|
|
guest uint64
|
|
}
|
|
|
|
// NewMetricsService creates a new metrics service
|
|
func NewMetricsService(db *database.DB, log *logger.Logger) *MetricsService {
|
|
return &MetricsService{
|
|
db: db,
|
|
logger: log,
|
|
startTime: time.Now(),
|
|
}
|
|
}
|
|
|
|
// CollectMetrics collects all system metrics
|
|
func (s *MetricsService) CollectMetrics(ctx context.Context) (*Metrics, error) {
|
|
metrics := &Metrics{
|
|
CollectedAt: time.Now(),
|
|
}
|
|
|
|
// Collect system metrics
|
|
sysMetrics, err := s.collectSystemMetrics(ctx)
|
|
if err != nil {
|
|
s.logger.Error("Failed to collect system metrics", "error", err)
|
|
// Set default/zero values if collection fails
|
|
metrics.System = SystemMetrics{}
|
|
} else {
|
|
metrics.System = *sysMetrics
|
|
}
|
|
|
|
// Collect storage metrics
|
|
storageMetrics, err := s.collectStorageMetrics(ctx)
|
|
if err != nil {
|
|
s.logger.Error("Failed to collect storage metrics", "error", err)
|
|
} else {
|
|
metrics.Storage = *storageMetrics
|
|
}
|
|
|
|
// Collect SCST metrics
|
|
scstMetrics, err := s.collectSCSTMetrics(ctx)
|
|
if err != nil {
|
|
s.logger.Error("Failed to collect SCST metrics", "error", err)
|
|
} else {
|
|
metrics.SCST = *scstMetrics
|
|
}
|
|
|
|
// Collect tape metrics
|
|
tapeMetrics, err := s.collectTapeMetrics(ctx)
|
|
if err != nil {
|
|
s.logger.Error("Failed to collect tape metrics", "error", err)
|
|
} else {
|
|
metrics.Tape = *tapeMetrics
|
|
}
|
|
|
|
// Collect VTL metrics
|
|
vtlMetrics, err := s.collectVTLMetrics(ctx)
|
|
if err != nil {
|
|
s.logger.Error("Failed to collect VTL metrics", "error", err)
|
|
} else {
|
|
metrics.VTL = *vtlMetrics
|
|
}
|
|
|
|
// Collect task metrics
|
|
taskMetrics, err := s.collectTaskMetrics(ctx)
|
|
if err != nil {
|
|
s.logger.Error("Failed to collect task metrics", "error", err)
|
|
} else {
|
|
metrics.Tasks = *taskMetrics
|
|
}
|
|
|
|
// API metrics are collected separately via middleware
|
|
metrics.API = APIMetrics{} // Placeholder
|
|
|
|
return metrics, nil
|
|
}
|
|
|
|
// collectSystemMetrics collects system-level metrics
|
|
func (s *MetricsService) collectSystemMetrics(ctx context.Context) (*SystemMetrics, error) {
|
|
// Get system memory from /proc/meminfo
|
|
memoryTotal, memoryUsed, memoryPercent := s.getSystemMemory()
|
|
|
|
// Get CPU usage from /proc/stat
|
|
cpuUsage := s.getCPUUsage()
|
|
|
|
// Get system uptime from /proc/uptime
|
|
uptime := s.getSystemUptime()
|
|
|
|
metrics := &SystemMetrics{
|
|
CPUUsagePercent: cpuUsage,
|
|
MemoryUsed: memoryUsed,
|
|
MemoryTotal: memoryTotal,
|
|
MemoryPercent: memoryPercent,
|
|
DiskUsed: 0, // Would need to read from df
|
|
DiskTotal: 0,
|
|
DiskPercent: 0,
|
|
UptimeSeconds: int64(uptime),
|
|
}
|
|
|
|
return metrics, nil
|
|
}
|
|
|
|
// collectStorageMetrics collects storage metrics
|
|
func (s *MetricsService) collectStorageMetrics(ctx context.Context) (*StorageMetrics, error) {
|
|
// Count disks
|
|
diskQuery := `SELECT COUNT(*) FROM physical_disks WHERE is_active = true`
|
|
var totalDisks int
|
|
if err := s.db.QueryRowContext(ctx, diskQuery).Scan(&totalDisks); err != nil {
|
|
return nil, fmt.Errorf("failed to count disks: %w", err)
|
|
}
|
|
|
|
// Count repositories and calculate capacity
|
|
repoQuery := `
|
|
SELECT COUNT(*), COALESCE(SUM(total_bytes), 0), COALESCE(SUM(used_bytes), 0)
|
|
FROM disk_repositories
|
|
WHERE is_active = true
|
|
`
|
|
var totalRepos int
|
|
var totalCapacity, usedCapacity int64
|
|
if err := s.db.QueryRowContext(ctx, repoQuery).Scan(&totalRepos, &totalCapacity, &usedCapacity); err != nil {
|
|
return nil, fmt.Errorf("failed to query repositories: %w", err)
|
|
}
|
|
|
|
availableBytes := totalCapacity - usedCapacity
|
|
usagePercent := 0.0
|
|
if totalCapacity > 0 {
|
|
usagePercent = float64(usedCapacity) / float64(totalCapacity) * 100
|
|
}
|
|
|
|
return &StorageMetrics{
|
|
TotalDisks: totalDisks,
|
|
TotalRepositories: totalRepos,
|
|
TotalCapacityBytes: totalCapacity,
|
|
UsedCapacityBytes: usedCapacity,
|
|
AvailableBytes: availableBytes,
|
|
UsagePercent: usagePercent,
|
|
}, nil
|
|
}
|
|
|
|
// collectSCSTMetrics collects SCST metrics
|
|
func (s *MetricsService) collectSCSTMetrics(ctx context.Context) (*SCSTMetrics, error) {
|
|
// Count targets
|
|
targetQuery := `SELECT COUNT(*) FROM scst_targets`
|
|
var totalTargets int
|
|
if err := s.db.QueryRowContext(ctx, targetQuery).Scan(&totalTargets); err != nil {
|
|
return nil, fmt.Errorf("failed to count targets: %w", err)
|
|
}
|
|
|
|
// Count LUNs
|
|
lunQuery := `SELECT COUNT(*) FROM scst_luns`
|
|
var totalLUNs int
|
|
if err := s.db.QueryRowContext(ctx, lunQuery).Scan(&totalLUNs); err != nil {
|
|
return nil, fmt.Errorf("failed to count LUNs: %w", err)
|
|
}
|
|
|
|
// Count initiators
|
|
initQuery := `SELECT COUNT(*) FROM scst_initiators`
|
|
var totalInitiators int
|
|
if err := s.db.QueryRowContext(ctx, initQuery).Scan(&totalInitiators); err != nil {
|
|
return nil, fmt.Errorf("failed to count initiators: %w", err)
|
|
}
|
|
|
|
// Active targets (targets with at least one LUN)
|
|
activeQuery := `
|
|
SELECT COUNT(DISTINCT target_id)
|
|
FROM scst_luns
|
|
`
|
|
var activeTargets int
|
|
if err := s.db.QueryRowContext(ctx, activeQuery).Scan(&activeTargets); err != nil {
|
|
activeTargets = 0 // Not critical
|
|
}
|
|
|
|
return &SCSTMetrics{
|
|
TotalTargets: totalTargets,
|
|
TotalLUNs: totalLUNs,
|
|
TotalInitiators: totalInitiators,
|
|
ActiveTargets: activeTargets,
|
|
}, nil
|
|
}
|
|
|
|
// collectTapeMetrics collects physical tape metrics
|
|
func (s *MetricsService) collectTapeMetrics(ctx context.Context) (*TapeMetrics, error) {
|
|
// Count libraries
|
|
libQuery := `SELECT COUNT(*) FROM physical_tape_libraries`
|
|
var totalLibraries int
|
|
if err := s.db.QueryRowContext(ctx, libQuery).Scan(&totalLibraries); err != nil {
|
|
return nil, fmt.Errorf("failed to count libraries: %w", err)
|
|
}
|
|
|
|
// Count drives
|
|
driveQuery := `SELECT COUNT(*) FROM physical_tape_drives`
|
|
var totalDrives int
|
|
if err := s.db.QueryRowContext(ctx, driveQuery).Scan(&totalDrives); err != nil {
|
|
return nil, fmt.Errorf("failed to count drives: %w", err)
|
|
}
|
|
|
|
// Count slots
|
|
slotQuery := `
|
|
SELECT COUNT(*), COUNT(CASE WHEN tape_barcode IS NOT NULL THEN 1 END)
|
|
FROM physical_tape_slots
|
|
`
|
|
var totalSlots, occupiedSlots int
|
|
if err := s.db.QueryRowContext(ctx, slotQuery).Scan(&totalSlots, &occupiedSlots); err != nil {
|
|
return nil, fmt.Errorf("failed to count slots: %w", err)
|
|
}
|
|
|
|
return &TapeMetrics{
|
|
TotalLibraries: totalLibraries,
|
|
TotalDrives: totalDrives,
|
|
TotalSlots: totalSlots,
|
|
OccupiedSlots: occupiedSlots,
|
|
}, nil
|
|
}
|
|
|
|
// collectVTLMetrics collects VTL metrics
|
|
func (s *MetricsService) collectVTLMetrics(ctx context.Context) (*VTLMetrics, error) {
|
|
// Count libraries
|
|
libQuery := `SELECT COUNT(*) FROM virtual_tape_libraries`
|
|
var totalLibraries int
|
|
if err := s.db.QueryRowContext(ctx, libQuery).Scan(&totalLibraries); err != nil {
|
|
return nil, fmt.Errorf("failed to count VTL libraries: %w", err)
|
|
}
|
|
|
|
// Count drives
|
|
driveQuery := `SELECT COUNT(*) FROM virtual_tape_drives`
|
|
var totalDrives int
|
|
if err := s.db.QueryRowContext(ctx, driveQuery).Scan(&totalDrives); err != nil {
|
|
return nil, fmt.Errorf("failed to count VTL drives: %w", err)
|
|
}
|
|
|
|
// Count tapes
|
|
tapeQuery := `SELECT COUNT(*) FROM virtual_tapes`
|
|
var totalTapes int
|
|
if err := s.db.QueryRowContext(ctx, tapeQuery).Scan(&totalTapes); err != nil {
|
|
return nil, fmt.Errorf("failed to count VTL tapes: %w", err)
|
|
}
|
|
|
|
// Count active drives (drives with loaded tape)
|
|
activeQuery := `
|
|
SELECT COUNT(*)
|
|
FROM virtual_tape_drives
|
|
WHERE loaded_tape_id IS NOT NULL
|
|
`
|
|
var activeDrives int
|
|
if err := s.db.QueryRowContext(ctx, activeQuery).Scan(&activeDrives); err != nil {
|
|
activeDrives = 0
|
|
}
|
|
|
|
// Count loaded tapes
|
|
loadedQuery := `
|
|
SELECT COUNT(*)
|
|
FROM virtual_tapes
|
|
WHERE is_loaded = true
|
|
`
|
|
var loadedTapes int
|
|
if err := s.db.QueryRowContext(ctx, loadedQuery).Scan(&loadedTapes); err != nil {
|
|
loadedTapes = 0
|
|
}
|
|
|
|
return &VTLMetrics{
|
|
TotalLibraries: totalLibraries,
|
|
TotalDrives: totalDrives,
|
|
TotalTapes: totalTapes,
|
|
ActiveDrives: activeDrives,
|
|
LoadedTapes: loadedTapes,
|
|
}, nil
|
|
}
|
|
|
|
// collectTaskMetrics collects task execution metrics
|
|
func (s *MetricsService) collectTaskMetrics(ctx context.Context) (*TaskMetrics, error) {
|
|
// Count tasks by status
|
|
query := `
|
|
SELECT
|
|
COUNT(*) as total,
|
|
COUNT(*) FILTER (WHERE status = 'pending') as pending,
|
|
COUNT(*) FILTER (WHERE status = 'running') as running,
|
|
COUNT(*) FILTER (WHERE status = 'completed') as completed,
|
|
COUNT(*) FILTER (WHERE status = 'failed') as failed
|
|
FROM tasks
|
|
`
|
|
var total, pending, running, completed, failed int
|
|
if err := s.db.QueryRowContext(ctx, query).Scan(&total, &pending, &running, &completed, &failed); err != nil {
|
|
return nil, fmt.Errorf("failed to count tasks: %w", err)
|
|
}
|
|
|
|
// Calculate average duration for completed tasks
|
|
avgDurationQuery := `
|
|
SELECT AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
|
|
FROM tasks
|
|
WHERE status = 'completed' AND started_at IS NOT NULL AND completed_at IS NOT NULL
|
|
`
|
|
var avgDuration sql.NullFloat64
|
|
if err := s.db.QueryRowContext(ctx, avgDurationQuery).Scan(&avgDuration); err != nil {
|
|
avgDuration = sql.NullFloat64{Valid: false}
|
|
}
|
|
|
|
avgDurationSec := 0.0
|
|
if avgDuration.Valid {
|
|
avgDurationSec = avgDuration.Float64
|
|
}
|
|
|
|
return &TaskMetrics{
|
|
TotalTasks: total,
|
|
PendingTasks: pending,
|
|
RunningTasks: running,
|
|
CompletedTasks: completed,
|
|
FailedTasks: failed,
|
|
AvgDurationSec: avgDurationSec,
|
|
}, nil
|
|
}
|
|
|
|
// getSystemUptime reads system uptime from /proc/uptime
|
|
// Returns uptime in seconds, or service uptime as fallback
|
|
func (s *MetricsService) getSystemUptime() float64 {
|
|
file, err := os.Open("/proc/uptime")
|
|
if err != nil {
|
|
// Fallback to service uptime if /proc/uptime is not available
|
|
s.logger.Warn("Failed to read /proc/uptime, using service uptime", "error", err)
|
|
return time.Since(s.startTime).Seconds()
|
|
}
|
|
defer file.Close()
|
|
|
|
scanner := bufio.NewScanner(file)
|
|
if !scanner.Scan() {
|
|
// Fallback to service uptime if file is empty
|
|
s.logger.Warn("Failed to read /proc/uptime content, using service uptime")
|
|
return time.Since(s.startTime).Seconds()
|
|
}
|
|
|
|
line := strings.TrimSpace(scanner.Text())
|
|
fields := strings.Fields(line)
|
|
if len(fields) == 0 {
|
|
// Fallback to service uptime if no data
|
|
s.logger.Warn("No data in /proc/uptime, using service uptime")
|
|
return time.Since(s.startTime).Seconds()
|
|
}
|
|
|
|
// First field is system uptime in seconds
|
|
uptimeSeconds, err := strconv.ParseFloat(fields[0], 64)
|
|
if err != nil {
|
|
// Fallback to service uptime if parsing fails
|
|
s.logger.Warn("Failed to parse /proc/uptime, using service uptime", "error", err)
|
|
return time.Since(s.startTime).Seconds()
|
|
}
|
|
|
|
return uptimeSeconds
|
|
}
|
|
|
|
// getSystemMemory reads system memory from /proc/meminfo
|
|
// Returns total, used (in bytes), and usage percentage
|
|
func (s *MetricsService) getSystemMemory() (int64, int64, float64) {
|
|
file, err := os.Open("/proc/meminfo")
|
|
if err != nil {
|
|
s.logger.Warn("Failed to read /proc/meminfo, using Go runtime memory", "error", err)
|
|
var m runtime.MemStats
|
|
runtime.ReadMemStats(&m)
|
|
memoryUsed := int64(m.Alloc)
|
|
memoryTotal := int64(m.Sys)
|
|
memoryPercent := float64(memoryUsed) / float64(memoryTotal) * 100
|
|
return memoryTotal, memoryUsed, memoryPercent
|
|
}
|
|
defer file.Close()
|
|
|
|
var memTotal, memAvailable, memFree, buffers, cached int64
|
|
scanner := bufio.NewScanner(file)
|
|
|
|
for scanner.Scan() {
|
|
line := strings.TrimSpace(scanner.Text())
|
|
if line == "" {
|
|
continue
|
|
}
|
|
|
|
// Parse line like "MemTotal: 16375596 kB"
|
|
// or "MemTotal: 16375596" (some systems don't have unit)
|
|
colonIdx := strings.Index(line, ":")
|
|
if colonIdx == -1 {
|
|
continue
|
|
}
|
|
|
|
key := strings.TrimSpace(line[:colonIdx])
|
|
valuePart := strings.TrimSpace(line[colonIdx+1:])
|
|
|
|
// Split value part to get number (ignore unit like "kB")
|
|
fields := strings.Fields(valuePart)
|
|
if len(fields) == 0 {
|
|
continue
|
|
}
|
|
|
|
value, err := strconv.ParseInt(fields[0], 10, 64)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
|
|
// Values in /proc/meminfo are in KB, convert to bytes
|
|
valueBytes := value * 1024
|
|
|
|
switch key {
|
|
case "MemTotal":
|
|
memTotal = valueBytes
|
|
case "MemAvailable":
|
|
memAvailable = valueBytes
|
|
case "MemFree":
|
|
memFree = valueBytes
|
|
case "Buffers":
|
|
buffers = valueBytes
|
|
case "Cached":
|
|
cached = valueBytes
|
|
}
|
|
}
|
|
|
|
if err := scanner.Err(); err != nil {
|
|
s.logger.Warn("Error scanning /proc/meminfo", "error", err)
|
|
}
|
|
|
|
if memTotal == 0 {
|
|
s.logger.Warn("Failed to get MemTotal from /proc/meminfo, using Go runtime memory", "memTotal", memTotal)
|
|
var m runtime.MemStats
|
|
runtime.ReadMemStats(&m)
|
|
memoryUsed := int64(m.Alloc)
|
|
memoryTotal := int64(m.Sys)
|
|
memoryPercent := float64(memoryUsed) / float64(memoryTotal) * 100
|
|
return memoryTotal, memoryUsed, memoryPercent
|
|
}
|
|
|
|
// Calculate used memory
|
|
// If MemAvailable exists (kernel 3.14+), use it for more accurate calculation
|
|
var memoryUsed int64
|
|
if memAvailable > 0 {
|
|
memoryUsed = memTotal - memAvailable
|
|
} else {
|
|
// Fallback: MemTotal - MemFree - Buffers - Cached
|
|
memoryUsed = memTotal - memFree - buffers - cached
|
|
if memoryUsed < 0 {
|
|
memoryUsed = memTotal - memFree
|
|
}
|
|
}
|
|
|
|
memoryPercent := float64(memoryUsed) / float64(memTotal) * 100
|
|
|
|
s.logger.Debug("System memory stats",
|
|
"memTotal", memTotal,
|
|
"memAvailable", memAvailable,
|
|
"memoryUsed", memoryUsed,
|
|
"memoryPercent", memoryPercent)
|
|
|
|
return memTotal, memoryUsed, memoryPercent
|
|
}
|
|
|
|
// getCPUUsage reads CPU usage from /proc/stat
|
|
// Requires two readings to calculate percentage
|
|
func (s *MetricsService) getCPUUsage() float64 {
|
|
currentCPU, err := s.readCPUStats()
|
|
if err != nil {
|
|
s.logger.Warn("Failed to read CPU stats", "error", err)
|
|
return 0.0
|
|
}
|
|
|
|
// If this is the first reading, store it and return 0
|
|
if s.lastCPU == nil {
|
|
s.lastCPU = currentCPU
|
|
s.lastCPUTime = time.Now()
|
|
return 0.0
|
|
}
|
|
|
|
// Calculate time difference
|
|
timeDiff := time.Since(s.lastCPUTime).Seconds()
|
|
if timeDiff < 0.1 {
|
|
// Too soon, return previous value or 0
|
|
return 0.0
|
|
}
|
|
|
|
// Calculate total CPU time
|
|
prevTotal := s.lastCPU.user + s.lastCPU.nice + s.lastCPU.system + s.lastCPU.idle +
|
|
s.lastCPU.iowait + s.lastCPU.irq + s.lastCPU.softirq + s.lastCPU.steal + s.lastCPU.guest
|
|
currTotal := currentCPU.user + currentCPU.nice + currentCPU.system + currentCPU.idle +
|
|
currentCPU.iowait + currentCPU.irq + currentCPU.softirq + currentCPU.steal + currentCPU.guest
|
|
|
|
// Calculate idle time
|
|
prevIdle := s.lastCPU.idle + s.lastCPU.iowait
|
|
currIdle := currentCPU.idle + currentCPU.iowait
|
|
|
|
// Calculate used time
|
|
totalDiff := currTotal - prevTotal
|
|
idleDiff := currIdle - prevIdle
|
|
|
|
if totalDiff == 0 {
|
|
return 0.0
|
|
}
|
|
|
|
// Calculate CPU usage percentage
|
|
usagePercent := 100.0 * (1.0 - float64(idleDiff)/float64(totalDiff))
|
|
|
|
// Update last CPU stats
|
|
s.lastCPU = currentCPU
|
|
s.lastCPUTime = time.Now()
|
|
|
|
return usagePercent
|
|
}
|
|
|
|
// readCPUStats reads CPU statistics from /proc/stat
|
|
func (s *MetricsService) readCPUStats() (*cpuStats, error) {
|
|
file, err := os.Open("/proc/stat")
|
|
if err != nil {
|
|
return nil, fmt.Errorf("failed to open /proc/stat: %w", err)
|
|
}
|
|
defer file.Close()
|
|
|
|
scanner := bufio.NewScanner(file)
|
|
if !scanner.Scan() {
|
|
return nil, fmt.Errorf("failed to read /proc/stat")
|
|
}
|
|
|
|
line := strings.TrimSpace(scanner.Text())
|
|
if !strings.HasPrefix(line, "cpu ") {
|
|
return nil, fmt.Errorf("invalid /proc/stat format")
|
|
}
|
|
|
|
fields := strings.Fields(line)
|
|
if len(fields) < 8 {
|
|
return nil, fmt.Errorf("insufficient CPU stats fields")
|
|
}
|
|
|
|
stats := &cpuStats{}
|
|
stats.user, _ = strconv.ParseUint(fields[1], 10, 64)
|
|
stats.nice, _ = strconv.ParseUint(fields[2], 10, 64)
|
|
stats.system, _ = strconv.ParseUint(fields[3], 10, 64)
|
|
stats.idle, _ = strconv.ParseUint(fields[4], 10, 64)
|
|
stats.iowait, _ = strconv.ParseUint(fields[5], 10, 64)
|
|
stats.irq, _ = strconv.ParseUint(fields[6], 10, 64)
|
|
stats.softirq, _ = strconv.ParseUint(fields[7], 10, 64)
|
|
|
|
if len(fields) > 8 {
|
|
stats.steal, _ = strconv.ParseUint(fields[8], 10, 64)
|
|
}
|
|
if len(fields) > 9 {
|
|
stats.guest, _ = strconv.ParseUint(fields[9], 10, 64)
|
|
}
|
|
|
|
return stats, nil
|
|
}
|