start working on the frontend side
This commit is contained in:
405
backend/internal/monitoring/metrics.go
Normal file
405
backend/internal/monitoring/metrics.go
Normal file
@@ -0,0 +1,405 @@
|
||||
package monitoring
|
||||
|
||||
import (
|
||||
"context"
|
||||
"database/sql"
|
||||
"fmt"
|
||||
"runtime"
|
||||
"time"
|
||||
|
||||
"github.com/atlasos/calypso/internal/common/database"
|
||||
"github.com/atlasos/calypso/internal/common/logger"
|
||||
)
|
||||
|
||||
// Metrics represents system metrics
|
||||
type Metrics struct {
|
||||
System SystemMetrics `json:"system"`
|
||||
Storage StorageMetrics `json:"storage"`
|
||||
SCST SCSTMetrics `json:"scst"`
|
||||
Tape TapeMetrics `json:"tape"`
|
||||
VTL VTLMetrics `json:"vtl"`
|
||||
Tasks TaskMetrics `json:"tasks"`
|
||||
API APIMetrics `json:"api"`
|
||||
CollectedAt time.Time `json:"collected_at"`
|
||||
}
|
||||
|
||||
// SystemMetrics represents system-level metrics
|
||||
type SystemMetrics struct {
|
||||
CPUUsagePercent float64 `json:"cpu_usage_percent"`
|
||||
MemoryUsed int64 `json:"memory_used_bytes"`
|
||||
MemoryTotal int64 `json:"memory_total_bytes"`
|
||||
MemoryPercent float64 `json:"memory_usage_percent"`
|
||||
DiskUsed int64 `json:"disk_used_bytes"`
|
||||
DiskTotal int64 `json:"disk_total_bytes"`
|
||||
DiskPercent float64 `json:"disk_usage_percent"`
|
||||
UptimeSeconds int64 `json:"uptime_seconds"`
|
||||
}
|
||||
|
||||
// StorageMetrics represents storage metrics
|
||||
type StorageMetrics struct {
|
||||
TotalDisks int `json:"total_disks"`
|
||||
TotalRepositories int `json:"total_repositories"`
|
||||
TotalCapacityBytes int64 `json:"total_capacity_bytes"`
|
||||
UsedCapacityBytes int64 `json:"used_capacity_bytes"`
|
||||
AvailableBytes int64 `json:"available_bytes"`
|
||||
UsagePercent float64 `json:"usage_percent"`
|
||||
}
|
||||
|
||||
// SCSTMetrics represents SCST metrics
|
||||
type SCSTMetrics struct {
|
||||
TotalTargets int `json:"total_targets"`
|
||||
TotalLUNs int `json:"total_luns"`
|
||||
TotalInitiators int `json:"total_initiators"`
|
||||
ActiveTargets int `json:"active_targets"`
|
||||
}
|
||||
|
||||
// TapeMetrics represents physical tape metrics
|
||||
type TapeMetrics struct {
|
||||
TotalLibraries int `json:"total_libraries"`
|
||||
TotalDrives int `json:"total_drives"`
|
||||
TotalSlots int `json:"total_slots"`
|
||||
OccupiedSlots int `json:"occupied_slots"`
|
||||
}
|
||||
|
||||
// VTLMetrics represents virtual tape library metrics
|
||||
type VTLMetrics struct {
|
||||
TotalLibraries int `json:"total_libraries"`
|
||||
TotalDrives int `json:"total_drives"`
|
||||
TotalTapes int `json:"total_tapes"`
|
||||
ActiveDrives int `json:"active_drives"`
|
||||
LoadedTapes int `json:"loaded_tapes"`
|
||||
}
|
||||
|
||||
// TaskMetrics represents task execution metrics
|
||||
type TaskMetrics struct {
|
||||
TotalTasks int `json:"total_tasks"`
|
||||
PendingTasks int `json:"pending_tasks"`
|
||||
RunningTasks int `json:"running_tasks"`
|
||||
CompletedTasks int `json:"completed_tasks"`
|
||||
FailedTasks int `json:"failed_tasks"`
|
||||
AvgDurationSec float64 `json:"avg_duration_seconds"`
|
||||
}
|
||||
|
||||
// APIMetrics represents API metrics
|
||||
type APIMetrics struct {
|
||||
TotalRequests int64 `json:"total_requests"`
|
||||
RequestsPerSec float64 `json:"requests_per_second"`
|
||||
ErrorRate float64 `json:"error_rate"`
|
||||
AvgLatencyMs float64 `json:"avg_latency_ms"`
|
||||
ActiveConnections int `json:"active_connections"`
|
||||
}
|
||||
|
||||
// MetricsService collects and provides system metrics
|
||||
type MetricsService struct {
|
||||
db *database.DB
|
||||
logger *logger.Logger
|
||||
startTime time.Time
|
||||
}
|
||||
|
||||
// NewMetricsService creates a new metrics service
|
||||
func NewMetricsService(db *database.DB, log *logger.Logger) *MetricsService {
|
||||
return &MetricsService{
|
||||
db: db,
|
||||
logger: log,
|
||||
startTime: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
// CollectMetrics collects all system metrics
|
||||
func (s *MetricsService) CollectMetrics(ctx context.Context) (*Metrics, error) {
|
||||
metrics := &Metrics{
|
||||
CollectedAt: time.Now(),
|
||||
}
|
||||
|
||||
// Collect system metrics
|
||||
sysMetrics, err := s.collectSystemMetrics(ctx)
|
||||
if err != nil {
|
||||
s.logger.Error("Failed to collect system metrics", "error", err)
|
||||
} else {
|
||||
metrics.System = *sysMetrics
|
||||
}
|
||||
|
||||
// Collect storage metrics
|
||||
storageMetrics, err := s.collectStorageMetrics(ctx)
|
||||
if err != nil {
|
||||
s.logger.Error("Failed to collect storage metrics", "error", err)
|
||||
} else {
|
||||
metrics.Storage = *storageMetrics
|
||||
}
|
||||
|
||||
// Collect SCST metrics
|
||||
scstMetrics, err := s.collectSCSTMetrics(ctx)
|
||||
if err != nil {
|
||||
s.logger.Error("Failed to collect SCST metrics", "error", err)
|
||||
} else {
|
||||
metrics.SCST = *scstMetrics
|
||||
}
|
||||
|
||||
// Collect tape metrics
|
||||
tapeMetrics, err := s.collectTapeMetrics(ctx)
|
||||
if err != nil {
|
||||
s.logger.Error("Failed to collect tape metrics", "error", err)
|
||||
} else {
|
||||
metrics.Tape = *tapeMetrics
|
||||
}
|
||||
|
||||
// Collect VTL metrics
|
||||
vtlMetrics, err := s.collectVTLMetrics(ctx)
|
||||
if err != nil {
|
||||
s.logger.Error("Failed to collect VTL metrics", "error", err)
|
||||
} else {
|
||||
metrics.VTL = *vtlMetrics
|
||||
}
|
||||
|
||||
// Collect task metrics
|
||||
taskMetrics, err := s.collectTaskMetrics(ctx)
|
||||
if err != nil {
|
||||
s.logger.Error("Failed to collect task metrics", "error", err)
|
||||
} else {
|
||||
metrics.Tasks = *taskMetrics
|
||||
}
|
||||
|
||||
// API metrics are collected separately via middleware
|
||||
metrics.API = APIMetrics{} // Placeholder
|
||||
|
||||
return metrics, nil
|
||||
}
|
||||
|
||||
// collectSystemMetrics collects system-level metrics
|
||||
func (s *MetricsService) collectSystemMetrics(ctx context.Context) (*SystemMetrics, error) {
|
||||
var m runtime.MemStats
|
||||
runtime.ReadMemStats(&m)
|
||||
|
||||
// Get memory info
|
||||
memoryUsed := int64(m.Alloc)
|
||||
memoryTotal := int64(m.Sys)
|
||||
memoryPercent := float64(memoryUsed) / float64(memoryTotal) * 100
|
||||
|
||||
// Uptime
|
||||
uptime := time.Since(s.startTime).Seconds()
|
||||
|
||||
// CPU and disk would require external tools or system calls
|
||||
// For now, we'll use placeholders
|
||||
metrics := &SystemMetrics{
|
||||
CPUUsagePercent: 0.0, // Would need to read from /proc/stat
|
||||
MemoryUsed: memoryUsed,
|
||||
MemoryTotal: memoryTotal,
|
||||
MemoryPercent: memoryPercent,
|
||||
DiskUsed: 0, // Would need to read from df
|
||||
DiskTotal: 0,
|
||||
DiskPercent: 0,
|
||||
UptimeSeconds: int64(uptime),
|
||||
}
|
||||
|
||||
return metrics, nil
|
||||
}
|
||||
|
||||
// collectStorageMetrics collects storage metrics
|
||||
func (s *MetricsService) collectStorageMetrics(ctx context.Context) (*StorageMetrics, error) {
|
||||
// Count disks
|
||||
diskQuery := `SELECT COUNT(*) FROM physical_disks WHERE is_active = true`
|
||||
var totalDisks int
|
||||
if err := s.db.QueryRowContext(ctx, diskQuery).Scan(&totalDisks); err != nil {
|
||||
return nil, fmt.Errorf("failed to count disks: %w", err)
|
||||
}
|
||||
|
||||
// Count repositories and calculate capacity
|
||||
repoQuery := `
|
||||
SELECT COUNT(*), COALESCE(SUM(total_bytes), 0), COALESCE(SUM(used_bytes), 0)
|
||||
FROM disk_repositories
|
||||
WHERE is_active = true
|
||||
`
|
||||
var totalRepos int
|
||||
var totalCapacity, usedCapacity int64
|
||||
if err := s.db.QueryRowContext(ctx, repoQuery).Scan(&totalRepos, &totalCapacity, &usedCapacity); err != nil {
|
||||
return nil, fmt.Errorf("failed to query repositories: %w", err)
|
||||
}
|
||||
|
||||
availableBytes := totalCapacity - usedCapacity
|
||||
usagePercent := 0.0
|
||||
if totalCapacity > 0 {
|
||||
usagePercent = float64(usedCapacity) / float64(totalCapacity) * 100
|
||||
}
|
||||
|
||||
return &StorageMetrics{
|
||||
TotalDisks: totalDisks,
|
||||
TotalRepositories: totalRepos,
|
||||
TotalCapacityBytes: totalCapacity,
|
||||
UsedCapacityBytes: usedCapacity,
|
||||
AvailableBytes: availableBytes,
|
||||
UsagePercent: usagePercent,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// collectSCSTMetrics collects SCST metrics
|
||||
func (s *MetricsService) collectSCSTMetrics(ctx context.Context) (*SCSTMetrics, error) {
|
||||
// Count targets
|
||||
targetQuery := `SELECT COUNT(*) FROM scst_targets`
|
||||
var totalTargets int
|
||||
if err := s.db.QueryRowContext(ctx, targetQuery).Scan(&totalTargets); err != nil {
|
||||
return nil, fmt.Errorf("failed to count targets: %w", err)
|
||||
}
|
||||
|
||||
// Count LUNs
|
||||
lunQuery := `SELECT COUNT(*) FROM scst_luns`
|
||||
var totalLUNs int
|
||||
if err := s.db.QueryRowContext(ctx, lunQuery).Scan(&totalLUNs); err != nil {
|
||||
return nil, fmt.Errorf("failed to count LUNs: %w", err)
|
||||
}
|
||||
|
||||
// Count initiators
|
||||
initQuery := `SELECT COUNT(*) FROM scst_initiators`
|
||||
var totalInitiators int
|
||||
if err := s.db.QueryRowContext(ctx, initQuery).Scan(&totalInitiators); err != nil {
|
||||
return nil, fmt.Errorf("failed to count initiators: %w", err)
|
||||
}
|
||||
|
||||
// Active targets (targets with at least one LUN)
|
||||
activeQuery := `
|
||||
SELECT COUNT(DISTINCT target_id)
|
||||
FROM scst_luns
|
||||
`
|
||||
var activeTargets int
|
||||
if err := s.db.QueryRowContext(ctx, activeQuery).Scan(&activeTargets); err != nil {
|
||||
activeTargets = 0 // Not critical
|
||||
}
|
||||
|
||||
return &SCSTMetrics{
|
||||
TotalTargets: totalTargets,
|
||||
TotalLUNs: totalLUNs,
|
||||
TotalInitiators: totalInitiators,
|
||||
ActiveTargets: activeTargets,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// collectTapeMetrics collects physical tape metrics
|
||||
func (s *MetricsService) collectTapeMetrics(ctx context.Context) (*TapeMetrics, error) {
|
||||
// Count libraries
|
||||
libQuery := `SELECT COUNT(*) FROM physical_tape_libraries`
|
||||
var totalLibraries int
|
||||
if err := s.db.QueryRowContext(ctx, libQuery).Scan(&totalLibraries); err != nil {
|
||||
return nil, fmt.Errorf("failed to count libraries: %w", err)
|
||||
}
|
||||
|
||||
// Count drives
|
||||
driveQuery := `SELECT COUNT(*) FROM physical_tape_drives`
|
||||
var totalDrives int
|
||||
if err := s.db.QueryRowContext(ctx, driveQuery).Scan(&totalDrives); err != nil {
|
||||
return nil, fmt.Errorf("failed to count drives: %w", err)
|
||||
}
|
||||
|
||||
// Count slots
|
||||
slotQuery := `
|
||||
SELECT COUNT(*), COUNT(CASE WHEN tape_barcode IS NOT NULL THEN 1 END)
|
||||
FROM physical_tape_slots
|
||||
`
|
||||
var totalSlots, occupiedSlots int
|
||||
if err := s.db.QueryRowContext(ctx, slotQuery).Scan(&totalSlots, &occupiedSlots); err != nil {
|
||||
return nil, fmt.Errorf("failed to count slots: %w", err)
|
||||
}
|
||||
|
||||
return &TapeMetrics{
|
||||
TotalLibraries: totalLibraries,
|
||||
TotalDrives: totalDrives,
|
||||
TotalSlots: totalSlots,
|
||||
OccupiedSlots: occupiedSlots,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// collectVTLMetrics collects VTL metrics
|
||||
func (s *MetricsService) collectVTLMetrics(ctx context.Context) (*VTLMetrics, error) {
|
||||
// Count libraries
|
||||
libQuery := `SELECT COUNT(*) FROM virtual_tape_libraries`
|
||||
var totalLibraries int
|
||||
if err := s.db.QueryRowContext(ctx, libQuery).Scan(&totalLibraries); err != nil {
|
||||
return nil, fmt.Errorf("failed to count VTL libraries: %w", err)
|
||||
}
|
||||
|
||||
// Count drives
|
||||
driveQuery := `SELECT COUNT(*) FROM virtual_tape_drives`
|
||||
var totalDrives int
|
||||
if err := s.db.QueryRowContext(ctx, driveQuery).Scan(&totalDrives); err != nil {
|
||||
return nil, fmt.Errorf("failed to count VTL drives: %w", err)
|
||||
}
|
||||
|
||||
// Count tapes
|
||||
tapeQuery := `SELECT COUNT(*) FROM virtual_tapes`
|
||||
var totalTapes int
|
||||
if err := s.db.QueryRowContext(ctx, tapeQuery).Scan(&totalTapes); err != nil {
|
||||
return nil, fmt.Errorf("failed to count VTL tapes: %w", err)
|
||||
}
|
||||
|
||||
// Count active drives (drives with loaded tape)
|
||||
activeQuery := `
|
||||
SELECT COUNT(*)
|
||||
FROM virtual_tape_drives
|
||||
WHERE loaded_tape_id IS NOT NULL
|
||||
`
|
||||
var activeDrives int
|
||||
if err := s.db.QueryRowContext(ctx, activeQuery).Scan(&activeDrives); err != nil {
|
||||
activeDrives = 0
|
||||
}
|
||||
|
||||
// Count loaded tapes
|
||||
loadedQuery := `
|
||||
SELECT COUNT(*)
|
||||
FROM virtual_tapes
|
||||
WHERE is_loaded = true
|
||||
`
|
||||
var loadedTapes int
|
||||
if err := s.db.QueryRowContext(ctx, loadedQuery).Scan(&loadedTapes); err != nil {
|
||||
loadedTapes = 0
|
||||
}
|
||||
|
||||
return &VTLMetrics{
|
||||
TotalLibraries: totalLibraries,
|
||||
TotalDrives: totalDrives,
|
||||
TotalTapes: totalTapes,
|
||||
ActiveDrives: activeDrives,
|
||||
LoadedTapes: loadedTapes,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// collectTaskMetrics collects task execution metrics
|
||||
func (s *MetricsService) collectTaskMetrics(ctx context.Context) (*TaskMetrics, error) {
|
||||
// Count tasks by status
|
||||
query := `
|
||||
SELECT
|
||||
COUNT(*) as total,
|
||||
COUNT(*) FILTER (WHERE status = 'pending') as pending,
|
||||
COUNT(*) FILTER (WHERE status = 'running') as running,
|
||||
COUNT(*) FILTER (WHERE status = 'completed') as completed,
|
||||
COUNT(*) FILTER (WHERE status = 'failed') as failed
|
||||
FROM tasks
|
||||
`
|
||||
var total, pending, running, completed, failed int
|
||||
if err := s.db.QueryRowContext(ctx, query).Scan(&total, &pending, &running, &completed, &failed); err != nil {
|
||||
return nil, fmt.Errorf("failed to count tasks: %w", err)
|
||||
}
|
||||
|
||||
// Calculate average duration for completed tasks
|
||||
avgDurationQuery := `
|
||||
SELECT AVG(EXTRACT(EPOCH FROM (completed_at - started_at)))
|
||||
FROM tasks
|
||||
WHERE status = 'completed' AND started_at IS NOT NULL AND completed_at IS NOT NULL
|
||||
`
|
||||
var avgDuration sql.NullFloat64
|
||||
if err := s.db.QueryRowContext(ctx, avgDurationQuery).Scan(&avgDuration); err != nil {
|
||||
avgDuration = sql.NullFloat64{Valid: false}
|
||||
}
|
||||
|
||||
avgDurationSec := 0.0
|
||||
if avgDuration.Valid {
|
||||
avgDurationSec = avgDuration.Float64
|
||||
}
|
||||
|
||||
return &TaskMetrics{
|
||||
TotalTasks: total,
|
||||
PendingTasks: pending,
|
||||
RunningTasks: running,
|
||||
CompletedTasks: completed,
|
||||
FailedTasks: failed,
|
||||
AvgDurationSec: avgDurationSec,
|
||||
}, nil
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user