218 lines
6.5 KiB
Go
218 lines
6.5 KiB
Go
package metrics
|
|
|
|
import (
|
|
"fmt"
|
|
"sync"
|
|
"time"
|
|
|
|
"gitea.avt.data-center.id/othman.suseno/atlas/internal/models"
|
|
)
|
|
|
|
// Collector gathers system metrics
|
|
type Collector struct {
|
|
mu sync.RWMutex
|
|
|
|
// ZFS metrics
|
|
poolCount int
|
|
datasetCount int
|
|
zvolCount int
|
|
snapshotCount int
|
|
totalCapacity uint64
|
|
totalAllocated uint64
|
|
totalFree uint64
|
|
|
|
// Service metrics
|
|
smbSharesCount int
|
|
nfsExportsCount int
|
|
iscsiTargetsCount int
|
|
smbServiceStatus int // 1 = running, 0 = stopped
|
|
nfsServiceStatus int
|
|
iscsiServiceStatus int
|
|
|
|
// Job metrics
|
|
jobsTotal int
|
|
jobsRunning int
|
|
jobsCompleted int
|
|
jobsFailed int
|
|
|
|
// System metrics
|
|
uptimeSeconds int64
|
|
lastUpdate time.Time
|
|
}
|
|
|
|
// NewCollector creates a new metrics collector
|
|
func NewCollector() *Collector {
|
|
return &Collector{
|
|
lastUpdate: time.Now(),
|
|
}
|
|
}
|
|
|
|
// UpdateZFSMetrics updates ZFS-related metrics
|
|
func (c *Collector) UpdateZFSMetrics(pools []models.Pool, datasets []models.Dataset, zvols []models.ZVOL, snapshots []models.Snapshot) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
c.poolCount = len(pools)
|
|
c.datasetCount = len(datasets)
|
|
c.zvolCount = len(zvols)
|
|
c.snapshotCount = len(snapshots)
|
|
|
|
c.totalCapacity = 0
|
|
c.totalAllocated = 0
|
|
c.totalFree = 0
|
|
|
|
for _, pool := range pools {
|
|
c.totalCapacity += pool.Size
|
|
c.totalAllocated += pool.Allocated
|
|
c.totalFree += pool.Free
|
|
}
|
|
|
|
c.lastUpdate = time.Now()
|
|
}
|
|
|
|
// UpdateServiceMetrics updates storage service metrics
|
|
func (c *Collector) UpdateServiceMetrics(smbShares, nfsExports, iscsiTargets int, smbStatus, nfsStatus, iscsiStatus bool) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
c.smbSharesCount = smbShares
|
|
c.nfsExportsCount = nfsExports
|
|
c.iscsiTargetsCount = iscsiTargets
|
|
|
|
if smbStatus {
|
|
c.smbServiceStatus = 1
|
|
} else {
|
|
c.smbServiceStatus = 0
|
|
}
|
|
|
|
if nfsStatus {
|
|
c.nfsServiceStatus = 1
|
|
} else {
|
|
c.nfsServiceStatus = 0
|
|
}
|
|
|
|
if iscsiStatus {
|
|
c.iscsiServiceStatus = 1
|
|
} else {
|
|
c.iscsiServiceStatus = 0
|
|
}
|
|
|
|
c.lastUpdate = time.Now()
|
|
}
|
|
|
|
// UpdateJobMetrics updates job-related metrics
|
|
func (c *Collector) UpdateJobMetrics(total, running, completed, failed int) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
|
|
c.jobsTotal = total
|
|
c.jobsRunning = running
|
|
c.jobsCompleted = completed
|
|
c.jobsFailed = failed
|
|
|
|
c.lastUpdate = time.Now()
|
|
}
|
|
|
|
// SetUptime sets the system uptime
|
|
func (c *Collector) SetUptime(seconds int64) {
|
|
c.mu.Lock()
|
|
defer c.mu.Unlock()
|
|
c.uptimeSeconds = seconds
|
|
}
|
|
|
|
// Collect returns metrics in Prometheus format
|
|
func (c *Collector) Collect() string {
|
|
c.mu.RLock()
|
|
defer c.mu.RUnlock()
|
|
|
|
var output string
|
|
|
|
// Build info
|
|
output += "# HELP atlas_build_info Build information\n"
|
|
output += "# TYPE atlas_build_info gauge\n"
|
|
output += `atlas_build_info{version="v0.1.0-dev"} 1` + "\n\n"
|
|
|
|
// System uptime
|
|
output += "# HELP atlas_uptime_seconds System uptime in seconds\n"
|
|
output += "# TYPE atlas_uptime_seconds gauge\n"
|
|
output += fmt.Sprintf("atlas_uptime_seconds %d\n\n", c.uptimeSeconds)
|
|
|
|
// ZFS metrics
|
|
output += "# HELP atlas_zfs_pools_total Total number of ZFS pools\n"
|
|
output += "# TYPE atlas_zfs_pools_total gauge\n"
|
|
output += fmt.Sprintf("atlas_zfs_pools_total %d\n\n", c.poolCount)
|
|
|
|
output += "# HELP atlas_zfs_datasets_total Total number of ZFS datasets\n"
|
|
output += "# TYPE atlas_zfs_datasets_total gauge\n"
|
|
output += fmt.Sprintf("atlas_zfs_datasets_total %d\n\n", c.datasetCount)
|
|
|
|
output += "# HELP atlas_zfs_zvols_total Total number of ZFS ZVOLs\n"
|
|
output += "# TYPE atlas_zfs_zvols_total gauge\n"
|
|
output += fmt.Sprintf("atlas_zfs_zvols_total %d\n\n", c.zvolCount)
|
|
|
|
output += "# HELP atlas_zfs_snapshots_total Total number of ZFS snapshots\n"
|
|
output += "# TYPE atlas_zfs_snapshots_total gauge\n"
|
|
output += fmt.Sprintf("atlas_zfs_snapshots_total %d\n\n", c.snapshotCount)
|
|
|
|
output += "# HELP atlas_zfs_capacity_bytes Total ZFS pool capacity in bytes\n"
|
|
output += "# TYPE atlas_zfs_capacity_bytes gauge\n"
|
|
output += fmt.Sprintf("atlas_zfs_capacity_bytes %d\n\n", c.totalCapacity)
|
|
|
|
output += "# HELP atlas_zfs_allocated_bytes Total ZFS pool allocated space in bytes\n"
|
|
output += "# TYPE atlas_zfs_allocated_bytes gauge\n"
|
|
output += fmt.Sprintf("atlas_zfs_allocated_bytes %d\n\n", c.totalAllocated)
|
|
|
|
output += "# HELP atlas_zfs_free_bytes Total ZFS pool free space in bytes\n"
|
|
output += "# TYPE atlas_zfs_free_bytes gauge\n"
|
|
output += fmt.Sprintf("atlas_zfs_free_bytes %d\n\n", c.totalFree)
|
|
|
|
// Service metrics
|
|
output += "# HELP atlas_smb_shares_total Total number of SMB shares\n"
|
|
output += "# TYPE atlas_smb_shares_total gauge\n"
|
|
output += fmt.Sprintf("atlas_smb_shares_total %d\n\n", c.smbSharesCount)
|
|
|
|
output += "# HELP atlas_nfs_exports_total Total number of NFS exports\n"
|
|
output += "# TYPE atlas_nfs_exports_total gauge\n"
|
|
output += fmt.Sprintf("atlas_nfs_exports_total %d\n\n", c.nfsExportsCount)
|
|
|
|
output += "# HELP atlas_iscsi_targets_total Total number of iSCSI targets\n"
|
|
output += "# TYPE atlas_iscsi_targets_total gauge\n"
|
|
output += fmt.Sprintf("atlas_iscsi_targets_total %d\n\n", c.iscsiTargetsCount)
|
|
|
|
output += "# HELP atlas_smb_service_status SMB service status (1=running, 0=stopped)\n"
|
|
output += "# TYPE atlas_smb_service_status gauge\n"
|
|
output += fmt.Sprintf("atlas_smb_service_status %d\n\n", c.smbServiceStatus)
|
|
|
|
output += "# HELP atlas_nfs_service_status NFS service status (1=running, 0=stopped)\n"
|
|
output += "# TYPE atlas_nfs_service_status gauge\n"
|
|
output += fmt.Sprintf("atlas_nfs_service_status %d\n\n", c.nfsServiceStatus)
|
|
|
|
output += "# HELP atlas_iscsi_service_status iSCSI service status (1=running, 0=stopped)\n"
|
|
output += "# TYPE atlas_iscsi_service_status gauge\n"
|
|
output += fmt.Sprintf("atlas_iscsi_service_status %d\n\n", c.iscsiServiceStatus)
|
|
|
|
// Job metrics
|
|
output += "# HELP atlas_jobs_total Total number of jobs\n"
|
|
output += "# TYPE atlas_jobs_total gauge\n"
|
|
output += fmt.Sprintf("atlas_jobs_total %d\n\n", c.jobsTotal)
|
|
|
|
output += "# HELP atlas_jobs_running Number of running jobs\n"
|
|
output += "# TYPE atlas_jobs_running gauge\n"
|
|
output += fmt.Sprintf("atlas_jobs_running %d\n\n", c.jobsRunning)
|
|
|
|
output += "# HELP atlas_jobs_completed_total Total number of completed jobs\n"
|
|
output += "# TYPE atlas_jobs_completed_total counter\n"
|
|
output += fmt.Sprintf("atlas_jobs_completed_total %d\n\n", c.jobsCompleted)
|
|
|
|
output += "# HELP atlas_jobs_failed_total Total number of failed jobs\n"
|
|
output += "# TYPE atlas_jobs_failed_total counter\n"
|
|
output += fmt.Sprintf("atlas_jobs_failed_total %d\n\n", c.jobsFailed)
|
|
|
|
// API status
|
|
output += "# HELP atlas_up Whether the atlas-api process is up\n"
|
|
output += "# TYPE atlas_up gauge\n"
|
|
output += "atlas_up 1\n"
|
|
|
|
return output
|
|
}
|