This commit is contained in:
217
internal/metrics/collector.go
Normal file
217
internal/metrics/collector.go
Normal file
@@ -0,0 +1,217 @@
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"gitea.avt.data-center.id/othman.suseno/atlas/internal/models"
|
||||
)
|
||||
|
||||
// Collector gathers system metrics
|
||||
type Collector struct {
|
||||
mu sync.RWMutex
|
||||
|
||||
// ZFS metrics
|
||||
poolCount int
|
||||
datasetCount int
|
||||
zvolCount int
|
||||
snapshotCount int
|
||||
totalCapacity uint64
|
||||
totalAllocated uint64
|
||||
totalFree uint64
|
||||
|
||||
// Service metrics
|
||||
smbSharesCount int
|
||||
nfsExportsCount int
|
||||
iscsiTargetsCount int
|
||||
smbServiceStatus int // 1 = running, 0 = stopped
|
||||
nfsServiceStatus int
|
||||
iscsiServiceStatus int
|
||||
|
||||
// Job metrics
|
||||
jobsTotal int
|
||||
jobsRunning int
|
||||
jobsCompleted int
|
||||
jobsFailed int
|
||||
|
||||
// System metrics
|
||||
uptimeSeconds int64
|
||||
lastUpdate time.Time
|
||||
}
|
||||
|
||||
// NewCollector creates a new metrics collector
|
||||
func NewCollector() *Collector {
|
||||
return &Collector{
|
||||
lastUpdate: time.Now(),
|
||||
}
|
||||
}
|
||||
|
||||
// UpdateZFSMetrics updates ZFS-related metrics
|
||||
func (c *Collector) UpdateZFSMetrics(pools []models.Pool, datasets []models.Dataset, zvols []models.ZVOL, snapshots []models.Snapshot) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
c.poolCount = len(pools)
|
||||
c.datasetCount = len(datasets)
|
||||
c.zvolCount = len(zvols)
|
||||
c.snapshotCount = len(snapshots)
|
||||
|
||||
c.totalCapacity = 0
|
||||
c.totalAllocated = 0
|
||||
c.totalFree = 0
|
||||
|
||||
for _, pool := range pools {
|
||||
c.totalCapacity += pool.Size
|
||||
c.totalAllocated += pool.Allocated
|
||||
c.totalFree += pool.Free
|
||||
}
|
||||
|
||||
c.lastUpdate = time.Now()
|
||||
}
|
||||
|
||||
// UpdateServiceMetrics updates storage service metrics
|
||||
func (c *Collector) UpdateServiceMetrics(smbShares, nfsExports, iscsiTargets int, smbStatus, nfsStatus, iscsiStatus bool) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
c.smbSharesCount = smbShares
|
||||
c.nfsExportsCount = nfsExports
|
||||
c.iscsiTargetsCount = iscsiTargets
|
||||
|
||||
if smbStatus {
|
||||
c.smbServiceStatus = 1
|
||||
} else {
|
||||
c.smbServiceStatus = 0
|
||||
}
|
||||
|
||||
if nfsStatus {
|
||||
c.nfsServiceStatus = 1
|
||||
} else {
|
||||
c.nfsServiceStatus = 0
|
||||
}
|
||||
|
||||
if iscsiStatus {
|
||||
c.iscsiServiceStatus = 1
|
||||
} else {
|
||||
c.iscsiServiceStatus = 0
|
||||
}
|
||||
|
||||
c.lastUpdate = time.Now()
|
||||
}
|
||||
|
||||
// UpdateJobMetrics updates job-related metrics
|
||||
func (c *Collector) UpdateJobMetrics(total, running, completed, failed int) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
|
||||
c.jobsTotal = total
|
||||
c.jobsRunning = running
|
||||
c.jobsCompleted = completed
|
||||
c.jobsFailed = failed
|
||||
|
||||
c.lastUpdate = time.Now()
|
||||
}
|
||||
|
||||
// SetUptime sets the system uptime
|
||||
func (c *Collector) SetUptime(seconds int64) {
|
||||
c.mu.Lock()
|
||||
defer c.mu.Unlock()
|
||||
c.uptimeSeconds = seconds
|
||||
}
|
||||
|
||||
// Collect returns metrics in Prometheus format
|
||||
func (c *Collector) Collect() string {
|
||||
c.mu.RLock()
|
||||
defer c.mu.RUnlock()
|
||||
|
||||
var output string
|
||||
|
||||
// Build info
|
||||
output += "# HELP atlas_build_info Build information\n"
|
||||
output += "# TYPE atlas_build_info gauge\n"
|
||||
output += `atlas_build_info{version="v0.1.0-dev"} 1` + "\n\n"
|
||||
|
||||
// System uptime
|
||||
output += "# HELP atlas_uptime_seconds System uptime in seconds\n"
|
||||
output += "# TYPE atlas_uptime_seconds gauge\n"
|
||||
output += fmt.Sprintf("atlas_uptime_seconds %d\n\n", c.uptimeSeconds)
|
||||
|
||||
// ZFS metrics
|
||||
output += "# HELP atlas_zfs_pools_total Total number of ZFS pools\n"
|
||||
output += "# TYPE atlas_zfs_pools_total gauge\n"
|
||||
output += fmt.Sprintf("atlas_zfs_pools_total %d\n\n", c.poolCount)
|
||||
|
||||
output += "# HELP atlas_zfs_datasets_total Total number of ZFS datasets\n"
|
||||
output += "# TYPE atlas_zfs_datasets_total gauge\n"
|
||||
output += fmt.Sprintf("atlas_zfs_datasets_total %d\n\n", c.datasetCount)
|
||||
|
||||
output += "# HELP atlas_zfs_zvols_total Total number of ZFS ZVOLs\n"
|
||||
output += "# TYPE atlas_zfs_zvols_total gauge\n"
|
||||
output += fmt.Sprintf("atlas_zfs_zvols_total %d\n\n", c.zvolCount)
|
||||
|
||||
output += "# HELP atlas_zfs_snapshots_total Total number of ZFS snapshots\n"
|
||||
output += "# TYPE atlas_zfs_snapshots_total gauge\n"
|
||||
output += fmt.Sprintf("atlas_zfs_snapshots_total %d\n\n", c.snapshotCount)
|
||||
|
||||
output += "# HELP atlas_zfs_capacity_bytes Total ZFS pool capacity in bytes\n"
|
||||
output += "# TYPE atlas_zfs_capacity_bytes gauge\n"
|
||||
output += fmt.Sprintf("atlas_zfs_capacity_bytes %d\n\n", c.totalCapacity)
|
||||
|
||||
output += "# HELP atlas_zfs_allocated_bytes Total ZFS pool allocated space in bytes\n"
|
||||
output += "# TYPE atlas_zfs_allocated_bytes gauge\n"
|
||||
output += fmt.Sprintf("atlas_zfs_allocated_bytes %d\n\n", c.totalAllocated)
|
||||
|
||||
output += "# HELP atlas_zfs_free_bytes Total ZFS pool free space in bytes\n"
|
||||
output += "# TYPE atlas_zfs_free_bytes gauge\n"
|
||||
output += fmt.Sprintf("atlas_zfs_free_bytes %d\n\n", c.totalFree)
|
||||
|
||||
// Service metrics
|
||||
output += "# HELP atlas_smb_shares_total Total number of SMB shares\n"
|
||||
output += "# TYPE atlas_smb_shares_total gauge\n"
|
||||
output += fmt.Sprintf("atlas_smb_shares_total %d\n\n", c.smbSharesCount)
|
||||
|
||||
output += "# HELP atlas_nfs_exports_total Total number of NFS exports\n"
|
||||
output += "# TYPE atlas_nfs_exports_total gauge\n"
|
||||
output += fmt.Sprintf("atlas_nfs_exports_total %d\n\n", c.nfsExportsCount)
|
||||
|
||||
output += "# HELP atlas_iscsi_targets_total Total number of iSCSI targets\n"
|
||||
output += "# TYPE atlas_iscsi_targets_total gauge\n"
|
||||
output += fmt.Sprintf("atlas_iscsi_targets_total %d\n\n", c.iscsiTargetsCount)
|
||||
|
||||
output += "# HELP atlas_smb_service_status SMB service status (1=running, 0=stopped)\n"
|
||||
output += "# TYPE atlas_smb_service_status gauge\n"
|
||||
output += fmt.Sprintf("atlas_smb_service_status %d\n\n", c.smbServiceStatus)
|
||||
|
||||
output += "# HELP atlas_nfs_service_status NFS service status (1=running, 0=stopped)\n"
|
||||
output += "# TYPE atlas_nfs_service_status gauge\n"
|
||||
output += fmt.Sprintf("atlas_nfs_service_status %d\n\n", c.nfsServiceStatus)
|
||||
|
||||
output += "# HELP atlas_iscsi_service_status iSCSI service status (1=running, 0=stopped)\n"
|
||||
output += "# TYPE atlas_iscsi_service_status gauge\n"
|
||||
output += fmt.Sprintf("atlas_iscsi_service_status %d\n\n", c.iscsiServiceStatus)
|
||||
|
||||
// Job metrics
|
||||
output += "# HELP atlas_jobs_total Total number of jobs\n"
|
||||
output += "# TYPE atlas_jobs_total gauge\n"
|
||||
output += fmt.Sprintf("atlas_jobs_total %d\n\n", c.jobsTotal)
|
||||
|
||||
output += "# HELP atlas_jobs_running Number of running jobs\n"
|
||||
output += "# TYPE atlas_jobs_running gauge\n"
|
||||
output += fmt.Sprintf("atlas_jobs_running %d\n\n", c.jobsRunning)
|
||||
|
||||
output += "# HELP atlas_jobs_completed_total Total number of completed jobs\n"
|
||||
output += "# TYPE atlas_jobs_completed_total counter\n"
|
||||
output += fmt.Sprintf("atlas_jobs_completed_total %d\n\n", c.jobsCompleted)
|
||||
|
||||
output += "# HELP atlas_jobs_failed_total Total number of failed jobs\n"
|
||||
output += "# TYPE atlas_jobs_failed_total counter\n"
|
||||
output += fmt.Sprintf("atlas_jobs_failed_total %d\n\n", c.jobsFailed)
|
||||
|
||||
// API status
|
||||
output += "# HELP atlas_up Whether the atlas-api process is up\n"
|
||||
output += "# TYPE atlas_up gauge\n"
|
||||
output += "atlas_up 1\n"
|
||||
|
||||
return output
|
||||
}
|
||||
Reference in New Issue
Block a user