package storage import ( "context" "os/exec" "regexp" "strconv" "strings" "time" "github.com/atlasos/calypso/internal/common/database" "github.com/atlasos/calypso/internal/common/logger" ) // ZFSPoolMonitor handles periodic ZFS pool status monitoring and sync to database type ZFSPoolMonitor struct { zfsService *ZFSService logger *logger.Logger interval time.Duration stopCh chan struct{} } // NewZFSPoolMonitor creates a new ZFS pool monitor service func NewZFSPoolMonitor(db *database.DB, log *logger.Logger, interval time.Duration) *ZFSPoolMonitor { return &ZFSPoolMonitor{ zfsService: NewZFSService(db, log), logger: log, interval: interval, stopCh: make(chan struct{}), } } // Start starts the ZFS pool monitor background service func (m *ZFSPoolMonitor) Start(ctx context.Context) { m.logger.Info("Starting ZFS pool monitor service", "interval", m.interval) ticker := time.NewTicker(m.interval) defer ticker.Stop() // Run initial sync immediately m.syncPools(ctx) for { select { case <-ctx.Done(): m.logger.Info("ZFS pool monitor service stopped") return case <-m.stopCh: m.logger.Info("ZFS pool monitor service stopped") return case <-ticker.C: m.syncPools(ctx) } } } // Stop stops the ZFS pool monitor service func (m *ZFSPoolMonitor) Stop() { close(m.stopCh) } // syncPools syncs ZFS pool status from system to database func (m *ZFSPoolMonitor) syncPools(ctx context.Context) { m.logger.Debug("Running periodic ZFS pool sync") // Get all pools from system systemPools, err := m.getSystemPools(ctx) if err != nil { m.logger.Error("Failed to get system pools", "error", err) return } m.logger.Debug("Found pools in system", "count", len(systemPools)) // Update each pool in database for poolName, poolInfo := range systemPools { if err := m.updatePoolStatus(ctx, poolName, poolInfo); err != nil { m.logger.Error("Failed to update pool status", "pool", poolName, "error", err) } } // Mark pools that don't exist in system as offline if err := m.markMissingPoolsOffline(ctx, systemPools); err != nil { m.logger.Error("Failed to mark missing pools offline", "error", err) } m.logger.Debug("ZFS pool sync completed") } // PoolInfo represents pool information from system type PoolInfo struct { Name string SizeBytes int64 UsedBytes int64 Health string // online, degraded, faulted, offline, unavailable, removed } // getSystemPools gets all pools from ZFS system func (m *ZFSPoolMonitor) getSystemPools(ctx context.Context) (map[string]PoolInfo, error) { pools := make(map[string]PoolInfo) // Get pool list cmd := exec.CommandContext(ctx, "zpool", "list", "-H", "-o", "name,size,alloc,free,health") output, err := cmd.Output() if err != nil { return nil, err } lines := strings.Split(strings.TrimSpace(string(output)), "\n") for _, line := range lines { if line == "" { continue } fields := strings.Fields(line) if len(fields) < 5 { continue } poolName := fields[0] sizeStr := fields[1] allocStr := fields[2] health := fields[4] // Parse size (e.g., "95.5G" -> bytes) sizeBytes, err := parseSize(sizeStr) if err != nil { m.logger.Warn("Failed to parse pool size", "pool", poolName, "size", sizeStr, "error", err) continue } // Parse allocated (used) size usedBytes, err := parseSize(allocStr) if err != nil { m.logger.Warn("Failed to parse pool used size", "pool", poolName, "alloc", allocStr, "error", err) continue } // Normalize health status to lowercase healthNormalized := strings.ToLower(health) pools[poolName] = PoolInfo{ Name: poolName, SizeBytes: sizeBytes, UsedBytes: usedBytes, Health: healthNormalized, } } return pools, nil } // parseSize parses size string (e.g., "95.5G", "1.2T") to bytes func parseSize(sizeStr string) (int64, error) { // Remove any whitespace sizeStr = strings.TrimSpace(sizeStr) // Match pattern like "95.5G", "1.2T", "512M" re := regexp.MustCompile(`^([\d.]+)([KMGT]?)$`) matches := re.FindStringSubmatch(strings.ToUpper(sizeStr)) if len(matches) != 3 { return 0, nil // Return 0 if can't parse } value, err := strconv.ParseFloat(matches[1], 64) if err != nil { return 0, err } unit := matches[2] var multiplier int64 = 1 switch unit { case "K": multiplier = 1024 case "M": multiplier = 1024 * 1024 case "G": multiplier = 1024 * 1024 * 1024 case "T": multiplier = 1024 * 1024 * 1024 * 1024 case "P": multiplier = 1024 * 1024 * 1024 * 1024 * 1024 } return int64(value * float64(multiplier)), nil } // updatePoolStatus updates pool status in database func (m *ZFSPoolMonitor) updatePoolStatus(ctx context.Context, poolName string, poolInfo PoolInfo) error { // Get pool from database by name var poolID string err := m.zfsService.db.QueryRowContext(ctx, "SELECT id FROM zfs_pools WHERE name = $1", poolName, ).Scan(&poolID) if err != nil { // Pool not in database, skip (might be created outside of Calypso) m.logger.Debug("Pool not found in database, skipping", "pool", poolName) return nil } // Update pool status, size, and used bytes _, err = m.zfsService.db.ExecContext(ctx, ` UPDATE zfs_pools SET size_bytes = $1, used_bytes = $2, health_status = $3, updated_at = NOW() WHERE id = $4 `, poolInfo.SizeBytes, poolInfo.UsedBytes, poolInfo.Health, poolID) if err != nil { return err } m.logger.Debug("Updated pool status", "pool", poolName, "health", poolInfo.Health, "size", poolInfo.SizeBytes, "used", poolInfo.UsedBytes) return nil } // markMissingPoolsOffline marks pools that exist in database but not in system as offline or deletes them func (m *ZFSPoolMonitor) markMissingPoolsOffline(ctx context.Context, systemPools map[string]PoolInfo) error { // Get all pools from database rows, err := m.zfsService.db.QueryContext(ctx, "SELECT id, name FROM zfs_pools WHERE is_active = true") if err != nil { return err } defer rows.Close() for rows.Next() { var poolID, poolName string if err := rows.Scan(&poolID, &poolName); err != nil { continue } // Check if pool exists in system if _, exists := systemPools[poolName]; !exists { // Pool doesn't exist in system - delete from database (pool was destroyed) m.logger.Info("Pool not found in system, removing from database", "pool", poolName) _, err = m.zfsService.db.ExecContext(ctx, "DELETE FROM zfs_pools WHERE id = $1", poolID) if err != nil { m.logger.Warn("Failed to delete missing pool from database", "pool", poolName, "error", err) } else { m.logger.Info("Removed missing pool from database", "pool", poolName) } } } return rows.Err() }