package monitoring import ( "context" "time" "github.com/atlasos/calypso/internal/common/database" "github.com/atlasos/calypso/internal/common/logger" ) // HealthStatus represents the health status of a component type HealthStatus string const ( HealthStatusHealthy HealthStatus = "healthy" HealthStatusDegraded HealthStatus = "degraded" HealthStatusUnhealthy HealthStatus = "unhealthy" HealthStatusUnknown HealthStatus = "unknown" ) // ComponentHealth represents the health of a system component type ComponentHealth struct { Name string `json:"name"` Status HealthStatus `json:"status"` Message string `json:"message,omitempty"` Timestamp time.Time `json:"timestamp"` } // EnhancedHealth represents enhanced health check response type EnhancedHealth struct { Status string `json:"status"` Service string `json:"service"` Version string `json:"version,omitempty"` Uptime int64 `json:"uptime_seconds"` Components []ComponentHealth `json:"components"` Timestamp time.Time `json:"timestamp"` } // HealthService provides enhanced health checking type HealthService struct { db *database.DB logger *logger.Logger startTime time.Time metricsService *MetricsService } // NewHealthService creates a new health service func NewHealthService(db *database.DB, log *logger.Logger, metricsService *MetricsService) *HealthService { return &HealthService{ db: db, logger: log, startTime: time.Now(), metricsService: metricsService, } } // CheckHealth performs a comprehensive health check func (s *HealthService) CheckHealth(ctx context.Context) *EnhancedHealth { health := &EnhancedHealth{ Status: string(HealthStatusHealthy), Service: "calypso-api", Uptime: int64(time.Since(s.startTime).Seconds()), Timestamp: time.Now(), Components: []ComponentHealth{}, } // Check database dbHealth := s.checkDatabase(ctx) health.Components = append(health.Components, dbHealth) // Check storage storageHealth := s.checkStorage(ctx) health.Components = append(health.Components, storageHealth) // Check SCST scstHealth := s.checkSCST(ctx) health.Components = append(health.Components, scstHealth) // Determine overall status hasUnhealthy := false hasDegraded := false for _, comp := range health.Components { if comp.Status == HealthStatusUnhealthy { hasUnhealthy = true } else if comp.Status == HealthStatusDegraded { hasDegraded = true } } if hasUnhealthy { health.Status = string(HealthStatusUnhealthy) } else if hasDegraded { health.Status = string(HealthStatusDegraded) } return health } // checkDatabase checks database health func (s *HealthService) checkDatabase(ctx context.Context) ComponentHealth { ctx, cancel := context.WithTimeout(ctx, 5*time.Second) defer cancel() if err := s.db.PingContext(ctx); err != nil { return ComponentHealth{ Name: "database", Status: HealthStatusUnhealthy, Message: "Database connection failed: " + err.Error(), Timestamp: time.Now(), } } // Check if we can query var count int if err := s.db.QueryRowContext(ctx, "SELECT 1").Scan(&count); err != nil { return ComponentHealth{ Name: "database", Status: HealthStatusDegraded, Message: "Database query failed: " + err.Error(), Timestamp: time.Now(), } } return ComponentHealth{ Name: "database", Status: HealthStatusHealthy, Timestamp: time.Now(), } } // checkStorage checks storage component health func (s *HealthService) checkStorage(ctx context.Context) ComponentHealth { // Check if we have any active repositories var count int if err := s.db.QueryRowContext(ctx, "SELECT COUNT(*) FROM disk_repositories WHERE is_active = true").Scan(&count); err != nil { return ComponentHealth{ Name: "storage", Status: HealthStatusDegraded, Message: "Failed to query storage repositories", Timestamp: time.Now(), } } if count == 0 { return ComponentHealth{ Name: "storage", Status: HealthStatusDegraded, Message: "No active storage repositories configured", Timestamp: time.Now(), } } // Check repository capacity var usagePercent float64 query := ` SELECT COALESCE( SUM(used_bytes)::float / NULLIF(SUM(total_bytes), 0) * 100, 0 ) FROM disk_repositories WHERE is_active = true ` if err := s.db.QueryRowContext(ctx, query).Scan(&usagePercent); err == nil { if usagePercent > 95 { return ComponentHealth{ Name: "storage", Status: HealthStatusDegraded, Message: "Storage repositories are nearly full", Timestamp: time.Now(), } } } return ComponentHealth{ Name: "storage", Status: HealthStatusHealthy, Timestamp: time.Now(), } } // checkSCST checks SCST component health func (s *HealthService) checkSCST(ctx context.Context) ComponentHealth { // Check if SCST targets exist var count int if err := s.db.QueryRowContext(ctx, "SELECT COUNT(*) FROM scst_targets").Scan(&count); err != nil { return ComponentHealth{ Name: "scst", Status: HealthStatusUnknown, Message: "Failed to query SCST targets", Timestamp: time.Now(), } } // SCST is healthy if we can query it (even if no targets exist) return ComponentHealth{ Name: "scst", Status: HealthStatusHealthy, Timestamp: time.Now(), } }