11"""Health models and status definitions for service health checks."""
22
33from enum import StrEnum
4- from typing import ClassVar , Self
4+ from typing import Any , ClassVar , Self
55
66from pydantic import BaseModel , Field , model_validator
77
88
9- class _HealthStatus (StrEnum ):
9+ class HealthStatus (StrEnum ):
10+ """Health status enumeration for service health checks.
11+
12+ Values:
13+ UP: Service is operating normally
14+ DEGRADED: Service is operational but with reduced functionality
15+ DOWN: Service is not operational
16+ """
17+
1018 UP = "UP"
19+ DEGRADED = "DEGRADED"
1120 DOWN = "DOWN"
1221
1322
1423class Health (BaseModel ):
1524 """Represents the health status of a service with optional components and failure reasons.
1625
1726 - A health object can have child components, i.e. health forms a tree.
18- - Any node in the tree can set itself to DOWN. In this case the node is required
19- to set the reason attribute. If reason is not set when DOWN,
20- automatic model validation of the tree will fail.
21- - DOWN'ness is propagated to parent health objects. I.e. the health of a parent
22- node is automatically set to DOWN if any of its child components are DOWN. The
23- child components leading to this will be listed in the reason.
24- - The root of the health tree is computed in the system module. The health of other
25- modules is automatically picked up by the system module.
27+ - Any node in the tree can set itself to DOWN or DEGRADED. If DOWN, the node is required
28+ to set the reason attribute. If reason is not set when DOWN, automatic model validation fails.
29+ - DOWN trumps DEGRADED, DEGRADED trumps UP. If any child is DOWN, parent is DOWN.
30+ If none are DOWN but any are DEGRADED, parent is DEGRADED.
31+ - The root of the health tree is computed in the system module.
32+ The health of other modules is automatically picked up by the system module.
2633 """
2734
28- Code : ClassVar [type [_HealthStatus ]] = _HealthStatus
29- status : _HealthStatus
35+ Code : ClassVar [type [HealthStatus ]] = HealthStatus
36+ status : HealthStatus
3037 reason : str | None = None
3138 components : dict [str , "Health" ] = Field (default_factory = dict )
39+ uptime_statistics : dict [str , dict [str , Any ]] | None = None # Optional uptime stats
3240
3341 def compute_health_from_components (self ) -> Self :
3442 """Recursively compute health status from components.
3543
3644 - If health is already DOWN, it remains DOWN with its original reason.
3745 - If health is UP but any component is DOWN, health becomes DOWN with
3846 a reason listing all failed components.
47+ - If no components are DOWN but any are DEGRADED, health becomes DEGRADED with a reason.
3948
4049 Returns:
4150 Self: The updated health instance with computed status.
4251 """
4352 # Skip recomputation if already known to be DOWN
44- if self .status == _HealthStatus .DOWN :
53+ if self .status == HealthStatus .DOWN :
4554 return self
4655
4756 # No components means we keep the existing status
4857 if not self .components :
4958 return self
5059
51- # Find all DOWN components
60+ # Find all DOWN and DEGRADED components
5261 down_components = []
62+ degraded_components = []
5363 for component_name , component in self .components .items ():
5464 # Recursively compute health for each component
5565 component .compute_health_from_components ()
56- if component .status == _HealthStatus .DOWN :
57- down_components .append (component_name )
66+ if component .status == HealthStatus .DOWN :
67+ down_components .append ((component_name , component .reason ))
68+ elif component .status == HealthStatus .DEGRADED :
69+ degraded_components .append ((component_name , component .reason ))
5870
5971 # If any components are DOWN, mark the parent as DOWN
6072 if down_components :
61- self .status = _HealthStatus .DOWN
73+ self .status = HealthStatus .DOWN
6274 if len (down_components ) == 1 :
63- self .reason = f"Component '{ down_components [0 ]} ' is DOWN"
75+ component_name , component_reason = down_components [0 ]
76+ self .reason = f"Component '{ component_name } ' is DOWN ({ component_reason } )"
77+ else :
78+ component_list = ", " .join (f"'{ name } ' ({ reason } )" for name , reason in down_components )
79+ self .reason = f"Components { component_list } are DOWN"
80+ # If no components are DOWN but any are DEGRADED, mark parent as DEGRADED
81+ elif degraded_components :
82+ self .status = HealthStatus .DEGRADED
83+ if len (degraded_components ) == 1 :
84+ component_name , component_reason = degraded_components [0 ]
85+ self .reason = f"Component '{ component_name } ' is DEGRADED ({ component_reason } )"
6486 else :
65- component_list = "', ' " .join (down_components )
66- self .reason = f"Components ' { component_list } ' are DOWN "
87+ component_list = ", " .join (f"' { name } ' ( { reason } )" for name , reason in degraded_components )
88+ self .reason = f"Components { component_list } are DEGRADED "
6789
6890 return self
6991
@@ -73,7 +95,7 @@ def validate_health_state(self) -> Self:
7395
7496 - Compute overall health based on component health
7597 - Ensure UP status has no associated reason
76- - Ensure DOWN status always has a reason
98+ - Ensure DOWN and DEGRADED status always have a reason
7799
78100 Returns:
79101 Self: The validated model instance with correct health status.
@@ -85,31 +107,31 @@ def validate_health_state(self) -> Self:
85107 self .compute_health_from_components ()
86108
87109 # Validate that UP status has no reason
88- if (self .status == _HealthStatus .UP ) and self .reason :
110+ if (self .status == HealthStatus .UP ) and self .reason :
89111 msg = f"Health { self .status } must not have reason"
90112 raise ValueError (msg )
91113
92- # Validate that DOWN status always has a reason
93- if (self .status == _HealthStatus .DOWN ) and not self .reason :
94- msg = "Health DOWN must have a reason"
114+ # Validate that DOWN and DEGRADED status always have a reason
115+ if (self .status in { HealthStatus .DOWN , HealthStatus . DEGRADED } ) and not self .reason :
116+ msg = f "Health { self . status } must have a reason"
95117 raise ValueError (msg )
96118
97119 return self
98120
99121 def __str__ (self ) -> str :
100- """Return string representation of health status with optional reason for DOWN state.
122+ """Return string representation of health status with optional reason for DOWN/DEGRADED state.
101123
102124 Returns:
103- str: The health status value, with reason appended if status is DOWN.
125+ str: The health status value, with reason appended if status is DOWN or DEGRADED .
104126 """
105- if self .status == _HealthStatus .DOWN and self .reason :
127+ if self .status in { HealthStatus .DOWN , HealthStatus . DEGRADED } and self .reason :
106128 return f"{ self .status .value } : { self .reason } "
107129 return self .status .value
108130
109131 def __bool__ (self ) -> bool :
110132 """Convert health status to a boolean value.
111133
112134 Returns:
113- bool: True if the status is UP, False otherwise.
135+ bool: True if the status is UP or DEGRADED , False otherwise.
114136 """
115- return self .status == _HealthStatus .UP
137+ return self .status in { HealthStatus .UP , HealthStatus . DEGRADED }
0 commit comments