diff --git a/litellm/integrations/SlackAlerting/slack_alerting.py b/litellm/integrations/SlackAlerting/slack_alerting.py index 7da38e193b69..3efe58737865 100644 --- a/litellm/integrations/SlackAlerting/slack_alerting.py +++ b/litellm/integrations/SlackAlerting/slack_alerting.py @@ -134,6 +134,27 @@ def update_values( if llm_router is not None: self.llm_router = llm_router + def _prepare_outage_value_for_cache(self, outage_value: Union[dict, ProviderRegionOutageModel, OutageModel]) -> dict: + """ + Helper method to prepare outage value for Redis caching. + Converts set objects to lists for JSON serialization. + """ + # Convert to dict for processing + cache_value = dict(outage_value) + + if "deployment_ids" in cache_value and isinstance(cache_value["deployment_ids"], set): + cache_value["deployment_ids"] = list(cache_value["deployment_ids"]) + return cache_value + + def _restore_outage_value_from_cache(self, outage_value: Optional[dict]) -> Optional[dict]: + """ + Helper method to restore outage value after retrieving from cache. + Converts list objects back to sets for proper handling. + """ + if outage_value and isinstance(outage_value.get("deployment_ids"), list): + outage_value["deployment_ids"] = set(outage_value["deployment_ids"]) + return outage_value + async def deployment_in_cooldown(self): pass @@ -809,6 +830,10 @@ async def region_outage_alerts( ProviderRegionOutageModel ] = await self.internal_usage_cache.async_get_cache(key=cache_key) + # Convert deployment_ids back to set if it was stored as a list + if outage_value is not None: + outage_value = self._restore_outage_value_from_cache(outage_value) # type: ignore + if ( getattr(exception, "status_code", None) is None or ( @@ -832,9 +857,11 @@ async def region_outage_alerts( ) ## add to cache ## + # Convert set to list for JSON serialization + cache_value = self._prepare_outage_value_for_cache(outage_value) await self.internal_usage_cache.async_set_cache( key=cache_key, - value=outage_value, + value=cache_value, ttl=self.alerting_args.region_outage_alert_ttl, ) return @@ -900,8 +927,10 @@ async def region_outage_alerts( outage_value["major_alert_sent"] = True ## update cache ## + # Convert set to list for JSON serialization + cache_value = self._prepare_outage_value_for_cache(outage_value) await self.internal_usage_cache.async_set_cache( - key=cache_key, value=outage_value + key=cache_key, value=cache_value ) async def outage_alerts( @@ -1025,8 +1054,10 @@ async def outage_alerts( outage_value["major_alert_sent"] = True ## update cache ## + # Convert set to list for JSON serialization + cache_value = self._prepare_outage_value_for_cache(outage_value) await self.internal_usage_cache.async_set_cache( - key=deployment_id, value=outage_value + key=deployment_id, value=cache_value ) except Exception: pass diff --git a/tests/test_litellm/integrations/SlackAlerting/test_slack_alerting.py b/tests/test_litellm/integrations/SlackAlerting/test_slack_alerting.py index 9cccdb517995..128a88a0f129 100644 --- a/tests/test_litellm/integrations/SlackAlerting/test_slack_alerting.py +++ b/tests/test_litellm/integrations/SlackAlerting/test_slack_alerting.py @@ -196,3 +196,47 @@ def test_alert_type_in_formatted_message(self, mock_datetime): self.assertIn("Level: `Medium`", formatted_message) self.assertIn("Timestamp: `12:34:56`", formatted_message) self.assertIn("Message: Test alert message", formatted_message) + + def test_original_redis_error_reproduction(self): + """Test that reproduces the original Redis serialization error.""" + # This test verifies that the original error would occur without our fix + outage_value = { + "alerts": [408], + "deployment_ids": {"zapier-multi-provider-gemini-2.5-flash-1ite-vertex"}, + "last_updated_at": 1760601633.6620142, + "major_alert_sent": False, + "minor_alert_sent": False, + "provider_region_id": "vertex_aius-east1" + } + + # This should raise a TypeError due to set not being JSON serializable + with self.assertRaises(TypeError) as context: + json.dumps(outage_value) + + # Verify the specific error message + self.assertIn("Object of type set is not JSON serializable", str(context.exception)) + + def test_fixed_redis_serialization(self): + """Test that our fix resolves the Redis serialization error.""" + # Same data that caused the original error + outage_value = { + "alerts": [408], + "deployment_ids": {"zapier-multi-provider-gemini-2.5-flash-1ite-vertex"}, + "last_updated_at": 1760601633.6620142, + "major_alert_sent": False, + "minor_alert_sent": False, + "provider_region_id": "vertex_aius-east1" + } + + # Apply our fix + cache_value = self.slack_alerting._prepare_outage_value_for_cache(outage_value) + + # This should now work without errors + json_str = json.dumps(cache_value) + self.assertIsInstance(json_str, str) + + # Verify the data is correct + parsed_data = json.loads(json_str) + self.assertEqual(parsed_data["deployment_ids"], ["zapier-multi-provider-gemini-2.5-flash-1ite-vertex"]) + self.assertEqual(parsed_data["alerts"], [408]) + self.assertEqual(parsed_data["provider_region_id"], "vertex_aius-east1")