Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 34 additions & 3 deletions litellm/integrations/SlackAlerting/slack_alerting.py
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,27 @@ def update_values(
if llm_router is not None:
self.llm_router = llm_router

def _prepare_outage_value_for_cache(self, outage_value: Union[dict, ProviderRegionOutageModel, OutageModel]) -> dict:
"""
Helper method to prepare outage value for Redis caching.
Converts set objects to lists for JSON serialization.
"""
# Convert to dict for processing
cache_value = dict(outage_value)

if "deployment_ids" in cache_value and isinstance(cache_value["deployment_ids"], set):
cache_value["deployment_ids"] = list(cache_value["deployment_ids"])
return cache_value

def _restore_outage_value_from_cache(self, outage_value: Optional[dict]) -> Optional[dict]:
"""
Helper method to restore outage value after retrieving from cache.
Converts list objects back to sets for proper handling.
"""
if outage_value and isinstance(outage_value.get("deployment_ids"), list):
outage_value["deployment_ids"] = set(outage_value["deployment_ids"])
return outage_value

async def deployment_in_cooldown(self):
pass

Expand Down Expand Up @@ -809,6 +830,10 @@ async def region_outage_alerts(
ProviderRegionOutageModel
] = await self.internal_usage_cache.async_get_cache(key=cache_key)

# Convert deployment_ids back to set if it was stored as a list
if outage_value is not None:
outage_value = self._restore_outage_value_from_cache(outage_value) # type: ignore

if (
getattr(exception, "status_code", None) is None
or (
Expand All @@ -832,9 +857,11 @@ async def region_outage_alerts(
)

## add to cache ##
# Convert set to list for JSON serialization
cache_value = self._prepare_outage_value_for_cache(outage_value)
await self.internal_usage_cache.async_set_cache(
key=cache_key,
value=outage_value,
value=cache_value,
ttl=self.alerting_args.region_outage_alert_ttl,
)
return
Expand Down Expand Up @@ -900,8 +927,10 @@ async def region_outage_alerts(
outage_value["major_alert_sent"] = True

## update cache ##
# Convert set to list for JSON serialization
cache_value = self._prepare_outage_value_for_cache(outage_value)
await self.internal_usage_cache.async_set_cache(
key=cache_key, value=outage_value
key=cache_key, value=cache_value
)

async def outage_alerts(
Expand Down Expand Up @@ -1025,8 +1054,10 @@ async def outage_alerts(
outage_value["major_alert_sent"] = True

## update cache ##
# Convert set to list for JSON serialization
cache_value = self._prepare_outage_value_for_cache(outage_value)
await self.internal_usage_cache.async_set_cache(
key=deployment_id, value=outage_value
key=deployment_id, value=cache_value
)
except Exception:
pass
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -196,3 +196,47 @@ def test_alert_type_in_formatted_message(self, mock_datetime):
self.assertIn("Level: `Medium`", formatted_message)
self.assertIn("Timestamp: `12:34:56`", formatted_message)
self.assertIn("Message: Test alert message", formatted_message)

def test_original_redis_error_reproduction(self):
"""Test that reproduces the original Redis serialization error."""
# This test verifies that the original error would occur without our fix
outage_value = {
"alerts": [408],
"deployment_ids": {"zapier-multi-provider-gemini-2.5-flash-1ite-vertex"},
"last_updated_at": 1760601633.6620142,
"major_alert_sent": False,
"minor_alert_sent": False,
"provider_region_id": "vertex_aius-east1"
}

# This should raise a TypeError due to set not being JSON serializable
with self.assertRaises(TypeError) as context:
json.dumps(outage_value)

# Verify the specific error message
self.assertIn("Object of type set is not JSON serializable", str(context.exception))

def test_fixed_redis_serialization(self):
"""Test that our fix resolves the Redis serialization error."""
# Same data that caused the original error
outage_value = {
"alerts": [408],
"deployment_ids": {"zapier-multi-provider-gemini-2.5-flash-1ite-vertex"},
"last_updated_at": 1760601633.6620142,
"major_alert_sent": False,
"minor_alert_sent": False,
"provider_region_id": "vertex_aius-east1"
}

# Apply our fix
cache_value = self.slack_alerting._prepare_outage_value_for_cache(outage_value)

# This should now work without errors
json_str = json.dumps(cache_value)
self.assertIsInstance(json_str, str)

# Verify the data is correct
parsed_data = json.loads(json_str)
self.assertEqual(parsed_data["deployment_ids"], ["zapier-multi-provider-gemini-2.5-flash-1ite-vertex"])
self.assertEqual(parsed_data["alerts"], [408])
self.assertEqual(parsed_data["provider_region_id"], "vertex_aius-east1")
Loading