diff --git a/posthog/ai/openai/openai_converter.py b/posthog/ai/openai/openai_converter.py index 778478ff..1af072af 100644 --- a/posthog/ai/openai/openai_converter.py +++ b/posthog/ai/openai/openai_converter.py @@ -18,6 +18,60 @@ ) +def _format_tool_calls_to_content(tool_calls: List[Any]) -> List[FormattedFunctionCall]: + """ + Helper function to format tool calls into standardized content items. + + Handles both object-based tool calls (from API responses) and dict-based + tool calls (from input messages). + + Args: + tool_calls: List of tool call objects or dicts from OpenAI API + + Returns: + List of formatted function call content items + """ + formatted: List[FormattedFunctionCall] = [] + for tool_call in tool_calls: + # Safely extract id - handle both object and dict formats + if hasattr(tool_call, "id"): + tool_id = tool_call.id + elif isinstance(tool_call, dict): + tool_id = tool_call.get("id", "") + else: + tool_id = "" + + # Safely extract function name and arguments - handle both formats + if hasattr(tool_call, "function"): + func_name = getattr(tool_call.function, "name", "") + func_args = getattr(tool_call.function, "arguments", "") + elif isinstance(tool_call, dict): + function_data = tool_call.get("function", {}) + func_name = ( + function_data.get("name", "") if isinstance(function_data, dict) else "" + ) + func_args = ( + function_data.get("arguments", "") + if isinstance(function_data, dict) + else "" + ) + else: + func_name = "" + func_args = "" + + formatted.append( + { + "type": "function", + "id": tool_id, + "function": { + "name": func_name, + "arguments": func_args, + }, + } + ) + return formatted + + def format_openai_response(response: Any) -> List[FormattedMessage]: """ Format an OpenAI response into standardized message format. @@ -55,17 +109,9 @@ def format_openai_response(response: Any) -> List[FormattedMessage]: ) if hasattr(choice.message, "tool_calls") and choice.message.tool_calls: - for tool_call in choice.message.tool_calls: - content.append( - { - "type": "function", - "id": tool_call.id, - "function": { - "name": tool_call.function.name, - "arguments": tool_call.function.arguments, - }, - } - ) + content.extend( + _format_tool_calls_to_content(choice.message.tool_calls) + ) if content: output.append( @@ -160,10 +206,45 @@ def format_openai_input( # Handle Chat Completions API format if messages is not None: for msg in messages: + role = msg.get("role", "user") + content_value = msg.get("content", "") + tool_calls = msg.get("tool_calls") + tool_call_id = msg.get("tool_call_id") + + # Validate tool_calls is a non-empty list + has_tool_calls = ( + tool_calls is not None + and isinstance(tool_calls, list) + and len(tool_calls) > 0 + ) + + # Simple string content (with optional tool_call_id for tool messages) + if isinstance(content_value, str) and not has_tool_calls: + formatted_msg: FormattedMessage = { + "role": role, + "content": content_value, + } + # Preserve tool_call_id for tool role messages + if tool_call_id: + formatted_msg["tool_call_id"] = tool_call_id + formatted_messages.append(formatted_msg) + continue + + # Complex content (multimodal or tool calls) + content_items: List[FormattedContentItem] = [] + + if isinstance(content_value, str) and content_value: + content_items.append({"type": "text", "text": content_value}) + elif isinstance(content_value, list): + content_items.extend(content_value) + + if has_tool_calls: + content_items.extend(_format_tool_calls_to_content(tool_calls)) + formatted_messages.append( { - "role": msg.get("role", "user"), - "content": msg.get("content", ""), + "role": role, + "content": content_items, } ) diff --git a/posthog/test/ai/openai/test_openai.py b/posthog/test/ai/openai/test_openai.py index cc4d6aea..e06c9da0 100644 --- a/posthog/test/ai/openai/test_openai.py +++ b/posthog/test/ai/openai/test_openai.py @@ -1173,3 +1173,78 @@ def test_tool_definition(mock_client, mock_openai_response): assert isinstance(props["$ai_latency"], float) # Verify that tools are captured in the $ai_tools property assert props["$ai_tools"] == tools + + +def test_input_messages_with_tool_calls(mock_client, mock_openai_response): + """Test that tool calls in input messages are preserved and not dropped""" + with patch( + "openai.resources.chat.completions.Completions.create", + return_value=mock_openai_response, + ): + client = OpenAI(api_key="test-key", posthog_client=mock_client) + + # Simulate a conversation with tool calls from a previous assistant message + messages = [ + {"role": "user", "content": "What's the weather in San Francisco?"}, + { + "role": "assistant", + "content": "Let me check that for you.", + "tool_calls": [ + { + "id": "call_abc123", + "type": "function", + "function": { + "name": "get_weather", + "arguments": '{"location": "San Francisco"}', + }, + } + ], + }, + { + "role": "tool", + "tool_call_id": "call_abc123", + "content": "72 degrees and sunny", + }, + ] + + response = client.chat.completions.create( + model="gpt-4", + messages=messages, + posthog_distinct_id="test-id", + ) + + assert response == mock_openai_response + assert mock_client.capture.call_count == 1 + + call_args = mock_client.capture.call_args[1] + props = call_args["properties"] + + # Verify the input contains the tool calls + assert len(props["$ai_input"]) == 3 + + # First message should be simple string + assert props["$ai_input"][0] == { + "role": "user", + "content": "What's the weather in San Francisco?", + } + + # Second message should have both content and tool calls + assert props["$ai_input"][1]["role"] == "assistant" + assert props["$ai_input"][1]["content"] == [ + {"type": "text", "text": "Let me check that for you."}, + { + "type": "function", + "id": "call_abc123", + "function": { + "name": "get_weather", + "arguments": '{"location": "San Francisco"}', + }, + }, + ] + + # Third message should be the tool response + assert props["$ai_input"][2] == { + "role": "tool", + "tool_call_id": "call_abc123", + "content": "72 degrees and sunny", + }