Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
107 changes: 94 additions & 13 deletions posthog/ai/openai/openai_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,60 @@
)


def _format_tool_calls_to_content(tool_calls: List[Any]) -> List[FormattedFunctionCall]:
"""
Helper function to format tool calls into standardized content items.

Handles both object-based tool calls (from API responses) and dict-based
tool calls (from input messages).

Args:
tool_calls: List of tool call objects or dicts from OpenAI API

Returns:
List of formatted function call content items
"""
formatted: List[FormattedFunctionCall] = []
for tool_call in tool_calls:
# Safely extract id - handle both object and dict formats
if hasattr(tool_call, "id"):
tool_id = tool_call.id
elif isinstance(tool_call, dict):
tool_id = tool_call.get("id", "")
else:
tool_id = ""

# Safely extract function name and arguments - handle both formats
if hasattr(tool_call, "function"):
func_name = getattr(tool_call.function, "name", "")
func_args = getattr(tool_call.function, "arguments", "")
elif isinstance(tool_call, dict):
function_data = tool_call.get("function", {})
func_name = (
function_data.get("name", "") if isinstance(function_data, dict) else ""
)
func_args = (
function_data.get("arguments", "")
if isinstance(function_data, dict)
else ""
)
else:
func_name = ""
func_args = ""

formatted.append(
{
"type": "function",
"id": tool_id,
"function": {
"name": func_name,
"arguments": func_args,
},
}
)
return formatted


def format_openai_response(response: Any) -> List[FormattedMessage]:
"""
Format an OpenAI response into standardized message format.
Expand Down Expand Up @@ -55,17 +109,9 @@ def format_openai_response(response: Any) -> List[FormattedMessage]:
)

if hasattr(choice.message, "tool_calls") and choice.message.tool_calls:
for tool_call in choice.message.tool_calls:
content.append(
{
"type": "function",
"id": tool_call.id,
"function": {
"name": tool_call.function.name,
"arguments": tool_call.function.arguments,
},
}
)
content.extend(
_format_tool_calls_to_content(choice.message.tool_calls)
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This fixes it for the chat completions response API format. What about the responses API format?

)

if content:
output.append(
Expand Down Expand Up @@ -160,10 +206,45 @@ def format_openai_input(
# Handle Chat Completions API format
if messages is not None:
for msg in messages:
role = msg.get("role", "user")
content_value = msg.get("content", "")
tool_calls = msg.get("tool_calls")
tool_call_id = msg.get("tool_call_id")

# Validate tool_calls is a non-empty list
has_tool_calls = (
tool_calls is not None
and isinstance(tool_calls, list)
and len(tool_calls) > 0
)

# Simple string content (with optional tool_call_id for tool messages)
if isinstance(content_value, str) and not has_tool_calls:
formatted_msg: FormattedMessage = {
"role": role,
"content": content_value,
}
# Preserve tool_call_id for tool role messages
if tool_call_id:
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we have a tool_call_id if we don't have any tool_calls?

formatted_msg["tool_call_id"] = tool_call_id
formatted_messages.append(formatted_msg)
continue

# Complex content (multimodal or tool calls)
content_items: List[FormattedContentItem] = []

if isinstance(content_value, str) and content_value:
content_items.append({"type": "text", "text": content_value})
elif isinstance(content_value, list):
content_items.extend(content_value)

if has_tool_calls:
content_items.extend(_format_tool_calls_to_content(tool_calls))

formatted_messages.append(
{
"role": msg.get("role", "user"),
"content": msg.get("content", ""),
"role": role,
"content": content_items,
}
)

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to change anything for the responses API format?

Expand Down
75 changes: 75 additions & 0 deletions posthog/test/ai/openai/test_openai.py
Original file line number Diff line number Diff line change
Expand Up @@ -1173,3 +1173,78 @@ def test_tool_definition(mock_client, mock_openai_response):
assert isinstance(props["$ai_latency"], float)
# Verify that tools are captured in the $ai_tools property
assert props["$ai_tools"] == tools


def test_input_messages_with_tool_calls(mock_client, mock_openai_response):
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you think that there are other tests that are testing things with tool_calls? Maybe we can just put some extra assertions there?

We should probably also test some of the edge cases that you have in your code

"""Test that tool calls in input messages are preserved and not dropped"""
with patch(
"openai.resources.chat.completions.Completions.create",
return_value=mock_openai_response,
):
client = OpenAI(api_key="test-key", posthog_client=mock_client)

# Simulate a conversation with tool calls from a previous assistant message
messages = [
{"role": "user", "content": "What's the weather in San Francisco?"},
{
"role": "assistant",
"content": "Let me check that for you.",
"tool_calls": [
{
"id": "call_abc123",
"type": "function",
"function": {
"name": "get_weather",
"arguments": '{"location": "San Francisco"}',
},
}
],
},
{
"role": "tool",
"tool_call_id": "call_abc123",
"content": "72 degrees and sunny",
},
]

response = client.chat.completions.create(
model="gpt-4",
messages=messages,
posthog_distinct_id="test-id",
)

assert response == mock_openai_response
assert mock_client.capture.call_count == 1

call_args = mock_client.capture.call_args[1]
props = call_args["properties"]

# Verify the input contains the tool calls
assert len(props["$ai_input"]) == 3

# First message should be simple string
assert props["$ai_input"][0] == {
"role": "user",
"content": "What's the weather in San Francisco?",
}

# Second message should have both content and tool calls
assert props["$ai_input"][1]["role"] == "assistant"
assert props["$ai_input"][1]["content"] == [
{"type": "text", "text": "Let me check that for you."},
{
"type": "function",
"id": "call_abc123",
"function": {
"name": "get_weather",
"arguments": '{"location": "San Francisco"}',
},
},
]

# Third message should be the tool response
assert props["$ai_input"][2] == {
"role": "tool",
"tool_call_id": "call_abc123",
"content": "72 degrees and sunny",
}