Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
22 changes: 18 additions & 4 deletions benchmark/llm_client.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,8 @@ def __init__(
self.top_p = top_p
self.effort = effort
self.reasoning = reasoning
# Responses API only when an effort is set for OpenAI/AzureOpenAI endpoints.
self.use_responses_api = bool(effort) and self.provider in ("openai", "azureopenai")
self.llm = None

self._initialize_llm()
Expand Down Expand Up @@ -65,11 +67,18 @@ def _initialize_llm(self):
elif self.provider == "openai":
from langchain_openai import ChatOpenAI

model_kwargs = {}
if self.top_p is not None:
model_kwargs["top_p"] = self.top_p

self.llm = ChatOpenAI(
model=self.model,
openai_api_key=self.api_key,
temperature=self.temperature,
max_tokens=self.max_tokens,
model_kwargs=model_kwargs,
use_responses_api=self.use_responses_api,
reasoning_effort=self.effort,
)
elif self.provider == "google":
from langchain_google_genai import ChatGoogleGenerativeAI
Expand Down Expand Up @@ -97,8 +106,6 @@ def _initialize_llm(self):
model_kwargs = {}
if self.top_p is not None:
model_kwargs["top_p"] = self.top_p
if self.effort is not None:
model_kwargs["reasoning_effort"] = self.effort

self.llm = AzureChatOpenAI(
azure_endpoint=self.custom_api_endpoint,
Expand All @@ -107,7 +114,9 @@ def _initialize_llm(self):
azure_deployment=self.model,
temperature=self.temperature,
max_completion_tokens=self.max_tokens,
model_kwargs=model_kwargs # In GPT-5.X this is a first class parameter, but passing this way is also allowed.
model_kwargs=model_kwargs,
use_responses_api=self.use_responses_api, # Required for some reasoning model configurations
reasoning_effort=self.effort,
)
elif self.provider == "vllm" or self.provider == "openrouter":
from langchain_openai import ChatOpenAI
Expand Down Expand Up @@ -200,6 +209,10 @@ def _convert_mcp_tools_to_langchain(
"parameters": cleaned_schema,
},
}
# Responses API defaults to strict mode, which forces the model to fill every
# optional param with hallucinated values. Explicit strict=False avoids this.
if self.provider in ("openai", "azureopenai") and self.use_responses_api:
tool_def["function"]["strict"] = False
langchain_tools.append(tool_def)

return langchain_tools
Expand Down Expand Up @@ -314,7 +327,8 @@ async def invoke_with_tools(
# Convert MCP tools to LangChain format
langchain_tools = self._convert_mcp_tools_to_langchain(tools)

# Bind tools to LLM
# Bind tools to LLM (the strict=False flag for OpenAI providers is
# set on each tool dict in _convert_mcp_tools_to_langchain).
llm_with_tools = self.llm.bind_tools(langchain_tools)
llm_with_retry = llm_with_tools.with_retry(
retry_if_exception_type=(
Expand Down