diff --git a/benchmark/llm_client.py b/benchmark/llm_client.py index 46d64e5..b393c00 100644 --- a/benchmark/llm_client.py +++ b/benchmark/llm_client.py @@ -35,6 +35,8 @@ def __init__( self.top_p = top_p self.effort = effort self.reasoning = reasoning + # Responses API only when an effort is set for OpenAI/AzureOpenAI endpoints. + self.use_responses_api = bool(effort) and self.provider in ("openai", "azureopenai") self.llm = None self._initialize_llm() @@ -65,11 +67,18 @@ def _initialize_llm(self): elif self.provider == "openai": from langchain_openai import ChatOpenAI + model_kwargs = {} + if self.top_p is not None: + model_kwargs["top_p"] = self.top_p + self.llm = ChatOpenAI( model=self.model, openai_api_key=self.api_key, temperature=self.temperature, max_tokens=self.max_tokens, + model_kwargs=model_kwargs, + use_responses_api=self.use_responses_api, + reasoning_effort=self.effort, ) elif self.provider == "google": from langchain_google_genai import ChatGoogleGenerativeAI @@ -97,8 +106,6 @@ def _initialize_llm(self): model_kwargs = {} if self.top_p is not None: model_kwargs["top_p"] = self.top_p - if self.effort is not None: - model_kwargs["reasoning_effort"] = self.effort self.llm = AzureChatOpenAI( azure_endpoint=self.custom_api_endpoint, @@ -107,7 +114,9 @@ def _initialize_llm(self): azure_deployment=self.model, temperature=self.temperature, max_completion_tokens=self.max_tokens, - model_kwargs=model_kwargs # In GPT-5.X this is a first class parameter, but passing this way is also allowed. + model_kwargs=model_kwargs, + use_responses_api=self.use_responses_api, # Required for some reasoning model configurations + reasoning_effort=self.effort, ) elif self.provider == "vllm" or self.provider == "openrouter": from langchain_openai import ChatOpenAI @@ -200,6 +209,10 @@ def _convert_mcp_tools_to_langchain( "parameters": cleaned_schema, }, } + # Responses API defaults to strict mode, which forces the model to fill every + # optional param with hallucinated values. Explicit strict=False avoids this. + if self.provider in ("openai", "azureopenai") and self.use_responses_api: + tool_def["function"]["strict"] = False langchain_tools.append(tool_def) return langchain_tools @@ -314,7 +327,8 @@ async def invoke_with_tools( # Convert MCP tools to LangChain format langchain_tools = self._convert_mcp_tools_to_langchain(tools) - # Bind tools to LLM + # Bind tools to LLM (the strict=False flag for OpenAI providers is + # set on each tool dict in _convert_mcp_tools_to_langchain). llm_with_tools = self.llm.bind_tools(langchain_tools) llm_with_retry = llm_with_tools.with_retry( retry_if_exception_type=(