Skip to content
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 56 additions & 1 deletion vllm/tool_parsers/minimax_m2_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -417,7 +417,6 @@
request: ChatCompletionRequest,
) -> DeltaMessage | None:
"""Extract tool calls from streaming model output."""

# Store request for type conversion
if not previous_text or self.tool_call_start_token in delta_text:
self._reset_streaming_state()
Expand Down Expand Up @@ -489,9 +488,40 @@
):
# We just ended a tool call, skip whitespace
return None

# Normal content, no tool call
return DeltaMessage(content=delta_text)

# Check if tool call block has ended and we need to allow content
# This handles the case between [/TOOL_CALL] and the next [TOOL_CALL]
# Check if the last tool call end is after the last invoke start (meaning we're between calls)

Check failure on line 497 in vllm/tool_parsers/minimax_m2_tool_parser.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/tool_parsers/minimax_m2_tool_parser.py:497:89: E501 Line too long (102 > 88)
last_invoke_pos = current_text.rfind(self.invoke_start_prefix)
last_invoke_end_pos = current_text.rfind(self.invoke_end_token)
has_ended = last_invoke_end_pos > last_invoke_pos if last_invoke_pos != -1 else False

Check failure on line 500 in vllm/tool_parsers/minimax_m2_tool_parser.py

View workflow job for this annotation

GitHub Actions / pre-commit

Ruff (E501)

vllm/tool_parsers/minimax_m2_tool_parser.py:500:89: E501 Line too long (93 > 88)
if has_ended and self.invoke_start_prefix not in delta_text:
# Before ending, send closing } if JSON was started but not closed
if self.json_started and not self.json_closed:
self.json_started = False
self.json_closed = True
self.in_function = False
return DeltaMessage(
tool_calls=[
DeltaToolCall(
index=self.current_tool_index,
function=DeltaFunctionCall(arguments="}"),
)
]
)
# We've ended a tool call block and no new one started yet
self.is_tool_call_started = False
self.current_tool_index = 0
self.header_sent = False
self.in_function = False
self.json_started = False
self.json_closed = False
# Now process as content
return DeltaMessage(content=delta_text)

# Check if we're between tool calls (waiting for next one)
invoke_starts_count = current_text.count(self.invoke_start_prefix)
if self.current_tool_index >= invoke_starts_count:
Expand Down Expand Up @@ -588,6 +618,17 @@
# Make sure json_started is set if we're processing parameters
if not self.json_started:
self.json_started = True
# Update streamed_args_for_tool for opening brace
if self.current_tool_index < len(self.streamed_args_for_tool):
self.streamed_args_for_tool[self.current_tool_index] += "{"
return DeltaMessage(
tool_calls=[
DeltaToolCall(
index=self.current_tool_index,
function=DeltaFunctionCall(arguments="{"),
)
]
)

# Check for function end in accumulated text
if not self.json_closed and self.invoke_end_token in tool_text:
Expand Down Expand Up @@ -652,6 +693,20 @@
return None

# Look for parameters
# First, ensure opening brace is sent if not already
if not self.json_started:
self.json_started = True
if self.current_tool_index < len(self.streamed_args_for_tool):
self.streamed_args_for_tool[self.current_tool_index] += "{"
return DeltaMessage(
tool_calls=[
DeltaToolCall(
index=self.current_tool_index,
function=DeltaFunctionCall(arguments="{"),
)
]
)

# Find all parameter starts
param_starts = []
idx = 0
Expand Down
Loading