-
-
Notifications
You must be signed in to change notification settings - Fork 15.8k
[Mistral Grammar] Fix tool and reasoning parsing #39217
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
8584c41
85d0c82
3ccef94
960003b
2ffcd83
b5ecc59
69466bf
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -73,7 +73,10 @@ | |
| from vllm.renderers import ChatParams | ||
| from vllm.sampling_params import BeamSearchParams, SamplingParams | ||
| from vllm.tokenizers import TokenizerLike | ||
| from vllm.tool_parsers.mistral_tool_parser import MistralToolCall | ||
| from vllm.tool_parsers.mistral_tool_parser import ( | ||
| MistralToolCall, | ||
| MistralToolParser, | ||
| ) | ||
| from vllm.tool_parsers.utils import partial_json_loads | ||
| from vllm.utils.collection_utils import as_list | ||
| from vllm.utils.mistral import is_mistral_tokenizer | ||
|
|
@@ -140,6 +143,12 @@ def __init__( | |
| enable_auto_tools=enable_auto_tools, | ||
| model_name=self.model_config.model, | ||
| ) | ||
| _is_mistral_tool_parser = self.tool_parser is not None and issubclass( | ||
| self.tool_parser, MistralToolParser | ||
| ) | ||
| if _is_mistral_tool_parser and self.reasoning_parser_cls is not None: | ||
| MistralToolParser.model_can_reason = True | ||
|
Comment on lines
+146
to
+150
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Setting a class attribute
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. yeah so indeed this is not clean but was discussed in previous PR. I don't know how else we should do this 😄
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I may not have seen all the previous discussion - did you consider just looking at the
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. actually using reasoning_effort does not help because it:
Collaborator
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. That's reasonable. To avoid mutating global state, can we just set this on the instance of the tool parser as opposed to on the module as a global mutation? I think it's just changing this line to That makes it set per instance of tool parser as opposed to globally. |
||
|
|
||
| self.exclude_tools_when_tool_choice_none = exclude_tools_when_tool_choice_none | ||
|
|
||
| self.enable_prompt_tokens_details = enable_prompt_tokens_details | ||
|
|
@@ -310,6 +319,11 @@ async def create_chat_completion( | |
| else: | ||
| if not request.include_reasoning: | ||
| reasoning_ended = True | ||
| elif request._grammar_from_tool_parser: | ||
| # The Mistral grammar already includes an optional | ||
| # `think?` rule that handles both reasoning and | ||
| # non-reasoning outputs. | ||
| reasoning_ended = True | ||
| elif reasoning_parser: | ||
| reasoning_ended = reasoning_parser.is_reasoning_end( | ||
| prompt_token_ids or [] | ||
|
|
@@ -530,6 +544,8 @@ async def chat_completion_stream_generator( | |
| harmony_tools_streamed = [False] * num_choices | ||
| tools_streamed = [False] * num_choices | ||
|
|
||
| is_mistral_grammar_path = request._grammar_from_tool_parser | ||
|
|
||
| if isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam): | ||
| tool_choice_function_name = request.tool_choice.function.name | ||
| else: | ||
|
|
@@ -553,7 +569,7 @@ async def chat_completion_stream_generator( | |
|
|
||
| # Only one of these will be used, thus previous_texts and | ||
| # all_previous_token_ids will not be used twice in the same iteration. | ||
| if tool_choice_auto or reasoning_parser: | ||
| if is_mistral_grammar_path or tool_choice_auto or reasoning_parser: | ||
| # These are only required in "auto" tool choice case | ||
| all_previous_token_ids = [[] for _ in range(num_choices)] | ||
| reasoning_end_arr = [False] * num_choices | ||
|
|
@@ -748,7 +764,7 @@ async def chat_completion_stream_generator( | |
| delta_message: DeltaMessage | None | ||
|
|
||
| # just update previous_texts and previous_token_ids | ||
| if tool_choice_auto or reasoning_parser: | ||
| if is_mistral_grammar_path or tool_choice_auto or reasoning_parser: | ||
| assert previous_texts is not None | ||
| assert all_previous_token_ids is not None | ||
| previous_text = previous_texts[i] | ||
|
|
@@ -772,6 +788,30 @@ async def chat_completion_stream_generator( | |
| ) | ||
| ) | ||
| harmony_tools_streamed[i] |= tools_streamed_flag | ||
| # Mistral grammar path: combined reasoning + tool streaming | ||
| elif is_mistral_grammar_path: | ||
| assert tool_parser is not None | ||
| assert isinstance(tool_parser, MistralToolParser) | ||
| assert reasoning_end_arr is not None | ||
| output_token_ids = as_list(output.token_ids) | ||
| result = tool_parser.extract_maybe_reasoning_and_tool_streaming( | ||
| reasoning_parser=reasoning_parser, | ||
| previous_text=previous_text, | ||
| current_text=current_text, | ||
| delta_text=delta_text, | ||
| previous_token_ids=previous_token_ids, | ||
| current_token_ids=current_token_ids, | ||
| output_token_ids=output_token_ids, | ||
| reasoning_ended=reasoning_end_arr[i], | ||
| prompt_is_reasoning_end=(prompt_is_reasoning_end_arr[i]), | ||
| request=request, | ||
| ) | ||
| delta_message = result.delta_message | ||
| reasoning_end_arr[i] = result.reasoning_ended | ||
| current_text = result.current_text | ||
| current_token_ids = result.current_token_ids | ||
| if result.tools_called: | ||
| tools_streamed[i] = True | ||
| # handle streaming deltas for tools with named tool_choice | ||
| elif tool_choice_function_name: | ||
| # When encountering think end id in prompt_token_ids | ||
|
|
@@ -925,7 +965,9 @@ async def chat_completion_stream_generator( | |
| delta_message = DeltaMessage(content=delta_text) | ||
|
|
||
| # update the previous values for the next iteration | ||
| if (tool_choice_auto or reasoning_parser) and not self.use_harmony: | ||
| if ( | ||
| is_mistral_grammar_path or tool_choice_auto or reasoning_parser | ||
| ) and not self.use_harmony: | ||
| assert previous_texts is not None | ||
| assert all_previous_token_ids is not None | ||
| previous_texts[i] = current_text | ||
|
|
@@ -1312,7 +1354,24 @@ async def chat_completion_full_generator( | |
| tool_call_class = ( | ||
| MistralToolCall if is_mistral_tokenizer(tokenizer) else ToolCall | ||
| ) | ||
| if (not self.enable_auto_tools or not self.tool_parser) and ( | ||
|
|
||
| use_mistral_tool_parser = request._grammar_from_tool_parser | ||
| if use_mistral_tool_parser: | ||
| tool_call_items = MistralToolParser.build_non_streaming_tool_calls( | ||
| tool_calls | ||
| ) | ||
| if tool_call_items: | ||
| auto_tools_called = ( | ||
| request.tool_choice is None or request.tool_choice == "auto" | ||
| ) | ||
| message = ChatMessage( | ||
| role=role, | ||
| reasoning=reasoning, | ||
| content=content, | ||
| tool_calls=tool_call_items, | ||
| ) | ||
|
|
||
| elif (not self.enable_auto_tools or not self.tool_parser) and ( | ||
| not isinstance(request.tool_choice, ChatCompletionNamedToolChoiceParam) | ||
| and request.tool_choice != "required" | ||
| ): | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This was introduced by #36238 but it was a bad idea because sometimes the model might want to try to reason so it forces it to be OOD.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Agree thanks!