diff --git a/requirements/common.txt b/requirements/common.txt index 652738eebe74..8edea85957c7 100644 --- a/requirements/common.txt +++ b/requirements/common.txt @@ -31,7 +31,7 @@ partial-json-parser # used for parsing partial JSON outputs pyzmq >= 25.0.0 msgspec gguf >= 0.17.0 -mistral_common[image] >= 1.11.0 +mistral_common[image] >= 1.11.2 opencv-python-headless >= 4.13.0 # required for video IO pyyaml six>=1.16.0; python_version > '3.11' # transitive dependency of pandas that needs to be the latest version for python 3.12 diff --git a/requirements/test/cuda.in b/requirements/test/cuda.in index 60be48b00be5..71e496ccf650 100644 --- a/requirements/test/cuda.in +++ b/requirements/test/cuda.in @@ -31,7 +31,7 @@ torchaudio==2.11.0 torchvision==0.26.0 transformers_stream_generator # required for qwen-vl test matplotlib # required for qwen-vl test -mistral_common[image,audio] >= 1.11.0 # required for voxtral test +mistral_common[image,audio] >= 1.11.2 # required for voxtral test num2words # required for smolvlm test open_clip_torch==2.32.0 # Required for nemotron_vl test, Nemotron Parse in test_common.py opencv-python-headless >= 4.13.0 # required for video test diff --git a/requirements/test/cuda.txt b/requirements/test/cuda.txt index 5c2a26d3dc92..8544f7b70969 100644 --- a/requirements/test/cuda.txt +++ b/requirements/test/cuda.txt @@ -412,7 +412,7 @@ mbstrdecoder==1.1.3 # typepy mdurl==0.1.2 # via markdown-it-py -mistral-common==1.11.0 +mistral-common==1.11.2 # via # -c requirements/common.txt # -r requirements/test/cuda.in diff --git a/requirements/test/nightly-torch.txt b/requirements/test/nightly-torch.txt index 0c34cf012031..75928e088dab 100644 --- a/requirements/test/nightly-torch.txt +++ b/requirements/test/nightly-torch.txt @@ -23,7 +23,7 @@ jiwer # required for audio tests timm # required for internvl test transformers_stream_generator # required for qwen-vl test matplotlib # required for qwen-vl test -mistral_common[image,audio] >= 1.11.0 # required for voxtral test +mistral_common[image,audio] >= 1.11.2 # required for voxtral test num2words # required for smolvlm test opencv-python-headless >= 4.13.0 # required for video test datamodel_code_generator # required for minicpm3 test diff --git a/requirements/test/rocm.in b/requirements/test/rocm.in index cc04eabf09b2..105c9c3527d2 100644 --- a/requirements/test/rocm.in +++ b/requirements/test/rocm.in @@ -30,7 +30,7 @@ tblib # for pickling test exceptions timm>=1.0.17 # required for internvl and gemma3n-mm test transformers_stream_generator # required for qwen-vl test matplotlib # required for qwen-vl test -mistral_common[image,audio]>=1.11.0 # required for voxtral test +mistral_common[image,audio]>=1.11.2 # required for voxtral test num2words # required for smolvlm test open_clip_torch==2.32.0 # Required for nemotron_vl test, Nemotron Parse in test_common.py opencv-python-headless>=4.13.0 # required for video test diff --git a/requirements/test/rocm.txt b/requirements/test/rocm.txt index 8445634ded40..6c0f8accdeb1 100644 --- a/requirements/test/rocm.txt +++ b/requirements/test/rocm.txt @@ -509,7 +509,7 @@ mcp==1.27.0 # via -r requirements/test/../common.txt mdurl==0.1.2 # via markdown-it-py -mistral-common==1.11.0 +mistral-common==1.11.2 # via # -c requirements/common.txt # -r requirements/test/../common.txt diff --git a/requirements/test/xpu.txt b/requirements/test/xpu.txt index 601838f843f9..80b0c148116d 100644 --- a/requirements/test/xpu.txt +++ b/requirements/test/xpu.txt @@ -266,7 +266,7 @@ mbstrdecoder==1.1.4 # typepy mdurl==0.1.2 # via markdown-it-py -mistral-common==1.11.0 +mistral-common==1.11.2 # via # -c requirements/common.txt # -r requirements/test/xpu.in diff --git a/tests/tokenizers_/test_mistral.py b/tests/tokenizers_/test_mistral.py index 2b101e8f98d9..2023337e8577 100644 --- a/tests/tokenizers_/test_mistral.py +++ b/tests/tokenizers_/test_mistral.py @@ -1,6 +1,7 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project +import copy from typing import Any import llguidance @@ -11,353 +12,34 @@ from vllm.tokenizers.mistral import ( MistralTokenizer, - _prepare_apply_chat_template_tools_and_messages, + _validate_apply_chat_template_args, ) -@pytest.mark.parametrize( - "openai_request,expected_mistral_output", - [ - ( - { - "messages": [ - { - "role": "user", - "content": "What is the current local date and time?", - } - ], - "tools": [ - { - "type": "function", - "function": { - "description": "Fetch the current local date and time.", - "name": "get_current_time", - }, - } - ], - }, - ( - [ - { - "role": "user", - "content": "What is the current local date and time?", - } - ], - [ - { - "type": "function", - "function": { - "description": "Fetch the current local date and time.", - "name": "get_current_time", - "parameters": {}, - }, - } - ], - ), - ), - ( - { - "messages": [ - { - "role": "user", - "content": "What is the current local date and time?", - } - ], - "tools": [ - { - "type": "function", - "function": { - "description": "Fetch the current local date and time.", - "name": "get_current_time", - "parameters": {}, - }, - } - ], - }, - ( - [ - { - "role": "user", - "content": "What is the current local date and time?", - } - ], - [ - { - "type": "function", - "function": { - "description": "Fetch the current local date and time.", - "name": "get_current_time", - "parameters": {}, - }, - } - ], - ), - ), - ( - { - "messages": [ - { - "role": "user", - "content": "What is the current local date and time?", - } - ], - "tools": [ - { - "type": "function", - "function": { - "description": "Fetch the current local date and time.", - "unsupported_field": False, - "name": "get_current_time", - "parameters": {}, - }, - }, - { - "type": "function", - "function": { - "description": "Fetch the current local date and time.", - "unsupported_field2": False, - "name": "get_current_time", - "parameters": {}, - }, - }, - ], - }, - ( - [ - { - "role": "user", - "content": "What is the current local date and time?", - } - ], - [ - { - "type": "function", - "function": { - "description": "Fetch the current local date and time.", - "name": "get_current_time", - "parameters": {}, - }, - }, - { - "type": "function", - "function": { - "description": "Fetch the current local date and time.", - "name": "get_current_time", - "parameters": {}, - }, - }, - ], - ), - ), - ( - { - "messages": [ - { - "role": "user", - "content": "What is the current local date and time?", - } - ], - "tools": [ - { - "type": "function", - "unsupported_field": False, - "function": { - "description": "Fetch the current local date and time.", - "name": "get_current_time", - "parameters": {}, - }, - }, - { - "type": "function", - "unsupported_field2": False, - "function": { - "description": "Fetch the current local date and time 2.", - "name": "get_current_time2", - "parameters": {"a": "1"}, - }, - }, - ], - }, - ( - [ - { - "role": "user", - "content": "What is the current local date and time?", - } - ], - [ - { - "type": "function", - "function": { - "description": "Fetch the current local date and time.", - "name": "get_current_time", - "parameters": {}, - }, - }, - { - "type": "function", - "function": { - "description": "Fetch the current local date and time 2.", - "name": "get_current_time2", - "parameters": {"a": "1"}, - }, - }, - ], - ), - ), - ], -) -def test_prepare_apply_chat_template_tools_and_messages( - openai_request, expected_mistral_output -): - actual_request = _prepare_apply_chat_template_tools_and_messages( - openai_request["messages"], openai_request["tools"] - ) - assert actual_request == expected_mistral_output - - -# Tool use with list content and reasoning -@pytest.mark.parametrize( - "openai_request,expected_mistral_output", - [ - ( - { - "messages": [ - { - "role": "user", - "content": "What's the weather in Paris?", - }, - { - "role": "assistant", - "reasoning": None, - "content": None, - "tool_calls": [ - { - "id": "call123", - "type": "function", - "function": { - "name": "get_weather", - "arguments": '{"city": "Paris"}', - }, - } - ], - }, - { - "role": "tool", - "content": [{"type": "text", "text": "Rainy"}], - "name": "get_weather", - "tool_call_id": "call123", - }, - ], - "tools": [ - { - "type": "function", - "function": { - "name": "get_weather", - "description": "Gets the current weather in a city.", - "parameters": { - "type": "object", - "properties": { - "city": { - "type": "string", - "description": "The city name", - } - }, - "required": ["city"], - }, - }, - } - ], - }, - ( - [ - { - "role": "user", - "content": "What's the weather in Paris?", - }, - { - "role": "assistant", - "content": None, - "tool_calls": [ - { - "id": "call123", - "type": "function", - "function": { - "name": "get_weather", - "arguments": '{"city": "Paris"}', - }, - } - ], - }, - { - "role": "tool", - "content": [{"type": "text", "text": "Rainy"}], - "name": "get_weather", - "tool_call_id": "call123", - }, - ], - [ - { - "type": "function", - "function": { - "name": "get_weather", - "description": "Gets the current weather in a city.", - "parameters": { - "type": "object", - "properties": { - "city": { - "type": "string", - "description": "The city name", - } - }, - "required": ["city"], - }, - }, - } - ], - ), - ) - ], -) -def test_prepare_apply_chat_template_tools_and_messages_list_content( - openai_request, expected_mistral_output -): - actual_request = _prepare_apply_chat_template_tools_and_messages( - openai_request["messages"], openai_request["tools"] - ) - assert actual_request == expected_mistral_output - - -def test_prepare_apply_chat_template_generation_prompt_and_continue(): +def test_validate_apply_chat_template_args(): + # add_generation_prompt with assistant last message → error messages = [{"role": "assistant", "content": "Hello"}] - tools: list[dict[str, Any]] = [] with pytest.raises(ValueError): - _prepare_apply_chat_template_tools_and_messages( - messages, tools, add_generation_prompt=True - ) + _validate_apply_chat_template_args(messages, add_generation_prompt=True) + # add_generation_prompt with user last message → ok messages = [{"role": "user", "content": "Hello"}] - out_messages, _ = _prepare_apply_chat_template_tools_and_messages( - messages, tools, add_generation_prompt=True - ) - assert out_messages == [{"role": "user", "content": "Hello"}] + _validate_apply_chat_template_args(messages, add_generation_prompt=True) + # both add_generation_prompt and continue_final_message → error with pytest.raises(ValueError): - _prepare_apply_chat_template_tools_and_messages( - messages, tools, add_generation_prompt=True, continue_final_message=True + _validate_apply_chat_template_args( + messages, add_generation_prompt=True, continue_final_message=True ) + # continue_final_message with assistant last message → ok messages = [{"role": "assistant", "content": "Hello"}] - out_messages, _ = _prepare_apply_chat_template_tools_and_messages( - messages, tools, add_generation_prompt=False, continue_final_message=True - ) - assert out_messages == [{"role": "assistant", "content": "Hello"}] + _validate_apply_chat_template_args(messages, continue_final_message=True) + # continue_final_message with user last message → error messages = [{"role": "user", "content": "Hello"}] with pytest.raises(ValueError): - _prepare_apply_chat_template_tools_and_messages( - messages, tools, add_generation_prompt=False, continue_final_message=True - ) + _validate_apply_chat_template_args(messages, continue_final_message=True) @pytest.fixture(scope="module") @@ -2435,3 +2117,120 @@ def test_llg_tokenizer(self, mistral_tokenizer: MistralTokenizer) -> None: # Test caching llg_tokenizer_2 = mistral_tokenizer.llg_tokenizer assert llg_tokenizer is llg_tokenizer_2 + + @pytest.mark.parametrize( + "messages,tools,tekken_expected_substrings,spm_expected_substrings", + [ + pytest.param( + [{"role": "user", "content": "Hello"}], + [{"type": "function", "function": {"name": "do_nothing"}}], + ["do_nothing", '"description": ""', '"parameters": {}'], + ["do_nothing", '"description":▁""', '"parameters":▁{}'], + id="tool_without_description_and_parameters", + ), + pytest.param( + [ + {"role": "user", "content": "Do nothing"}, + { + "role": "assistant", + "content": "", + "tool_calls": [ + { + "id": "123456789", + "type": "function", + "function": { + "name": "do_nothing", + "arguments": None, + }, + } + ], + }, + { + "role": "tool", + "tool_call_id": "123456789", + "content": "done", + }, + ], + [{"type": "function", "function": {"name": "do_nothing"}}], + ["do_nothing"], + ["do_nothing"], + id="tool_call_with_none_arguments", + ), + ], + ) + def test_apply_chat_template_tool_optional_fields( + self, + mistral_tokenizer: MistralTokenizer, + messages: list[dict[str, Any]], + tools: list[dict[str, Any]], + tekken_expected_substrings: list[str], + spm_expected_substrings: list[str], + ) -> None: + output = mistral_tokenizer.apply_chat_template( + messages, tools=tools, add_generation_prompt=True + ) + decoded = mistral_tokenizer.tokenizer.decode(output, SpecialTokenPolicy.KEEP) + + expected = ( + tekken_expected_substrings + if mistral_tokenizer.is_tekken + else spm_expected_substrings + ) + for substring in expected: + assert substring in decoded + + def test_apply_chat_template_tools_not_mutated( + self, mistral_tokenizer: MistralTokenizer + ) -> None: + messages: list[dict[str, Any]] = [ + {"role": "user", "content": "Hello"}, + ] + tools: list[dict[str, Any]] = [ + { + "type": "function", + "function": { + "name": "get_weather", + "description": "Gets weather.", + "parameters": { + "type": "object", + "properties": { + "city": {"type": "string"}, + }, + }, + }, + }, + ] + original_tools = copy.deepcopy(tools) + + mistral_tokenizer.apply_chat_template( + messages, tools=tools, add_generation_prompt=True + ) + + assert tools == original_tools + + @pytest.mark.parametrize( + "reasoning_key", + ["reasoning", "reasoning_content"], + ) + def test_apply_chat_template_reasoning_assistant( + self, mistral_tokenizer: MistralTokenizer, reasoning_key: str + ) -> None: + if not mistral_tokenizer.is_tekken: + pytest.skip("Reasoning tokens only supported on tekken tokenizers") + + messages: list[dict[str, Any]] = [ + {"role": "user", "content": "What is 2+2?"}, + { + "role": "assistant", + "content": "4", + reasoning_key: "2+2 equals 4", + }, + {"role": "user", "content": "Are you sure?"}, + ] + + output = mistral_tokenizer.apply_chat_template( + messages, add_generation_prompt=True + ) + decoded = mistral_tokenizer.tokenizer.decode(output, SpecialTokenPolicy.KEEP) + + assert "[THINK]2+2 equals 4[/THINK]" in decoded diff --git a/vllm/tokenizers/mistral.py b/vllm/tokenizers/mistral.py index ef58b1b75d68..8fce690433ef 100644 --- a/vllm/tokenizers/mistral.py +++ b/vllm/tokenizers/mistral.py @@ -13,7 +13,6 @@ from mistral_common.protocol.instruct.request import ( ReasoningEffort, ) -from mistral_common.protocol.instruct.tool_calls import Function, Tool from mistral_common.protocol.instruct.validator import ValidationMode from mistral_common.tokens.tokenizers.base import ( SpecialTokenPolicy, @@ -68,36 +67,6 @@ def _pop_unallowed_keys_and_warn( ) -# TODO(juliendenize): remove this once OpenAI API is better supported by -# `mistral-common`. -def adapt_inplace_to_mistral_tool( - tool: dict[str, Any], -) -> dict[str, Any]: - tools_fields = set(Tool.model_fields.keys()) - function_fields = set(Function.model_fields.keys()) - - # The Mistral client, in comparison to the OpenAI client, requires the - # "parameters" dict and the "description" string to be present - # even if they are empty. - if function := tool.get("function"): - if function.get("parameters") is None: - function["parameters"] = {} - if function.get("description") is None: - function["description"] = "" - - _pop_unallowed_keys_and_warn( - dictionary=function, - allowed_keys=function_fields, - err_dict_name="function", - ) - - _pop_unallowed_keys_and_warn( - dictionary=tool, allowed_keys=tools_fields, err_dict_name="tools" - ) - - return tool - - def maybe_serialize_tool_calls(request: "MistralChatCompletionRequest"): # SEE: https://github.com/vllm-project/vllm/pull/9951 # Credits go to: @gcalmettes @@ -167,12 +136,11 @@ def truncate_tool_call_ids(request: "MistralChatCompletionRequest"): request.messages[i]["tool_call_id"] = tool_call_id -def _prepare_apply_chat_template_tools_and_messages( +def _validate_apply_chat_template_args( messages: list["ChatCompletionMessageParam"], - tools: list[dict[str, Any]] | None = None, continue_final_message: bool = False, add_generation_prompt: bool = False, -) -> tuple[list["ChatCompletionMessageParam"], list[dict[str, Any]] | None]: +) -> None: if add_generation_prompt and continue_final_message: raise ValueError( "Cannot set both `add_generation_prompt` and " @@ -196,21 +164,6 @@ def _prepare_apply_chat_template_tools_and_messages( "the last message is not from the assistant." ) - # mistral-common requires AssistantMessage content to be string [1]. - # - # [1]: https://github.com/mistralai/mistral-common/blob/f4a06998b75ed78bbf5aaf569590b772ea26c9f6/src/mistral_common/protocol/instruct/messages.py#L80 - for message in messages: - # Remove reasoning as unsupported by Mistral - _ = message.pop("reasoning", None) # type: ignore - - tools = ( - [adapt_inplace_to_mistral_tool(tool=tool) for tool in tools] - if tools is not None - else None - ) - - return messages, tools - def validate_request_params(request: "ChatCompletionRequest"): if request.chat_template is not None or request.chat_template_kwargs is not None: @@ -449,8 +402,8 @@ def apply_chat_template( if self.version >= 15: version_kwargs["reasoning_effort"] = kwargs.get("reasoning_effort") - messages, tools = _prepare_apply_chat_template_tools_and_messages( - messages, tools, continue_final_message, add_generation_prompt + _validate_apply_chat_template_args( + messages, continue_final_message, add_generation_prompt ) return self.transformers_tokenizer.apply_chat_template( diff --git a/vllm/tool_parsers/mistral_tool_parser.py b/vllm/tool_parsers/mistral_tool_parser.py index 945820ed9ae6..0a057a3af468 100644 --- a/vllm/tool_parsers/mistral_tool_parser.py +++ b/vllm/tool_parsers/mistral_tool_parser.py @@ -43,7 +43,7 @@ from vllm.reasoning.mistral_reasoning_parser import MistralReasoningParser from vllm.sampling_params import StructuredOutputsParams from vllm.tokenizers import TokenizerLike -from vllm.tokenizers.mistral import MistralTokenizer, adapt_inplace_to_mistral_tool +from vllm.tokenizers.mistral import MistralTokenizer from vllm.tool_parsers.abstract_tool_parser import ( Tool, ToolParser, @@ -241,12 +241,7 @@ def adjust_request( ) mistral_tools = ( - [ - MistralTool.model_validate( - adapt_inplace_to_mistral_tool(tool.model_dump()) - ) - for tool in request.tools - ] + [MistralTool.from_openai(tool.model_dump()) for tool in request.tools] if request.tools is not None else None )