diff --git a/vllm/entrypoints/openai/chat_completion/serving.py b/vllm/entrypoints/openai/chat_completion/serving.py
index 62a0192e7b7..a005fff46c2 100644
--- a/vllm/entrypoints/openai/chat_completion/serving.py
+++ b/vllm/entrypoints/openai/chat_completion/serving.py
@@ -565,7 +565,7 @@ async def chat_completion_stream_generator(
)
tool_parsers: list[ToolParser | None] = [
- self.tool_parser(tokenizer)
+ self.tool_parser(tokenizer, request.tools)
] * num_choices
else:
tool_parsers = [None] * num_choices
@@ -1331,7 +1331,7 @@ async def chat_completion_full_generator(
"Tokenizer not available when `skip_tokenizer_init=True`"
)
- tool_parser = self.tool_parser(tokenizer)
+ tool_parser = self.tool_parser(tokenizer, request.tools)
# NOTE: We use token_ids for openai tool parser
tool_call_info = tool_parser.extract_tool_calls(
"",
diff --git a/vllm/entrypoints/openai/engine/serving.py b/vllm/entrypoints/openai/engine/serving.py
index c19910c51cc..5fcd9919f49 100644
--- a/vllm/entrypoints/openai/engine/serving.py
+++ b/vllm/entrypoints/openai/engine/serving.py
@@ -929,7 +929,7 @@ def _parse_tool_calls_from_content(
# Automatic Tool Call Parsing
try:
- tool_parser = tool_parser_cls(tokenizer)
+ tool_parser = tool_parser_cls(tokenizer, request.tools)
except RuntimeError as e:
logger.exception("Error in tool parser creation.")
raise e
diff --git a/vllm/entrypoints/openai/parser/responses_parser.py b/vllm/entrypoints/openai/parser/responses_parser.py
index e3d7c588acb..a31f20501e0 100644
--- a/vllm/entrypoints/openai/parser/responses_parser.py
+++ b/vllm/entrypoints/openai/parser/responses_parser.py
@@ -52,7 +52,7 @@ def __init__(
self.reasoning_parser_instance = reasoning_parser_cls(tokenizer)
self.tool_parser_instance = None
if tool_parser_cls is not None:
- self.tool_parser_instance = tool_parser_cls(tokenizer)
+ self.tool_parser_instance = tool_parser_cls(tokenizer, request.tools)
# Store the last finish_reason to determine response status
self.finish_reason: str | None = None
diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py
index a130d3686c8..98ff3199f59 100644
--- a/vllm/entrypoints/openai/responses/serving.py
+++ b/vllm/entrypoints/openai/responses/serving.py
@@ -1348,7 +1348,7 @@ async def _process_simple_streaming_events(
reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
tool_parser = None
if self.parser and self.parser.tool_parser_cls:
- tool_parser = self.parser.tool_parser_cls(tokenizer)
+ tool_parser = self.parser.tool_parser_cls(tokenizer, request.tools)
reasoning_ended = False
tool_call_text_started = False
previous_text = ""
diff --git a/vllm/entrypoints/serve/render/serving.py b/vllm/entrypoints/serve/render/serving.py
index a6d2f5040de..7f590a897f8 100644
--- a/vllm/entrypoints/serve/render/serving.py
+++ b/vllm/entrypoints/serve/render/serving.py
@@ -544,6 +544,8 @@ async def preprocess_chat(
)
raise NotImplementedError(msg)
tokenizer = renderer.get_tokenizer()
- request = tool_parser(tokenizer).adjust_request(request=request) # type: ignore[arg-type]
+ request = tool_parser(tokenizer, request.tools).adjust_request(
+ request=request # type: ignore[arg-type]
+ )
return conversation, [engine_prompt]
diff --git a/vllm/tool_parsers/abstract_tool_parser.py b/vllm/tool_parsers/abstract_tool_parser.py
index a2c2f062788..dcfe45d388f 100644
--- a/vllm/tool_parsers/abstract_tool_parser.py
+++ b/vllm/tool_parsers/abstract_tool_parser.py
@@ -5,13 +5,18 @@
import os
from collections.abc import Callable, Sequence
from functools import cached_property
+from typing import TypeAlias
from openai.types.responses import (
ResponseFormatTextJSONSchemaConfig,
ResponseTextConfig,
)
+from openai.types.responses.tool import Tool as ResponsesTool
-from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
+from vllm.entrypoints.openai.chat_completion.protocol import (
+ ChatCompletionRequest,
+ ChatCompletionToolsParam,
+)
from vllm.entrypoints.openai.engine.protocol import (
DeltaMessage,
ExtractedToolCallInformation,
@@ -30,6 +35,8 @@
logger = init_logger(__name__)
+Tool: TypeAlias = ChatCompletionToolsParam | ResponsesTool
+
class ToolParser:
"""
@@ -38,7 +45,11 @@ class ToolParser:
derived classes.
"""
- def __init__(self, tokenizer: TokenizerLike):
+ def __init__(
+ self,
+ tokenizer: TokenizerLike,
+ tools: list[Tool] | None = None,
+ ):
self.prev_tool_call_arr: list[dict] = []
# the index of the tool call that is currently being parsed
self.current_tool_id: int = -1
@@ -46,6 +57,7 @@ def __init__(self, tokenizer: TokenizerLike):
self.streamed_args_for_tool: list[str] = []
self.model_tokenizer = tokenizer
+ self.tools = tools
@cached_property
def vocab(self) -> dict[str, int]:
diff --git a/vllm/tool_parsers/deepseekv31_tool_parser.py b/vllm/tool_parsers/deepseekv31_tool_parser.py
index ad42bb7713c..e4ade3aae98 100644
--- a/vllm/tool_parsers/deepseekv31_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv31_tool_parser.py
@@ -19,14 +19,14 @@
)
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
-from vllm.tool_parsers.abstract_tool_parser import ToolParser
+from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser
logger = init_logger(__name__)
class DeepSeekV31ToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.current_tool_name_sent: bool = False
self.prev_tool_call_arr: list[dict] = []
diff --git a/vllm/tool_parsers/deepseekv32_tool_parser.py b/vllm/tool_parsers/deepseekv32_tool_parser.py
index cb39a16fd92..e8692994479 100644
--- a/vllm/tool_parsers/deepseekv32_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv32_tool_parser.py
@@ -22,6 +22,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -43,8 +44,8 @@ class DeepSeekV32ToolParser(ToolParser):
|DSML|function_calls>
"""
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.prev_tool_call_arr: list[dict] = []
diff --git a/vllm/tool_parsers/deepseekv3_tool_parser.py b/vllm/tool_parsers/deepseekv3_tool_parser.py
index 83bba1c878e..e92af87e604 100644
--- a/vllm/tool_parsers/deepseekv3_tool_parser.py
+++ b/vllm/tool_parsers/deepseekv3_tool_parser.py
@@ -20,6 +20,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -27,8 +28,8 @@
class DeepSeekV3ToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.current_tool_name_sent: bool = False
self.prev_tool_call_arr: list[dict] = []
diff --git a/vllm/tool_parsers/ernie45_tool_parser.py b/vllm/tool_parsers/ernie45_tool_parser.py
index d5dc7a3da3c..9722dddf734 100644
--- a/vllm/tool_parsers/ernie45_tool_parser.py
+++ b/vllm/tool_parsers/ernie45_tool_parser.py
@@ -20,6 +20,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -27,12 +28,12 @@
class Ernie45ToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
"""
Ernie thinking model format:
abc\n\n\n\n\ndef\n\n
"""
- super().__init__(tokenizer)
+ super().__init__(tokenizer, tools)
self.current_tool_name_sent = False
self.prev_tool_call_arr: list[dict] = []
self.current_tool_id = -1
diff --git a/vllm/tool_parsers/functiongemma_tool_parser.py b/vllm/tool_parsers/functiongemma_tool_parser.py
index 599019b1b29..dfd91d97431 100644
--- a/vllm/tool_parsers/functiongemma_tool_parser.py
+++ b/vllm/tool_parsers/functiongemma_tool_parser.py
@@ -20,7 +20,7 @@
)
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
-from vllm.tool_parsers.abstract_tool_parser import ToolParser
+from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser
logger = init_logger(__name__)
@@ -33,8 +33,8 @@ class FunctionGemmaToolParser(ToolParser):
call:func_name{param:value}
"""
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
# Streaming state
self.current_tool_name_sent: bool = False
diff --git a/vllm/tool_parsers/gigachat3_tool_parser.py b/vllm/tool_parsers/gigachat3_tool_parser.py
index 90928f9aefe..f470f6a5b28 100644
--- a/vllm/tool_parsers/gigachat3_tool_parser.py
+++ b/vllm/tool_parsers/gigachat3_tool_parser.py
@@ -20,7 +20,7 @@
)
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
-from vllm.tool_parsers.abstract_tool_parser import ToolParser
+from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser
logger = init_logger(__name__)
@@ -46,8 +46,8 @@
class GigaChat3ToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.tool_started: bool = False
self.tool_name_sent: bool = False
self.tool_id: str | None = None
diff --git a/vllm/tool_parsers/glm47_moe_tool_parser.py b/vllm/tool_parsers/glm47_moe_tool_parser.py
index 8c72342d713..765d6d37de1 100644
--- a/vllm/tool_parsers/glm47_moe_tool_parser.py
+++ b/vllm/tool_parsers/glm47_moe_tool_parser.py
@@ -16,14 +16,15 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
+from vllm.tool_parsers.abstract_tool_parser import Tool
from vllm.tool_parsers.glm4_moe_tool_parser import Glm4MoeModelToolParser
logger = init_logger(__name__)
class Glm47MoeModelToolParser(Glm4MoeModelToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
# GLM-4.7 format: func_name[...]*
# The function name can be followed by a newline, whitespace, or
# directly by tags (no separator). The arg section is
diff --git a/vllm/tool_parsers/glm4_moe_tool_parser.py b/vllm/tool_parsers/glm4_moe_tool_parser.py
index 28d86b68bec..fc718921d5c 100644
--- a/vllm/tool_parsers/glm4_moe_tool_parser.py
+++ b/vllm/tool_parsers/glm4_moe_tool_parser.py
@@ -21,7 +21,6 @@
from vllm.entrypoints.chat_utils import make_tool_call_id
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
- ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaFunctionCall,
@@ -34,6 +33,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -48,8 +48,8 @@ class Glm4MoeModelToolParser(ToolParser):
rather than waiting for the complete tag.
"""
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
# Stateful streaming fields
self.current_tool_name_sent: bool = False
self.prev_tool_call_arr: list[dict[str, Any]] = []
@@ -122,7 +122,7 @@ def _json_escape_string_content(s: str) -> str:
def _is_string_type(
tool_name: str,
arg_name: str,
- tools: list[ChatCompletionToolsParam] | None,
+ tools: list[Tool] | None,
) -> bool:
if tools is None:
return False
diff --git a/vllm/tool_parsers/granite4_tool_parser.py b/vllm/tool_parsers/granite4_tool_parser.py
index 693c4dc8f34..3d58690f592 100644
--- a/vllm/tool_parsers/granite4_tool_parser.py
+++ b/vllm/tool_parsers/granite4_tool_parser.py
@@ -22,6 +22,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -43,8 +44,8 @@ def __init__(self, *, name: str, arguments: str | None): ...
class Granite4ToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.prev_tool_call_arr: list[dict] = []
self.current_tool_id: int = -1
diff --git a/vllm/tool_parsers/granite_20b_fc_tool_parser.py b/vllm/tool_parsers/granite_20b_fc_tool_parser.py
index 7fe3c39f70c..6d217a03056 100644
--- a/vllm/tool_parsers/granite_20b_fc_tool_parser.py
+++ b/vllm/tool_parsers/granite_20b_fc_tool_parser.py
@@ -24,6 +24,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.tool_parsers.utils import (
@@ -46,8 +47,8 @@ class Granite20bFCToolParser(ToolParser):
are all set
"""
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.bot_token = ""
self.tool_start_token = self.bot_token
diff --git a/vllm/tool_parsers/granite_tool_parser.py b/vllm/tool_parsers/granite_tool_parser.py
index 7cad01e1643..d586db32670 100644
--- a/vllm/tool_parsers/granite_tool_parser.py
+++ b/vllm/tool_parsers/granite_tool_parser.py
@@ -22,6 +22,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.tool_parsers.utils import (
@@ -44,8 +45,8 @@ class GraniteToolParser(ToolParser):
are all set
"""
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
# for granite 3.0, the token `<|tool_call|>`
self.bot_token = "<|tool_call|>"
# for granite 3.1, the string ``
diff --git a/vllm/tool_parsers/hermes_tool_parser.py b/vllm/tool_parsers/hermes_tool_parser.py
index 5bde5b2c07a..cca2bf9a059 100644
--- a/vllm/tool_parsers/hermes_tool_parser.py
+++ b/vllm/tool_parsers/hermes_tool_parser.py
@@ -23,6 +23,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.utils.mistral import is_mistral_tokenizer
@@ -31,8 +32,8 @@
class Hermes2ProToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
if is_mistral_tokenizer(tokenizer):
logger.error("Detected Mistral tokenizer when using a Hermes model")
diff --git a/vllm/tool_parsers/hunyuan_a13b_tool_parser.py b/vllm/tool_parsers/hunyuan_a13b_tool_parser.py
index 4f446bfcce9..29b2a5eae27 100644
--- a/vllm/tool_parsers/hunyuan_a13b_tool_parser.py
+++ b/vllm/tool_parsers/hunyuan_a13b_tool_parser.py
@@ -22,6 +22,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.tool_parsers.utils import consume_space
@@ -31,8 +32,8 @@
class HunyuanA13BToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
# Initialize state for streaming mode
self.prev_tool_calls: list[dict] = []
diff --git a/vllm/tool_parsers/internlm2_tool_parser.py b/vllm/tool_parsers/internlm2_tool_parser.py
index 3b858f34c20..fc7c44cff9e 100644
--- a/vllm/tool_parsers/internlm2_tool_parser.py
+++ b/vllm/tool_parsers/internlm2_tool_parser.py
@@ -22,6 +22,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.tool_parsers.utils import extract_intermediate_diff
@@ -30,8 +31,8 @@
class Internlm2ToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.position = 0
def adjust_request(self, request: ChatCompletionRequest) -> ChatCompletionRequest:
diff --git a/vllm/tool_parsers/jamba_tool_parser.py b/vllm/tool_parsers/jamba_tool_parser.py
index 98293a4c17c..5a9af99109c 100644
--- a/vllm/tool_parsers/jamba_tool_parser.py
+++ b/vllm/tool_parsers/jamba_tool_parser.py
@@ -22,7 +22,7 @@
)
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
-from vllm.tool_parsers import ToolParser
+from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser
from vllm.tool_parsers.utils import extract_intermediate_diff
from vllm.utils.mistral import is_mistral_tokenizer
@@ -30,8 +30,8 @@
class JambaToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
if is_mistral_tokenizer(self.model_tokenizer):
raise ValueError(
diff --git a/vllm/tool_parsers/kimi_k2_tool_parser.py b/vllm/tool_parsers/kimi_k2_tool_parser.py
index ed479521523..bc995319e51 100644
--- a/vllm/tool_parsers/kimi_k2_tool_parser.py
+++ b/vllm/tool_parsers/kimi_k2_tool_parser.py
@@ -20,6 +20,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -27,8 +28,8 @@
class KimiK2ToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.current_tool_name_sent: bool = False
self.prev_tool_call_arr: list[dict] = []
self.current_tool_id: int = -1
diff --git a/vllm/tool_parsers/llama4_pythonic_tool_parser.py b/vllm/tool_parsers/llama4_pythonic_tool_parser.py
index 93807196dd6..1921187e935 100644
--- a/vllm/tool_parsers/llama4_pythonic_tool_parser.py
+++ b/vllm/tool_parsers/llama4_pythonic_tool_parser.py
@@ -17,6 +17,7 @@
)
from vllm.logger import init_logger
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.tool_parsers.utils import (
@@ -47,8 +48,12 @@ class Llama4PythonicToolParser(ToolParser):
re.DOTALL,
)
- def __init__(self, tokenizer: PreTrainedTokenizerBase):
- super().__init__(tokenizer)
+ def __init__(
+ self,
+ tokenizer: PreTrainedTokenizerBase,
+ tools: list[Tool] | None = None,
+ ):
+ super().__init__(tokenizer, tools)
# Rename for readability. This is NOT a tool id.
@property
diff --git a/vllm/tool_parsers/llama_tool_parser.py b/vllm/tool_parsers/llama_tool_parser.py
index 527d3f7358e..be3d47acd97 100644
--- a/vllm/tool_parsers/llama_tool_parser.py
+++ b/vllm/tool_parsers/llama_tool_parser.py
@@ -24,6 +24,7 @@
)
from vllm.logger import init_logger
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.tool_parsers.utils import (
@@ -44,8 +45,12 @@ class Llama3JsonToolParser(ToolParser):
llama4_json are set.
"""
- def __init__(self, tokenizer: PreTrainedTokenizerBase):
- super().__init__(tokenizer)
+ def __init__(
+ self,
+ tokenizer: PreTrainedTokenizerBase,
+ tools: list[Tool] | None = None,
+ ):
+ super().__init__(tokenizer, tools)
# initialize properties used for state when parsing tool calls in
# streaming mode
diff --git a/vllm/tool_parsers/longcat_tool_parser.py b/vllm/tool_parsers/longcat_tool_parser.py
index 72f13559a92..0304f452e07 100644
--- a/vllm/tool_parsers/longcat_tool_parser.py
+++ b/vllm/tool_parsers/longcat_tool_parser.py
@@ -4,12 +4,13 @@
import regex as re
from vllm.tokenizers import TokenizerLike
+from vllm.tool_parsers.abstract_tool_parser import Tool
from vllm.tool_parsers.hermes_tool_parser import Hermes2ProToolParser
class LongcatFlashToolParser(Hermes2ProToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.tool_call_start_token: str = ""
self.tool_call_end_token: str = ""
diff --git a/vllm/tool_parsers/minimax_m2_tool_parser.py b/vllm/tool_parsers/minimax_m2_tool_parser.py
index a9291adc123..6c75e009947 100644
--- a/vllm/tool_parsers/minimax_m2_tool_parser.py
+++ b/vllm/tool_parsers/minimax_m2_tool_parser.py
@@ -22,6 +22,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -29,8 +30,8 @@
class MinimaxM2ToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.prev_tool_call_arr: list[dict] = []
diff --git a/vllm/tool_parsers/minimax_tool_parser.py b/vllm/tool_parsers/minimax_tool_parser.py
index cb5610fc750..2a2baa03b0e 100644
--- a/vllm/tool_parsers/minimax_tool_parser.py
+++ b/vllm/tool_parsers/minimax_tool_parser.py
@@ -22,6 +22,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.tool_parsers.utils import extract_intermediate_diff
@@ -30,8 +31,8 @@
class MinimaxToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
# Initialize streaming state for tracking tool call progress
self.streaming_state: dict[str, Any] = {
diff --git a/vllm/tool_parsers/mistral_tool_parser.py b/vllm/tool_parsers/mistral_tool_parser.py
index 56ba245ceda..153c6ed32c4 100644
--- a/vllm/tool_parsers/mistral_tool_parser.py
+++ b/vllm/tool_parsers/mistral_tool_parser.py
@@ -26,6 +26,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.utils.mistral import is_mistral_tokenizer
@@ -78,8 +79,8 @@ class MistralToolParser(ToolParser):
Used when --enable-auto-tool-choice --tool-call-parser mistral are all set
"""
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
if not is_mistral_tokenizer(self.model_tokenizer):
logger.info("Non-Mistral tokenizer detected when using a Mistral model...")
diff --git a/vllm/tool_parsers/olmo3_tool_parser.py b/vllm/tool_parsers/olmo3_tool_parser.py
index dd63b108635..dcbf0a67376 100644
--- a/vllm/tool_parsers/olmo3_tool_parser.py
+++ b/vllm/tool_parsers/olmo3_tool_parser.py
@@ -17,6 +17,7 @@
)
from vllm.logger import init_logger
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.tool_parsers.utils import (
@@ -51,8 +52,12 @@ class Olmo3PythonicToolParser(ToolParser):
re.DOTALL,
)
- def __init__(self, tokenizer: PreTrainedTokenizerBase):
- super().__init__(tokenizer)
+ def __init__(
+ self,
+ tokenizer: PreTrainedTokenizerBase,
+ tools: list[Tool] | None = None,
+ ):
+ super().__init__(tokenizer, tools)
# Rename for readability. This is NOT a tool id.
@property
diff --git a/vllm/tool_parsers/openai_tool_parser.py b/vllm/tool_parsers/openai_tool_parser.py
index 76f7a49dfae..ee6dd70718b 100644
--- a/vllm/tool_parsers/openai_tool_parser.py
+++ b/vllm/tool_parsers/openai_tool_parser.py
@@ -16,6 +16,7 @@
from vllm.entrypoints.openai.parser.harmony_utils import parse_output_into_messages
from vllm.logger import init_logger
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -28,8 +29,8 @@
class OpenAIToolParser(ToolParser):
- def __init__(self, tokenizer: "TokenizerLike"):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: "TokenizerLike", tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
def extract_tool_calls(
self,
diff --git a/vllm/tool_parsers/phi4mini_tool_parser.py b/vllm/tool_parsers/phi4mini_tool_parser.py
index f222cffd61d..2dc262bba2e 100644
--- a/vllm/tool_parsers/phi4mini_tool_parser.py
+++ b/vllm/tool_parsers/phi4mini_tool_parser.py
@@ -20,6 +20,7 @@
)
from vllm.logger import init_logger
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -35,8 +36,12 @@ class Phi4MiniJsonToolParser(ToolParser):
are all set
"""
- def __init__(self, tokenizer: PreTrainedTokenizerBase) -> None:
- super().__init__(tokenizer)
+ def __init__(
+ self,
+ tokenizer: PreTrainedTokenizerBase,
+ tools: list[Tool] | None = None,
+ ) -> None:
+ super().__init__(tokenizer, tools)
# initialize properties used for state when parsing tool calls in
# streaming mode
diff --git a/vllm/tool_parsers/pythonic_tool_parser.py b/vllm/tool_parsers/pythonic_tool_parser.py
index 9c9f3e183d3..540a65024f5 100644
--- a/vllm/tool_parsers/pythonic_tool_parser.py
+++ b/vllm/tool_parsers/pythonic_tool_parser.py
@@ -17,6 +17,7 @@
)
from vllm.logger import init_logger
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.tool_parsers.utils import (
@@ -49,8 +50,12 @@ class PythonicToolParser(ToolParser):
re.DOTALL,
)
- def __init__(self, tokenizer: PreTrainedTokenizerBase):
- super().__init__(tokenizer)
+ def __init__(
+ self,
+ tokenizer: PreTrainedTokenizerBase,
+ tools: list[Tool] | None = None,
+ ):
+ super().__init__(tokenizer, tools)
# Rename for readability. This is NOT a tool id.
@property
diff --git a/vllm/tool_parsers/qwen3coder_tool_parser.py b/vllm/tool_parsers/qwen3coder_tool_parser.py
index 216ae163b77..f9b406b53ec 100644
--- a/vllm/tool_parsers/qwen3coder_tool_parser.py
+++ b/vllm/tool_parsers/qwen3coder_tool_parser.py
@@ -10,7 +10,6 @@
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
- ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaFunctionCall,
@@ -23,6 +22,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -30,8 +30,8 @@
class Qwen3CoderToolParser(ToolParser):
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.current_tool_name_sent: bool = False
self.prev_tool_call_arr: list[dict] = []
@@ -109,9 +109,7 @@ def _reset_streaming_state(self):
self.accumulated_params = {}
self.streaming_request = None
- def _get_arguments_config(
- self, func_name: str, tools: list[ChatCompletionToolsParam] | None
- ) -> dict:
+ def _get_arguments_config(self, func_name: str, tools: list[Tool] | None) -> dict:
"""Extract argument configuration for a function."""
if tools is None:
return {}
@@ -246,7 +244,7 @@ def _convert_param_value(
return param_value
def _parse_xml_function_call(
- self, function_call_str: str, tools: list[ChatCompletionToolsParam] | None
+ self, function_call_str: str, tools: list[Tool] | None
) -> ToolCall | None:
# Extract function name
end_index = function_call_str.find(">")
diff --git a/vllm/tool_parsers/qwen3xml_tool_parser.py b/vllm/tool_parsers/qwen3xml_tool_parser.py
index f7dcf20abb7..23778091ee9 100644
--- a/vllm/tool_parsers/qwen3xml_tool_parser.py
+++ b/vllm/tool_parsers/qwen3xml_tool_parser.py
@@ -11,7 +11,6 @@
from vllm.entrypoints.chat_utils import make_tool_call_id
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
- ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaFunctionCall,
@@ -24,6 +23,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
@@ -40,7 +40,7 @@ def __init__(self):
self.reset_streaming_state()
# Tool configuration information
- self.tools: list[ChatCompletionToolsParam] | None = None
+ self.tools: list[Tool] | None = None
self.tool_call_start_token: str = ""
self.tool_call_end_token: str = ""
self.function_start_token: str = " ToolCall | None:
def get_arguments_config(func_name: str) -> dict:
if tools is None:
diff --git a/vllm/tool_parsers/step3_tool_parser.py b/vllm/tool_parsers/step3_tool_parser.py
index 8e6f27907c9..a9c5695876f 100644
--- a/vllm/tool_parsers/step3_tool_parser.py
+++ b/vllm/tool_parsers/step3_tool_parser.py
@@ -22,6 +22,7 @@
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
from vllm.tool_parsers.abstract_tool_parser import (
+ Tool,
ToolParser,
)
from vllm.utils import random_uuid
@@ -43,8 +44,8 @@ class Step3ToolParser(ToolParser):
TOOL_SEP = "<|tool_sep|>"
SPECIAL_TOKENS = [TOOL_CALLS_BEGIN, TOOL_CALLS_END, TOOL_CALL_BEGIN, TOOL_CALL_END]
- def __init__(self, tokenizer: TokenizerLike):
- super().__init__(tokenizer)
+ def __init__(self, tokenizer: TokenizerLike, tools: list[Tool] | None = None):
+ super().__init__(tokenizer, tools)
self.position = 0
# Explicit state flags for robust streaming
self.tool_block_started = False
diff --git a/vllm/tool_parsers/step3p5_tool_parser.py b/vllm/tool_parsers/step3p5_tool_parser.py
index 4441cd74e09..25b310f2af6 100644
--- a/vllm/tool_parsers/step3p5_tool_parser.py
+++ b/vllm/tool_parsers/step3p5_tool_parser.py
@@ -11,7 +11,6 @@
from vllm.entrypoints.chat_utils import make_tool_call_id
from vllm.entrypoints.openai.chat_completion.protocol import (
ChatCompletionRequest,
- ChatCompletionToolsParam,
)
from vllm.entrypoints.openai.engine.protocol import (
DeltaFunctionCall,
@@ -23,7 +22,7 @@
)
from vllm.logger import init_logger
from vllm.tokenizers import TokenizerLike
-from vllm.tool_parsers.abstract_tool_parser import ToolParser
+from vllm.tool_parsers.abstract_tool_parser import Tool, ToolParser
logger = init_logger(__name__)
@@ -38,7 +37,7 @@ def __init__(self):
self.reset_streaming_state()
# Tool configuration information
- self.tools: list[ChatCompletionToolsParam] | None = None
+ self.tools: list[Tool] | None = None
self.tool_call_start_token: str = ""
self.tool_call_end_token: str = ""
self.function_start_token: str = "