Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 19 additions & 0 deletions vllm/config/structured_outputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ class StructuredOutputsConfig:
reasoning_parser: str = ""
"""Select the reasoning parser depending on the model that you're using.
This is used to parse the reasoning content into OpenAI API format."""
reasoning_parser_plugin: str = ""
"""Path to a dynamically reasoning parser plugin that can be dynamically
loaded and registered."""
enable_in_reasoning: bool = False
"""Whether to use structured input for reasoning."""

Expand All @@ -60,6 +63,22 @@ def compute_hash(self) -> str:

@model_validator(mode="after")
def _validate_structured_output_config(self) -> Self:
# Import here to avoid circular import
from vllm.reasoning.abs_reasoning_parsers import ReasoningParserManager

if self.reasoning_parser_plugin and len(self.reasoning_parser_plugin) > 3:
ReasoningParserManager.import_reasoning_parser(self.reasoning_parser_plugin)

valid_reasoning_parsers = ReasoningParserManager.list_registered()
if (
self.reasoning_parser != ""
and self.reasoning_parser not in valid_reasoning_parsers
):
raise ValueError(
f"invalid reasoning parser: {self.reasoning_parser} "
f"(chose from {{ {','.join(valid_reasoning_parsers)} }})"
)

if self.disable_any_whitespace and self.backend not in ("xgrammar", "guidance"):
raise ValueError(
"disable_any_whitespace is only supported for "
Expand Down
15 changes: 11 additions & 4 deletions vllm/engine/arg_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@
from vllm.platforms import CpuArchEnum, current_platform
from vllm.plugins import load_general_plugins
from vllm.ray.lazy_utils import is_in_ray_actor, is_ray_initialized
from vllm.reasoning import ReasoningParserManager
from vllm.transformers_utils.config import (
get_model_path,
is_interleaved,
Expand Down Expand Up @@ -495,7 +494,7 @@ class EngineArgs:
VllmConfig, "structured_outputs_config"
)
reasoning_parser: str = StructuredOutputsConfig.reasoning_parser

reasoning_parser_plugin: str | None = None
# Deprecated guided decoding fields
guided_decoding_backend: str | None = None
guided_decoding_disable_fallback: bool | None = None
Expand Down Expand Up @@ -707,10 +706,13 @@ def add_cli_args(parser: FlexibleArgumentParser) -> FlexibleArgumentParser:
)
structured_outputs_group.add_argument(
"--reasoning-parser",
# This choice is a special case because it's not static
choices=list(ReasoningParserManager.list_registered()),
# Choices need to be validated after parsing to include plugins
Copy link
Copy Markdown
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It might have been nicer if choices were changed to metavar so that users using the CLI --help can still see the built in options without the passed value being strictly checked. You could add something like <plugin> so that users know the list in the metavar is not exhaustive.

@walterbm could this be done in a follow up?

Copy link
Copy Markdown

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

@hmellor good idea! yes I can do that

**structured_outputs_kwargs["reasoning_parser"],
)
structured_outputs_group.add_argument(
"--reasoning-parser-plugin",
**structured_outputs_kwargs["reasoning_parser_plugin"],
)
# Deprecated guided decoding arguments
for arg, type in [
("--guided-decoding-backend", str),
Expand Down Expand Up @@ -1629,6 +1631,11 @@ def create_engine_config(
if self.reasoning_parser:
self.structured_outputs_config.reasoning_parser = self.reasoning_parser

if self.reasoning_parser_plugin:
self.structured_outputs_config.reasoning_parser_plugin = (
self.reasoning_parser_plugin
)

# Forward the deprecated CLI args to the StructuredOutputsConfig
so_config = self.structured_outputs_config
if self.guided_decoding_backend is not None:
Expand Down
12 changes: 9 additions & 3 deletions vllm/entrypoints/openai/api_server.py
Original file line number Diff line number Diff line change
Expand Up @@ -1944,13 +1944,13 @@ def validate_api_server_args(args):
f"(chose from {{ {','.join(valid_tool_parses)} }})"
)

valid_reasoning_parses = ReasoningParserManager.list_registered()
valid_reasoning_parsers = ReasoningParserManager.list_registered()
if (
reasoning_parser := args.structured_outputs_config.reasoning_parser
) and reasoning_parser not in valid_reasoning_parses:
) and reasoning_parser not in valid_reasoning_parsers:
raise KeyError(
f"invalid reasoning parser: {reasoning_parser} "
f"(chose from {{ {','.join(valid_reasoning_parses)} }})"
f"(chose from {{ {','.join(valid_reasoning_parsers)} }})"
)


Expand All @@ -1964,6 +1964,9 @@ def setup_server(args):
if args.tool_parser_plugin and len(args.tool_parser_plugin) > 3:
ToolParserManager.import_tool_parser(args.tool_parser_plugin)

if args.reasoning_parser_plugin and len(args.reasoning_parser_plugin) > 3:
ReasoningParserManager.import_reasoning_parser(args.reasoning_parser_plugin)

validate_api_server_args(args)

# workaround to make sure that we bind the port before the engine is set up.
Expand Down Expand Up @@ -2013,6 +2016,9 @@ async def run_server_worker(
if args.tool_parser_plugin and len(args.tool_parser_plugin) > 3:
ToolParserManager.import_tool_parser(args.tool_parser_plugin)

if args.reasoning_parser_plugin and len(args.reasoning_parser_plugin) > 3:
ReasoningParserManager.import_reasoning_parser(args.reasoning_parser_plugin)

# Load logging config for uvicorn if specified
log_config = load_log_config(args.log_config_file)
if log_config is not None:
Expand Down
6 changes: 3 additions & 3 deletions vllm/entrypoints/openai/run_batch.py
Original file line number Diff line number Diff line change
Expand Up @@ -334,13 +334,13 @@ async def run_request(


def validate_run_batch_args(args):
valid_reasoning_parses = ReasoningParserManager.list_registered()
valid_reasoning_parsers = ReasoningParserManager.list_registered()
if (
reasoning_parser := args.structured_outputs_config.reasoning_parser
) and reasoning_parser not in valid_reasoning_parses:
) and reasoning_parser not in valid_reasoning_parsers:
raise KeyError(
f"invalid reasoning parser: {reasoning_parser} "
f"(chose from {{ {','.join(valid_reasoning_parses)} }})"
f"(chose from {{ {','.join(valid_reasoning_parsers)} }})"
)


Expand Down
16 changes: 11 additions & 5 deletions vllm/reasoning/basic_parsers.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,21 @@

from abc import abstractmethod
from collections.abc import Sequence
from typing import TYPE_CHECKING, Any

from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest,
DeltaMessage,
ResponsesRequest,
)
from vllm.entrypoints.openai.protocol import DeltaMessage
from vllm.reasoning.abs_reasoning_parsers import ReasoningParser
from vllm.transformers_utils.tokenizer import AnyTokenizer

if TYPE_CHECKING:
from vllm.entrypoints.openai.protocol import (
ChatCompletionRequest,
ResponsesRequest,
)
else:
ChatCompletionRequest = Any
ResponsesRequest = Any


class BaseThinkingReasoningParser(ReasoningParser):
"""
Expand Down
9 changes: 9 additions & 0 deletions vllm/v1/structured_output/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,15 @@ def __init__(self, vllm_config: VllmConfig):
self.tokenizer = init_tokenizer_from_configs(
model_config=self.vllm_config.model_config
)
reasoning_parser = (
self.vllm_config.structured_outputs_config.reasoning_parser
)
reasoning_parser_plugin = (
self.vllm_config.structured_outputs_config.reasoning_parser_plugin
)
if reasoning_parser_plugin and len(reasoning_parser_plugin) > 3:
ReasoningParserManager.import_reasoning_parser(reasoning_parser_plugin)

reasoning_parser = (
self.vllm_config.structured_outputs_config.reasoning_parser
)
Expand Down