Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 47 additions & 1 deletion tests/entrypoints/openai/chat_completion/test_chat_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,13 @@
from unittest.mock import AsyncMock, MagicMock, patch

import pytest
from pydantic import ValidationError

from vllm.config.multimodal import MultiModalConfig
from vllm.entrypoints.openai.chat_completion.protocol import ChatCompletionRequest
from vllm.entrypoints.openai.chat_completion.protocol import (
BatchChatCompletionRequest,
ChatCompletionRequest,
)
from vllm.entrypoints.openai.chat_completion.serving import OpenAIServingChat
from vllm.entrypoints.openai.engine.protocol import GenerationError
from vllm.entrypoints.openai.models.protocol import BaseModelPath
Expand Down Expand Up @@ -444,3 +448,45 @@ def test_json_schema_response_format_missing_schema():
messages=[{"role": "user", "content": "hello"}],
response_format={"type": "json_schema"},
)


@pytest.mark.parametrize("format_value", [None, {}])
def test_structural_tag_response_format_invalid(format_value):
"""Malformed structural tags should be rejected during request validation."""
with pytest.raises(
ValidationError,
match="Invalid response_format structural_tag",
):
ChatCompletionRequest(
model=MODEL_NAME,
messages=[{"role": "user", "content": "hello"}],
response_format={"type": "structural_tag", "format": format_value},
)


@pytest.mark.parametrize("format_value", [None, {}])
def test_batch_structural_tag_response_format_invalid(format_value):
"""Batch chat should reject malformed structural tags at request parsing."""
with pytest.raises(
ValidationError,
match="Invalid response_format structural_tag",
):
BatchChatCompletionRequest(
model=MODEL_NAME,
messages=[[{"role": "user", "content": "hello"}]],
response_format={"type": "structural_tag", "format": format_value},
)


@pytest.mark.parametrize("structural_tag", ["not json", ""])
def test_structured_outputs_structural_tag_invalid(structural_tag):
"""Malformed direct structured_outputs structural tags should be rejected."""
with pytest.raises(
ValidationError,
match="Invalid structured_outputs structural_tag",
):
ChatCompletionRequest(
model=MODEL_NAME,
messages=[{"role": "user", "content": "hello"}],
structured_outputs={"structural_tag": structural_tag},
)
31 changes: 31 additions & 0 deletions tests/entrypoints/openai/completion/test_completion_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from unittest.mock import AsyncMock, MagicMock

import pytest
from pydantic import ValidationError

from vllm.config.multimodal import MultiModalConfig
from vllm.entrypoints.openai.completion.protocol import CompletionRequest
Expand Down Expand Up @@ -302,6 +303,36 @@ def test_json_schema_response_format_missing_schema():
)


@pytest.mark.parametrize("format_value", [None, {}])
def test_structural_tag_response_format_invalid(format_value):
"""Malformed structural tags should be rejected during request validation."""
with pytest.raises(
ValidationError,
match="Invalid response_format structural_tag",
):
CompletionRequest(
model=MODEL_NAME,
prompt="Test prompt",
max_tokens=10,
response_format={"type": "structural_tag", "format": format_value},
)


@pytest.mark.parametrize("structural_tag", ["not json", ""])
def test_structured_outputs_structural_tag_invalid(structural_tag):
"""Malformed direct structured_outputs structural tags should be rejected."""
with pytest.raises(
ValidationError,
match="Invalid structured_outputs structural_tag",
):
CompletionRequest(
model=MODEL_NAME,
prompt="Test prompt",
max_tokens=10,
structured_outputs={"structural_tag": structural_tag},
)


def test_negative_prompt_token_ids_nested():
"""Negative token IDs in prompt (nested list) should raise validation error."""
with pytest.raises(Exception, match="greater than or equal to 0"):
Expand Down
28 changes: 26 additions & 2 deletions tests/tool_parsers/test_mistral_tool_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1382,7 +1382,20 @@ def test_adjust_request_non_mistral_tokenizer(
[
{"regex": r"\d+"},
{"choice": ["a", "b"]},
{"structural_tag": '{"key": "value"}'},
{
"structural_tag": json.dumps(
{
"structures": [
{
"begin": "<tool>",
"schema": {"type": "object"},
"end": "</tool>",
}
],
"triggers": ["<tool>"],
}
)
},
{"grammar": "start: 'hello'"},
],
ids=["regex", "choice", "structural_tag", "grammar"],
Expand All @@ -1404,7 +1417,18 @@ def test_adjust_request_unsupported_response_format(
) -> None:
request = _make_request(
response_format=StructuralTagResponseFormat(
type="structural_tag", format={"some": "config"}
type="structural_tag",
format={
"type": "triggered_tags",
"tags": [
{
"begin": "<tool>",
"content": {"type": "any_text"},
"end": "</tool>",
}
],
"triggers": ["<tool>"],
},
),
)
result = mistral_tool_parser.adjust_request(request)
Expand Down
16 changes: 16 additions & 0 deletions vllm/entrypoints/openai/chat_completion/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@
StructuralTagResponseFormat,
ToolCall,
UsageInfo,
validate_structural_tag_response_format,
validate_structured_outputs_structural_tag,
)
from vllm.exceptions import VLLMValidationError
from vllm.logger import init_logger
Expand Down Expand Up @@ -671,6 +673,9 @@ def validate_response_format(cls, data):
parameter="response_format",
)

if rf_type == "structural_tag":
validate_structural_tag_response_format(response_format)

return data

@model_validator(mode="before")
Expand Down Expand Up @@ -754,6 +759,7 @@ def check_structured_outputs_count(cls, data):
"You can only either use constraints for structured outputs "
"or tools, not both.",
)
validate_structured_outputs_structural_tag(structured_outputs_kwargs)
return data

@model_validator(mode="before")
Expand Down Expand Up @@ -979,6 +985,16 @@ def check_batch_mode(cls, data: Any) -> Any:
"Batch chat completions do not support beam search. "
"Please set `use_beam_search` to False."
)
response_format = data.get("response_format")
rf_type = (
response_format.get("type")
if isinstance(response_format, dict)
else getattr(response_format, "type", None)
)
if rf_type == "structural_tag":
validate_structural_tag_response_format(response_format)
if (structured_outputs := data.get("structured_outputs")) is not None:
validate_structured_outputs_structural_tag(structured_outputs)
n = data.get("n", 1)
if n is not None and n != 1:
raise ValueError(
Expand Down
6 changes: 6 additions & 0 deletions vllm/entrypoints/openai/completion/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
StreamOptions,
StructuralTagResponseFormat,
UsageInfo,
validate_structural_tag_response_format,
validate_structured_outputs_structural_tag,
)
from vllm.exceptions import VLLMValidationError
from vllm.logger import init_logger
Expand Down Expand Up @@ -370,6 +372,9 @@ def validate_response_format(cls, data):
parameter="response_format",
)

if rf_type == "structural_tag":
validate_structural_tag_response_format(response_format)

return data

@model_validator(mode="before")
Expand Down Expand Up @@ -397,6 +402,7 @@ def check_structured_outputs_count(cls, data):
"outputs ('json', 'regex' or 'choice').",
parameter="structured_outputs",
)
validate_structured_outputs_structural_tag(structured_outputs_kwargs)
return data

@model_validator(mode="before")
Expand Down
75 changes: 75 additions & 0 deletions vllm/entrypoints/openai/engine/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
)

from vllm.entrypoints.chat_utils import make_tool_call_id
from vllm.exceptions import VLLMValidationError
from vllm.logger import init_logger
from vllm.utils import random_uuid
from vllm.utils.import_utils import resolve_obj_by_qualname
Expand Down Expand Up @@ -158,6 +159,80 @@ class ResponseFormat(OpenAIBaseModel):
)


def validate_structural_tag_response_format(
response_format: AnyStructuralTagResponseFormat | dict[str, Any],
) -> None:
"""Validate structural tags before they are sent to the engine.

Engine-side validation reports malformed structural tags as generation
failures. OpenAI request parsing should classify them as bad requests.
"""
import json

from pydantic import TypeAdapter, ValidationError

if isinstance(response_format, dict):
try:
response_format = TypeAdapter(
AnyStructuralTagResponseFormat
).validate_python(response_format)
except ValidationError as exc:
raise VLLMValidationError(
"Invalid response_format structural_tag specification.",
parameter="response_format",
) from exc

try:
payload = json.dumps(response_format.model_dump(by_alias=True))
validate_structural_tag_payload(payload, parameter="response_format")
except (TypeError, ValueError) as exc:
raise VLLMValidationError(
"Invalid response_format structural_tag specification.",
parameter="response_format",
) from exc


def validate_structural_tag_payload(payload: Any, *, parameter: str) -> None:
from vllm.sampling_params import SamplingParams, StructuredOutputsParams
from vllm.v1.structured_output.backend_xgrammar import validate_xgrammar_grammar

if isinstance(payload, str) and not payload:
raise VLLMValidationError(
f"Invalid {parameter} structural_tag specification.",
parameter=parameter,
)

try:
validate_xgrammar_grammar(
SamplingParams(
structured_outputs=StructuredOutputsParams(structural_tag=payload)
)
)
except (TypeError, ValueError) as exc:
raise VLLMValidationError(
f"Invalid {parameter} structural_tag specification.",
parameter=parameter,
) from exc


def validate_structured_outputs_structural_tag(
structured_outputs: Any,
) -> None:
from vllm.sampling_params import StructuredOutputsParams

if isinstance(structured_outputs, StructuredOutputsParams):
structural_tag = structured_outputs.structural_tag
elif isinstance(structured_outputs, dict):
structural_tag = structured_outputs.get("structural_tag")
else:
return
if structural_tag is not None:
validate_structural_tag_payload(
structural_tag,
parameter="structured_outputs",
)


class StreamOptions(OpenAIBaseModel):
include_usage: bool | None = False
continuous_usage_stats: bool | None = False
Expand Down
Loading