diff --git a/tests/entrypoints/openai/test_openai_schema.py b/tests/entrypoints/openai/test_openai_schema.py index 083290ed5b3a..7d17eb9d4569 100644 --- a/tests/entrypoints/openai/test_openai_schema.py +++ b/tests/entrypoints/openai/test_openai_schema.py @@ -1,18 +1,13 @@ # SPDX-License-Identifier: Apache-2.0 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project import json -from http import HTTPStatus from typing import Final import pytest import schemathesis -from httpx import URL -from hypothesis import settings +from hypothesis import HealthCheck, settings from schemathesis import GenerationConfig -from schemathesis.checks import not_a_server_error -from schemathesis.internal.checks import CheckContext from schemathesis.models import Case -from schemathesis.transports.responses import GenericResponse from vllm.platforms import current_platform @@ -65,20 +60,10 @@ def before_generate_case(context: schemathesis.hooks.HookContext, strategy): def no_invalid_types(case: schemathesis.models.Case): """ - This filter skips test cases with invalid data that schemathesis - incorrectly generates due to permissive schema configurations. - - 1. Skips `POST /tokenize` endpoint cases with `"type": "file"` in - message content, which isn't implemented. - - 2. Skips tool_calls with `"type": "custom"` which schemathesis - incorrectly generates instead of the valid `"type": "function"`. - - Example test cases that are skipped: - curl -X POST -H 'Content-Type: application/json' \ - -d '{"messages": [{"content": [{"file": {}, "type": "file"}], "role": "user"}]}' \ - http://localhost:8000/tokenize + Skips tool_calls with `"type": "custom"` which schemathesis incorrectly + generates instead of the valid `"type": "function"`. + Example test case that is skipped: curl -X POST -H 'Content-Type: application/json' \ -d '{"messages": [{"role": "assistant", "tool_calls": [{"custom": {"input": "", "name": ""}, "id": "", "type": "custom"}]}]}' \ http://localhost:8000/v1/chat/completions @@ -93,20 +78,6 @@ def no_invalid_types(case: schemathesis.models.Case): if not isinstance(message, dict): continue - # Check for invalid file type in tokenize endpoint - if op.method.lower() == "post" and op.path == "/tokenize": - content = message.get("content", []) - if ( - isinstance(content, list) - and len(content) > 0 - and any( - isinstance(item, dict) and item.get("type") == "file" - for item in content - ) - ): - return False - - # Check for invalid tool_calls with non-function types tool_calls = message.get("tool_calls", []) if isinstance(tool_calls, list): for tool_call in tool_calls: @@ -136,24 +107,19 @@ def no_invalid_types(case: schemathesis.models.Case): return strategy.filter(no_invalid_types) -def customized_not_a_server_error( - ctx: CheckContext, response: GenericResponse, case: Case -) -> bool | None: - try: - return not_a_server_error(ctx, response, case) - except Exception: - if ( - URL(response.request.url).path - in ["/v1/chat/completions/render", "/v1/chat/completions"] - and response.status_code == HTTPStatus.NOT_IMPLEMENTED.value - ): - return True - raise - - @schema.parametrize() @schema.override(headers={"Content-Type": "application/json"}) -@settings(deadline=LONG_TIMEOUT_SECONDS * 1000, max_examples=50) +@settings( + deadline=LONG_TIMEOUT_SECONDS * 1000, + max_examples=50, + # Under CI's derandomized hypothesis seed, the schemathesis strategy + # for /v1/chat/completions/batch's nested-message body, combined with + # the no_invalid_types filter (notably the grammar=="" rule), exceeds + # the default filtered-vs-good ratio. The filter is intentional, so + # suppress the health check rather than drop the filter — dropping it + # exposes pre-existing server bugs out of scope here. + suppress_health_check=[HealthCheck.filter_too_much], +) def test_openapi_stateless(case: Case): key = ( case.operation.method.upper(), @@ -180,9 +146,4 @@ def test_openapi_stateless(case: Case): }.get(key, DEFAULT_TIMEOUT_SECONDS) # No need to verify SSL certificate for localhost - case.call_and_validate( - verify=False, - timeout=timeout, - additional_checks=(customized_not_a_server_error,), - excluded_checks=(not_a_server_error,), - ) + case.call_and_validate(verify=False, timeout=timeout) diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py index bd4f29a00410..73a32033aa48 100644 --- a/vllm/entrypoints/chat_utils.py +++ b/vllm/entrypoints/chat_utils.py @@ -40,6 +40,7 @@ from vllm import envs from vllm.config import ModelConfig +from vllm.exceptions import VLLMValidationError from vllm.inputs import MultiModalDataDict, MultiModalUUIDDict from vllm.logger import init_logger from vllm.model_executor.models import SupportsMultiModal @@ -1501,7 +1502,13 @@ def _parse_chat_message_content_part( mm_parser.parse_video(str_content, uuid) modality = "video" else: - raise NotImplementedError(f"Unknown part type: {part_type}") + supported = sorted(MM_PARSER_MAP.keys() | set(PART_TYPES_TO_SKIP_NONE_CONTENT)) + raise VLLMValidationError( + f"Unsupported chat content part type: {part_type!r}. " + f"Supported types: {', '.join(supported)}.", + parameter="type", + value=part_type, + ) if wrap_dicts: return {"type": modality}