vllm-project · chaunceyjiang · Apr 29, 2026 · Apr 28, 2026 · Apr 28, 2026
@@ -1,18 +1,13 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 import json
-from http import HTTPStatus
 from typing import Final
 
 import pytest
 import schemathesis
-from httpx import URL
-from hypothesis import settings
+from hypothesis import HealthCheck, settings
 from schemathesis import GenerationConfig
-from schemathesis.checks import not_a_server_error
-from schemathesis.internal.checks import CheckContext
 from schemathesis.models import Case
-from schemathesis.transports.responses import GenericResponse
 
 from vllm.platforms import current_platform
 
@@ -65,20 +60,10 @@ def before_generate_case(context: schemathesis.hooks.HookContext, strategy):
 
     def no_invalid_types(case: schemathesis.models.Case):
         """
-        This filter skips test cases with invalid data that schemathesis
-        incorrectly generates due to permissive schema configurations.
-
-        1. Skips `POST /tokenize` endpoint cases with `"type": "file"` in 
-           message content, which isn't implemented.
-
-        2. Skips tool_calls with `"type": "custom"` which schemathesis 
-           incorrectly generates instead of the valid `"type": "function"`.
-
-        Example test cases that are skipped:
-        curl -X POST -H 'Content-Type: application/json' \
-            -d '{"messages": [{"content": [{"file": {}, "type": "file"}], "role": "user"}]}' \
-            http://localhost:8000/tokenize
+        Skips tool_calls with `"type": "custom"` which schemathesis incorrectly
+        generates instead of the valid `"type": "function"`.
 
+        Example test case that is skipped:
         curl -X POST -H 'Content-Type: application/json' \
             -d '{"messages": [{"role": "assistant", "tool_calls": [{"custom": {"input": "", "name": ""}, "id": "", "type": "custom"}]}]}' \
             http://localhost:8000/v1/chat/completions
@@ -93,20 +78,6 @@ def no_invalid_types(case: schemathesis.models.Case):
                     if not isinstance(message, dict):
                         continue
 
-                    # Check for invalid file type in tokenize endpoint
-                    if op.method.lower() == "post" and op.path == "/tokenize":
-                        content = message.get("content", [])
-                        if (
-                            isinstance(content, list)
-                            and len(content) > 0
-                            and any(
-                                isinstance(item, dict) and item.get("type") == "file"
-                                for item in content
-                            )
-                        ):
-                            return False
-
-                    # Check for invalid tool_calls with non-function types
                     tool_calls = message.get("tool_calls", [])
                     if isinstance(tool_calls, list):
                         for tool_call in tool_calls:
@@ -136,24 +107,19 @@ def no_invalid_types(case: schemathesis.models.Case):
     return strategy.filter(no_invalid_types)
 
 
-def customized_not_a_server_error(
-    ctx: CheckContext, response: GenericResponse, case: Case
-) -> bool | None:
-    try:
-        return not_a_server_error(ctx, response, case)
-    except Exception:
-        if (
-            URL(response.request.url).path
-            in ["/v1/chat/completions/render", "/v1/chat/completions"]
-            and response.status_code == HTTPStatus.NOT_IMPLEMENTED.value
-        ):
-            return True
-        raise
-
-
 @schema.parametrize()
 @schema.override(headers={"Content-Type": "application/json"})
-@settings(deadline=LONG_TIMEOUT_SECONDS * 1000, max_examples=50)
+@settings(
+    deadline=LONG_TIMEOUT_SECONDS * 1000,
+    max_examples=50,
+    # Under CI's derandomized hypothesis seed, the schemathesis strategy
+    # for /v1/chat/completions/batch's nested-message body, combined with
+    # the no_invalid_types filter (notably the grammar=="" rule), exceeds
+    # the default filtered-vs-good ratio. The filter is intentional, so
+    # suppress the health check rather than drop the filter — dropping it
+    # exposes pre-existing server bugs out of scope here.
+    suppress_health_check=[HealthCheck.filter_too_much],
+)
 def test_openapi_stateless(case: Case):
     key = (
         case.operation.method.upper(),
@@ -180,9 +146,4 @@ def test_openapi_stateless(case: Case):
     }.get(key, DEFAULT_TIMEOUT_SECONDS)
 
     # No need to verify SSL certificate for localhost
-    case.call_and_validate(
-        verify=False,
-        timeout=timeout,
-        additional_checks=(customized_not_a_server_error,),
-        excluded_checks=(not_a_server_error,),
-    )
+    case.call_and_validate(verify=False, timeout=timeout)
@@ -40,6 +40,7 @@
 
 from vllm import envs
 from vllm.config import ModelConfig
+from vllm.exceptions import VLLMValidationError
 from vllm.inputs import MultiModalDataDict, MultiModalUUIDDict
 from vllm.logger import init_logger
 from vllm.model_executor.models import SupportsMultiModal
@@ -1501,7 +1502,13 @@ def _parse_chat_message_content_part(
         mm_parser.parse_video(str_content, uuid)
         modality = "video"
     else:
-        raise NotImplementedError(f"Unknown part type: {part_type}")
+        supported = sorted(MM_PARSER_MAP.keys() | set(PART_TYPES_TO_SKIP_NONE_CONTENT))
+        raise VLLMValidationError(
+            f"Unsupported chat content part type: {part_type!r}. "
+            f"Supported types: {', '.join(supported)}.",
+            parameter="type",
+            value=part_type,
+        )
 
     if wrap_dicts:
         return {"type": modality}