From d1831c19a017d57009cb0ea97b48d0a117191b16 Mon Sep 17 00:00:00 2001
From: Varun Chawla <varun_6april@hotmail.com>
Date: Mon, 16 Feb 2026 11:30:07 -0800
Subject: [PATCH 1/3] fix: reject non-text content in system/developer messages

Per the OpenAI API spec, system and developer messages only accept
text content. Add validation inside the existing per-part parsing
loop to reject multimodal content (image_url, audio_url, video_url,
input_audio, etc.) for these roles.

Handles both explicit type fields and inferred types from dict keys,
preventing bypasses via simplified multimodal formats.

Fixes #33925

Signed-off-by: Varun Chawla <varun_6april@hotmail.com>
---
 tests/entrypoints/test_chat_utils.py | 154 +++++++++++++++++++++++++++
 vllm/entrypoints/chat_utils.py       |  57 ++++++++++
 2 files changed, 211 insertions(+)

diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py
index 36e8b0c0b540..31a0807c04cd 100644
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -2682,3 +2682,157 @@ async def test_parse_chat_messages_video_vision_chunk_with_uuid_async(
     assert conversation == expected_conversation
     _assert_mm_data_is_vision_chunk_input(mm_data, 1)
     _assert_mm_uuids(mm_uuids, 1, expected_uuids=[video_uuid], modality="vision_chunk")
+
+
+# -- Tests for system/developer message content validation (issue #33925) -----
+
+
+@pytest.mark.parametrize("role", ["system", "developer"])
+@pytest.mark.parametrize(
+    ("part", "part_type_label"),
+    [
+        (
+            {"type": "image_url", "image_url": {"url": "https://example.com/img.png"}},
+            "image_url",
+        ),
+        (
+            {"type": "input_audio", "input_audio": {"data": "abc", "format": "wav"}},
+            "input_audio",
+        ),
+        (
+            {"type": "video_url", "video_url": {"url": "https://example.com/vid.mp4"}},
+            "video_url",
+        ),
+    ],
+    ids=["image_url", "input_audio", "video_url"],
+)
+def test_system_message_rejects_non_text_content(
+    phi3v_model_config, role, part, part_type_label
+):
+    """System and developer messages must only accept text content.
+
+    Per the OpenAI API specification, sending multimodal content (e.g.
+    ``image_url``) in a system message should be rejected with an error.
+    See https://github.com/vllm-project/vllm/issues/33925
+    """
+    messages = [
+        {
+            "role": role,
+            "content": [part],
+        },
+        {
+            "role": "user",
+            "content": "Hello",
+        },
+    ]
+
+    with pytest.raises(ValueError, match=f"'{part_type_label}' is not supported"):
+        parse_chat_messages(
+            messages,
+            phi3v_model_config,
+            content_format="string",
+        )
+
+
+@pytest.mark.parametrize("role", ["system", "developer"])
+@pytest.mark.parametrize(
+    ("part", "part_type_label"),
+    [
+        (
+            {"image_url": "https://example.com/img.png"},
+            "image_url",
+        ),
+        (
+            {"audio_url": "https://example.com/audio.mp3"},
+            "audio_url",
+        ),
+        (
+            {"video_url": "https://example.com/vid.mp4"},
+            "video_url",
+        ),
+        (
+            {"input_audio": {"data": "abc", "format": "wav"}},
+            "input_audio",
+        ),
+    ],
+    ids=[
+        "image_url_no_type",
+        "audio_url_no_type",
+        "video_url_no_type",
+        "input_audio_no_type",
+    ],
+)
+def test_system_message_rejects_mm_content_without_type_key(
+    phi3v_model_config, role, part, part_type_label
+):
+    """Parts without an explicit ``type`` field but with a multimodal key
+    (e.g. ``{"image_url": "..."}`` ) must also be rejected for text-only
+    roles.
+
+    See https://github.com/vllm-project/vllm/issues/33925
+    """
+    messages = [
+        {
+            "role": role,
+            "content": [part],
+        },
+        {
+            "role": "user",
+            "content": "Hello",
+        },
+    ]
+
+    with pytest.raises(ValueError, match=f"'{part_type_label}' is not supported"):
+        parse_chat_messages(
+            messages,
+            phi3v_model_config,
+            content_format="string",
+        )
+
+
+@pytest.mark.parametrize("role", ["system", "developer"])
+def test_system_message_accepts_text_content(phi3v_model_config, role):
+    """System and developer messages with text-only content should work."""
+    messages = [
+        {
+            "role": role,
+            "content": [{"type": "text", "text": "You are helpful."}],
+        },
+        {
+            "role": "user",
+            "content": "Hello",
+        },
+    ]
+
+    # Should not raise
+    conversation, _, _ = parse_chat_messages(
+        messages,
+        phi3v_model_config,
+        content_format="string",
+    )
+    assert conversation[0]["role"] == role
+    assert conversation[0]["content"] == "You are helpful."
+
+
+@pytest.mark.parametrize("role", ["system", "developer"])
+def test_system_message_accepts_string_content(phi3v_model_config, role):
+    """System and developer messages with plain string content should work."""
+    messages = [
+        {
+            "role": role,
+            "content": "You are helpful.",
+        },
+        {
+            "role": "user",
+            "content": "Hello",
+        },
+    ]
+
+    # Should not raise
+    conversation, _, _ = parse_chat_messages(
+        messages,
+        phi3v_model_config,
+        content_format="string",
+    )
+    assert conversation[0]["role"] == role
+    assert conversation[0]["content"] == "You are helpful."
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index c48d7bea983c..e013889443b5 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -1335,6 +1335,7 @@ def _parse_chat_message_content_parts(
         parse_res = _parse_chat_message_content_part(
             part,
             mm_parser,
+            role=role,
             wrap_dicts=wrap_dicts,
             interleave_strings=interleave_strings,
         )
@@ -1360,6 +1361,7 @@ def _parse_chat_message_content_part(
     part: ChatCompletionContentPartParam,
     mm_parser: BaseMultiModalContentParser,
     *,
+    role: str,
     wrap_dicts: bool,
     interleave_strings: bool,
 ) -> _ContentPart | None:
@@ -1372,6 +1374,30 @@ def _parse_chat_message_content_part(
     """
     if isinstance(part, str):  # Handle plain text parts
         return part
+
+    # Validate text-only roles (system / developer) before doing any
+    # multimodal parsing.  This covers both parts with an explicit ``type``
+    # field and parts that only carry a multimodal key (e.g.
+    # ``{"image_url": "..."}`` without ``"type"``).
+    if role in _ROLES_TEXT_ONLY:
+        part_type_raw = part.get("type")
+        has_explicit_mm_type = (
+            isinstance(part_type_raw, str) and part_type_raw not in _TEXT_CONTENT_TYPES
+        )
+        has_mm_key = bool(set(part.keys()) & _MULTIMODAL_CONTENT_KEYS)
+        if has_explicit_mm_type or has_mm_key:
+            # Build a descriptive label for the error message.
+            label = (
+                part_type_raw
+                if has_explicit_mm_type
+                else next(k for k in part if k in _MULTIMODAL_CONTENT_KEYS)
+            )
+            raise ValueError(
+                f"Content part type '{label}' is not supported "
+                f"in '{role}' messages. Only text content is accepted "
+                f"for '{role}' role messages."
+            )
+
     # Handle structured dictionary parts
     part_type, content = _parse_chat_message_content_mm_part(part)
     # if part_type is text/refusal/image_url/audio_url/video_url/input_audio but
@@ -1442,6 +1468,36 @@ def _parse_chat_message_content_part(
 _ToolParser = partial(cast, ChatCompletionToolMessageParam)
 
 
+_ROLES_TEXT_ONLY = frozenset({"system", "developer"})
+"""Roles whose ``content`` field must only contain text parts,
+per the OpenAI Chat Completion API specification."""
+
+_TEXT_CONTENT_TYPES = frozenset(
+    {
+        "text",
+        "input_text",
+        "output_text",
+        "thinking",
+        "refusal",
+    }
+)
+"""Content part types that are considered text-only."""
+
+_MULTIMODAL_CONTENT_KEYS = frozenset(
+    {
+        "image_url",
+        "image_pil",
+        "image_embeds",
+        "audio_url",
+        "input_audio",
+        "audio_embeds",
+        "video_url",
+    }
+)
+"""Keys whose presence in a content part dict indicates multimodal content,
+even when an explicit ``type`` field is absent."""
+
+
 def _parse_chat_message_content(
     message: ChatCompletionMessageParam,
     mm_tracker: BaseMultiModalItemTracker,
@@ -1456,6 +1512,7 @@ def _parse_chat_message_content(
         content = []
     elif isinstance(content, str):
         content = [ChatCompletionContentPartTextParam(type="text", text=content)]
+
     result = _parse_chat_message_content_parts(
         role,
         content,  # type: ignore

From 5b44ab9417601d9f69230f3ba4fb53db06954294 Mon Sep 17 00:00:00 2001
From: Varun Chawla <varun_6april@hotmail.com>
Date: Thu, 19 Feb 2026 21:58:40 -0800
Subject: [PATCH 2/3] Change non-text content validation from error to warning

Instead of raising a ValueError when system/developer messages contain
non-text content, issue a logger.warning and skip the part. This is
consistent with the decision in #34072.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 tests/entrypoints/test_chat_utils.py | 39 ++++++++++++++++++----------
 vllm/entrypoints/chat_utils.py       | 10 ++++---
 2 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py
index 31a0807c04cd..57a3c8672a09 100644
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -2706,13 +2706,14 @@ async def test_parse_chat_messages_video_vision_chunk_with_uuid_async(
     ],
     ids=["image_url", "input_audio", "video_url"],
 )
-def test_system_message_rejects_non_text_content(
-    phi3v_model_config, role, part, part_type_label
+def test_system_message_warns_on_non_text_content(
+    phi3v_model_config, role, part, part_type_label, caplog
 ):
-    """System and developer messages must only accept text content.
+    """System and developer messages should warn on multimodal content.
 
-    Per the OpenAI API specification, sending multimodal content (e.g.
-    ``image_url``) in a system message should be rejected with an error.
+    Per the decision in https://github.com/vllm-project/vllm/pull/34072,
+    sending multimodal content (e.g. ``image_url``) in a system message
+    should issue a warning and skip the part rather than raising an error.
     See https://github.com/vllm-project/vllm/issues/33925
     """
     messages = [
@@ -2726,13 +2727,19 @@ def test_system_message_rejects_non_text_content(
         },
     ]
 
-    with pytest.raises(ValueError, match=f"'{part_type_label}' is not supported"):
-        parse_chat_messages(
+    import logging
+    with caplog.at_level(logging.WARNING, logger="vllm.entrypoints.chat_utils"):
+        conversation, _, _ = parse_chat_messages(
             messages,
             phi3v_model_config,
             content_format="string",
         )
 
+    assert any(
+        f"'{part_type_label}' is not supported" in record.message
+        for record in caplog.records
+    ), f"Expected warning about '{part_type_label}' not found in logs"
+
 
 @pytest.mark.parametrize("role", ["system", "developer"])
 @pytest.mark.parametrize(
@@ -2762,12 +2769,12 @@ def test_system_message_rejects_non_text_content(
         "input_audio_no_type",
     ],
 )
-def test_system_message_rejects_mm_content_without_type_key(
-    phi3v_model_config, role, part, part_type_label
+def test_system_message_warns_on_mm_content_without_type_key(
+    phi3v_model_config, role, part, part_type_label, caplog
 ):
     """Parts without an explicit ``type`` field but with a multimodal key
-    (e.g. ``{"image_url": "..."}`` ) must also be rejected for text-only
-    roles.
+    (e.g. ``{"image_url": "..."}`` ) should issue a warning for text-only
+    roles and skip the part.
 
     See https://github.com/vllm-project/vllm/issues/33925
     """
@@ -2782,13 +2789,19 @@ def test_system_message_rejects_mm_content_without_type_key(
         },
     ]
 
-    with pytest.raises(ValueError, match=f"'{part_type_label}' is not supported"):
-        parse_chat_messages(
+    import logging
+    with caplog.at_level(logging.WARNING, logger="vllm.entrypoints.chat_utils"):
+        conversation, _, _ = parse_chat_messages(
             messages,
             phi3v_model_config,
             content_format="string",
         )
 
+    assert any(
+        f"'{part_type_label}' is not supported" in record.message
+        for record in caplog.records
+    ), f"Expected warning about '{part_type_label}' not found in logs"
+
 
 @pytest.mark.parametrize("role", ["system", "developer"])
 def test_system_message_accepts_text_content(phi3v_model_config, role):
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index e013889443b5..f63aaaeb520c 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -1392,11 +1392,13 @@ def _parse_chat_message_content_part(
                 if has_explicit_mm_type
                 else next(k for k in part if k in _MULTIMODAL_CONTENT_KEYS)
             )
-            raise ValueError(
-                f"Content part type '{label}' is not supported "
-                f"in '{role}' messages. Only text content is accepted "
-                f"for '{role}' role messages."
+            logger.warning(
+                "Content part type '%s' is not supported "
+                "in '%s' messages. Only text content is accepted "
+                "for '%s' role messages. Skipping this content part.",
+                label, role, role,
             )
+            return None
 
     # Handle structured dictionary parts
     part_type, content = _parse_chat_message_content_mm_part(part)

From 584013b8891b4872c965e439d4b40f0cd803f95a Mon Sep 17 00:00:00 2001
From: Varun Chawla <varun_6april@hotmail.com>
Date: Thu, 19 Feb 2026 22:17:59 -0800
Subject: [PATCH 3/3] style: fix pre-commit formatting issues

Add missing blank lines after import statements and split long
function arguments across multiple lines per project style guide.

Signed-off-by: Varun Chawla <varun_6april@hotmail.com>
---
 tests/entrypoints/test_chat_utils.py | 2 ++
 vllm/entrypoints/chat_utils.py       | 4 +++-
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/entrypoints/test_chat_utils.py b/tests/entrypoints/test_chat_utils.py
index 57a3c8672a09..103ffa2dbcbc 100644
--- a/tests/entrypoints/test_chat_utils.py
+++ b/tests/entrypoints/test_chat_utils.py
@@ -2728,6 +2728,7 @@ def test_system_message_warns_on_non_text_content(
     ]
 
     import logging
+
     with caplog.at_level(logging.WARNING, logger="vllm.entrypoints.chat_utils"):
         conversation, _, _ = parse_chat_messages(
             messages,
@@ -2790,6 +2791,7 @@ def test_system_message_warns_on_mm_content_without_type_key(
     ]
 
     import logging
+
     with caplog.at_level(logging.WARNING, logger="vllm.entrypoints.chat_utils"):
         conversation, _, _ = parse_chat_messages(
             messages,
diff --git a/vllm/entrypoints/chat_utils.py b/vllm/entrypoints/chat_utils.py
index f63aaaeb520c..2a5e46b491fa 100644
--- a/vllm/entrypoints/chat_utils.py
+++ b/vllm/entrypoints/chat_utils.py
@@ -1396,7 +1396,9 @@ def _parse_chat_message_content_part(
                 "Content part type '%s' is not supported "
                 "in '%s' messages. Only text content is accepted "
                 "for '%s' role messages. Skipping this content part.",
-                label, role, role,
+                label,
+                role,
+                role,
             )
             return None