diff --git a/.gitignore b/.gitignore
index d62536cfb91d..8b38903cd999 100644
--- a/.gitignore
+++ b/.gitignore
@@ -239,5 +239,5 @@ vllm/grpc/vllm_engine_pb2.py
 vllm/grpc/vllm_engine_pb2_grpc.py
 vllm/grpc/vllm_engine_pb2.pyi
 
-# Ignore generated cpu headers 
+# Ignore generated cpu headers
 csrc/cpu/cpu_attn_dispatch_generated.h
diff --git a/tests/entrypoints/openai/parser/test_harmony_utils.py b/tests/entrypoints/openai/parser/test_harmony_utils.py
index 21b53dff1507..6b8e9c26e365 100644
--- a/tests/entrypoints/openai/parser/test_harmony_utils.py
+++ b/tests/entrypoints/openai/parser/test_harmony_utils.py
@@ -1,15 +1,19 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
+from unittest.mock import patch
+
 import pytest
 from openai_harmony import Message, Role
 
 from tests.entrypoints.openai.utils import verify_harmony_messages
 from vllm.entrypoints.openai.parser.harmony_utils import (
     auto_drop_analysis_messages,
+    get_developer_message,
     get_encoding,
     get_system_message,
     has_custom_tools,
+    inject_response_formats,
     parse_chat_input_to_harmony_message,
     parse_chat_output,
 )
@@ -928,3 +932,99 @@ def test_reasoning_with_empty_content_returns_none(self):
         msg = response_input_to_harmony(item, prev_responses=[])
 
         assert msg is None
+
+
+class TestInjectResponseFormats:
+    def test_appends_to_existing_instructions(self):
+        result = inject_response_formats("You are helpful.", {"type": "object"})
+        assert result.startswith("You are helpful.")
+        assert "# Response Formats" in result
+        assert '{"type":"object"}' in result
+
+    def test_none_instructions_creates_section(self):
+        result = inject_response_formats(None, {"type": "object"})
+        assert result.startswith("# Response Formats")
+        assert '{"type":"object"}' in result
+
+    def test_custom_format_name(self):
+        result = inject_response_formats(None, {"type": "object"}, format_name="order")
+        assert "## order" in result
+
+    def test_compact_json_no_spaces(self):
+        schema = {"type": "object", "properties": {"name": {"type": "string"}}}
+        result = inject_response_formats(None, schema)
+        assert '{"type":"object","properties":{"name":{"type":"string"}}}' in result
+
+    def test_section_separated_by_blank_lines(self):
+        result = inject_response_formats("Instructions here.", {"type": "object"})
+        assert "\n\n# Response Formats\n\n## structured_output\n\n" in result
+
+
+class TestGetDeveloperMessageResponseFormats:
+    """Tests for response_format_section parameter in get_developer_message."""
+
+    ENV_VAR = (
+        "vllm.entrypoints.openai.parser.harmony_utils"
+        ".envs.VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS"
+    )
+
+    def _extract_instructions_text(self, dev_msg: Message) -> str | None:
+        """Extract the raw text from a developer message's instructions."""
+        for content_item in dev_msg.content:
+            instructions = getattr(content_item, "instructions", None)
+            if instructions is not None:
+                return instructions
+        return None
+
+    def test_response_format_preserved_with_system_instructions(self):
+        """When VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS is True,
+        user instructions should be dropped but response format schema
+        should still appear in the developer message."""
+        schema_section = "# Response Formats\n\n## structured_output\n\n{}"
+        with patch(self.ENV_VAR, True):
+            dev_msg = get_developer_message(
+                instructions="Be concise.",
+                response_format_section=schema_section,
+            )
+        text = self._extract_instructions_text(dev_msg)
+        assert text is not None
+        assert "# Response Formats" in text
+        # User instructions should NOT be present
+        assert "Be concise." not in text
+
+    def test_response_format_and_instructions_without_system_instructions(self):
+        """When VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS is False,
+        both instructions and response format schema should appear."""
+        schema_section = "# Response Formats\n\n## structured_output\n\n{}"
+        with patch(self.ENV_VAR, False):
+            dev_msg = get_developer_message(
+                instructions="Be concise.",
+                response_format_section=schema_section,
+            )
+        text = self._extract_instructions_text(dev_msg)
+        assert text is not None
+        assert "Be concise." in text
+        assert "# Response Formats" in text
+
+    def test_response_format_only_no_instructions(self):
+        """With instructions=None, only the response format section appears."""
+        schema_section = "# Response Formats\n\n## structured_output\n\n{}"
+        with patch(self.ENV_VAR, False):
+            dev_msg = get_developer_message(
+                instructions=None,
+                response_format_section=schema_section,
+            )
+        text = self._extract_instructions_text(dev_msg)
+        assert text is not None
+        assert "# Response Formats" in text
+
+    def test_backward_compat_no_response_format(self):
+        """Without response_format_section, behavior matches the original."""
+        with patch(self.ENV_VAR, False):
+            dev_msg = get_developer_message(
+                instructions="Be concise.",
+            )
+        text = self._extract_instructions_text(dev_msg)
+        assert text is not None
+        assert "Be concise." in text
+        assert "# Response Formats" not in text
diff --git a/tests/entrypoints/openai/responses/test_response_formats.py b/tests/entrypoints/openai/responses/test_response_formats.py
new file mode 100644
index 000000000000..9c688f9bbd4b
--- /dev/null
+++ b/tests/entrypoints/openai/responses/test_response_formats.py
@@ -0,0 +1,90 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""Tests for response format schema extraction and developer message injection.
+
+These tests verify that structured output schemas are correctly extracted from
+ResponsesRequest and injected into the Harmony developer message per the
+Harmony cookbook specification.
+"""
+
+from openai.types.responses.response_format_text_json_schema_config import (
+    ResponseFormatTextJSONSchemaConfig,
+)
+
+from vllm.entrypoints.openai.responses.protocol import (
+    ResponsesRequest,
+    ResponseTextConfig,
+)
+from vllm.entrypoints.openai.responses.serving import (
+    _extract_response_format_schema,
+)
+from vllm.sampling_params import StructuredOutputsParams
+
+
+def _make_json_schema_text_config(schema: dict) -> ResponseTextConfig:
+    text_config = ResponseTextConfig()
+    text_config.format = ResponseFormatTextJSONSchemaConfig(
+        type="json_schema",
+        name="test_schema",
+        schema=schema,
+    )
+    return text_config
+
+
+class TestExtractResponseFormatSchema:
+    def test_extracts_from_text_format_json_schema(self):
+        schema = {"type": "object", "properties": {"name": {"type": "string"}}}
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+            text=_make_json_schema_text_config(schema),
+        )
+        result = _extract_response_format_schema(request)
+        assert result == schema
+
+    def test_extracts_from_structured_outputs_json(self):
+        schema = {"type": "object", "properties": {"id": {"type": "integer"}}}
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+            structured_outputs=StructuredOutputsParams(json=schema),
+        )
+        result = _extract_response_format_schema(request)
+        assert result == schema
+
+    def test_returns_none_for_text_format(self):
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+            text=ResponseTextConfig(format={"type": "text"}),
+        )
+        result = _extract_response_format_schema(request)
+        assert result is None
+
+    def test_returns_none_for_no_format(self):
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+        )
+        result = _extract_response_format_schema(request)
+        assert result is None
+
+    def test_text_format_takes_precedence(self):
+        """text.format.json_schema is checked before structured_outputs."""
+        text_schema = {
+            "type": "object",
+            "properties": {"a": {"type": "string"}},
+        }
+        so_schema = {
+            "type": "object",
+            "properties": {"b": {"type": "string"}},
+        }
+        request = ResponsesRequest(
+            model="test-model",
+            input="test",
+            text=_make_json_schema_text_config(text_schema),
+            structured_outputs=StructuredOutputsParams(json=so_schema),
+        )
+        result = _extract_response_format_schema(request)
+        assert result == text_schema
diff --git a/tests/entrypoints/openai/responses/test_sampling_params.py b/tests/entrypoints/openai/responses/test_sampling_params.py
index 87910271dd75..7509489ca3c4 100644
--- a/tests/entrypoints/openai/responses/test_sampling_params.py
+++ b/tests/entrypoints/openai/responses/test_sampling_params.py
@@ -132,6 +132,25 @@ def test_structured_outputs_passed_through(self):
         assert sampling_params.structured_outputs is not None
         assert sampling_params.structured_outputs.grammar == "root ::= 'hello'"
 
+    def test_json_object_format_produces_structured_outputs(self):
+        """Test that text.format.type=json_object creates StructuredOutputsParams."""
+        from openai.types.shared.response_format_json_object import (
+            ResponseFormatJSONObject,
+        )
+
+        text_config = ResponseTextConfig()
+        text_config.format = ResponseFormatJSONObject(type="json_object")
+        request = ResponsesRequest(
+            model="test-model",
+            input="test input",
+            text=text_config,
+        )
+
+        sampling_params = request.to_sampling_params(default_max_tokens=1000)
+
+        assert sampling_params.structured_outputs is not None
+        assert sampling_params.structured_outputs.json_object is True
+
     def test_structured_outputs_and_json_schema_conflict(self):
         """Test that specifying both structured_outputs and json_schema raises."""
         structured_outputs = StructuredOutputsParams(grammar="root ::= 'hello'")
diff --git a/tests/entrypoints/openai/responses/test_structured_output.py b/tests/entrypoints/openai/responses/test_structured_output.py
index db8b87768e44..4d092b7e7154 100644
--- a/tests/entrypoints/openai/responses/test_structured_output.py
+++ b/tests/entrypoints/openai/responses/test_structured_output.py
@@ -1,11 +1,18 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+"""Unit tests for structured output helpers in the Responses API."""
+
 import json
 
 import openai
 import pytest
 from pydantic import BaseModel
 
+from vllm.entrypoints.openai.responses.serving import (
+    _constraint_to_content_format,
+)
+from vllm.sampling_params import StructuredOutputsParams
+
 
 @pytest.mark.asyncio
 async def test_structured_output(client: openai.AsyncOpenAI):
@@ -76,3 +83,22 @@ class CalendarEvent(BaseModel):
     assert len(participants) == 2
     assert participants[0] == "Alice"
     assert participants[1] == "Bob"
+
+
+class TestConstraintToContentFormat:
+    """Test _constraint_to_content_format helper."""
+
+    def test_json_schema_string_is_parsed(self):
+        """JSON schema passed as a string gets json.loads'd into a dict."""
+        schema = {"type": "object", "properties": {"age": {"type": "integer"}}}
+        params = StructuredOutputsParams(json=json.dumps(schema))
+        result = _constraint_to_content_format(params)
+
+        assert result == {"type": "json_schema", "json_schema": schema}
+
+    def test_structural_tag_only_returns_none(self):
+        """structural_tag is not a content constraint — should return None."""
+        params = StructuredOutputsParams(structural_tag='{"type": "structural_tag"}')
+        result = _constraint_to_content_format(params)
+
+        assert result is None
diff --git a/tests/v1/structured_output/test_gptoss_structural_tags.py b/tests/v1/structured_output/test_gptoss_structural_tags.py
new file mode 100644
index 000000000000..aa0fbe15f119
--- /dev/null
+++ b/tests/v1/structured_output/test_gptoss_structural_tags.py
@@ -0,0 +1,245 @@
+# SPDX-License-Identifier: Apache-2.0
+# SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+
+"""Unit tests for GPT-OSS structural tag support in reasoning (PR #25515)."""
+
+import json
+from unittest.mock import Mock
+
+import pytest
+
+from vllm.entrypoints.mcp.tool_server import ToolServer
+from vllm.reasoning.gptoss_reasoning_parser import (
+    GptOssReasoningParser,
+    from_builtin_tool_to_tag,
+    no_func_reasoning_tag,
+    tag_with_builtin_funcs,
+)
+
+
+class TestGptOssReasoningParser:
+    """Test cases for GptOssReasoningParser structural tag functionality."""
+
+    @pytest.fixture
+    def mock_tokenizer(self):
+        """Create a mock tokenizer for testing."""
+        tokenizer = Mock()
+        tokenizer.encode = Mock(return_value=[1, 2, 3, 4, 5])
+        tokenizer.vocab = {"<|end|>": 6}
+        tokenizer.get_vocab = Mock(return_value={"<|end|>": 6})
+        return tokenizer
+
+    @pytest.fixture
+    def reasoning_parser(self, mock_tokenizer):
+        """Create a GptOssReasoningParser instance."""
+        return GptOssReasoningParser(mock_tokenizer)
+
+    @pytest.fixture
+    def mock_tool_server_empty(self):
+        """Create a mock ToolServer with no tools."""
+        tool_server = Mock(spec=ToolServer)
+        tool_server.has_tool = Mock(return_value=False)
+        return tool_server
+
+    @pytest.fixture
+    def mock_tool_server_with_browser(self):
+        """Create a mock ToolServer with browser tool."""
+        tool_server = Mock(spec=ToolServer)
+        tool_server.has_tool = Mock(side_effect=lambda tool: tool == "browser")
+        return tool_server
+
+    @pytest.fixture
+    def mock_tool_server_with_all_tools(self):
+        """Create a mock ToolServer with all builtin tools."""
+        tool_server = Mock(spec=ToolServer)
+        tool_server.has_tool = Mock(
+            side_effect=lambda tool: tool in ["browser", "python", "container"]
+        )
+        return tool_server
+
+    def test_prepare_structured_tag_no_tool_server(self, reasoning_parser):
+        """Test prepare_structured_tag with no tool server."""
+        result = reasoning_parser.prepare_structured_tag(None, None)
+        expected = json.dumps(no_func_reasoning_tag)
+
+        assert result == expected
+
+        # Verify the structure is correct
+        parsed = json.loads(result)
+        assert parsed["type"] == "structural_tag"
+        assert parsed["format"]["type"] == "triggered_tags"
+        assert len(parsed["format"]["tags"]) == 1
+        assert parsed["format"]["tags"][0]["begin"] == "<|channel|>analysis<|message|>"
+        assert parsed["format"]["triggers"] == ["<|channel|>analysis"]
+
+    def test_prepare_structured_tag_with_all_tools(
+        self, reasoning_parser, mock_tool_server_with_all_tools
+    ):
+        """Test prepare_structured_tag with all builtin tools."""
+        result = reasoning_parser.prepare_structured_tag(
+            None, mock_tool_server_with_all_tools
+        )
+        parsed = json.loads(result)
+
+        # Should have analysis tag + tags for all 3 tools (2 tags each)
+        assert len(parsed["format"]["tags"]) == 7  # 1 analysis + 6 tool tags
+
+        # Check all tool tags are present
+        tag_begins = [tag["begin"] for tag in parsed["format"]["tags"]]
+        for tool in ["browser", "python", "container"]:
+            assert f"<|channel|>commentary to={tool}" in tag_begins
+            assert f"<|channel|>analysis to={tool}" in tag_begins
+
+    def test_prepare_structured_tag_with_original_tag(self, reasoning_parser):
+        """Test prepare_structured_tag when original_tag is provided."""
+        original_tag = '{"custom": "tag"}'
+        result = reasoning_parser.prepare_structured_tag(original_tag, None)
+
+        # Should return the original tag unchanged
+        assert result == original_tag
+
+    def test_from_builtin_tool_to_tag(self):
+        """Test from_builtin_tool_to_tag function."""
+        tags = from_builtin_tool_to_tag("python")
+
+        assert len(tags) == 2
+        assert tags[0]["begin"] == "<|channel|>commentary to=python"
+        assert tags[0]["content"]["type"] == "any_text"
+        assert tags[0]["end"] == "<|end|>"
+
+        assert tags[1]["begin"] == "<|channel|>analysis to=python"
+        assert tags[1]["content"]["type"] == "any_text"
+        assert tags[1]["end"] == "<|end|>"
+
+    def test_tag_with_builtin_funcs(self):
+        """Test tag_with_builtin_funcs function."""
+        builtin_tools = ["browser", "python"]
+        result = tag_with_builtin_funcs(no_func_reasoning_tag, builtin_tools)
+
+        assert result["type"] == "structural_tag"
+        # Should have original analysis tag + 2 tags per tool
+        assert len(result["format"]["tags"]) == 5  # 1 + 2*2
+
+        # Should have added commentary trigger
+        assert "<|channel|>commentary to=" in result["format"]["triggers"]
+        assert "<|channel|>analysis" in result["format"]["triggers"]
+
+    def test_tag_structure_invariants(self):
+        """Test that the basic tag structure follows expected format."""
+        # Test the base no_func_reasoning_tag structure
+        assert no_func_reasoning_tag["type"] == "structural_tag"
+        assert no_func_reasoning_tag["format"]["type"] == "triggered_tags"
+        assert no_func_reasoning_tag["format"]["stop_after_first"] is False
+
+        # Verify analysis tag structure
+        analysis_tag = no_func_reasoning_tag["format"]["tags"][0]
+        assert analysis_tag["begin"] == "<|channel|>analysis<|message|>"
+        assert analysis_tag["content"]["type"] == "any_text"
+        assert analysis_tag["end"] == "<|end|>"
+
+    def test_json_serialization_valid(
+        self, reasoning_parser, mock_tool_server_with_all_tools
+    ):
+        """Test that all generated tags produce valid JSON."""
+        # Test with no tool server
+        result1 = reasoning_parser.prepare_structured_tag(None, None)
+        json.loads(result1)  # Should not raise
+
+        # Test with empty tool server
+        empty_server = Mock(spec=ToolServer)
+        empty_server.has_tool = Mock(return_value=False)
+        result2 = reasoning_parser.prepare_structured_tag(None, empty_server)
+        json.loads(result2)  # Should not raise
+
+        # Test with tools
+        result3 = reasoning_parser.prepare_structured_tag(
+            None, mock_tool_server_with_all_tools
+        )
+        json.loads(result3)  # Should not raise
+
+    @pytest.mark.parametrize("tool_name", ["browser", "python", "container"])
+    def test_single_tool_integration(self, reasoning_parser, tool_name):
+        """Test integration with individual tools."""
+        tool_server = Mock(spec=ToolServer)
+        tool_server.has_tool = Mock(side_effect=lambda tool: tool == tool_name)
+
+        result = reasoning_parser.prepare_structured_tag(None, tool_server)
+        parsed = json.loads(result)
+
+        # Should have 1 analysis + 2 tool-specific tags
+        assert len(parsed["format"]["tags"]) == 3
+
+        tag_begins = [tag["begin"] for tag in parsed["format"]["tags"]]
+        assert f"<|channel|>commentary to={tool_name}" in tag_begins
+        assert f"<|channel|>analysis to={tool_name}" in tag_begins
+
+    def test_prepare_structured_tag_with_json_schema(self, reasoning_parser):
+        """Test that final channel tag has json_schema content constraint."""
+        content_format = {
+            "type": "json_schema",
+            "json_schema": {
+                "type": "object",
+                "properties": {"name": {"type": "string"}},
+            },
+        }
+        result = reasoning_parser.prepare_structured_tag(
+            None, None, final_content_format=content_format
+        )
+        parsed = json.loads(result)
+
+        # Should have analysis tag + final channel tag
+        assert len(parsed["format"]["tags"]) == 2
+
+        # Verify analysis tag is unchanged
+        assert parsed["format"]["tags"][0]["begin"] == "<|channel|>analysis<|message|>"
+        assert parsed["format"]["tags"][0]["content"]["type"] == "any_text"
+
+        # Verify final channel tag has the json_schema content constraint
+        final_tag = parsed["format"]["tags"][1]
+        assert final_tag["begin"] == "<|channel|>final<|message|>"
+        assert final_tag["end"] == "<|end|>"
+        assert final_tag["content"] == content_format
+
+        # Verify triggers include both analysis and final
+        assert "<|channel|>analysis" in parsed["format"]["triggers"]
+        assert "<|channel|>final" in parsed["format"]["triggers"]
+
+    def test_prepare_structured_tag_original_tag_ignores_constraint(
+        self, reasoning_parser
+    ):
+        """When original_tag is provided, final_content_format is ignored."""
+        original_tag = '{"custom": "tag"}'
+        content_format = {"type": "json_schema", "json_schema": {"type": "object"}}
+        result = reasoning_parser.prepare_structured_tag(
+            original_tag, None, final_content_format=content_format
+        )
+
+        # Should return the original tag unchanged
+        assert result == original_tag
+
+    def test_prepare_structured_tag_with_tools_and_constraint(
+        self, reasoning_parser, mock_tool_server_with_browser
+    ):
+        """Test that tools and content constraint coexist in the tag."""
+        content_format = {"type": "json_schema", "json_schema": {"type": "object"}}
+        result = reasoning_parser.prepare_structured_tag(
+            None, mock_tool_server_with_browser, final_content_format=content_format
+        )
+        parsed = json.loads(result)
+
+        # Should have analysis + 2 browser tags + final channel tag = 4
+        assert len(parsed["format"]["tags"]) == 4
+
+        tag_begins = [tag["begin"] for tag in parsed["format"]["tags"]]
+        assert "<|channel|>analysis<|message|>" in tag_begins
+        assert "<|channel|>commentary to=browser" in tag_begins
+        assert "<|channel|>analysis to=browser" in tag_begins
+        assert "<|channel|>final<|message|>" in tag_begins
+
+        # Verify final tag has the constraint
+        final_tag = next(
+            t
+            for t in parsed["format"]["tags"]
+            if t["begin"] == "<|channel|>final<|message|>"
+        )
+        assert final_tag["content"] == content_format
diff --git a/vllm/entrypoints/openai/parser/harmony_utils.py b/vllm/entrypoints/openai/parser/harmony_utils.py
index 9b4264456c51..9004045f30e7 100644
--- a/vllm/entrypoints/openai/parser/harmony_utils.py
+++ b/vllm/entrypoints/openai/parser/harmony_utils.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import datetime
+import json
 from collections.abc import Iterable, Sequence
 from typing import Literal
 
@@ -121,10 +122,16 @@ def create_tool_definition(tool: ChatCompletionToolsParam | Tool):
 def get_developer_message(
     instructions: str | None = None,
     tools: list[Tool | ChatCompletionToolsParam] | None = None,
+    response_format_section: str | None = None,
 ) -> Message:
     dev_msg_content = DeveloperContent.new()
+    parts: list[str] = []
     if instructions is not None and not envs.VLLM_GPT_OSS_HARMONY_SYSTEM_INSTRUCTIONS:
-        dev_msg_content = dev_msg_content.with_instructions(instructions)
+        parts.append(instructions)
+    if response_format_section is not None:
+        parts.append(response_format_section)
+    if parts:
+        dev_msg_content = dev_msg_content.with_instructions("\n\n".join(parts))
     if tools is not None:
         function_tools: list[Tool | ChatCompletionToolsParam] = []
         for tool in tools:
@@ -150,6 +157,25 @@ def get_developer_message(
     return dev_msg
 
 
+def inject_response_formats(
+    instructions: str | None,
+    schema: dict,
+    format_name: str = "structured_output",
+) -> str:
+    """Append a Harmony cookbook ``# Response Formats`` section.
+
+    Per the cookbook, structured output schemas should appear in the
+    developer message under a ``# Response Formats`` heading so the
+    model knows what format to produce.  This complements grammar
+    enforcement via structural tags.
+    """
+    schema_json = json.dumps(schema, separators=(",", ":"))
+    section = f"\n\n# Response Formats\n\n## {format_name}\n\n{schema_json}"
+    if instructions:
+        return instructions + section
+    return section.lstrip("\n")
+
+
 def get_user_message(content: str) -> Message:
     return Message.from_role_and_content(Role.USER, content)
 
diff --git a/vllm/entrypoints/openai/responses/protocol.py b/vllm/entrypoints/openai/responses/protocol.py
index a5f62bdd8c39..831fb1077243 100644
--- a/vllm/entrypoints/openai/responses/protocol.py
+++ b/vllm/entrypoints/openai/responses/protocol.py
@@ -346,6 +346,10 @@ def to_sampling_params(
                     # --follow-imports skip hides the class definition but also hides
                     # multiple third party conflicts, so best of both evils
                 )
+            elif response_format.type == "json_object":
+                structured_outputs = StructuredOutputsParams(
+                    json_object=True  # type: ignore[call-arg]
+                )
 
         stop = self.stop if self.stop else []
         if isinstance(stop, str):
diff --git a/vllm/entrypoints/openai/responses/serving.py b/vllm/entrypoints/openai/responses/serving.py
index 574282c4cdc6..064ec5ae74c1 100644
--- a/vllm/entrypoints/openai/responses/serving.py
+++ b/vllm/entrypoints/openai/responses/serving.py
@@ -2,6 +2,7 @@
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
 
 import asyncio
+import json as json_mod
 import time
 import uuid
 from collections import deque
@@ -66,6 +67,7 @@
     get_system_message,
     get_user_message,
     has_custom_tools,
+    inject_response_formats,
     render_for_completion,
 )
 from vllm.entrypoints.openai.responses.context import (
@@ -126,6 +128,26 @@
 logger = init_logger(__name__)
 
 
+def _extract_response_format_schema(request: ResponsesRequest) -> dict | None:
+    """Extract JSON schema from the request's structured output config."""
+    if (
+        request.text is not None
+        and request.text.format is not None
+        and request.text.format.type == "json_schema"
+        and request.text.format.schema_ is not None
+    ):
+        return request.text.format.schema_
+    if (
+        request.structured_outputs is not None
+        and request.structured_outputs.json is not None
+    ):
+        val = request.structured_outputs.json
+        if isinstance(val, str):
+            return json_mod.loads(val)
+        return val
+    return None
+
+
 def _extract_allowed_tools_from_mcp_requests(
     tools: list[Tool],
 ) -> dict[str, list[str] | None]:
@@ -165,6 +187,32 @@ def _extract_allowed_tools_from_mcp_requests(
     return allowed_tools_map
 
 
+def _constraint_to_content_format(
+    params: StructuredOutputsParams,
+) -> dict | None:
+    """Convert a StructuredOutputsParams constraint into an xgrammar
+    content format dict suitable for embedding in a structural tag."""
+    if params.json is not None:
+        schema = (
+            params.json
+            if isinstance(params.json, dict)
+            else json_mod.loads(params.json)
+        )
+        return {"type": "json_schema", "json_schema": schema}
+    if params.json_object:
+        return {"type": "json_schema", "json_schema": {"type": "object"}}
+    if params.regex is not None:
+        return {"type": "regex", "pattern": params.regex}
+    if params.grammar is not None:
+        return {"type": "grammar", "grammar": params.grammar}
+    if params.choice is not None:
+        return {
+            "type": "or",
+            "elements": [{"type": "const_string", "value": c} for c in params.choice],
+        }
+    return None
+
+
 class OpenAIServingResponses(OpenAIServing):
     def __init__(
         self,
@@ -411,83 +459,126 @@ async def create_responses(
         else:
             assert len(builtin_tool_list) == 0
             available_tools = []
-        tokenizer = self.renderer.get_tokenizer()
-
-        for engine_prompt in engine_prompts:
-            maybe_error = self._validate_generator_input(engine_prompt)
-            if maybe_error is not None:
-                return maybe_error
-
-            default_max_tokens = get_max_tokens(
-                max_model_len,
-                request.max_output_tokens,
-                self._extract_prompt_len(engine_prompt),
-                self.default_sampling_params,
-                self.override_max_tokens,
-            )
+        try:
+            tokenizer = self.renderer.get_tokenizer()
 
-            sampling_params = request.to_sampling_params(
-                default_max_tokens, self.default_sampling_params
-            )
+            for engine_prompt in engine_prompts:
+                maybe_error = self._validate_generator_input(engine_prompt)
+                if maybe_error is not None:
+                    return maybe_error
 
-            trace_headers = (
-                None
-                if raw_request is None
-                else await self._get_trace_headers(raw_request.headers)
-            )
+                default_max_tokens = get_max_tokens(
+                    max_model_len,
+                    request.max_output_tokens,
+                    self._extract_prompt_len(engine_prompt),
+                    self.default_sampling_params,
+                    self.override_max_tokens,
+                )
 
-            context: ConversationContext
-            if self.use_harmony:
-                if request.stream:
-                    context = StreamingHarmonyContext(messages, available_tools)
-                else:
-                    context = HarmonyContext(messages, available_tools)
-            else:
-                if envs.VLLM_USE_EXPERIMENTAL_PARSER_CONTEXT:
-                    # This is a feature in development for parsing
-                    # tokens during generation instead of at the end
-                    context = ParsableContext(
-                        response_messages=messages,
-                        tokenizer=tokenizer,
-                        reasoning_parser_cls=self.parser.reasoning_parser_cls
-                        if self.parser
-                        else None,
-                        request=request,
-                        tool_parser_cls=self.parser.tool_parser_cls
-                        if self.parser
-                        else None,
-                        available_tools=available_tools,
-                        chat_template=self.chat_template,
-                        chat_template_content_format=self.chat_template_content_format,
-                    )
-                else:
-                    context = SimpleContext()
+                sampling_params = request.to_sampling_params(
+                    default_max_tokens, self.default_sampling_params
+                )
 
-            if self.parser and self.parser.reasoning_parser_cls is not None:
-                reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
-                if (
-                    isinstance(
-                        struct_out := sampling_params.structured_outputs,
-                        StructuredOutputsParams,
-                    )
-                    and struct_out.all_non_structural_tag_constraints_none()
-                ):
-                    sampling_params.structured_outputs = replace(
-                        struct_out,
-                        structural_tag=reasoning_parser.prepare_structured_tag(
-                            struct_out.structural_tag, self.tool_server
-                        ),
-                    )
-            generator = self._generate_with_builtin_tools(
-                request_id=request.request_id,
-                engine_prompt=engine_prompt,
-                sampling_params=sampling_params,
-                context=context,
-                lora_request=lora_request,
-                priority=request.priority,
-                trace_headers=trace_headers,
-            )
-            generators.append(generator)
+                trace_headers = (
+                    None
+                    if raw_request is None
+                    else await self._get_trace_headers(raw_request.headers)
+                )
+
+                context: ConversationContext
+                if self.use_harmony:
+                    if request.stream:
+                        context = StreamingHarmonyContext(messages, available_tools)
+                    else:
+                        context = HarmonyContext(messages, available_tools)
+                else:
+                    if envs.VLLM_USE_EXPERIMENTAL_PARSER_CONTEXT:
+                        # This is a feature in development for parsing
+                        # tokens during generation instead of at the end
+                        context = ParsableContext(
+                            response_messages=messages,
+                            tokenizer=tokenizer,
+                            reasoning_parser_cls=self.parser.reasoning_parser_cls
+                            if self.parser
+                            else None,
+                            request=request,
+                            tool_parser_cls=self.parser.tool_parser_cls
+                            if self.parser
+                            else None,
+                            available_tools=available_tools,
+                            chat_template=self.chat_template,
+                            chat_template_content_format=self.chat_template_content_format,
+                        )
+                    else:
+                        context = SimpleContext()
+
+                if self.parser and self.parser.reasoning_parser_cls is not None:
+                    reasoning_parser = self.parser.reasoning_parser_cls(tokenizer)
+                    struct_out = sampling_params.structured_outputs
+
+                    if isinstance(struct_out, StructuredOutputsParams):
+                        if struct_out.all_non_structural_tag_constraints_none():
+                            # No content constraint — just apply reasoning
+                            # channel tags
+                            sampling_params.structured_outputs = replace(
+                                struct_out,
+                                structural_tag=(
+                                    reasoning_parser.prepare_structured_tag(
+                                        struct_out.structural_tag,
+                                        self.tool_server,
+                                    )
+                                ),
+                            )
+                        else:
+                            # Content constraint present (json, regex,
+                            # grammar, choice, json_object). Embed it in the
+                            # final channel tag within the structural tag.
+                            content_fmt = _constraint_to_content_format(struct_out)
+                            if content_fmt is not None:
+                                structural_tag = (
+                                    reasoning_parser.prepare_structured_tag(
+                                        None,
+                                        self.tool_server,
+                                        final_content_format=content_fmt,
+                                    )
+                                )
+                                if structural_tag is not None:
+                                    # Clear content constraints, set
+                                    # structural_tag, but preserve options
+                                    # like disable_any_whitespace.
+                                    sampling_params.structured_outputs = replace(
+                                        struct_out,
+                                        json=None,
+                                        regex=None,
+                                        choice=None,
+                                        grammar=None,
+                                        json_object=None,
+                                        structural_tag=structural_tag,
+                                    )
+                    elif struct_out is None:
+                        # No structured output requested, but still need
+                        # reasoning channel tags
+                        tag = reasoning_parser.prepare_structured_tag(
+                            None, self.tool_server
+                        )
+                        if tag is not None:
+                            sampling_params.structured_outputs = (
+                                StructuredOutputsParams(
+                                    structural_tag=tag  # type: ignore[call-arg]
+                                )
+                            )
+                generator = self._generate_with_builtin_tools(
+                    request_id=request.request_id,
+                    engine_prompt=engine_prompt,
+                    sampling_params=sampling_params,
+                    context=context,
+                    lora_request=lora_request,
+                    priority=request.priority,
+                    trace_headers=trace_headers,
+                )
+                generators.append(generator)
+        except ValueError as e:
+            return self.create_error_response(e)
 
         assert len(generators) == 1
         (result_generator,) = generators
@@ -1136,9 +1227,23 @@ def _construct_input_messages_with_harmony(
                 request, with_custom_tools, tool_types
             )
             messages.append(sys_msg)
-            if with_custom_tools:
+
+            # Determine if we need a developer message.
+            # Per Harmony cookbook: developer message holds instructions,
+            # function tools, AND response format schemas.
+            response_format_schema = _extract_response_format_schema(request)
+            needs_dev_msg = with_custom_tools or response_format_schema is not None
+
+            if needs_dev_msg:
+                response_format_text = None
+                if response_format_schema is not None:
+                    response_format_text = inject_response_formats(
+                        None, response_format_schema
+                    )
                 dev_msg = get_developer_message(
-                    instructions=request.instructions, tools=request.tools
+                    instructions=request.instructions,
+                    tools=request.tools if with_custom_tools else None,
+                    response_format_section=response_format_text,
                 )
                 messages.append(dev_msg)
             messages += construct_harmony_previous_input_messages(request)
@@ -1978,7 +2083,7 @@ def _increment_sequence_number_and_return(
                 output=[],
                 status="in_progress",
                 usage=None,
-            ).model_dump()
+            )
             yield _increment_sequence_number_and_return(
                 ResponseCreatedEvent(
                     type="response.created",
diff --git a/vllm/reasoning/abs_reasoning_parsers.py b/vllm/reasoning/abs_reasoning_parsers.py
index 5271a307075e..29ee6d33be0c 100644
--- a/vllm/reasoning/abs_reasoning_parsers.py
+++ b/vllm/reasoning/abs_reasoning_parsers.py
@@ -154,10 +154,18 @@ def prepare_structured_tag(
         self,
         original_tag: str | None,
         tool_server: ToolServer | None,
+        final_content_format: dict | None = None,
     ) -> str | None:
         """
-        Instance method that is implemented for preparing the structured tag
-        Otherwise, None is returned
+        Instance method that is implemented for preparing the structured tag.
+        Otherwise, None is returned.
+
+        Args:
+            original_tag: An existing structural tag string, if any.
+            tool_server: The tool server for builtin tool support.
+            final_content_format: Optional xgrammar content format dict
+                (e.g. json_schema, regex) to embed in the <|channel|>final
+                tag for constraining the model's final output region.
         """
         return None
 
@@ -298,7 +306,8 @@ def _decorator(obj: type[ReasoningParser]) -> type[ReasoningParser]:
             if isinstance(name, str):
                 names = [name]
             elif is_list_of(name, str):
-                names = name
+                assert name is not None
+                names = list(name)
             else:
                 names = [class_name]
 
diff --git a/vllm/reasoning/gptoss_reasoning_parser.py b/vllm/reasoning/gptoss_reasoning_parser.py
index 89299d4b12b8..4dba7eaa3f93 100644
--- a/vllm/reasoning/gptoss_reasoning_parser.py
+++ b/vllm/reasoning/gptoss_reasoning_parser.py
@@ -1,8 +1,9 @@
 # SPDX-License-Identifier: Apache-2.0
 # SPDX-FileCopyrightText: Copyright contributors to the vLLM project
+import copy
 import json
 from collections.abc import Sequence
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Any
 
 from transformers import PreTrainedTokenizerBase
 
@@ -158,30 +159,46 @@ def extract_reasoning(
 
     # This function prepares the structural tag to format reasoning output
     def prepare_structured_tag(
-        self, original_tag: str | None, tool_server: ToolServer | None
+        self,
+        original_tag: str | None,
+        tool_server: ToolServer | None,
+        final_content_format: dict | None = None,
     ) -> str | None:
-        if original_tag is None:
-            if tool_server is None:
-                return json.dumps(no_func_reasoning_tag)
-            else:
-                builtin_tool_list: list[str] = []
-                if tool_server.has_tool("browser"):
-                    builtin_tool_list.append("browser")
-                if tool_server.has_tool("python"):
-                    builtin_tool_list.append("python")
-                if tool_server.has_tool("container"):
-                    builtin_tool_list.append("container")
-
-                if len(builtin_tool_list) > 0:
-                    logger.info("Builtin_tool_list: %s", builtin_tool_list)
-                    func_tag = json.dumps(
-                        tag_with_builtin_funcs(no_func_reasoning_tag, builtin_tool_list)
-                    )
-                else:
-                    logger.info("Builtin_tool_list is empty")
-                    func_tag = json.dumps(no_func_reasoning_tag)
-
-                return func_tag
-        else:
+        if original_tag is not None:
             # There is potential risk for appending the tag to the original tag
             return original_tag
+
+        tag: dict[str, Any]
+        if tool_server is None:
+            tag = copy.deepcopy(no_func_reasoning_tag)
+        else:
+            builtin_tool_list: list[str] = []
+            if tool_server.has_tool("browser"):
+                builtin_tool_list.append("browser")
+            if tool_server.has_tool("python"):
+                builtin_tool_list.append("python")
+            if tool_server.has_tool("container"):
+                builtin_tool_list.append("container")
+
+            if len(builtin_tool_list) > 0:
+                logger.info("Builtin_tool_list: %s", builtin_tool_list)
+                tag = tag_with_builtin_funcs(no_func_reasoning_tag, builtin_tool_list)
+            else:
+                logger.info("Builtin_tool_list is empty")
+                tag = copy.deepcopy(no_func_reasoning_tag)
+
+        # If a content constraint is requested for the final channel,
+        # add a triggered tag for <|channel|>final with that constraint.
+        # This ensures grammar enforcement only applies within the final
+        # output region, not during reasoning.
+        if final_content_format is not None:
+            tag["format"]["triggers"].append("<|channel|>final")
+            tag["format"]["tags"].append(
+                {
+                    "begin": "<|channel|>final<|message|>",
+                    "content": final_content_format,
+                    "end": "<|end|>",
+                }
+            )
+
+        return json.dumps(tag)