PrefectHQ · voidborne-d · Apr 1, 2026 · Apr 1, 2026 · Apr 1, 2026
diff --git a/src/fastmcp/server/middleware/response_limiting.py b/src/fastmcp/server/middleware/response_limiting.py
@@ -3,6 +3,7 @@
 from __future__ import annotations
 
 import logging
+from typing import Any
 
 import mcp.types as mt
 import pydantic_core
@@ -67,12 +68,31 @@ def __init__(
         self.truncation_suffix = truncation_suffix
         self.tools = set(tools) if tools is not None else None
 
-    def _truncate_to_result(self, text: str) -> ToolResult:
-        """Truncate text to fit within max_size and wrap in ToolResult."""
+    def _truncate_to_result(
+        self,
+        text: str,
+        structured_content: dict[str, Any] | None = None,
+    ) -> ToolResult:
+        """Truncate text to fit within max_size and wrap in ToolResult.
+
+        Args:
+            text: The text content to truncate.
+            structured_content: Optional structured content to preserve.
+                When present, its serialized size is subtracted from the
+                budget available for text.
+        """
         suffix_bytes = len(self.truncation_suffix.encode("utf-8"))
         # Account for JSON wrapper overhead: {"content":[{"type":"text","text":"..."}]}
         overhead = 50
-        target_size = self.max_size - suffix_bytes - overhead
+
+        # Reserve space for structured_content when present
+        structured_bytes = 0
+        if structured_content is not None:
+            structured_bytes = len(
+                pydantic_core.to_json(structured_content, fallback=str)
+            )
+
+        target_size = self.max_size - suffix_bytes - overhead - structured_bytes
 
         if target_size <= 0:
             # Edge case: max_size too small for even the suffix
@@ -88,7 +108,10 @@ def _truncate_to_result(self, text: str) -> ToolResult:
                     + self.truncation_suffix
                 )
 
-        return ToolResult(content=[TextContent(type="text", text=truncated)])
+        return ToolResult(
+            content=[TextContent(type="text", text=truncated)],
+            structured_content=structured_content,
+        )
 
     async def on_call_tool(
         self,
@@ -122,4 +145,6 @@ async def on_call_tool(
             else serialized.decode("utf-8", errors="replace")
         )
 
-        return self._truncate_to_result(text)
+        return self._truncate_to_result(
+            text, structured_content=result.structured_content
+        )
diff --git a/tests/server/middleware/test_response_limiting.py b/tests/server/middleware/test_response_limiting.py
@@ -2,6 +2,7 @@
 
 import pytest
 from mcp.types import ImageContent, TextContent
+from pydantic import BaseModel
 
 from fastmcp import Client, FastMCP
 from fastmcp.server.middleware.response_limiting import ResponseLimitingMiddleware
@@ -153,3 +154,57 @@ def test_utf8_truncation_preserves_characters(self):
         content = result.content[0]
         assert isinstance(content, TextContent)
         content.text.encode("utf-8")
+
+    async def test_structured_content_preserved_on_truncation(
+        self, mcp_server: FastMCP
+    ):
+        """Test that structured_content is preserved when text content is truncated.
+
+        Regression test for #3717: tools with outputSchema must always return
+        structured_content, even when the text representation is truncated.
+        """
+        mcp_server.add_middleware(ResponseLimitingMiddleware(max_size=1_000))
+
+        class Answer(BaseModel):
+            text: str
+
+        @mcp_server.tool()
+        def big_answer() -> Answer:
+            return Answer(text="x" * 2_000)
+
+        async with Client(mcp_server) as client:
+            result = await client.call_tool("big_answer")
+            # structured_content must be present (not dropped by truncation)
+            assert result.structured_content is not None
+            assert result.structured_content["text"] == "x" * 2_000
+            # text content should be truncated
+            first_content = result.content[0]
+            assert isinstance(first_content, TextContent)
+            assert "[Response truncated" in first_content.text
+
+    async def test_truncation_without_structured_content_still_works(
+        self, mcp_server: FastMCP
+    ):
+        """Test that truncation still works normally for tools without outputSchema."""
+        mcp_server.add_middleware(ResponseLimitingMiddleware(max_size=500))
+
+        @mcp_server.tool()
+        def plain_tool() -> ToolResult:
+            return ToolResult(content=[TextContent(type="text", text="y" * 10_000)])
+
+        async with Client(mcp_server) as client:
+            result = await client.call_tool("plain_tool", {})
+            assert result.structured_content is None
+            first_content = result.content[0]
+            assert isinstance(first_content, TextContent)
+            assert "[Response truncated" in first_content.text
+
+    def test_truncate_to_result_preserves_structured_content(self):
+        """Unit test: _truncate_to_result passes through structured_content."""
+        middleware = ResponseLimitingMiddleware(max_size=500)
+        sc = {"key": "value", "count": 42}
+        result = middleware._truncate_to_result("a" * 1000, structured_content=sc)
+        assert result.structured_content == sc
+        first_content = result.content[0]
+        assert isinstance(first_content, TextContent)
+        assert "[Response truncated" in first_content.text