BerriAI · ishaan-jaff · Jan 26, 2026 · Jan 23, 2026 · Jan 23, 2026 · Jan 23, 2026
diff --git a/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py b/litellm/llms/anthropic/experimental_pass_through/adapters/transformation.py
@@ -168,6 +168,24 @@ def _extract_signature_from_tool_use_content(
             return provider_specific_fields.get("signature")
         return None
 
+    def _add_cache_control_if_applicable(
+        self,
+        source: Dict[str, Any],
+        target: Dict[str, Any],
+        model: Optional[str],
+    ) -> None:
+        """
+        Extract cache_control from source and add to target if it should be preserved.
+
+        Args:
+            source: Dict containing potential cache_control field
+            target: Dict to add cache_control to
+            model: Model name to check if cache_control should be preserved
+        """
+        cache_control = source.get("cache_control")
+        if cache_control and model and self.is_anthropic_claude_model(model):
+            target["cache_control"] = cache_control
+
     def translatable_anthropic_params(self) -> List:
         """
         Which anthropic params, we need to translate to the openai format.
@@ -202,15 +220,11 @@ def translate_anthropic_messages_to_openai(  # noqa: PLR0915
                 elif message_content and isinstance(message_content, list):
                     for content in message_content:
                         if content.get("type") == "text":
-                            text_obj = ChatCompletionTextObject(
+                            text_obj: Dict[str, Any] = ChatCompletionTextObject(
                                 type="text", text=content.get("text", "")
                             )
-                            # Preserve cache_control if present (for prompt caching)
-                            # Only for Anthropic models that support prompt caching
-                            cache_control = content.get("cache_control")
-                            if cache_control and model and self.is_anthropic_claude_model(model):
-                                text_obj["cache_control"] = cache_control  # type: ignore
-                            new_user_content_list.append(text_obj)
+                            self._add_cache_control_if_applicable(content, text_obj, model)
+                            new_user_content_list.append(text_obj)  # type: ignore
                         elif content.get("type") == "image":
                             # Convert Anthropic image format to OpenAI format
                             source = content.get("source", {})
@@ -222,25 +236,44 @@ def translate_anthropic_messages_to_openai(  # noqa: PLR0915
                                 image_url_obj = ChatCompletionImageUrlObject(
                                     url=openai_image_url
                                 )
-                                image_obj = ChatCompletionImageObject(
+                                image_obj: Dict[str, Any] = ChatCompletionImageObject(
                                     type="image_url", image_url=image_url_obj
                                 )
-                                new_user_content_list.append(image_obj)
+                                self._add_cache_control_if_applicable(content, image_obj, model)
+                                new_user_content_list.append(image_obj)  # type: ignore
+                        elif content.get("type") == "document":
+                            # Convert Anthropic document format (PDF, etc.) to OpenAI format
+                            source = content.get("source", {})
+                            openai_image_url = (
+                                self._translate_anthropic_image_to_openai(source)
+                            )
+
+                            if openai_image_url:
+                                image_url_obj = ChatCompletionImageUrlObject(
+                                    url=openai_image_url
+                                )
+                                doc_obj: Dict[str, Any] = ChatCompletionImageObject(
+                                    type="image_url", image_url=image_url_obj
+                                )
+                                self._add_cache_control_if_applicable(content, doc_obj, model)
+                                new_user_content_list.append(doc_obj)  # type: ignore
                         elif content.get("type") == "tool_result":
                             if "content" not in content:
-                                tool_result = ChatCompletionToolMessage(
+                                tool_result: Dict[str, Any] = ChatCompletionToolMessage(
                                     role="tool",
                                     tool_call_id=content.get("tool_use_id", ""),
                                     content="",
                                 )
-                                tool_message_list.append(tool_result)
+                                self._add_cache_control_if_applicable(content, tool_result, model)
+                                tool_message_list.append(tool_result)  # type: ignore[arg-type]
                             elif isinstance(content.get("content"), str):
                                 tool_result = ChatCompletionToolMessage(
                                     role="tool",
                                     tool_call_id=content.get("tool_use_id", ""),
                                     content=str(content.get("content", "")),
                                 )
-                                tool_message_list.append(tool_result)
+                                self._add_cache_control_if_applicable(content, tool_result, model)
+                                tool_message_list.append(tool_result)  # type: ignore[arg-type]
                             elif isinstance(content.get("content"), list):
                                 # Combine all content items into a single tool message
                                 # to avoid creating multiple tool_result blocks with the same ID
@@ -256,7 +289,8 @@ def translate_anthropic_messages_to_openai(  # noqa: PLR0915
                                             tool_call_id=content.get("tool_use_id", ""),
                                             content=c,
                                         )
-                                        tool_message_list.append(tool_result)
+                                        self._add_cache_control_if_applicable(content, tool_result, model)
+                                        tool_message_list.append(tool_result)  # type: ignore[arg-type]
                                     elif isinstance(c, dict):
                                         if c.get("type") == "text":
                                             tool_result = ChatCompletionToolMessage(
@@ -266,7 +300,8 @@ def translate_anthropic_messages_to_openai(  # noqa: PLR0915
                                                 ),
                                                 content=c.get("text", ""),
                                             )
-                                            tool_message_list.append(tool_result)
+                                            self._add_cache_control_if_applicable(content, tool_result, model)
+                                            tool_message_list.append(tool_result)  # type: ignore[arg-type]
                                         elif c.get("type") == "image":
                                             source = c.get("source", {})
                                             openai_image_url = (
@@ -282,7 +317,8 @@ def translate_anthropic_messages_to_openai(  # noqa: PLR0915
                                                 ),
                                                 content=openai_image_url,
                                             )
-                                            tool_message_list.append(tool_result)
+                                            self._add_cache_control_if_applicable(content, tool_result, model)
+                                            tool_message_list.append(tool_result)  # type: ignore[arg-type]
                                 else:
                                     # For multiple content items, combine into a single tool message
                                     # with list content to preserve all items while having one tool_use_id
@@ -331,7 +367,8 @@ def translate_anthropic_messages_to_openai(  # noqa: PLR0915
                                             tool_call_id=content.get("tool_use_id", ""),
                                             content=combined_content_parts,  # type: ignore
                                         )
-                                        tool_message_list.append(tool_result)
+                                        self._add_cache_control_if_applicable(content, tool_result, model)
+                                        tool_message_list.append(tool_result)  # type: ignore[arg-type]
 
             if len(tool_message_list) > 0:
                 new_messages.extend(tool_message_list)
@@ -344,7 +381,9 @@ def translate_anthropic_messages_to_openai(  # noqa: PLR0915
 
             ## ASSISTANT MESSAGE ##
             assistant_message_str: Optional[str] = None
-            tool_calls: List[ChatCompletionAssistantToolCall] = []
+            assistant_content_list: List[Dict[str, Any]] = []  # For content blocks with cache_control
+            has_cache_control_in_text = False
+            tool_calls: List[Dict[str, Any]] = []
             thinking_blocks: List[
                 Union[ChatCompletionThinkingBlock, ChatCompletionRedactedThinkingBlock]
             ] = []
@@ -357,10 +396,14 @@ def translate_anthropic_messages_to_openai(  # noqa: PLR0915
                             assistant_message_str = str(content)
                         elif isinstance(content, dict):
                             if content.get("type") == "text":
-                                if assistant_message_str is None:
-                                    assistant_message_str = content.get("text", "")
-                                else:
-                                    assistant_message_str += content.get("text", "")
+                                text_block: Dict[str, Any] = {
+                                    "type": "text",
+                                    "text": content.get("text", ""),
+                                }
+                                self._add_cache_control_if_applicable(content, text_block, model)
+                                if "cache_control" in text_block:
+                                    has_cache_control_in_text = True
+                                assistant_content_list.append(text_block)
                             elif content.get("type") == "tool_use":
                                 function_chunk: ChatCompletionToolCallFunctionChunk = {
                                     "name": content.get("name", ""),
@@ -384,13 +427,13 @@ def translate_anthropic_messages_to_openai(  # noqa: PLR0915
                                         provider_specific_fields
                                     )
 
-                                tool_calls.append(
-                                    ChatCompletionAssistantToolCall(
-                                        id=content.get("id", ""),
-                                        type="function",
-                                        function=function_chunk,
-                                    )
+                                tool_call: Dict[str, Any] = ChatCompletionAssistantToolCall(
+                                    id=content.get("id", ""),
+                                    type="function",
+                                    function=function_chunk,
                                 )
+                                self._add_cache_control_if_applicable(content, tool_call, model)
+                                tool_calls.append(tool_call)
                             elif content.get("type") == "thinking":
                                 thinking_block = ChatCompletionThinkingBlock(
                                     type="thinking",
@@ -411,18 +454,30 @@ def translate_anthropic_messages_to_openai(  # noqa: PLR0915
 
             if (
                 assistant_message_str is not None
+                or len(assistant_content_list) > 0
                 or len(tool_calls) > 0
                 or len(thinking_blocks) > 0
             ):
+                # Use list format if any text block has cache_control, otherwise use string
+                if has_cache_control_in_text and len(assistant_content_list) > 0:
+                    assistant_content: Any = assistant_content_list
+                elif len(assistant_content_list) > 0 and not has_cache_control_in_text:
+                    # Concatenate text blocks into string when no cache_control
+                    assistant_content = "".join(
+                        block.get("text", "") for block in assistant_content_list
+                    )
+                else:
+                    assistant_content = assistant_message_str
+
                 assistant_message = ChatCompletionAssistantMessage(
                     role="assistant",
-                    content=assistant_message_str,
+                    content=assistant_content,
                     thinking_blocks=(
                         thinking_blocks if len(thinking_blocks) > 0 else None
                     ),
                 )
                 if len(tool_calls) > 0:
-                    assistant_message["tool_calls"] = tool_calls
+                    assistant_message["tool_calls"] = tool_calls  # type: ignore
                 if len(thinking_blocks) > 0:
                     assistant_message["thinking_blocks"] = thinking_blocks  # type: ignore
                 new_messages.append(assistant_message)
@@ -532,10 +587,10 @@ def translate_anthropic_tool_choice_to_openai(
             )
 
     def translate_anthropic_tools_to_openai(
-        self, tools: List[AllAnthropicToolsValues]
+        self, tools: List[AllAnthropicToolsValues], model: Optional[str] = None
     ) -> List[ChatCompletionToolParam]:
         new_tools: List[ChatCompletionToolParam] = []
-        mapped_tool_params = ["name", "input_schema", "description"]
+        mapped_tool_params = ["name", "input_schema", "description", "cache_control"]
         for tool in tools:
             function_chunk = ChatCompletionToolParamFunctionChunk(
                 name=tool["name"],
@@ -548,11 +603,11 @@ def translate_anthropic_tools_to_openai(
             for k, v in tool.items():
                 if k not in mapped_tool_params:  # pass additional computer kwargs
                     function_chunk.setdefault("parameters", {}).update({k: v})
-            new_tools.append(
-                ChatCompletionToolParam(type="function", function=function_chunk)
-            )
+            tool_param: Dict[str, Any] = ChatCompletionToolParam(type="function", function=function_chunk)
+            self._add_cache_control_if_applicable(tool, tool_param, model)
+            new_tools.append(tool_param)  # type: ignore[arg-type]
 
-        return new_tools
+        return new_tools  # type: ignore[return-value]
 
     def translate_anthropic_output_format_to_openai(
         self, output_format: Any
@@ -621,10 +676,29 @@ def translate_anthropic_to_openai(
         if "system" in anthropic_message_request:
             system_content = anthropic_message_request["system"]
             if system_content:
-                new_messages.insert(
-                    0,
-                    ChatCompletionSystemMessage(role="system", content=system_content),
-                )
+                # Handle system as string or array of content blocks
+                if isinstance(system_content, str):
+                    new_messages.insert(
+                        0,
+                        ChatCompletionSystemMessage(role="system", content=system_content),
+                    )
+                elif isinstance(system_content, list):
+                    # Convert Anthropic system content blocks to OpenAI format
+                    openai_system_content: List[Dict[str, Any]] = []
+                    model_name = anthropic_message_request.get("model", "")
+                    for block in system_content:
+                        if isinstance(block, dict) and block.get("type") == "text":
+                            text_block: Dict[str, Any] = {
+                                "type": "text",
+                                "text": block.get("text", ""),
+                            }
+                            self._add_cache_control_if_applicable(block, text_block, model_name)
+                            openai_system_content.append(text_block)
+                    if openai_system_content:
+                        new_messages.insert(
+                            0,
+                            ChatCompletionSystemMessage(role="system", content=openai_system_content),  # type: ignore
+                        )
 
         new_kwargs: ChatCompletionRequest = {
             "model": anthropic_message_request["model"],
@@ -655,7 +729,8 @@ def translate_anthropic_to_openai(
             tools = anthropic_message_request["tools"]
             if tools:
                 new_kwargs["tools"] = self.translate_anthropic_tools_to_openai(
-                    tools=cast(List[AllAnthropicToolsValues], tools)
+                    tools=cast(List[AllAnthropicToolsValues], tools),
+                    model=new_kwargs.get("model"),
                 )
 
         ## CONVERT THINKING
@@ -827,7 +902,7 @@ def translate_openai_response_to_anthropic(
         )
         # extract usage
         usage: Usage = getattr(response, "usage")
-        anthropic_usage = AnthropicUsage(
+        anthropic_usage: Dict[str, Any] = AnthropicUsage(
             input_tokens=usage.prompt_tokens or 0,
             output_tokens=usage.completion_tokens or 0,
         )
@@ -843,7 +918,7 @@ def translate_openai_response_to_anthropic(
             role="assistant",
             model=response.model or "unknown-model",
             stop_sequence=None,
-            usage=anthropic_usage,
+            usage=anthropic_usage,  # type: ignore
             content=anthropic_content,  # type: ignore
             stop_reason=anthropic_finish_reason,
         )
@@ -980,7 +1055,7 @@ def translate_streaming_openai_response_to_anthropic(
             else:
                 litellm_usage_chunk = None
             if litellm_usage_chunk is not None:
-                usage_delta = UsageDelta(
+                usage_delta: Dict[str, Any] = UsageDelta(
                     input_tokens=litellm_usage_chunk.prompt_tokens or 0,
                     output_tokens=litellm_usage_chunk.completion_tokens or 0,
                 )
@@ -992,7 +1067,7 @@ def translate_streaming_openai_response_to_anthropic(
             else:
                 usage_delta = UsageDelta(input_tokens=0, output_tokens=0)
             return MessageBlockDelta(
-                type="message_delta", delta=delta, usage=usage_delta
+                type="message_delta", delta=delta, usage=usage_delta  # type: ignore
             )
         (
             type_of_content,