BerriAI · ishaan-jaff · Jan 30, 2026 · Jan 30, 2026 · Jan 30, 2026
diff --git a/docs/my-website/docs/proxy/call_hooks.md b/docs/my-website/docs/proxy/call_hooks.md
@@ -19,6 +19,7 @@ import Image from '@theme/IdealImage';
 | `async_post_call_success_hook` | Modify outgoing response (non-streaming) | After successful LLM API call, for non-streaming responses |
 | `async_post_call_failure_hook` | Transform error responses sent to clients | After failed LLM API call |
 | `async_post_call_streaming_hook` | Modify outgoing response (streaming) | After successful LLM API call, for streaming responses |
+| `async_post_call_response_headers_hook` | Inject custom HTTP response headers | After LLM API call (both success and failure) |
 
 See a complete example with our [parallel request rate limiter](https://github.com/BerriAI/litellm/blob/main/litellm/proxy/hooks/parallel_request_limiter.py)
 
@@ -115,6 +116,18 @@ class MyCustomHandler(CustomLogger): # https://docs.litellm.ai/docs/observabilit
         async for item in response:
             yield item
 
+    async def async_post_call_response_headers_hook(
+        self,
+        data: dict,
+        user_api_key_dict: UserAPIKeyAuth,
+        response: Any,
+        request_headers: Optional[Dict[str, str]] = None,
+    ) -> Optional[Dict[str, str]]:
+        """
+        Inject custom headers into HTTP response (runs for both success and failure).
+        """
+        return {"x-custom-header": "custom-value"}
+
 proxy_handler_instance = MyCustomHandler()
 ```
 
@@ -389,3 +402,31 @@ proxy_handler_instance = MyErrorTransformer()
 ```
 
 **Result:** Clients receive `"Your prompt is too long..."` instead of `"ContextWindowExceededError: Prompt exceeds context window"`.
+
+## Advanced - Inject Custom HTTP Response Headers
+
+Use `async_post_call_response_headers_hook` to inject custom HTTP headers into responses. This hook runs for **both successful and failed** LLM API calls.
+
+```python
+from litellm.integrations.custom_logger import CustomLogger
+from litellm.proxy.proxy_server import UserAPIKeyAuth
+from typing import Any, Dict, Optional
+
+class CustomHeaderLogger(CustomLogger):
+    def __init__(self):
+        super().__init__()
+
+    async def async_post_call_response_headers_hook(
+        self,
+        data: dict,
+        user_api_key_dict: UserAPIKeyAuth,
+        response: Any,
+        request_headers: Optional[Dict[str, str]] = None,
+    ) -> Optional[Dict[str, str]]:
+        """
+        Inject custom headers into all responses (success and failure).
+        """
+        return {"x-custom-header": "custom-value"}
+
+proxy_handler_instance = CustomHeaderLogger()
+```
diff --git a/litellm/integrations/custom_logger.py b/litellm/integrations/custom_logger.py
@@ -371,6 +371,28 @@ async def async_pre_call_hook(
     ]:  # raise exception if invalid, return a str for the user to receive - if rejected, or return a modified dictionary for passing into litellm
         pass
 
+    async def async_post_call_response_headers_hook(
+        self,
+        data: dict,
+        user_api_key_dict: UserAPIKeyAuth,
+        response: Any,
+        request_headers: Optional[Dict[str, str]] = None,
+    ) -> Optional[Dict[str, str]]:
+        """
+        Called after an LLM API call (success or failure) to allow injecting custom HTTP response headers.
+
+        Args:
+            - data: dict - The request data.
+            - user_api_key_dict: UserAPIKeyAuth - The user API key dictionary.
+            - response: Any - The response object (None for failure cases).
+            - request_headers: Optional[Dict[str, str]] - The original request headers.
+
+        Returns:
+            - Optional[Dict[str, str]]: A dictionary of headers to inject into the HTTP response.
+                                        Return None to not inject any headers.
+        """
+        return None
+
     async def async_post_call_failure_hook(
         self,
         request_data: dict,

diff --git a/litellm/model_prices_and_context_window_backup.json b/litellm/model_prices_and_context_window_backup.json
@@ -3726,9 +3726,9 @@
         "cache_read_input_token_cost": 1.75e-07,
         "input_cost_per_token": 1.75e-06,
         "litellm_provider": "azure",
-        "max_input_tokens": 128000,
-        "max_output_tokens": 16384,
-        "max_tokens": 16384,
+        "max_input_tokens": 272000,
+        "max_output_tokens": 128000,
+        "max_tokens": 128000,
         "mode": "responses",
         "output_cost_per_token": 1.4e-05,
         "supported_endpoints": [
@@ -18799,7 +18799,7 @@
         "input_cost_per_token": 1.75e-06,
         "input_cost_per_token_priority": 3.5e-06,
         "litellm_provider": "openai",
-        "max_input_tokens": 400000,
+        "max_input_tokens": 272000,
         "max_output_tokens": 128000,
         "max_tokens": 128000,
         "mode": "responses",
@@ -23790,6 +23790,20 @@
         "output_cost_per_token": 6.5e-07,
         "supports_tool_choice": true
     },
+    "openrouter/moonshotai/kimi-k2.5": {
+        "cache_read_input_token_cost": 1e-07,
+        "input_cost_per_token": 6e-07,
+        "litellm_provider": "openrouter",
+        "max_input_tokens": 262144,
+        "max_output_tokens": 262144,
+        "max_tokens": 262144,
+        "mode": "chat",
+        "output_cost_per_token": 3e-06,
+        "source": "https://openrouter.ai/moonshotai/kimi-k2.5",
+        "supports_function_calling": true,
+        "supports_tool_choice": true,
+        "supports_vision": true
+    },
     "openrouter/nousresearch/nous-hermes-llama2-13b": {
         "input_cost_per_token": 2e-07,
         "litellm_provider": "openrouter",
@@ -24003,7 +24017,7 @@
         "cache_read_input_token_cost": 1.75e-07,
         "input_cost_per_token": 1.75e-06,
         "litellm_provider": "openrouter",
-        "max_input_tokens": 400000,
+        "max_input_tokens": 272000,
         "max_output_tokens": 128000,
         "max_tokens": 128000,
         "mode": "responses",

diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py
@@ -804,6 +804,15 @@ async def base_process_llm_request(
                 **additional_headers,
             )
 
+            # Call response headers hook for streaming success
+            callback_headers = await proxy_logging_obj.post_call_response_headers_hook(
+                data=self.data,
+                user_api_key_dict=user_api_key_dict,
+                response=response,
+            )
+            if callback_headers:
+                custom_headers.update(callback_headers)
+
             # Preserve the original client-requested model (pre-alias mapping) for downstream
             # streaming generators. Pre-call processing can rewrite `self.data["model"]` for
             # aliasing/routing, but the OpenAI-compatible response `model` field should reflect
@@ -900,6 +909,16 @@ async def base_process_llm_request(
                 **additional_headers,
             )
         )
+
+        # Call response headers hook for non-streaming success
+        callback_headers = await proxy_logging_obj.post_call_response_headers_hook(
+            data=self.data,
+            user_api_key_dict=user_api_key_dict,
+            response=response,
+        )
+        if callback_headers:
+            fastapi_response.headers.update(callback_headers)
+
         await check_response_size_is_safe(response=response)
 
         return response
@@ -1058,6 +1077,18 @@ async def _handle_llm_api_exception(
                     headers = get_response_headers(dict(_response_headers))
         headers.update(custom_headers)
 
+        # Call response headers hook for failure
+        try:
+            callback_headers = await proxy_logging_obj.post_call_response_headers_hook(
+                data=self.data,
+                user_api_key_dict=user_api_key_dict,
+                response=None,
+            )
+            if callback_headers:
+                headers.update(callback_headers)
+        except Exception:
+            pass
+
         if isinstance(e, HTTPException):
             raise ProxyException(
                 message=getattr(e, "detail", str(e)),

diff --git a/litellm/proxy/utils.py b/litellm/proxy/utils.py
@@ -1808,6 +1808,46 @@ async def post_call_success_hook(
             raise e
         return response
 
+    async def post_call_response_headers_hook(
+        self,
+        data: dict,
+        user_api_key_dict: UserAPIKeyAuth,
+        response: Any,
+        request_headers: Optional[Dict[str, str]] = None,
+    ) -> Dict[str, str]:
+        """
+        Calls async_post_call_response_headers_hook on all CustomLogger callbacks.
+        Merges all returned header dicts (later callbacks override earlier ones).
+
+        Returns:
+            Dict[str, str]: Merged headers from all callbacks.
+        """
+        merged_headers: Dict[str, str] = {}
+        try:
+            for callback in litellm.callbacks:
+                _callback: Optional[CustomLogger] = None
+                if isinstance(callback, str):
+                    _callback = litellm.litellm_core_utils.litellm_logging.get_custom_logger_compatible_class(
+                        cast(_custom_logger_compatible_callbacks_literal, callback)
+                    )
+                else:
+                    _callback = callback  # type: ignore
+
+                if _callback is not None and isinstance(_callback, CustomLogger):
+                    result = await _callback.async_post_call_response_headers_hook(
+                        data=data,
+                        user_api_key_dict=user_api_key_dict,
+                        response=response,
+                        request_headers=request_headers,
+                    )
+                    if result is not None:
+                        merged_headers.update(result)
+        except Exception as e:
+            verbose_proxy_logger.exception(
+                "Error in post_call_response_headers_hook: %s", str(e)
+            )
+        return merged_headers
+
     async def async_post_call_streaming_hook(
         self,
         data: dict,