From 6b73c3c69c57a1179c6dc0caa594a0959a6f4732 Mon Sep 17 00:00:00 2001
From: michelligabriele <gabriele.michelli@icloud.com>
Date: Tue, 17 Feb 2026 16:59:40 +0100
Subject: [PATCH 1/4] fix(proxy): pass request_headers to response headers hook
 + fix guardrail gap
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The async_post_call_response_headers_hook receives request_headers as a
parameter but it was never populated — always None. This prevented use cases
like echoing an API gateway request ID (e.g., APIGEE) from the incoming
request into the response headers.

Changes:
- Capture request headers from the FastAPI Request object at the start of
  base_process_llm_request and pass them to all three hook call sites
  (streaming success, non-streaming success, failure)
- Add missing post_call_response_headers_hook call in the /responses
  endpoint's ModifyResponseException handler, so custom headers are
  injected even when a guardrail blocks the request
- Update E2E demo to show APIGEE request ID echoing across all endpoints
- Update docs with request_headers usage example
- Add unit tests for request_headers forwarding and guardrail exception path

Closes #19646
---
 docs/my-website/docs/proxy/call_hooks.md      | 22 ++++-
 e2e_demo_response_headers_callback.py         | 90 +++++++++++++++++++
 litellm/proxy/common_request_processing.py    |  6 ++
 .../proxy/response_api_endpoints/endpoints.py | 10 +++
 tests/e2e_demo_response_headers_callback.py   | 90 +++++++++++++++++++
 .../test_post_call_response_headers_hook.py   | 63 +++++++++++++
 ...response_headers_on_guardrail_exception.py | 72 +++++++++++++++
 7 files changed, 349 insertions(+), 4 deletions(-)
 create mode 100644 e2e_demo_response_headers_callback.py
 create mode 100644 tests/e2e_demo_response_headers_callback.py
 create mode 100644 tests/test_litellm/proxy/response_api_endpoints/test_response_headers_on_guardrail_exception.py

diff --git a/docs/my-website/docs/proxy/call_hooks.md b/docs/my-website/docs/proxy/call_hooks.md
index 17354725fd5..d0a6aa779dc 100644
--- a/docs/my-website/docs/proxy/call_hooks.md
+++ b/docs/my-website/docs/proxy/call_hooks.md
@@ -413,9 +413,6 @@ from litellm.proxy.proxy_server import UserAPIKeyAuth
 from typing import Any, Dict, Optional
 
 class CustomHeaderLogger(CustomLogger):
-    def __init__(self):
-        super().__init__()
-
     async def async_post_call_response_headers_hook(
         self,
         data: dict,
@@ -425,8 +422,25 @@ class CustomHeaderLogger(CustomLogger):
     ) -> Optional[Dict[str, str]]:
         """
         Inject custom headers into all responses (success and failure).
+        Works for /chat/completions, /embeddings, and /responses.
+
+        Use request_headers to echo incoming headers (e.g., API gateway request IDs).
         """
-        return {"x-custom-header": "custom-value"}
+        headers = {"x-custom-header": "custom-value"}
+
+        # Echo an incoming gateway request ID into the response
+        if request_headers:
+            gateway_id = request_headers.get("x-gateway-request-id")
+            if gateway_id:
+                headers["x-gateway-request-id"] = gateway_id
+
+        return headers
 
 proxy_handler_instance = CustomHeaderLogger()
 ```
+
+:::tip
+This hook works for **all proxy endpoints**: `/chat/completions`, `/embeddings`, `/responses` (streaming and non-streaming), and failure responses.
+
+The `request_headers` parameter contains the original HTTP request headers, allowing you to echo incoming headers (e.g., API gateway request IDs) into the response.
+:::
diff --git a/e2e_demo_response_headers_callback.py b/e2e_demo_response_headers_callback.py
new file mode 100644
index 00000000000..aabe0008a99
--- /dev/null
+++ b/e2e_demo_response_headers_callback.py
@@ -0,0 +1,90 @@
+"""
+Demo CustomLogger that injects custom response headers.
+
+Shows how to:
+1. Echo an incoming request header (e.g., APIGEE request ID) into the response
+2. Inject headers on both success and failure paths
+3. Works for /chat/completions, /embeddings, and /responses
+
+Usage:
+    litellm --config tests/e2e_demo_response_headers_config.yaml
+
+Test commands:
+    # /chat/completions (non-streaming)
+    curl -s -D- http://localhost:4000/chat/completions \
+      -H "Authorization: Bearer sk-1234" \
+      -H "x-apigee-request-id: apigee-req-001" \
+      -d '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"hi"}]}'
+
+    # /chat/completions (streaming)
+    curl -s -D- http://localhost:4000/chat/completions \
+      -H "Authorization: Bearer sk-1234" \
+      -H "x-apigee-request-id: apigee-req-002" \
+      -d '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"hi"}],"stream":true}'
+
+    # /embeddings
+    curl -s -D- http://localhost:4000/embeddings \
+      -H "Authorization: Bearer sk-1234" \
+      -H "x-apigee-request-id: apigee-req-003" \
+      -d '{"model":"text-embedding-3-small","input":"hello"}'
+
+    # /v1/responses (non-streaming)
+    curl -s -D- http://localhost:4000/v1/responses \
+      -H "Authorization: Bearer sk-1234" \
+      -H "x-apigee-request-id: apigee-req-004" \
+      -d '{"model":"gpt-4o-mini","input":"hi"}'
+
+    # /v1/responses (streaming)
+    curl -s -D- http://localhost:4000/v1/responses \
+      -H "Authorization: Bearer sk-1234" \
+      -H "x-apigee-request-id: apigee-req-005" \
+      -d '{"model":"gpt-4o-mini","input":"hi","stream":true}'
+
+    # Failure path (bad model → headers still injected)
+    curl -s -D- http://localhost:4000/chat/completions \
+      -H "Authorization: Bearer sk-1234" \
+      -H "x-apigee-request-id: apigee-req-006" \
+      -d '{"model":"nonexistent-model","messages":[{"role":"user","content":"hi"}]}'
+
+Expected: All responses contain x-apigee-request-id, x-custom-header, and x-litellm-hook-model.
+"""
+
+from typing import Any, Dict, Optional
+
+from litellm.integrations.custom_logger import CustomLogger
+from litellm.proxy._types import UserAPIKeyAuth
+
+
+class ResponseHeaderInjector(CustomLogger):
+    """
+    Demonstrates injecting custom HTTP response headers via the proxy hook.
+
+    Key features:
+    - Echoes the incoming x-apigee-request-id header back in the response
+    - Adds a static custom header and the model name
+    - Works for success (streaming + non-streaming) and failure responses
+    - Works for all endpoints: /chat/completions, /embeddings, /responses
+    """
+
+    async def async_post_call_response_headers_hook(
+        self,
+        data: dict,
+        user_api_key_dict: UserAPIKeyAuth,
+        response: Any,
+        request_headers: Optional[Dict[str, str]] = None,
+    ) -> Optional[Dict[str, str]]:
+        headers: Dict[str, str] = {
+            "x-custom-header": "hello-from-hook",
+            "x-litellm-hook-model": data.get("model", "unknown"),
+        }
+
+        # Echo the APIGEE request ID from the incoming request into the response
+        if request_headers:
+            apigee_id = request_headers.get("x-apigee-request-id")
+            if apigee_id:
+                headers["x-apigee-request-id"] = apigee_id
+
+        return headers
+
+
+response_header_injector = ResponseHeaderInjector()
diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py
index 7dfa3bb239f..7f7af506a07 100644
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@@ -353,6 +353,7 @@ def _get_cost_breakdown_from_logging_obj(
 class ProxyBaseLLMRequestProcessing:
     def __init__(self, data: dict):
         self.data = data
+        self._request_headers: Optional[Dict[str, str]] = None
 
     @staticmethod
     def get_custom_headers(
@@ -749,6 +750,8 @@ async def base_process_llm_request(
         """
         Common request processing logic for both chat completions and responses API endpoints
         """
+        self._request_headers = dict(request.headers)
+
         requested_model_from_client: Optional[str] = (
             self.data.get("model") if isinstance(self.data.get("model"), str) else None
         )
@@ -859,6 +862,7 @@ async def base_process_llm_request(
                 data=self.data,
                 user_api_key_dict=user_api_key_dict,
                 response=response,
+                request_headers=self._request_headers,
             )
             if callback_headers:
                 custom_headers.update(callback_headers)
@@ -967,6 +971,7 @@ async def base_process_llm_request(
             data=self.data,
             user_api_key_dict=user_api_key_dict,
             response=response,
+            request_headers=self._request_headers,
         )
         if callback_headers:
             fastapi_response.headers.update(callback_headers)
@@ -1135,6 +1140,7 @@ async def _handle_llm_api_exception(
                 data=self.data,
                 user_api_key_dict=user_api_key_dict,
                 response=None,
+                request_headers=self._request_headers,
             )
             if callback_headers:
                 headers.update(callback_headers)
diff --git a/litellm/proxy/response_api_endpoints/endpoints.py b/litellm/proxy/response_api_endpoints/endpoints.py
index 44e8c42b2c1..970819cc5ea 100644
--- a/litellm/proxy/response_api_endpoints/endpoints.py
+++ b/litellm/proxy/response_api_endpoints/endpoints.py
@@ -226,6 +226,16 @@ async def responses_api(
             request_data=_data,
         )
 
+        # Call response headers hook for guardrail failure path
+        callback_headers = await proxy_logging_obj.post_call_response_headers_hook(
+            data=_data,
+            user_api_key_dict=user_api_key_dict,
+            response=None,
+            request_headers=dict(request.headers),
+        )
+        if callback_headers:
+            fastapi_response.headers.update(callback_headers)
+
         violation_text = e.message
         response_obj = ResponsesAPIResponse(
             id=f"resp_{uuid4()}",
diff --git a/tests/e2e_demo_response_headers_callback.py b/tests/e2e_demo_response_headers_callback.py
new file mode 100644
index 00000000000..aabe0008a99
--- /dev/null
+++ b/tests/e2e_demo_response_headers_callback.py
@@ -0,0 +1,90 @@
+"""
+Demo CustomLogger that injects custom response headers.
+
+Shows how to:
+1. Echo an incoming request header (e.g., APIGEE request ID) into the response
+2. Inject headers on both success and failure paths
+3. Works for /chat/completions, /embeddings, and /responses
+
+Usage:
+    litellm --config tests/e2e_demo_response_headers_config.yaml
+
+Test commands:
+    # /chat/completions (non-streaming)
+    curl -s -D- http://localhost:4000/chat/completions \
+      -H "Authorization: Bearer sk-1234" \
+      -H "x-apigee-request-id: apigee-req-001" \
+      -d '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"hi"}]}'
+
+    # /chat/completions (streaming)
+    curl -s -D- http://localhost:4000/chat/completions \
+      -H "Authorization: Bearer sk-1234" \
+      -H "x-apigee-request-id: apigee-req-002" \
+      -d '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"hi"}],"stream":true}'
+
+    # /embeddings
+    curl -s -D- http://localhost:4000/embeddings \
+      -H "Authorization: Bearer sk-1234" \
+      -H "x-apigee-request-id: apigee-req-003" \
+      -d '{"model":"text-embedding-3-small","input":"hello"}'
+
+    # /v1/responses (non-streaming)
+    curl -s -D- http://localhost:4000/v1/responses \
+      -H "Authorization: Bearer sk-1234" \
+      -H "x-apigee-request-id: apigee-req-004" \
+      -d '{"model":"gpt-4o-mini","input":"hi"}'
+
+    # /v1/responses (streaming)
+    curl -s -D- http://localhost:4000/v1/responses \
+      -H "Authorization: Bearer sk-1234" \
+      -H "x-apigee-request-id: apigee-req-005" \
+      -d '{"model":"gpt-4o-mini","input":"hi","stream":true}'
+
+    # Failure path (bad model → headers still injected)
+    curl -s -D- http://localhost:4000/chat/completions \
+      -H "Authorization: Bearer sk-1234" \
+      -H "x-apigee-request-id: apigee-req-006" \
+      -d '{"model":"nonexistent-model","messages":[{"role":"user","content":"hi"}]}'
+
+Expected: All responses contain x-apigee-request-id, x-custom-header, and x-litellm-hook-model.
+"""
+
+from typing import Any, Dict, Optional
+
+from litellm.integrations.custom_logger import CustomLogger
+from litellm.proxy._types import UserAPIKeyAuth
+
+
+class ResponseHeaderInjector(CustomLogger):
+    """
+    Demonstrates injecting custom HTTP response headers via the proxy hook.
+
+    Key features:
+    - Echoes the incoming x-apigee-request-id header back in the response
+    - Adds a static custom header and the model name
+    - Works for success (streaming + non-streaming) and failure responses
+    - Works for all endpoints: /chat/completions, /embeddings, /responses
+    """
+
+    async def async_post_call_response_headers_hook(
+        self,
+        data: dict,
+        user_api_key_dict: UserAPIKeyAuth,
+        response: Any,
+        request_headers: Optional[Dict[str, str]] = None,
+    ) -> Optional[Dict[str, str]]:
+        headers: Dict[str, str] = {
+            "x-custom-header": "hello-from-hook",
+            "x-litellm-hook-model": data.get("model", "unknown"),
+        }
+
+        # Echo the APIGEE request ID from the incoming request into the response
+        if request_headers:
+            apigee_id = request_headers.get("x-apigee-request-id")
+            if apigee_id:
+                headers["x-apigee-request-id"] = apigee_id
+
+        return headers
+
+
+response_header_injector = ResponseHeaderInjector()
diff --git a/tests/test_litellm/proxy/hooks/test_post_call_response_headers_hook.py b/tests/test_litellm/proxy/hooks/test_post_call_response_headers_hook.py
index 6a12366fdd3..948fa6fcbe8 100644
--- a/tests/test_litellm/proxy/hooks/test_post_call_response_headers_hook.py
+++ b/tests/test_litellm/proxy/hooks/test_post_call_response_headers_hook.py
@@ -195,3 +195,66 @@ async def test_default_hook_returns_none():
         response=None,
     )
     assert result is None
+
+
+@pytest.mark.asyncio
+async def test_response_headers_hook_receives_request_headers():
+    """Test that the hook receives request_headers when provided."""
+    injector = HeaderInjectorLogger(headers={"x-echoed": "yes"})
+    mock_request_headers = {"x-apigee-request-id": "req-abc-123", "authorization": "Bearer sk-xxx"}
+
+    with patch("litellm.callbacks", [injector]):
+        from litellm.proxy.utils import ProxyLogging
+        from litellm.caching.caching import DualCache
+
+        proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
+
+        result = await proxy_logging.post_call_response_headers_hook(
+            data={"model": "test-model"},
+            user_api_key_dict=UserAPIKeyAuth(api_key="test-key"),
+            response={"id": "resp-1"},
+            request_headers=mock_request_headers,
+        )
+
+        assert injector.called is True
+        assert result == {"x-echoed": "yes"}
+
+
+@pytest.mark.asyncio
+async def test_response_headers_hook_request_headers_passed_to_callback():
+    """Test that request_headers are forwarded to the callback and can be used to echo incoming headers."""
+
+    class RequestHeaderAwareLogger(CustomLogger):
+        def __init__(self):
+            self.received_request_headers = None
+
+        async def async_post_call_response_headers_hook(
+            self,
+            data: dict,
+            user_api_key_dict: UserAPIKeyAuth,
+            response: Any,
+            request_headers: Optional[Dict[str, str]] = None,
+        ) -> Optional[Dict[str, str]]:
+            self.received_request_headers = request_headers
+            if request_headers and "x-apigee-request-id" in request_headers:
+                return {"x-apigee-request-id": request_headers["x-apigee-request-id"]}
+            return None
+
+    logger = RequestHeaderAwareLogger()
+    mock_request_headers = {"x-apigee-request-id": "apigee-123"}
+
+    with patch("litellm.callbacks", [logger]):
+        from litellm.proxy.utils import ProxyLogging
+        from litellm.caching.caching import DualCache
+
+        proxy_logging = ProxyLogging(user_api_key_cache=DualCache())
+
+        result = await proxy_logging.post_call_response_headers_hook(
+            data={"model": "test-model"},
+            user_api_key_dict=UserAPIKeyAuth(api_key="test-key"),
+            response=None,
+            request_headers=mock_request_headers,
+        )
+
+        assert logger.received_request_headers == mock_request_headers
+        assert result == {"x-apigee-request-id": "apigee-123"}
diff --git a/tests/test_litellm/proxy/response_api_endpoints/test_response_headers_on_guardrail_exception.py b/tests/test_litellm/proxy/response_api_endpoints/test_response_headers_on_guardrail_exception.py
new file mode 100644
index 00000000000..04b98f6d472
--- /dev/null
+++ b/tests/test_litellm/proxy/response_api_endpoints/test_response_headers_on_guardrail_exception.py
@@ -0,0 +1,72 @@
+"""
+Test that post_call_response_headers_hook is called on ModifyResponseException
+in the /responses endpoint, so custom headers appear even on guardrail failures.
+"""
+
+import os
+import sys
+import pytest
+from typing import Any, Dict, Optional
+from unittest.mock import AsyncMock, MagicMock, patch
+
+sys.path.insert(0, os.path.abspath("../../../.."))
+
+from litellm.integrations.custom_logger import CustomLogger
+from litellm.proxy._types import UserAPIKeyAuth
+
+
+class GuardrailHeaderLogger(CustomLogger):
+    """Logger that injects headers — used to verify hook fires on guardrail path."""
+
+    async def async_post_call_response_headers_hook(
+        self,
+        data: dict,
+        user_api_key_dict: UserAPIKeyAuth,
+        response: Any,
+        request_headers: Optional[Dict[str, str]] = None,
+    ) -> Optional[Dict[str, str]]:
+        return {"x-guardrail-header": "injected"}
+
+
+@pytest.mark.asyncio
+async def test_modify_response_exception_calls_response_headers_hook():
+    """
+    When a guardrail raises ModifyResponseException on /responses,
+    the response should still include custom headers from the hook.
+    """
+    from litellm.integrations.custom_guardrail import ModifyResponseException
+    from litellm.proxy.proxy_server import app
+    from fastapi.testclient import TestClient
+
+    guardrail_logger = GuardrailHeaderLogger()
+
+    with patch("litellm.callbacks", [guardrail_logger]):
+        with patch("litellm.proxy.proxy_server.user_api_key_auth") as mock_auth:
+            mock_auth.return_value = MagicMock(
+                token="test_token",
+                user_id="test_user",
+                team_id=None,
+            )
+
+            # Make base_process_llm_request raise ModifyResponseException
+            with patch(
+                "litellm.proxy.response_api_endpoints.endpoints.ProxyBaseLLMRequestProcessing"
+            ) as MockProcessor:
+                mock_instance = MockProcessor.return_value
+                mock_instance.base_process_llm_request = AsyncMock(
+                    side_effect=ModifyResponseException(
+                        message="Content blocked by guardrail",
+                        model="gpt-4o",
+                        request_data={"model": "gpt-4o"},
+                    )
+                )
+
+                client = TestClient(app)
+                response = client.post(
+                    "/v1/responses",
+                    json={"model": "gpt-4o", "input": "blocked content"},
+                    headers={"Authorization": "Bearer sk-1234"},
+                )
+
+                assert response.status_code == 200
+                assert response.headers.get("x-guardrail-header") == "injected"

From 650acd0fb510bd85f1a140c7d94b2edf260002d0 Mon Sep 17 00:00:00 2001
From: michelligabriele <gabriele.michelli@icloud.com>
Date: Tue, 17 Feb 2026 18:49:10 +0100
Subject: [PATCH 2/4] fix(proxy): filter sensitive headers, fix guardrail hook
 in all endpoints
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Address code review feedback:

1. Filter sensitive request headers (authorization, cookie,
   proxy-authorization) before passing to callbacks to prevent
   credential leaks in third-party loggers.

2. Add post_call_response_headers_hook to ModifyResponseException
   handlers in all four endpoints (/chat/completions, /completions,
   /v1/messages, /responses) — previously only /responses was covered.

3. Remove duplicate e2e demo file from repo root (canonical copy
   lives in tests/).
---
 e2e_demo_response_headers_callback.py         | 90 -------------------
 .../proxy/anthropic_endpoints/endpoints.py    | 10 +++
 litellm/proxy/common_request_processing.py    | 17 +++-
 litellm/proxy/proxy_server.py                 | 21 +++++
 .../proxy/response_api_endpoints/endpoints.py |  2 +-
 5 files changed, 48 insertions(+), 92 deletions(-)
 delete mode 100644 e2e_demo_response_headers_callback.py

diff --git a/e2e_demo_response_headers_callback.py b/e2e_demo_response_headers_callback.py
deleted file mode 100644
index aabe0008a99..00000000000
--- a/e2e_demo_response_headers_callback.py
+++ /dev/null
@@ -1,90 +0,0 @@
-"""
-Demo CustomLogger that injects custom response headers.
-
-Shows how to:
-1. Echo an incoming request header (e.g., APIGEE request ID) into the response
-2. Inject headers on both success and failure paths
-3. Works for /chat/completions, /embeddings, and /responses
-
-Usage:
-    litellm --config tests/e2e_demo_response_headers_config.yaml
-
-Test commands:
-    # /chat/completions (non-streaming)
-    curl -s -D- http://localhost:4000/chat/completions \
-      -H "Authorization: Bearer sk-1234" \
-      -H "x-apigee-request-id: apigee-req-001" \
-      -d '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"hi"}]}'
-
-    # /chat/completions (streaming)
-    curl -s -D- http://localhost:4000/chat/completions \
-      -H "Authorization: Bearer sk-1234" \
-      -H "x-apigee-request-id: apigee-req-002" \
-      -d '{"model":"gpt-4o-mini","messages":[{"role":"user","content":"hi"}],"stream":true}'
-
-    # /embeddings
-    curl -s -D- http://localhost:4000/embeddings \
-      -H "Authorization: Bearer sk-1234" \
-      -H "x-apigee-request-id: apigee-req-003" \
-      -d '{"model":"text-embedding-3-small","input":"hello"}'
-
-    # /v1/responses (non-streaming)
-    curl -s -D- http://localhost:4000/v1/responses \
-      -H "Authorization: Bearer sk-1234" \
-      -H "x-apigee-request-id: apigee-req-004" \
-      -d '{"model":"gpt-4o-mini","input":"hi"}'
-
-    # /v1/responses (streaming)
-    curl -s -D- http://localhost:4000/v1/responses \
-      -H "Authorization: Bearer sk-1234" \
-      -H "x-apigee-request-id: apigee-req-005" \
-      -d '{"model":"gpt-4o-mini","input":"hi","stream":true}'
-
-    # Failure path (bad model → headers still injected)
-    curl -s -D- http://localhost:4000/chat/completions \
-      -H "Authorization: Bearer sk-1234" \
-      -H "x-apigee-request-id: apigee-req-006" \
-      -d '{"model":"nonexistent-model","messages":[{"role":"user","content":"hi"}]}'
-
-Expected: All responses contain x-apigee-request-id, x-custom-header, and x-litellm-hook-model.
-"""
-
-from typing import Any, Dict, Optional
-
-from litellm.integrations.custom_logger import CustomLogger
-from litellm.proxy._types import UserAPIKeyAuth
-
-
-class ResponseHeaderInjector(CustomLogger):
-    """
-    Demonstrates injecting custom HTTP response headers via the proxy hook.
-
-    Key features:
-    - Echoes the incoming x-apigee-request-id header back in the response
-    - Adds a static custom header and the model name
-    - Works for success (streaming + non-streaming) and failure responses
-    - Works for all endpoints: /chat/completions, /embeddings, /responses
-    """
-
-    async def async_post_call_response_headers_hook(
-        self,
-        data: dict,
-        user_api_key_dict: UserAPIKeyAuth,
-        response: Any,
-        request_headers: Optional[Dict[str, str]] = None,
-    ) -> Optional[Dict[str, str]]:
-        headers: Dict[str, str] = {
-            "x-custom-header": "hello-from-hook",
-            "x-litellm-hook-model": data.get("model", "unknown"),
-        }
-
-        # Echo the APIGEE request ID from the incoming request into the response
-        if request_headers:
-            apigee_id = request_headers.get("x-apigee-request-id")
-            if apigee_id:
-                headers["x-apigee-request-id"] = apigee_id
-
-        return headers
-
-
-response_header_injector = ResponseHeaderInjector()
diff --git a/litellm/proxy/anthropic_endpoints/endpoints.py b/litellm/proxy/anthropic_endpoints/endpoints.py
index 77bb1f53e62..df986634199 100644
--- a/litellm/proxy/anthropic_endpoints/endpoints.py
+++ b/litellm/proxy/anthropic_endpoints/endpoints.py
@@ -78,6 +78,16 @@ async def anthropic_response(  # noqa: PLR0915
             request_data=_data,
         )
 
+        # Call response headers hook for guardrail failure path
+        _callback_headers = await proxy_logging_obj.post_call_response_headers_hook(
+            data=_data,
+            user_api_key_dict=user_api_key_dict,
+            response=None,
+            request_headers=ProxyBaseLLMRequestProcessing._filter_sensitive_headers(request.headers),
+        )
+        if _callback_headers:
+            fastapi_response.headers.update(_callback_headers)
+
         # Create Anthropic-formatted response with violation message
         import uuid
 
diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py
index 7f7af506a07..95a7b2cb945 100644
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@@ -8,6 +8,7 @@
     Any,
     AsyncGenerator,
     Callable,
+    Dict,
     Literal,
     Optional,
     Tuple,
@@ -351,10 +352,24 @@ def _get_cost_breakdown_from_logging_obj(
 
 
 class ProxyBaseLLMRequestProcessing:
+    # Headers excluded from request_headers passed to callbacks to avoid leaking credentials
+    _SENSITIVE_HEADERS = frozenset({"authorization", "cookie", "proxy-authorization"})
+
     def __init__(self, data: dict):
         self.data = data
         self._request_headers: Optional[Dict[str, str]] = None
 
+    @staticmethod
+    def _filter_sensitive_headers(
+        headers: "starlette.datastructures.Headers",
+    ) -> dict:
+        """Return a copy of request headers with sensitive values removed."""
+        return {
+            k: v
+            for k, v in headers.items()
+            if k.lower() not in ProxyBaseLLMRequestProcessing._SENSITIVE_HEADERS
+        }
+
     @staticmethod
     def get_custom_headers(
         *,
@@ -750,7 +765,7 @@ async def base_process_llm_request(
         """
         Common request processing logic for both chat completions and responses API endpoints
         """
-        self._request_headers = dict(request.headers)
+        self._request_headers = self._filter_sensitive_headers(request.headers)
 
         requested_model_from_client: Optional[str] = (
             self.data.get("model") if isinstance(self.data.get("model"), str) else None
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 7d1067f7b05..655ff469332 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -6202,6 +6202,17 @@ async def chat_completion(  # noqa: PLR0915
             original_exception=e,
             request_data=_data,
         )
+
+        # Call response headers hook for guardrail failure path
+        _callback_headers = await proxy_logging_obj.post_call_response_headers_hook(
+            data=_data,
+            user_api_key_dict=user_api_key_dict,
+            response=None,
+            request_headers=ProxyBaseLLMRequestProcessing._filter_sensitive_headers(request.headers),
+        )
+        if _callback_headers:
+            fastapi_response.headers.update(_callback_headers)
+
         _chat_response = litellm.ModelResponse()
         _chat_response.model = e.model  # type: ignore
         _chat_response.choices[0].message.content = e.message  # type: ignore
@@ -6367,6 +6378,16 @@ async def completion(  # noqa: PLR0915
             request_data=_data,
         )
 
+        # Call response headers hook for guardrail failure path
+        _callback_headers = await proxy_logging_obj.post_call_response_headers_hook(
+            data=_data,
+            user_api_key_dict=user_api_key_dict,
+            response=None,
+            request_headers=ProxyBaseLLMRequestProcessing._filter_sensitive_headers(request.headers),
+        )
+        if _callback_headers:
+            fastapi_response.headers.update(_callback_headers)
+
         if _data.get("stream", None) is not None and _data["stream"] is True:
             _text_response = litellm.ModelResponse()
             # Set text attribute dynamically for text completion format
diff --git a/litellm/proxy/response_api_endpoints/endpoints.py b/litellm/proxy/response_api_endpoints/endpoints.py
index 970819cc5ea..3f627f8bb3d 100644
--- a/litellm/proxy/response_api_endpoints/endpoints.py
+++ b/litellm/proxy/response_api_endpoints/endpoints.py
@@ -231,7 +231,7 @@ async def responses_api(
             data=_data,
             user_api_key_dict=user_api_key_dict,
             response=None,
-            request_headers=dict(request.headers),
+            request_headers=ProxyBaseLLMRequestProcessing._filter_sensitive_headers(request.headers),
         )
         if callback_headers:
             fastapi_response.headers.update(callback_headers)

From 631869213c793cd3881eac397a2f7ccac4c6fc61 Mon Sep 17 00:00:00 2001
From: michelligabriele <gabriele.michelli@icloud.com>
Date: Wed, 18 Feb 2026 14:46:34 +0100
Subject: [PATCH 3/4] refactor(proxy): centralize ModifyResponseException
 header logic

Move post_call_failure_hook + post_call_response_headers_hook calls
from 4 separate endpoint handlers into a single
_handle_modify_response_exception method on ProxyBaseLLMRequestProcessing,
mirroring the existing _handle_llm_api_exception pattern.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 .../proxy/anthropic_endpoints/endpoints.py    | 17 +++-------
 litellm/proxy/common_request_processing.py    | 28 +++++++++++++++
 litellm/proxy/proxy_server.py                 | 34 +++++--------------
 .../proxy/response_api_endpoints/endpoints.py | 18 +++-------
 ...response_headers_on_guardrail_exception.py | 34 ++++++++++---------
 5 files changed, 62 insertions(+), 69 deletions(-)

diff --git a/litellm/proxy/anthropic_endpoints/endpoints.py b/litellm/proxy/anthropic_endpoints/endpoints.py
index df986634199..4617f9b865c 100644
--- a/litellm/proxy/anthropic_endpoints/endpoints.py
+++ b/litellm/proxy/anthropic_endpoints/endpoints.py
@@ -72,21 +72,12 @@ async def anthropic_response(  # noqa: PLR0915
     except ModifyResponseException as e:
         # Guardrail flagged content in passthrough mode - return 200 with violation message
         _data = e.request_data
-        await proxy_logging_obj.post_call_failure_hook(
-            user_api_key_dict=user_api_key_dict,
-            original_exception=e,
-            request_data=_data,
-        )
-
-        # Call response headers hook for guardrail failure path
-        _callback_headers = await proxy_logging_obj.post_call_response_headers_hook(
-            data=_data,
+        await base_llm_response_processor._handle_modify_response_exception(
+            e=e,
             user_api_key_dict=user_api_key_dict,
-            response=None,
-            request_headers=ProxyBaseLLMRequestProcessing._filter_sensitive_headers(request.headers),
+            proxy_logging_obj=proxy_logging_obj,
+            fastapi_response=fastapi_response,
         )
-        if _callback_headers:
-            fastapi_response.headers.update(_callback_headers)
 
         # Create Anthropic-formatted response with violation message
         import uuid
diff --git a/litellm/proxy/common_request_processing.py b/litellm/proxy/common_request_processing.py
index 95a7b2cb945..d1b36c07c6f 100644
--- a/litellm/proxy/common_request_processing.py
+++ b/litellm/proxy/common_request_processing.py
@@ -50,6 +50,7 @@
 StreamErrorSerializer = Callable[[ProxyException], str]
 
 if TYPE_CHECKING:
+    from litellm.integrations.custom_guardrail import ModifyResponseException
     from litellm.proxy.proxy_server import ProxyConfig as _ProxyConfig
 
     ProxyConfig = _ProxyConfig
@@ -1085,6 +1086,33 @@ def _is_streaming_request(
             return True
         return False
 
+    async def _handle_modify_response_exception(
+        self,
+        e: "ModifyResponseException",
+        user_api_key_dict: UserAPIKeyAuth,
+        proxy_logging_obj: ProxyLogging,
+        fastapi_response: Response,
+    ):
+        """Centralized handling for ModifyResponseException (guardrail passthrough).
+
+        Calls the failure hook and injects custom response headers — mirrors
+        the pattern in ``_handle_llm_api_exception`` for error responses.
+        """
+        await proxy_logging_obj.post_call_failure_hook(
+            user_api_key_dict=user_api_key_dict,
+            original_exception=e,
+            request_data=e.request_data,
+        )
+
+        callback_headers = await proxy_logging_obj.post_call_response_headers_hook(
+            data=e.request_data,
+            user_api_key_dict=user_api_key_dict,
+            response=None,
+            request_headers=self._request_headers,
+        )
+        if callback_headers:
+            fastapi_response.headers.update(callback_headers)
+
     async def _handle_llm_api_exception(
         self,
         e: Exception,
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 655ff469332..46a75b97123 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -6197,21 +6197,12 @@ async def chat_completion(  # noqa: PLR0915
     except ModifyResponseException as e:
         # Guardrail flagged content in passthrough mode - return 200 with violation message
         _data = e.request_data
-        await proxy_logging_obj.post_call_failure_hook(
-            user_api_key_dict=user_api_key_dict,
-            original_exception=e,
-            request_data=_data,
-        )
-
-        # Call response headers hook for guardrail failure path
-        _callback_headers = await proxy_logging_obj.post_call_response_headers_hook(
-            data=_data,
+        await base_llm_response_processor._handle_modify_response_exception(
+            e=e,
             user_api_key_dict=user_api_key_dict,
-            response=None,
-            request_headers=ProxyBaseLLMRequestProcessing._filter_sensitive_headers(request.headers),
+            proxy_logging_obj=proxy_logging_obj,
+            fastapi_response=fastapi_response,
         )
-        if _callback_headers:
-            fastapi_response.headers.update(_callback_headers)
 
         _chat_response = litellm.ModelResponse()
         _chat_response.model = e.model  # type: ignore
@@ -6372,21 +6363,12 @@ async def completion(  # noqa: PLR0915
     except ModifyResponseException as e:
         # Guardrail flagged content in passthrough mode - return 200 with violation message
         _data = e.request_data
-        await proxy_logging_obj.post_call_failure_hook(
-            user_api_key_dict=user_api_key_dict,
-            original_exception=e,
-            request_data=_data,
-        )
-
-        # Call response headers hook for guardrail failure path
-        _callback_headers = await proxy_logging_obj.post_call_response_headers_hook(
-            data=_data,
+        await base_llm_response_processor._handle_modify_response_exception(
+            e=e,
             user_api_key_dict=user_api_key_dict,
-            response=None,
-            request_headers=ProxyBaseLLMRequestProcessing._filter_sensitive_headers(request.headers),
+            proxy_logging_obj=proxy_logging_obj,
+            fastapi_response=fastapi_response,
         )
-        if _callback_headers:
-            fastapi_response.headers.update(_callback_headers)
 
         if _data.get("stream", None) is not None and _data["stream"] is True:
             _text_response = litellm.ModelResponse()
diff --git a/litellm/proxy/response_api_endpoints/endpoints.py b/litellm/proxy/response_api_endpoints/endpoints.py
index 3f627f8bb3d..05f187cdf7e 100644
--- a/litellm/proxy/response_api_endpoints/endpoints.py
+++ b/litellm/proxy/response_api_endpoints/endpoints.py
@@ -219,22 +219,12 @@ async def responses_api(
         return response
     except ModifyResponseException as e:
         # Guardrail passthrough: return violation message in Responses API format (200)
-        _data = e.request_data
-        await proxy_logging_obj.post_call_failure_hook(
-            user_api_key_dict=user_api_key_dict,
-            original_exception=e,
-            request_data=_data,
-        )
-
-        # Call response headers hook for guardrail failure path
-        callback_headers = await proxy_logging_obj.post_call_response_headers_hook(
-            data=_data,
+        await processor._handle_modify_response_exception(
+            e=e,
             user_api_key_dict=user_api_key_dict,
-            response=None,
-            request_headers=ProxyBaseLLMRequestProcessing._filter_sensitive_headers(request.headers),
+            proxy_logging_obj=proxy_logging_obj,
+            fastapi_response=fastapi_response,
         )
-        if callback_headers:
-            fastapi_response.headers.update(callback_headers)
 
         violation_text = e.message
         response_obj = ResponsesAPIResponse(
diff --git a/tests/test_litellm/proxy/response_api_endpoints/test_response_headers_on_guardrail_exception.py b/tests/test_litellm/proxy/response_api_endpoints/test_response_headers_on_guardrail_exception.py
index 04b98f6d472..9f8fe4142a1 100644
--- a/tests/test_litellm/proxy/response_api_endpoints/test_response_headers_on_guardrail_exception.py
+++ b/tests/test_litellm/proxy/response_api_endpoints/test_response_headers_on_guardrail_exception.py
@@ -1,6 +1,7 @@
 """
-Test that post_call_response_headers_hook is called on ModifyResponseException
-in the /responses endpoint, so custom headers appear even on guardrail failures.
+Test that _handle_modify_response_exception (centralized in ProxyBaseLLMRequestProcessing)
+is called on ModifyResponseException in the /responses endpoint, so custom headers appear
+even on guardrail failures.
 """
 
 import os
@@ -32,10 +33,12 @@ async def async_post_call_response_headers_hook(
 async def test_modify_response_exception_calls_response_headers_hook():
     """
     When a guardrail raises ModifyResponseException on /responses,
-    the response should still include custom headers from the hook.
+    the response should still include custom headers from the hook
+    via the centralized _handle_modify_response_exception method.
     """
     from litellm.integrations.custom_guardrail import ModifyResponseException
     from litellm.proxy.proxy_server import app
+    from litellm.proxy.common_request_processing import ProxyBaseLLMRequestProcessing
     from fastapi.testclient import TestClient
 
     guardrail_logger = GuardrailHeaderLogger()
@@ -48,19 +51,18 @@ async def test_modify_response_exception_calls_response_headers_hook():
                 team_id=None,
             )
 
-            # Make base_process_llm_request raise ModifyResponseException
-            with patch(
-                "litellm.proxy.response_api_endpoints.endpoints.ProxyBaseLLMRequestProcessing"
-            ) as MockProcessor:
-                mock_instance = MockProcessor.return_value
-                mock_instance.base_process_llm_request = AsyncMock(
-                    side_effect=ModifyResponseException(
-                        message="Content blocked by guardrail",
-                        model="gpt-4o",
-                        request_data={"model": "gpt-4o"},
-                    )
-                )
-
+            # Only mock base_process_llm_request so the real
+            # _handle_modify_response_exception runs and calls the hook.
+            with patch.object(
+                ProxyBaseLLMRequestProcessing,
+                "base_process_llm_request",
+                new_callable=AsyncMock,
+                side_effect=ModifyResponseException(
+                    message="Content blocked by guardrail",
+                    model="gpt-4o",
+                    request_data={"model": "gpt-4o"},
+                ),
+            ):
                 client = TestClient(app)
                 response = client.post(
                     "/v1/responses",

From 36717bda188e91e5371db16f8a2f3781f28e85db Mon Sep 17 00:00:00 2001
From: michelligabriele <gabriele.michelli@icloud.com>
Date: Wed, 18 Feb 2026 14:56:05 +0100
Subject: [PATCH 4/4] fix(proxy): pass custom headers to StreamingResponse on
 guardrail failures

When a streaming request triggered ModifyResponseException, custom
headers set by _handle_modify_response_exception on fastapi_response
were lost because a new StreamingResponse was returned without them.
Pass dict(fastapi_response.headers) to StreamingResponse/create_response
in /chat/completions, /completions, and /v1/messages handlers.

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
---
 litellm/proxy/anthropic_endpoints/endpoints.py | 2 +-
 litellm/proxy/proxy_server.py                  | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/litellm/proxy/anthropic_endpoints/endpoints.py b/litellm/proxy/anthropic_endpoints/endpoints.py
index 4617f9b865c..3d6c58a33ee 100644
--- a/litellm/proxy/anthropic_endpoints/endpoints.py
+++ b/litellm/proxy/anthropic_endpoints/endpoints.py
@@ -111,7 +111,7 @@ async def _passthrough_stream_generator():
             return await create_response(
                 generator=selected_data_generator,
                 media_type="text/event-stream",
-                headers={},
+                headers=dict(fastapi_response.headers),
             )
 
         return _anthropic_response
diff --git a/litellm/proxy/proxy_server.py b/litellm/proxy/proxy_server.py
index 46a75b97123..9f7156273a6 100644
--- a/litellm/proxy/proxy_server.py
+++ b/litellm/proxy/proxy_server.py
@@ -6229,6 +6229,7 @@ async def chat_completion(  # noqa: PLR0915
                 selected_data_generator,
                 media_type="text/event-stream",
                 status_code=200,  # Return 200 for passthrough mode
+                headers=dict(fastapi_response.headers),
             )
         _usage = litellm.Usage(prompt_tokens=0, completion_tokens=0, total_tokens=0)
         _chat_response.usage = _usage  # type: ignore
@@ -6400,6 +6401,7 @@ async def completion(  # noqa: PLR0915
                 selected_data_generator,
                 media_type="text/event-stream",
                 status_code=200,  # Return 200 for passthrough mode
+                headers=dict(fastapi_response.headers),
             )
         else:
             _response = litellm.TextCompletionResponse()