BerriAI · ishaan-jaff · Feb 11, 2026 · Feb 8, 2026 · Feb 11, 2026 · greptile-apps
diff --git a/litellm/proxy/guardrails/guardrail_hooks/presidio.py b/litellm/proxy/guardrails/guardrail_hooks/presidio.py
@@ -38,7 +38,7 @@
 
 from litellm._uuid import uuid
 from litellm.caching.caching import DualCache
-from litellm.exceptions import BlockedPiiEntityError
+from litellm.exceptions import BlockedPiiEntityError, GuardrailRaisedException
 from litellm.integrations.custom_guardrail import (
     CustomGuardrail,
     log_guardrail_information,
@@ -232,6 +232,14 @@ def __del__(self):
         """Cleanup: we try to close, but doing async cleanup in __del__ is risky."""
         pass
 
+    def _has_block_action(self) -> bool:
+        """Return True if pii_entities_config has any BLOCK action (fail-closed on analyzer errors)."""
+        if not self.pii_entities_config:
+            return False
+        return any(
+            action == PiiAction.BLOCK for action in self.pii_entities_config.values()
+        )
+
     def _get_presidio_analyze_request_payload(
         self,
         text: str,
@@ -316,37 +324,64 @@ async def analyze_text(
 
                 # Handle error responses from Presidio (e.g., {'error': 'No text provided'})
                 # Presidio may return a dict instead of a list when errors occur
+                def _fail_on_invalid_response(
+                    reason: str,
+                ) -> List[PresidioAnalyzeResponseItem]:
+                    should_fail_closed = (
+                        bool(self.pii_entities_config)
+                        or self.output_parse_pii
+                        or self.apply_to_output
+                    )
+                    if should_fail_closed:
+                        raise GuardrailRaisedException(
+                            guardrail_name=self.guardrail_name,
+                            message=f"Presidio analyzer returned invalid response; cannot verify PII when PII protection is configured: {reason}",
+                            should_wrap_with_default_message=False,
+                        )
+                    verbose_proxy_logger.warning(
+                        "Presidio analyzer %s, returning empty list", reason
+                    )
+                    return []
+
                 if isinstance(analyze_results, dict):
                     if "error" in analyze_results:
-                        verbose_proxy_logger.warning(
-                            "Presidio analyzer returned error: %s, returning empty list",
-                            analyze_results.get("error"),
+                        return _fail_on_invalid_response(
+                            f"error: {analyze_results.get('error')}"
                         )
-                        return []
                     # If it's a dict but not an error, try to process it as a single item
                     verbose_proxy_logger.debug(
                         "Presidio returned dict (not list), attempting to process as single item"
                     )
                     try:
                         return [PresidioAnalyzeResponseItem(**analyze_results)]
                     except Exception as e:
-                        verbose_proxy_logger.warning(
-                            "Failed to parse Presidio dict response: %s, returning empty list",
-                            e,
+                        return _fail_on_invalid_response(
+                            f"failed to parse dict response: {e}"
                         )
-                        return []
+
+                # Handle unexpected types (str, None, etc.) - e.g. from malformed/error
+                if not isinstance(analyze_results, list):
+                    return _fail_on_invalid_response(
+                        f"unexpected type {type(analyze_results).__name__} (expected list or dict), response: {str(analyze_results)[:200]}"
+                    )
 
                 # Normal case: list of results
                 final_results = []
                 for item in analyze_results:
+                    if not isinstance(item, dict):
+                        verbose_proxy_logger.warning(
+                            "Skipping invalid Presidio result item (expected dict, got %s): %s",
+                            type(item).__name__,
+                            str(item)[:100],
+                        )
+                        continue
                     try:
                         final_results.append(PresidioAnalyzeResponseItem(**item))
-                    except TypeError as te:
-                        # Handle case where item is not a dict (shouldn't happen, but be defensive)
+                    except Exception as e:
                         verbose_proxy_logger.warning(
-                            "Skipping invalid Presidio result item: %s (error: %s)",
+                            "Failed to parse Presidio result item: %s (error: %s)",
                             item,
-                            te,
+                            e,
                         )
                         continue
                 return final_results

diff --git a/tests/test_litellm/proxy/guardrails/guardrail_hooks/test_presidio.py b/tests/test_litellm/proxy/guardrails/guardrail_hooks/test_presidio.py
@@ -6,6 +6,7 @@
 import asyncio
 import os
 import sys
+from contextlib import asynccontextmanager
 from unittest.mock import MagicMock, patch
 
 import pytest
@@ -18,10 +19,41 @@
 from litellm.proxy.guardrails.guardrail_hooks.presidio import (
     _OPTIONAL_PresidioPIIMasking,
 )
+from litellm.exceptions import GuardrailRaisedException
 from litellm.types.guardrails import LitellmParams, PiiAction, PiiEntityType
 from litellm.types.utils import Choices, Message, ModelResponse
 
 
+def _make_mock_session_iterator(json_response):
+    """Create a mock _get_session_iterator that yields a session returning json_response."""
+
+    @asynccontextmanager
+    async def mock_iterator():
+        class MockResponse:
+            async def json(self):
+                return json_response
+
+            async def __aenter__(self):
+                return self
+
+            async def __aexit__(self, *args):
+                pass
+
+        class MockSession:
+            def post(self, *args, **kwargs):
+                return MockResponse()
+
+            async def __aenter__(self):
+                return self
+
+            async def __aexit__(self, *args):
+                pass
+
+        yield MockSession()
+
+    return mock_iterator
+
+
 @pytest.fixture
 def presidio_guardrail():
     """Create a Presidio guardrail instance for testing"""
@@ -889,37 +921,132 @@ async def test_analyze_text_error_dict_handling():
         output_parse_pii=False,
     )
 
-    # Mock the HTTP response to return error dict
-    class MockResponse:
-        async def json(self):
-            return {"error": "No text provided"}
-
-        async def __aenter__(self):
-            return self
+    with patch.object(
+        presidio,
+        "_get_session_iterator",
+        _make_mock_session_iterator({"error": "No text provided"}),
+    ):
+        result = await presidio.analyze_text(
+            text="some text",
+            presidio_config=None,
+            request_data={},
+        )
+    assert result == [], "Error dict should be handled gracefully"
 
-        async def __aexit__(self, *args):
-            pass
+    print("✓ analyze_text error dict handling test passed")
 
-    class MockSession:
-        def post(self, *args, **kwargs):
-            return MockResponse()
 
-        async def __aenter__(self):
-            return self
+@pytest.mark.asyncio
+async def test_analyze_text_string_response_handling():
+    """
+    Test that analyze_text handles string responses from Presidio API.
 
-        async def __aexit__(self, *args):
-            pass
+    When Presidio returns a string (e.g. error message from websearch/hosted models),
+    should handle gracefully instead of crashing with TypeError about mapping vs str.
+    """
+    presidio = _OPTIONAL_PresidioPIIMasking(
+        presidio_analyzer_api_base="http://mock-presidio:5002/",
+        presidio_anonymizer_api_base="http://mock-presidio:5001/",
+        output_parse_pii=False,
+    )
 
-    with patch("aiohttp.ClientSession", return_value=MockSession()):
+    with patch.object(
+        presidio,
+        "_get_session_iterator",
+        _make_mock_session_iterator("Internal Server Error"),
+    ):
         result = await presidio.analyze_text(
             text="some text",
             presidio_config=None,
             request_data={},
         )
-        # Should return empty list when error dict is received
-        assert result == [], "Error dict should be handled gracefully"
+    assert result == [], "String response should be handled gracefully"
 
-    print("✓ analyze_text error dict handling test passed")
+
+@pytest.mark.asyncio
+async def test_analyze_text_invalid_response_raises_when_block_configured():
+    """
+    When pii_entities_config has BLOCK and Presidio returns invalid response,
+    should raise GuardrailRaisedException (fail-closed) rather than silently allowing content.
+    """
+    presidio = _OPTIONAL_PresidioPIIMasking(
+        presidio_analyzer_api_base="http://mock-presidio:5002/",
+        presidio_anonymizer_api_base="http://mock-presidio:5001/",
+        output_parse_pii=False,
+        pii_entities_config={PiiEntityType.CREDIT_CARD: PiiAction.BLOCK},
+    )
+
+    with patch.object(
+        presidio,
+        "_get_session_iterator",
+        _make_mock_session_iterator("Internal Server Error"),
+    ):
+        with pytest.raises(GuardrailRaisedException) as exc_info:
+            await presidio.analyze_text(
+                text="some text",
+                presidio_config=None,
+                request_data={},
+            )
+    assert "BLOCK" in str(exc_info.value) or "Presidio" in str(exc_info.value)
+
+
+@pytest.mark.asyncio
+async def test_analyze_text_invalid_response_raises_when_mask_configured():
+    """
+    When pii_entities_config has MASK and Presidio returns invalid response,
+    should raise GuardrailRaisedException (fail-closed) because PII masking is expected.
+    """
+    presidio = _OPTIONAL_PresidioPIIMasking(
+        presidio_analyzer_api_base="http://mock-presidio:5002/",
+        presidio_anonymizer_api_base="http://mock-presidio:5001/",
+        output_parse_pii=False,
+        pii_entities_config={PiiEntityType.CREDIT_CARD: PiiAction.MASK},
+    )
+
+    with patch.object(
+        presidio,
+        "_get_session_iterator",
+        _make_mock_session_iterator("Internal Server Error"),
+    ):
+        with pytest.raises(GuardrailRaisedException) as exc_info:
+            await presidio.analyze_text(
+                text="some text",
+                presidio_config=None,
+                request_data={},
+            )
+    assert "PII protection is configured" in str(exc_info.value)
+
+
+@pytest.mark.asyncio
+async def test_analyze_text_list_with_non_dict_items():
+    """
+    Test that analyze_text skips non-dict items in the result list.
+
+    When Presidio returns a list containing strings (malformed response),
+    should skip invalid items and return parsed valid ones.
+    """
+    presidio = _OPTIONAL_PresidioPIIMasking(
+        presidio_analyzer_api_base="http://mock-presidio:5002/",
+        presidio_anonymizer_api_base="http://mock-presidio:5001/",
+        output_parse_pii=False,
+    )
+
+    json_response = [
+        {"entity_type": "PERSON", "start": 0, "end": 5, "score": 0.9},
+        "invalid_string_item",
+        {"entity_type": "EMAIL", "start": 10, "end": 25, "score": 0.85},
+    ]
+    with patch.object(
+        presidio, "_get_session_iterator", _make_mock_session_iterator(json_response)
+    ):
+        result = await presidio.analyze_text(
+            text="some text",
+            presidio_config=None,
+            request_data={},
+        )
+    assert len(result) == 2, "Should parse 2 valid dict items and skip the string"
+    assert result[0].get("entity_type") == "PERSON"
+    assert result[1].get("entity_type") == "EMAIL"
 
 
 @pytest.mark.asyncio