diff --git a/litellm/proxy/guardrails/guardrail_hooks/presidio.py b/litellm/proxy/guardrails/guardrail_hooks/presidio.py index d71b8449f94..3984384aae4 100644 --- a/litellm/proxy/guardrails/guardrail_hooks/presidio.py +++ b/litellm/proxy/guardrails/guardrail_hooks/presidio.py @@ -38,7 +38,7 @@ from litellm._uuid import uuid from litellm.caching.caching import DualCache -from litellm.exceptions import BlockedPiiEntityError +from litellm.exceptions import BlockedPiiEntityError, GuardrailRaisedException from litellm.integrations.custom_guardrail import ( CustomGuardrail, log_guardrail_information, @@ -232,6 +232,14 @@ def __del__(self): """Cleanup: we try to close, but doing async cleanup in __del__ is risky.""" pass + def _has_block_action(self) -> bool: + """Return True if pii_entities_config has any BLOCK action (fail-closed on analyzer errors).""" + if not self.pii_entities_config: + return False + return any( + action == PiiAction.BLOCK for action in self.pii_entities_config.values() + ) + def _get_presidio_analyze_request_payload( self, text: str, @@ -316,13 +324,30 @@ async def analyze_text( # Handle error responses from Presidio (e.g., {'error': 'No text provided'}) # Presidio may return a dict instead of a list when errors occur + def _fail_on_invalid_response( + reason: str, + ) -> List[PresidioAnalyzeResponseItem]: + should_fail_closed = ( + bool(self.pii_entities_config) + or self.output_parse_pii + or self.apply_to_output + ) + if should_fail_closed: + raise GuardrailRaisedException( + guardrail_name=self.guardrail_name, + message=f"Presidio analyzer returned invalid response; cannot verify PII when PII protection is configured: {reason}", + should_wrap_with_default_message=False, + ) + verbose_proxy_logger.warning( + "Presidio analyzer %s, returning empty list", reason + ) + return [] + if isinstance(analyze_results, dict): if "error" in analyze_results: - verbose_proxy_logger.warning( - "Presidio analyzer returned error: %s, returning empty list", - analyze_results.get("error"), + return _fail_on_invalid_response( + f"error: {analyze_results.get('error')}" ) - return [] # If it's a dict but not an error, try to process it as a single item verbose_proxy_logger.debug( "Presidio returned dict (not list), attempting to process as single item" @@ -330,23 +355,33 @@ async def analyze_text( try: return [PresidioAnalyzeResponseItem(**analyze_results)] except Exception as e: - verbose_proxy_logger.warning( - "Failed to parse Presidio dict response: %s, returning empty list", - e, + return _fail_on_invalid_response( + f"failed to parse dict response: {e}" ) - return [] + + # Handle unexpected types (str, None, etc.) - e.g. from malformed/error + if not isinstance(analyze_results, list): + return _fail_on_invalid_response( + f"unexpected type {type(analyze_results).__name__} (expected list or dict), response: {str(analyze_results)[:200]}" + ) # Normal case: list of results final_results = [] for item in analyze_results: + if not isinstance(item, dict): + verbose_proxy_logger.warning( + "Skipping invalid Presidio result item (expected dict, got %s): %s", + type(item).__name__, + str(item)[:100], + ) + continue try: final_results.append(PresidioAnalyzeResponseItem(**item)) - except TypeError as te: - # Handle case where item is not a dict (shouldn't happen, but be defensive) + except Exception as e: verbose_proxy_logger.warning( - "Skipping invalid Presidio result item: %s (error: %s)", + "Failed to parse Presidio result item: %s (error: %s)", item, - te, + e, ) continue return final_results diff --git a/tests/test_litellm/proxy/guardrails/guardrail_hooks/test_presidio.py b/tests/test_litellm/proxy/guardrails/guardrail_hooks/test_presidio.py index 5ec9b13408e..f01c23f7116 100644 --- a/tests/test_litellm/proxy/guardrails/guardrail_hooks/test_presidio.py +++ b/tests/test_litellm/proxy/guardrails/guardrail_hooks/test_presidio.py @@ -6,6 +6,7 @@ import asyncio import os import sys +from contextlib import asynccontextmanager from unittest.mock import MagicMock, patch import pytest @@ -18,10 +19,41 @@ from litellm.proxy.guardrails.guardrail_hooks.presidio import ( _OPTIONAL_PresidioPIIMasking, ) +from litellm.exceptions import GuardrailRaisedException from litellm.types.guardrails import LitellmParams, PiiAction, PiiEntityType from litellm.types.utils import Choices, Message, ModelResponse +def _make_mock_session_iterator(json_response): + """Create a mock _get_session_iterator that yields a session returning json_response.""" + + @asynccontextmanager + async def mock_iterator(): + class MockResponse: + async def json(self): + return json_response + + async def __aenter__(self): + return self + + async def __aexit__(self, *args): + pass + + class MockSession: + def post(self, *args, **kwargs): + return MockResponse() + + async def __aenter__(self): + return self + + async def __aexit__(self, *args): + pass + + yield MockSession() + + return mock_iterator + + @pytest.fixture def presidio_guardrail(): """Create a Presidio guardrail instance for testing""" @@ -889,37 +921,132 @@ async def test_analyze_text_error_dict_handling(): output_parse_pii=False, ) - # Mock the HTTP response to return error dict - class MockResponse: - async def json(self): - return {"error": "No text provided"} - - async def __aenter__(self): - return self + with patch.object( + presidio, + "_get_session_iterator", + _make_mock_session_iterator({"error": "No text provided"}), + ): + result = await presidio.analyze_text( + text="some text", + presidio_config=None, + request_data={}, + ) + assert result == [], "Error dict should be handled gracefully" - async def __aexit__(self, *args): - pass + print("✓ analyze_text error dict handling test passed") - class MockSession: - def post(self, *args, **kwargs): - return MockResponse() - async def __aenter__(self): - return self +@pytest.mark.asyncio +async def test_analyze_text_string_response_handling(): + """ + Test that analyze_text handles string responses from Presidio API. - async def __aexit__(self, *args): - pass + When Presidio returns a string (e.g. error message from websearch/hosted models), + should handle gracefully instead of crashing with TypeError about mapping vs str. + """ + presidio = _OPTIONAL_PresidioPIIMasking( + presidio_analyzer_api_base="http://mock-presidio:5002/", + presidio_anonymizer_api_base="http://mock-presidio:5001/", + output_parse_pii=False, + ) - with patch("aiohttp.ClientSession", return_value=MockSession()): + with patch.object( + presidio, + "_get_session_iterator", + _make_mock_session_iterator("Internal Server Error"), + ): result = await presidio.analyze_text( text="some text", presidio_config=None, request_data={}, ) - # Should return empty list when error dict is received - assert result == [], "Error dict should be handled gracefully" + assert result == [], "String response should be handled gracefully" - print("✓ analyze_text error dict handling test passed") + +@pytest.mark.asyncio +async def test_analyze_text_invalid_response_raises_when_block_configured(): + """ + When pii_entities_config has BLOCK and Presidio returns invalid response, + should raise GuardrailRaisedException (fail-closed) rather than silently allowing content. + """ + presidio = _OPTIONAL_PresidioPIIMasking( + presidio_analyzer_api_base="http://mock-presidio:5002/", + presidio_anonymizer_api_base="http://mock-presidio:5001/", + output_parse_pii=False, + pii_entities_config={PiiEntityType.CREDIT_CARD: PiiAction.BLOCK}, + ) + + with patch.object( + presidio, + "_get_session_iterator", + _make_mock_session_iterator("Internal Server Error"), + ): + with pytest.raises(GuardrailRaisedException) as exc_info: + await presidio.analyze_text( + text="some text", + presidio_config=None, + request_data={}, + ) + assert "BLOCK" in str(exc_info.value) or "Presidio" in str(exc_info.value) + + +@pytest.mark.asyncio +async def test_analyze_text_invalid_response_raises_when_mask_configured(): + """ + When pii_entities_config has MASK and Presidio returns invalid response, + should raise GuardrailRaisedException (fail-closed) because PII masking is expected. + """ + presidio = _OPTIONAL_PresidioPIIMasking( + presidio_analyzer_api_base="http://mock-presidio:5002/", + presidio_anonymizer_api_base="http://mock-presidio:5001/", + output_parse_pii=False, + pii_entities_config={PiiEntityType.CREDIT_CARD: PiiAction.MASK}, + ) + + with patch.object( + presidio, + "_get_session_iterator", + _make_mock_session_iterator("Internal Server Error"), + ): + with pytest.raises(GuardrailRaisedException) as exc_info: + await presidio.analyze_text( + text="some text", + presidio_config=None, + request_data={}, + ) + assert "PII protection is configured" in str(exc_info.value) + + +@pytest.mark.asyncio +async def test_analyze_text_list_with_non_dict_items(): + """ + Test that analyze_text skips non-dict items in the result list. + + When Presidio returns a list containing strings (malformed response), + should skip invalid items and return parsed valid ones. + """ + presidio = _OPTIONAL_PresidioPIIMasking( + presidio_analyzer_api_base="http://mock-presidio:5002/", + presidio_anonymizer_api_base="http://mock-presidio:5001/", + output_parse_pii=False, + ) + + json_response = [ + {"entity_type": "PERSON", "start": 0, "end": 5, "score": 0.9}, + "invalid_string_item", + {"entity_type": "EMAIL", "start": 10, "end": 25, "score": 0.85}, + ] + with patch.object( + presidio, "_get_session_iterator", _make_mock_session_iterator(json_response) + ): + result = await presidio.analyze_text( + text="some text", + presidio_config=None, + request_data={}, + ) + assert len(result) == 2, "Should parse 2 valid dict items and skip the string" + assert result[0].get("entity_type") == "PERSON" + assert result[1].get("entity_type") == "EMAIL" @pytest.mark.asyncio