Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 48 additions & 13 deletions litellm/proxy/guardrails/guardrail_hooks/presidio.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@

from litellm._uuid import uuid
from litellm.caching.caching import DualCache
from litellm.exceptions import BlockedPiiEntityError
from litellm.exceptions import BlockedPiiEntityError, GuardrailRaisedException
from litellm.integrations.custom_guardrail import (
CustomGuardrail,
log_guardrail_information,
Expand Down Expand Up @@ -232,6 +232,14 @@ def __del__(self):
"""Cleanup: we try to close, but doing async cleanup in __del__ is risky."""
pass

def _has_block_action(self) -> bool:
"""Return True if pii_entities_config has any BLOCK action (fail-closed on analyzer errors)."""
if not self.pii_entities_config:
return False
return any(
action == PiiAction.BLOCK for action in self.pii_entities_config.values()
)
Comment on lines +235 to +241
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

_has_block_action() is defined but never called anywhere in the codebase. The fail-closed logic in _fail_on_invalid_response uses bool(self.pii_entities_config) instead. Remove this dead code or use it in the should_fail_closed check if the intent was to only fail-closed on BLOCK actions.


def _get_presidio_analyze_request_payload(
self,
text: str,
Expand Down Expand Up @@ -316,37 +324,64 @@ async def analyze_text(

# Handle error responses from Presidio (e.g., {'error': 'No text provided'})
# Presidio may return a dict instead of a list when errors occur
def _fail_on_invalid_response(
reason: str,
) -> List[PresidioAnalyzeResponseItem]:
should_fail_closed = (
bool(self.pii_entities_config)
or self.output_parse_pii
or self.apply_to_output
)
if should_fail_closed:
raise GuardrailRaisedException(
guardrail_name=self.guardrail_name,
message=f"Presidio analyzer returned invalid response; cannot verify PII when PII protection is configured: {reason}",
should_wrap_with_default_message=False,
)
verbose_proxy_logger.warning(
"Presidio analyzer %s, returning empty list", reason
)
return []

if isinstance(analyze_results, dict):
if "error" in analyze_results:
verbose_proxy_logger.warning(
"Presidio analyzer returned error: %s, returning empty list",
analyze_results.get("error"),
return _fail_on_invalid_response(
f"error: {analyze_results.get('error')}"
)
return []
# If it's a dict but not an error, try to process it as a single item
verbose_proxy_logger.debug(
"Presidio returned dict (not list), attempting to process as single item"
)
try:
return [PresidioAnalyzeResponseItem(**analyze_results)]
except Exception as e:
verbose_proxy_logger.warning(
"Failed to parse Presidio dict response: %s, returning empty list",
e,
return _fail_on_invalid_response(
f"failed to parse dict response: {e}"
)
return []

# Handle unexpected types (str, None, etc.) - e.g. from malformed/error
if not isinstance(analyze_results, list):
return _fail_on_invalid_response(
f"unexpected type {type(analyze_results).__name__} (expected list or dict), response: {str(analyze_results)[:200]}"
)

# Normal case: list of results
final_results = []
for item in analyze_results:
if not isinstance(item, dict):
verbose_proxy_logger.warning(
"Skipping invalid Presidio result item (expected dict, got %s): %s",
type(item).__name__,
str(item)[:100],
)
continue
try:
final_results.append(PresidioAnalyzeResponseItem(**item))
except TypeError as te:
# Handle case where item is not a dict (shouldn't happen, but be defensive)
except Exception as e:
verbose_proxy_logger.warning(
"Skipping invalid Presidio result item: %s (error: %s)",
"Failed to parse Presidio result item: %s (error: %s)",
item,
te,
e,
)
continue
return final_results
Expand Down
167 changes: 147 additions & 20 deletions tests/test_litellm/proxy/guardrails/guardrail_hooks/test_presidio.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
import asyncio
import os
import sys
from contextlib import asynccontextmanager
from unittest.mock import MagicMock, patch

import pytest
Expand All @@ -18,10 +19,41 @@
from litellm.proxy.guardrails.guardrail_hooks.presidio import (
_OPTIONAL_PresidioPIIMasking,
)
from litellm.exceptions import GuardrailRaisedException
from litellm.types.guardrails import LitellmParams, PiiAction, PiiEntityType
from litellm.types.utils import Choices, Message, ModelResponse


def _make_mock_session_iterator(json_response):
"""Create a mock _get_session_iterator that yields a session returning json_response."""

@asynccontextmanager
async def mock_iterator():
class MockResponse:
async def json(self):
return json_response

async def __aenter__(self):
return self

async def __aexit__(self, *args):
pass

class MockSession:
def post(self, *args, **kwargs):
return MockResponse()

async def __aenter__(self):
return self

async def __aexit__(self, *args):
pass

yield MockSession()

return mock_iterator


@pytest.fixture
def presidio_guardrail():
"""Create a Presidio guardrail instance for testing"""
Expand Down Expand Up @@ -889,37 +921,132 @@ async def test_analyze_text_error_dict_handling():
output_parse_pii=False,
)

# Mock the HTTP response to return error dict
class MockResponse:
async def json(self):
return {"error": "No text provided"}

async def __aenter__(self):
return self
with patch.object(
presidio,
"_get_session_iterator",
_make_mock_session_iterator({"error": "No text provided"}),
):
result = await presidio.analyze_text(
text="some text",
presidio_config=None,
request_data={},
)
assert result == [], "Error dict should be handled gracefully"

async def __aexit__(self, *args):
pass
print("✓ analyze_text error dict handling test passed")

class MockSession:
def post(self, *args, **kwargs):
return MockResponse()

async def __aenter__(self):
return self
@pytest.mark.asyncio
async def test_analyze_text_string_response_handling():
"""
Test that analyze_text handles string responses from Presidio API.

async def __aexit__(self, *args):
pass
When Presidio returns a string (e.g. error message from websearch/hosted models),
should handle gracefully instead of crashing with TypeError about mapping vs str.
"""
presidio = _OPTIONAL_PresidioPIIMasking(
presidio_analyzer_api_base="http://mock-presidio:5002/",
presidio_anonymizer_api_base="http://mock-presidio:5001/",
output_parse_pii=False,
)

with patch("aiohttp.ClientSession", return_value=MockSession()):
with patch.object(
presidio,
"_get_session_iterator",
_make_mock_session_iterator("Internal Server Error"),
):
result = await presidio.analyze_text(
text="some text",
presidio_config=None,
request_data={},
)
# Should return empty list when error dict is received
assert result == [], "Error dict should be handled gracefully"
assert result == [], "String response should be handled gracefully"

print("✓ analyze_text error dict handling test passed")

@pytest.mark.asyncio
async def test_analyze_text_invalid_response_raises_when_block_configured():
"""
When pii_entities_config has BLOCK and Presidio returns invalid response,
should raise GuardrailRaisedException (fail-closed) rather than silently allowing content.
"""
presidio = _OPTIONAL_PresidioPIIMasking(
presidio_analyzer_api_base="http://mock-presidio:5002/",
presidio_anonymizer_api_base="http://mock-presidio:5001/",
output_parse_pii=False,
pii_entities_config={PiiEntityType.CREDIT_CARD: PiiAction.BLOCK},
)

with patch.object(
presidio,
"_get_session_iterator",
_make_mock_session_iterator("Internal Server Error"),
):
with pytest.raises(GuardrailRaisedException) as exc_info:
await presidio.analyze_text(
text="some text",
presidio_config=None,
request_data={},
)
assert "BLOCK" in str(exc_info.value) or "Presidio" in str(exc_info.value)


@pytest.mark.asyncio
async def test_analyze_text_invalid_response_raises_when_mask_configured():
"""
When pii_entities_config has MASK and Presidio returns invalid response,
should raise GuardrailRaisedException (fail-closed) because PII masking is expected.
"""
presidio = _OPTIONAL_PresidioPIIMasking(
presidio_analyzer_api_base="http://mock-presidio:5002/",
presidio_anonymizer_api_base="http://mock-presidio:5001/",
output_parse_pii=False,
pii_entities_config={PiiEntityType.CREDIT_CARD: PiiAction.MASK},
)

with patch.object(
presidio,
"_get_session_iterator",
_make_mock_session_iterator("Internal Server Error"),
):
with pytest.raises(GuardrailRaisedException) as exc_info:
await presidio.analyze_text(
text="some text",
presidio_config=None,
request_data={},
)
assert "PII protection is configured" in str(exc_info.value)


@pytest.mark.asyncio
async def test_analyze_text_list_with_non_dict_items():
"""
Test that analyze_text skips non-dict items in the result list.

When Presidio returns a list containing strings (malformed response),
should skip invalid items and return parsed valid ones.
"""
presidio = _OPTIONAL_PresidioPIIMasking(
presidio_analyzer_api_base="http://mock-presidio:5002/",
presidio_anonymizer_api_base="http://mock-presidio:5001/",
output_parse_pii=False,
)

json_response = [
{"entity_type": "PERSON", "start": 0, "end": 5, "score": 0.9},
"invalid_string_item",
{"entity_type": "EMAIL", "start": 10, "end": 25, "score": 0.85},
]
with patch.object(
presidio, "_get_session_iterator", _make_mock_session_iterator(json_response)
):
result = await presidio.analyze_text(
text="some text",
presidio_config=None,
request_data={},
)
assert len(result) == 2, "Should parse 2 valid dict items and skip the string"
assert result[0].get("entity_type") == "PERSON"
assert result[1].get("entity_type") == "EMAIL"


@pytest.mark.asyncio
Expand Down
Loading