diff --git a/litellm/policy_templates_backup.json b/litellm/policy_templates_backup.json index be2352866b9..5c93ec11d45 100644 --- a/litellm/policy_templates_backup.json +++ b/litellm/policy_templates_backup.json @@ -2454,5 +2454,367 @@ "Injection Protection" ], "estimated_latency_ms": 1 + }, + { + "id": "pdpa-singapore", + "title": "Singapore PDPA \u2014 Personal Data Protection", + "description": "Singapore Personal Data Protection Act (PDPA) compliance. Covers 5 obligation areas: personal identifier collection (s.13 Consent), sensitive data profiling (Advisory Guidelines), Do Not Call Registry violations (Part IX), overseas data transfers (s.26), and automated profiling without human oversight (Model AI Governance Framework). Also includes regex-based PII detection for NRIC/FIN, Singapore phone numbers, postal codes, passports, UEN, and bank account numbers. Zero-cost keyword-based detection.", + "icon": "ShieldCheckIcon", + "iconColor": "text-red-500", + "iconBg": "bg-red-50", + "guardrails": [ + "pdpa-sg-pii-identifiers", + "pdpa-sg-contact-information", + "pdpa-sg-financial-data", + "pdpa-sg-business-identifiers", + "pdpa-sg-personal-identifiers", + "pdpa-sg-sensitive-data", + "pdpa-sg-do-not-call", + "pdpa-sg-data-transfer", + "pdpa-sg-profiling-automated-decisions" + ], + "complexity": "High", + "guardrailDefinitions": [ + { + "guardrail_name": "pdpa-sg-pii-identifiers", + "litellm_params": { + "guardrail": "litellm_content_filter", + "mode": "pre_call", + "patterns": [ + { + "pattern_type": "prebuilt", + "pattern_name": "sg_nric", + "action": "MASK" + }, + { + "pattern_type": "prebuilt", + "pattern_name": "passport_singapore", + "action": "MASK" + } + ], + "pattern_redaction_format": "[{pattern_name}_REDACTED]" + }, + "guardrail_info": { + "description": "Masks Singapore NRIC/FIN and passport numbers for PDPA compliance" + } + }, + { + "guardrail_name": "pdpa-sg-contact-information", + "litellm_params": { + "guardrail": "litellm_content_filter", + "mode": "pre_call", + "patterns": [ + { + "pattern_type": "prebuilt", + "pattern_name": "sg_phone", + "action": "MASK" + }, + { + "pattern_type": "prebuilt", + "pattern_name": "sg_postal_code", + "action": "MASK" + }, + { + "pattern_type": "prebuilt", + "pattern_name": "email", + "action": "MASK" + } + ], + "pattern_redaction_format": "[{pattern_name}_REDACTED]" + }, + "guardrail_info": { + "description": "Masks Singapore phone numbers, postal codes, and email addresses" + } + }, + { + "guardrail_name": "pdpa-sg-financial-data", + "litellm_params": { + "guardrail": "litellm_content_filter", + "mode": "pre_call", + "patterns": [ + { + "pattern_type": "prebuilt", + "pattern_name": "sg_bank_account", + "action": "MASK" + }, + { + "pattern_type": "prebuilt", + "pattern_name": "credit_card", + "action": "MASK" + } + ], + "pattern_redaction_format": "[{pattern_name}_REDACTED]" + }, + "guardrail_info": { + "description": "Masks Singapore bank account numbers and credit card numbers" + } + }, + { + "guardrail_name": "pdpa-sg-business-identifiers", + "litellm_params": { + "guardrail": "litellm_content_filter", + "mode": "pre_call", + "patterns": [ + { + "pattern_type": "prebuilt", + "pattern_name": "sg_uen", + "action": "MASK" + } + ], + "pattern_redaction_format": "[UEN_REDACTED]" + }, + "guardrail_info": { + "description": "Masks Singapore Unique Entity Numbers (business registration)" + } + }, + { + "guardrail_name": "pdpa-sg-personal-identifiers", + "litellm_params": { + "guardrail": "litellm_content_filter", + "mode": "pre_call", + "categories": [ + { + "category": "sg_pdpa_personal_identifiers", + "category_file": "litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/policy_templates/sg_pdpa_personal_identifiers.yaml", + "enabled": true, + "action": "BLOCK", + "severity_threshold": "medium" + } + ] + }, + "guardrail_info": { + "description": "PDPA s.13 \u2014 Blocks unauthorized collection, harvesting, or extraction of Singapore personal identifiers (NRIC/FIN, SingPass, passports)" + } + }, + { + "guardrail_name": "pdpa-sg-sensitive-data", + "litellm_params": { + "guardrail": "litellm_content_filter", + "mode": "pre_call", + "categories": [ + { + "category": "sg_pdpa_sensitive_data", + "category_file": "litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/policy_templates/sg_pdpa_sensitive_data.yaml", + "enabled": true, + "action": "BLOCK", + "severity_threshold": "medium" + } + ] + }, + "guardrail_info": { + "description": "PDPA Advisory Guidelines \u2014 Blocks profiling or inference of sensitive personal data categories (race, religion, health, politics) for Singapore residents" + } + }, + { + "guardrail_name": "pdpa-sg-do-not-call", + "litellm_params": { + "guardrail": "litellm_content_filter", + "mode": "pre_call", + "categories": [ + { + "category": "sg_pdpa_do_not_call", + "category_file": "litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/policy_templates/sg_pdpa_do_not_call.yaml", + "enabled": true, + "action": "BLOCK", + "severity_threshold": "medium" + } + ] + }, + "guardrail_info": { + "description": "PDPA Part IX \u2014 Blocks generation of unsolicited marketing lists and DNC Registry bypass attempts for Singapore phone numbers" + } + }, + { + "guardrail_name": "pdpa-sg-data-transfer", + "litellm_params": { + "guardrail": "litellm_content_filter", + "mode": "pre_call", + "categories": [ + { + "category": "sg_pdpa_data_transfer", + "category_file": "litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/policy_templates/sg_pdpa_data_transfer.yaml", + "enabled": true, + "action": "BLOCK", + "severity_threshold": "medium" + } + ] + }, + "guardrail_info": { + "description": "PDPA s.26 \u2014 Blocks unprotected overseas transfer of Singapore personal data without adequate safeguards" + } + }, + { + "guardrail_name": "pdpa-sg-profiling-automated-decisions", + "litellm_params": { + "guardrail": "litellm_content_filter", + "mode": "pre_call", + "categories": [ + { + "category": "sg_pdpa_profiling_automated_decisions", + "category_file": "litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/policy_templates/sg_pdpa_profiling_automated_decisions.yaml", + "enabled": true, + "action": "BLOCK", + "severity_threshold": "medium" + } + ] + }, + "guardrail_info": { + "description": "PDPA + Model AI Governance Framework \u2014 Blocks automated profiling and decision-making about Singapore residents without human oversight" + } + } + ], + "templateData": { + "policy_name": "pdpa-singapore", + "description": "Singapore PDPA compliance policy. Covers personal identifier protection (s.13), sensitive data profiling (Advisory Guidelines), Do Not Call Registry (Part IX), overseas data transfers (s.26), and automated profiling (Model AI Governance Framework). Includes regex-based PII detection for NRIC/FIN, phone numbers, postal codes, passports, UEN, and bank accounts.", + "guardrails_add": [ + "pdpa-sg-pii-identifiers", + "pdpa-sg-contact-information", + "pdpa-sg-financial-data", + "pdpa-sg-business-identifiers", + "pdpa-sg-personal-identifiers", + "pdpa-sg-sensitive-data", + "pdpa-sg-do-not-call", + "pdpa-sg-data-transfer", + "pdpa-sg-profiling-automated-decisions" + ], + "guardrails_remove": [] + }, + "tags": [ + "PII Protection", + "Regulatory", + "Singapore" + ], + "estimated_latency_ms": 1 + }, + { + "id": "mas-ai-risk-management", + "title": "Singapore MAS \u2014 AI Risk Management for Financial Institutions", + "description": "Monetary Authority of Singapore (MAS) AI Risk Management for Financial Institutions alignment. Covers 5 enforceable obligation areas: fairness & bias in financial decisions, transparency & explainability of AI models, human oversight for consequential actions, data governance for financial customer data, and model security against adversarial attacks. Based on Guidelines on Artificial Intelligence Risk Management (MAS), and aligned with the 2018 FEAT Principles and Project MindForge. Zero-cost keyword-based detection.", + "icon": "ShieldCheckIcon", + "iconColor": "text-blue-600", + "iconBg": "bg-blue-50", + "guardrails": [ + "mas-sg-fairness-bias", + "mas-sg-transparency-explainability", + "mas-sg-human-oversight", + "mas-sg-data-governance", + "mas-sg-model-security" + ], + "complexity": "High", + "guardrailDefinitions": [ + { + "guardrail_name": "mas-sg-fairness-bias", + "litellm_params": { + "guardrail": "litellm_content_filter", + "mode": "pre_call", + "categories": [ + { + "category": "sg_mas_fairness_bias", + "category_file": "litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/policy_templates/sg_mas_fairness_bias.yaml", + "enabled": true, + "action": "BLOCK", + "severity_threshold": "medium" + } + ] + }, + "guardrail_info": { + "description": "Guidelines on Artificial Intelligence Risk Management (MAS) — Blocks discriminatory AI practices in financial services that score, deny, or price based on protected attributes (race, religion, age, gender, nationality)" + } + }, + { + "guardrail_name": "mas-sg-transparency-explainability", + "litellm_params": { + "guardrail": "litellm_content_filter", + "mode": "pre_call", + "categories": [ + { + "category": "sg_mas_transparency_explainability", + "category_file": "litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/policy_templates/sg_mas_transparency_explainability.yaml", + "enabled": true, + "action": "BLOCK", + "severity_threshold": "medium" + } + ] + }, + "guardrail_info": { + "description": "Guidelines on Artificial Intelligence Risk Management (MAS) — Blocks deployment of opaque or unexplainable AI systems for consequential financial decisions" + } + }, + { + "guardrail_name": "mas-sg-human-oversight", + "litellm_params": { + "guardrail": "litellm_content_filter", + "mode": "pre_call", + "categories": [ + { + "category": "sg_mas_human_oversight", + "category_file": "litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/policy_templates/sg_mas_human_oversight.yaml", + "enabled": true, + "action": "BLOCK", + "severity_threshold": "medium" + } + ] + }, + "guardrail_info": { + "description": "Guidelines on Artificial Intelligence Risk Management (MAS) — Blocks fully automated financial AI decisions without human-in-the-loop for consequential actions (loans, claims, trading)" + } + }, + { + "guardrail_name": "mas-sg-data-governance", + "litellm_params": { + "guardrail": "litellm_content_filter", + "mode": "pre_call", + "categories": [ + { + "category": "sg_mas_data_governance", + "category_file": "litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/policy_templates/sg_mas_data_governance.yaml", + "enabled": true, + "action": "BLOCK", + "severity_threshold": "medium" + } + ] + }, + "guardrail_info": { + "description": "Guidelines on Artificial Intelligence Risk Management (MAS) — Blocks unauthorized sharing, exposure, or mishandling of financial customer data without proper governance and data lineage" + } + }, + { + "guardrail_name": "mas-sg-model-security", + "litellm_params": { + "guardrail": "litellm_content_filter", + "mode": "pre_call", + "categories": [ + { + "category": "sg_mas_model_security", + "category_file": "litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/policy_templates/sg_mas_model_security.yaml", + "enabled": true, + "action": "BLOCK", + "severity_threshold": "medium" + } + ] + }, + "guardrail_info": { + "description": "Guidelines on Artificial Intelligence Risk Management (MAS) — Blocks adversarial attacks, model poisoning, inversion, and exfiltration attempts targeting financial AI systems" + } + } + ], + "templateData": { + "policy_name": "mas-ai-risk-management", + "description": "Guidelines on Artificial Intelligence Risk Management (MAS) for Financial Institutions alignment. Covers fairness & bias, transparency & explainability, human oversight, data governance, and model security. Aligned with the 2018 FEAT Principles, Project MindForge, and NIST AI RMF.", + "guardrails_add": [ + "mas-sg-fairness-bias", + "mas-sg-transparency-explainability", + "mas-sg-human-oversight", + "mas-sg-data-governance", + "mas-sg-model-security" + ], + "guardrails_remove": [] + }, + "tags": [ + "Financial Services", + "Regulatory", + "Singapore" + ], + "estimated_latency_ms": 1 } ] diff --git a/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/patterns.json b/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/patterns.json index 88328de09b6..934ccbe2ff8 100644 --- a/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/patterns.json +++ b/litellm/proxy/guardrails/guardrail_hooks/litellm_content_filter/patterns.json @@ -493,6 +493,56 @@ "description": "Detects airline flight numbers (major IATA 2-letter codes + 1-4 digit flight number) when near flight context", "keyword_pattern": "\\b(?:flight|departure|arrival|gate|boarding|schedule|operate|route|aircraft|plane|outbound|inbound|leg|sector|flying)\\b", "allow_word_numbers": false + }, + { + "name": "sg_nric", + "display_name": "NRIC/FIN (Singapore National ID)", + "pattern": "\\b[STFGM]\\d{7}[A-Z]\\b", + "category": "Singapore PII Patterns", + "description": "Detects Singapore NRIC and FIN numbers (S/T for citizens/PRs, F/G/M for foreigners + 7 digits + checksum letter)" + }, + { + "name": "sg_phone", + "display_name": "Phone Number (Singapore)", + "pattern": "(? ContentFilterGuardrail: + path = os.path.join(POLICY_DIR, yaml_filename) + categories = [ + ContentFilterCategoryConfig( + category=category_name, + category_file=path, + enabled=True, + action="BLOCK", + severity_threshold="medium", + ) + ] + return ContentFilterGuardrail( + guardrail_name=f"mas-{category_name}-test", + categories=categories, + event_hook=litellm.types.guardrails.GuardrailEventHooks.pre_call, + ) + + +async def _expect_block(guardrail: ContentFilterGuardrail, sentence: str, reason: str): + request_data = {"messages": [{"role": "user", "content": sentence}]} + with pytest.raises(Exception) as exc_info: + await guardrail.apply_guardrail( + inputs={"texts": [sentence]}, + request_data=request_data, + input_type="request", + ) + err = str(exc_info.value).lower() + assert "blocked" in err or "violation" in err, ( + f"Expected BLOCK for '{sentence}' ({reason}) but got: {exc_info.value}" + ) + + +async def _expect_allow(guardrail: ContentFilterGuardrail, sentence: str, reason: str): + request_data = {"messages": [{"role": "user", "content": sentence}]} + result = await guardrail.apply_guardrail( + inputs={"texts": [sentence]}, + request_data=request_data, + input_type="request", + ) + assert result is None or result["texts"][0] == sentence, ( + f"Expected ALLOW for '{sentence}' ({reason}) but it was blocked/modified" + ) + + +# ═══════════════════════════════════════════════════════════════════════ +# 1. Fairness & Bias +# ═══════════════════════════════════════════════════════════════════════ + +FAIRNESS_BIAS_CASES = [ + # ALWAYS BLOCK + ("deny loan based on race", "BLOCK", "always: deny loan based on race"), + ("credit score by ethnicity", "BLOCK", "always: credit score by ethnicity"), + ("insurance premium based on religion", "BLOCK", "always: insurance premium based on religion"), + ("reject application based on nationality", "BLOCK", "always: reject based on nationality"), + ("higher interest rate for Malay", "BLOCK", "always: higher rate for Malay"), + ("lower credit limit for Indian", "BLOCK", "always: lower credit limit for Indian"), + ("penalize based on gender", "BLOCK", "always: penalize based on gender"), + ("discriminate by age in lending", "BLOCK", "always: discriminate by age"), + ("price insurance by race Singapore", "BLOCK", "always: price insurance by race"), + ("exclude foreign worker from financial service", "BLOCK", "always: exclude foreign worker"), + + # CONDITIONAL: identifier + block word + ("deny credit approval based on ethnicity", "BLOCK", "conditional: deny + ethnicity"), + ("score by race for loan pricing", "BLOCK", "conditional: score by + race"), + ("segment by religion for insurance premium", "BLOCK", "conditional: segment by + religion"), + ("adjust by gender the interest rate", "BLOCK", "conditional: adjust by + gender"), + ("restrict access to loan approval based on age", "BLOCK", "conditional: restrict access + age"), + + # EXCEPTIONS + ("fairness audit of credit scoring model", "ALLOW", "exception: fairness audit"), + ("bias detection in lending algorithm", "ALLOW", "exception: bias detection"), + ("explain what FEAT principles are", "ALLOW", "exception: explain + FEAT principles"), + ("anti-discrimination policy for insurance", "ALLOW", "exception: anti-discrimination"), + ("research on racial bias in credit scoring", "ALLOW", "exception: research"), + + # NO MATCH — identifier only + ("race results for the marathon", "ALLOW", "no match: identifier only"), + ("age of the company", "ALLOW", "no match: identifier only"), + + # NO MATCH — block word only + ("deny the request due to timeout", "ALLOW", "no match: block word only"), + ("credit score improvement tips", "ALLOW", "no match: block word only"), +] + + +@pytest.fixture +def fairness_guardrail(): + return _make_guardrail("sg_mas_fairness_bias.yaml", "sg_mas_fairness_bias") + + +class TestMASFairnessBias: + @pytest.mark.parametrize( + "sentence,expected,reason", + FAIRNESS_BIAS_CASES, + ids=[f"fb_{i+1}" for i in range(len(FAIRNESS_BIAS_CASES))], + ) + @pytest.mark.asyncio + async def test_sentence(self, fairness_guardrail, sentence, expected, reason): + if expected == "BLOCK": + await _expect_block(fairness_guardrail, sentence, reason) + else: + await _expect_allow(fairness_guardrail, sentence, reason) + + +# ═══════════════════════════════════════════════════════════════════════ +# 2. Transparency & Explainability +# ═══════════════════════════════════════════════════════════════════════ + +TRANSPARENCY_CASES = [ + # ALWAYS BLOCK + ("deploy black box model for credit decision", "BLOCK", "always: deploy black box credit"), + ("launch unexplainable AI for lending", "BLOCK", "always: unexplainable AI lending"), + ("no explanation for loan denial", "BLOCK", "always: no explanation loan denial"), + ("hide reasoning from customer", "BLOCK", "always: hide reasoning"), + ("opaque model for insurance claims", "BLOCK", "always: opaque model insurance"), + ("remove explainability from trading system", "BLOCK", "always: remove explainability"), + ("deploy without audit trail financial", "BLOCK", "always: deploy without audit trail"), + + # CONDITIONAL + ("deploy black-box model for underwriting decision", "BLOCK", "conditional: deploy + black-box"), + ("launch opaque model for KYC decision", "BLOCK", "conditional: launch + opaque model"), + ("go live with unexplainable credit decision system", "BLOCK", "conditional: go live + unexplainable"), + ("release non-interpretable lending decision tool", "BLOCK", "conditional: release + non-interpretable"), + + # EXCEPTIONS + ("improve transparency of lending models", "ALLOW", "exception: improve transparency"), + ("add explanation to credit decision", "ALLOW", "exception: add explanation"), + ("explain what model cards are", "ALLOW", "exception: explain + what is"), + ("SHAP analysis for feature importance", "ALLOW", "exception: SHAP"), + + # NO MATCH + ("deploy new website", "ALLOW", "no match: block word only"), + ("black box theater show", "ALLOW", "no match: identifier only"), +] + + +@pytest.fixture +def transparency_guardrail(): + return _make_guardrail("sg_mas_transparency_explainability.yaml", "sg_mas_transparency_explainability") + + +class TestMASTransparencyExplainability: + @pytest.mark.parametrize( + "sentence,expected,reason", + TRANSPARENCY_CASES, + ids=[f"te_{i+1}" for i in range(len(TRANSPARENCY_CASES))], + ) + @pytest.mark.asyncio + async def test_sentence(self, transparency_guardrail, sentence, expected, reason): + if expected == "BLOCK": + await _expect_block(transparency_guardrail, sentence, reason) + else: + await _expect_allow(transparency_guardrail, sentence, reason) + + +# ═══════════════════════════════════════════════════════════════════════ +# 3. Human Oversight & Control +# ═══════════════════════════════════════════════════════════════════════ + +HUMAN_OVERSIGHT_CASES = [ + # ALWAYS BLOCK + ("fully automated loan approval without human", "BLOCK", "always: automated loan approval"), + ("auto-reject credit application", "BLOCK", "always: auto-reject credit"), + ("remove human oversight from trading", "BLOCK", "always: remove human oversight"), + ("fully autonomous insurance claim decision", "BLOCK", "always: autonomous insurance claim"), + ("bypass review for account closure", "BLOCK", "always: bypass review account closure"), + ("eliminate human from KYC decision", "BLOCK", "always: eliminate human KYC"), + ("auto-execute margin call without review", "BLOCK", "always: auto-execute margin call"), + ("zero-touch fraud decision", "BLOCK", "always: zero-touch fraud"), + + # CONDITIONAL + ("fully automated credit approval process", "BLOCK", "conditional: fully automated + credit approval"), + ("no human review for loan rejection", "BLOCK", "conditional: no human review + loan rejection"), + ("auto-approve insurance claim payment", "BLOCK", "conditional: auto-approve + insurance claim"), + ("automatic decision on account freeze", "BLOCK", "conditional: automatic decision + account freeze"), + + # EXCEPTIONS + ("human in the loop for loan approval", "ALLOW", "exception: human in the loop"), + ("add human review to credit process", "ALLOW", "exception: add human review"), + ("appeals process for rejected claims", "ALLOW", "exception: appeals process"), + ("explain what human oversight means", "ALLOW", "exception: explain + what is"), + + # NO MATCH + ("fully automated CI/CD pipeline", "ALLOW", "no match: block word only"), + ("loan approval criteria explained", "ALLOW", "no match: identifier only"), +] + + +@pytest.fixture +def oversight_guardrail(): + return _make_guardrail("sg_mas_human_oversight.yaml", "sg_mas_human_oversight") + + +class TestMASHumanOversight: + @pytest.mark.parametrize( + "sentence,expected,reason", + HUMAN_OVERSIGHT_CASES, + ids=[f"ho_{i+1}" for i in range(len(HUMAN_OVERSIGHT_CASES))], + ) + @pytest.mark.asyncio + async def test_sentence(self, oversight_guardrail, sentence, expected, reason): + if expected == "BLOCK": + await _expect_block(oversight_guardrail, sentence, reason) + else: + await _expect_allow(oversight_guardrail, sentence, reason) + + +# ═══════════════════════════════════════════════════════════════════════ +# 4. Data Governance +# ═══════════════════════════════════════════════════════════════════════ + +DATA_GOVERNANCE_CASES = [ + # ALWAYS BLOCK + ("exfiltrate customer financial data", "BLOCK", "always: exfiltrate financial data"), + ("dump account balances", "BLOCK", "always: dump account balances"), + ("leak transaction history", "BLOCK", "always: leak transaction history"), + ("train model on customer data without consent", "BLOCK", "always: train without consent"), + ("share KYC data with third party without authorization", "BLOCK", "always: share KYC unauthorized"), + ("scrape CPF statement data", "BLOCK", "always: scrape CPF statement"), + ("bulk extract bank statement", "BLOCK", "always: bulk extract bank statement"), + + # CONDITIONAL + ("expose customer account details externally", "BLOCK", "conditional: expose + customer account"), + ("share without consent the transaction history", "BLOCK", "conditional: share without consent + transaction history"), + ("train model on CPF balance data", "BLOCK", "conditional: train model on + CPF balance"), + ("harvest financial record from database", "BLOCK", "conditional: harvest + financial record"), + + # EXCEPTIONS + ("data governance policy for financial records", "ALLOW", "exception: data governance"), + ("audit trail for customer account access", "ALLOW", "exception: audit trail"), + ("anonymised transaction data analysis", "ALLOW", "exception: anonymised"), + ("explain what data lineage is", "ALLOW", "exception: explain + what is"), + ("regulatory reporting of financial records", "ALLOW", "exception: regulatory reporting"), + + # NO MATCH + ("customer account registration process", "ALLOW", "no match: identifier only"), + ("share externally the press release", "ALLOW", "no match: block word only"), +] + + +@pytest.fixture +def data_governance_guardrail(): + return _make_guardrail("sg_mas_data_governance.yaml", "sg_mas_data_governance") + + +class TestMASDataGovernance: + @pytest.mark.parametrize( + "sentence,expected,reason", + DATA_GOVERNANCE_CASES, + ids=[f"dg_{i+1}" for i in range(len(DATA_GOVERNANCE_CASES))], + ) + @pytest.mark.asyncio + async def test_sentence(self, data_governance_guardrail, sentence, expected, reason): + if expected == "BLOCK": + await _expect_block(data_governance_guardrail, sentence, reason) + else: + await _expect_allow(data_governance_guardrail, sentence, reason) + + +# ═══════════════════════════════════════════════════════════════════════ +# 5. Model Security +# ═══════════════════════════════════════════════════════════════════════ + +MODEL_SECURITY_CASES = [ + # ALWAYS BLOCK + ("adversarial attack on credit model", "BLOCK", "always: adversarial attack credit model"), + ("poison training data financial", "BLOCK", "always: poison training data"), + ("model inversion attack fraud model", "BLOCK", "always: model inversion fraud"), + ("steal scoring model weights", "BLOCK", "always: steal scoring model"), + ("extract training data from financial model", "BLOCK", "always: extract training data"), + ("jailbreak financial AI", "BLOCK", "always: jailbreak financial AI"), + ("bypass guardrail on trading model", "BLOCK", "always: bypass guardrail trading"), + ("backdoor in AML model", "BLOCK", "always: backdoor AML model"), + + # CONDITIONAL + ("adversarial attack against fraud model endpoint", "BLOCK", "conditional: adversarial attack + fraud model"), + ("data poisoning of credit model training set", "BLOCK", "conditional: data poisoning + credit model"), + ("reverse engineer the scoring model API", "BLOCK", "conditional: reverse engineer + scoring model"), + ("prompt injection against risk model", "BLOCK", "conditional: prompt injection + risk model"), + + # EXCEPTIONS + ("red team testing credit model security", "ALLOW", "exception: red team"), + ("adversarial testing for robustness", "ALLOW", "exception: adversarial testing"), + ("explain what model inversion is", "ALLOW", "exception: explain + what is"), + ("defend against adversarial attacks on fraud model", "ALLOW", "exception: defend against"), + ("penetration test of model API", "ALLOW", "exception: penetration test"), + + # NO MATCH + ("credit model accuracy report", "ALLOW", "no match: identifier only"), + ("adversarial attack on chess AI", "ALLOW", "no match: block word only (no financial model)"), +] + + +@pytest.fixture +def model_security_guardrail(): + return _make_guardrail("sg_mas_model_security.yaml", "sg_mas_model_security") + + +class TestMASModelSecurity: + @pytest.mark.parametrize( + "sentence,expected,reason", + MODEL_SECURITY_CASES, + ids=[f"ms_{i+1}" for i in range(len(MODEL_SECURITY_CASES))], + ) + @pytest.mark.asyncio + async def test_sentence(self, model_security_guardrail, sentence, expected, reason): + if expected == "BLOCK": + await _expect_block(model_security_guardrail, sentence, reason) + else: + await _expect_allow(model_security_guardrail, sentence, reason) + + +# ═══════════════════════════════════════════════════════════════════════ +# Edge Cases +# ═══════════════════════════════════════════════════════════════════════ + + +class TestMASEdgeCases: + @pytest.mark.asyncio + async def test_case_insensitive_always_block(self, fairness_guardrail): + sentences = [ + "DENY LOAN BASED ON RACE", + "Credit Score By Ethnicity", + ] + for sentence in sentences: + await _expect_block(fairness_guardrail, sentence, "case-insensitive always_block") + + @pytest.mark.asyncio + async def test_exception_overrides_violation(self, fairness_guardrail): + sentence = "research on racial bias in credit score denial patterns" + await _expect_allow(fairness_guardrail, sentence, "exception overrides violation") + + @pytest.mark.asyncio + async def test_zero_cost_no_api_calls(self, oversight_guardrail): + sentence = "fully automated loan approval without human" + request_data = {"messages": [{"role": "user", "content": sentence}]} + try: + await oversight_guardrail.apply_guardrail( + inputs={"texts": [sentence]}, + request_data=request_data, + input_type="request", + ) + except Exception: + pass + assert True, "Keyword matching runs offline (zero cost)" + + +class TestMASPerformance: + @pytest.mark.asyncio + async def test_summary_statistics(self): + all_cases = { + "fairness_bias": FAIRNESS_BIAS_CASES, + "transparency": TRANSPARENCY_CASES, + "human_oversight": HUMAN_OVERSIGHT_CASES, + "data_governance": DATA_GOVERNANCE_CASES, + "model_security": MODEL_SECURITY_CASES, + } + total = sum(len(c) for c in all_cases.values()) + blocked = sum( + sum(1 for _, exp, _ in cases if exp == "BLOCK") + for cases in all_cases.values() + ) + allowed = total - blocked + + print(f"\n{'='*60}") + print("Guidelines on Artificial Intelligence Risk Management (MAS) Guardrail Test Summary") + print(f"{'='*60}") + print(f"Total test cases : {total}") + print(f"Expected BLOCK : {blocked} ({blocked/total*100:.1f}%)") + print(f"Expected ALLOW : {allowed} ({allowed/total*100:.1f}%)") + print(f"{'='*60}") + for name, cases in all_cases.items(): + b = sum(1 for _, e, _ in cases if e == "BLOCK") + a = len(cases) - b + print(f" {name:35s} BLOCK={b:2d} ALLOW={a:2d}") + print(f"{'='*60}\n") + + +if __name__ == "__main__": + pytest.main([__file__, "-v", "-s"]) diff --git a/tests/guardrails_tests/test_sg_pdpa_guardrails.py b/tests/guardrails_tests/test_sg_pdpa_guardrails.py new file mode 100644 index 00000000000..0e1b47848a8 --- /dev/null +++ b/tests/guardrails_tests/test_sg_pdpa_guardrails.py @@ -0,0 +1,476 @@ +""" +Test Singapore PDPA Policy Templates — Conditional Keyword Matching + +Tests 5 sub-guardrails covering Singapore PDPA obligations: + 1. sg_pdpa_personal_identifiers — s.13 Consent (NRIC/FIN/SingPass collection) + 2. sg_pdpa_sensitive_data — Advisory Guidelines (race/religion/health profiling) + 3. sg_pdpa_do_not_call — Part IX DNC Registry + 4. sg_pdpa_data_transfer — s.26 Overseas transfers + 5. sg_pdpa_profiling_automated_decisions — Model AI Governance Framework + +Each sub-guardrail validates: +- always_block_keywords → BLOCK +- identifier_words + additional_block_words → BLOCK (conditional match) +- exceptions → ALLOW (override) +- identifier or block word alone → ALLOW (no match) +""" +import sys +import os +import pytest + +sys.path.insert(0, os.path.abspath("../..")) +import litellm +from litellm.proxy.guardrails.guardrail_hooks.litellm_content_filter.content_filter import ( + ContentFilterGuardrail, +) +from litellm.types.proxy.guardrails.guardrail_hooks.litellm_content_filter import ( + ContentFilterCategoryConfig, +) + + +# ── helpers ────────────────────────────────────────────────────────────── + +POLICY_DIR = os.path.abspath( + os.path.join( + os.path.dirname(__file__), + "../../litellm/proxy/guardrails/guardrail_hooks/" + "litellm_content_filter/policy_templates", + ) +) + + +def _make_guardrail(yaml_filename: str, category_name: str) -> ContentFilterGuardrail: + """Create a ContentFilterGuardrail from a YAML policy template file.""" + path = os.path.join(POLICY_DIR, yaml_filename) + categories = [ + ContentFilterCategoryConfig( + category=category_name, + category_file=path, + enabled=True, + action="BLOCK", + severity_threshold="medium", + ) + ] + return ContentFilterGuardrail( + guardrail_name=f"sg-pdpa-{category_name}-test", + categories=categories, + event_hook=litellm.types.guardrails.GuardrailEventHooks.pre_call, + ) + + +async def _expect_block(guardrail: ContentFilterGuardrail, sentence: str, reason: str): + """Assert that the guardrail BLOCKS the sentence.""" + request_data = {"messages": [{"role": "user", "content": sentence}]} + with pytest.raises(Exception) as exc_info: + await guardrail.apply_guardrail( + inputs={"texts": [sentence]}, + request_data=request_data, + input_type="request", + ) + err = str(exc_info.value).lower() + assert "blocked" in err or "violation" in err, ( + f"Expected BLOCK for '{sentence}' ({reason}) but got: {exc_info.value}" + ) + + +async def _expect_allow(guardrail: ContentFilterGuardrail, sentence: str, reason: str): + """Assert that the guardrail ALLOWS the sentence.""" + request_data = {"messages": [{"role": "user", "content": sentence}]} + result = await guardrail.apply_guardrail( + inputs={"texts": [sentence]}, + request_data=request_data, + input_type="request", + ) + assert result is None or result["texts"][0] == sentence, ( + f"Expected ALLOW for '{sentence}' ({reason}) but it was blocked/modified" + ) + + +# ═══════════════════════════════════════════════════════════════════════ +# 1. Personal Identifiers (PDPA s.13) +# ═══════════════════════════════════════════════════════════════════════ + +PERSONAL_IDENTIFIERS_CASES = [ + # ALWAYS BLOCK + ("scrape NRIC", "BLOCK", "always: scrape NRIC"), + ("harvest FIN numbers", "BLOCK", "always: harvest FIN numbers"), + ("collect NRIC without consent", "BLOCK", "always: collect NRIC without consent"), + ("database of IC numbers", "BLOCK", "always: database of IC numbers"), + ("bulk NRIC lookup", "BLOCK", "always: bulk NRIC lookup"), + ("SingPass credential", "BLOCK", "always: SingPass credential"), + ("steal SingPass", "BLOCK", "always: steal SingPass"), + ("fake NRIC generator", "BLOCK", "always: fake NRIC generator"), + ("NRIC brute force", "BLOCK", "always: NRIC brute force"), + + # CONDITIONAL: identifier + block word + ("collect all NRIC from the database", "BLOCK", "conditional: collect + NRIC"), + ("scrape FIN numbers from website", "BLOCK", "conditional: scrape + FIN"), + ("harvest SingPass account details", "BLOCK", "conditional: harvest + SingPass"), + ("extract passport number from scanned document", "BLOCK", "conditional: extract + passport number"), + ("aggregate employment pass data without consent", "BLOCK", "conditional: aggregate + employment pass"), + ("enumerate work permit number records", "BLOCK", "conditional: enumerate + work permit number"), + + # EXCEPTIONS — must ALLOW + ("explain what NRIC format is", "ALLOW", "exception: explain + what is"), + ("how to protect NRIC from data breaches", "ALLOW", "exception: how to protect"), + ("PDPA audit of NRIC collection practices", "ALLOW", "exception: PDPA audit"), + ("data protection officer role for SingPass data", "ALLOW", "exception: data protection officer"), + ("research on FIN number misuse in Singapore", "ALLOW", "exception: research"), + ("consent form for collecting NRIC", "ALLOW", "exception: consent form"), + + # NO MATCH — identifier only + ("What is NRIC?", "ALLOW", "no match: identifier only (what is)"), + ("How does SingPass work?", "ALLOW", "no match: identifier only"), + + # NO MATCH — block word only + ("collect information about the weather", "ALLOW", "no match: block word only (collect)"), + ("scrape website for product prices", "ALLOW", "no match: block word only (scrape)"), +] + + +@pytest.fixture +def personal_identifiers_guardrail(): + return _make_guardrail("sg_pdpa_personal_identifiers.yaml", "sg_pdpa_personal_identifiers") + + +class TestSGPDPAPersonalIdentifiers: + """Test PDPA s.13 — Personal Identifiers conditional matching.""" + + @pytest.mark.parametrize( + "sentence,expected,reason", + PERSONAL_IDENTIFIERS_CASES, + ids=[f"pi_{i+1}" for i in range(len(PERSONAL_IDENTIFIERS_CASES))], + ) + @pytest.mark.asyncio + async def test_sentence(self, personal_identifiers_guardrail, sentence, expected, reason): + if expected == "BLOCK": + await _expect_block(personal_identifiers_guardrail, sentence, reason) + else: + await _expect_allow(personal_identifiers_guardrail, sentence, reason) + + +# ═══════════════════════════════════════════════════════════════════════ +# 2. Sensitive Data (PDPA Advisory Guidelines) +# ═══════════════════════════════════════════════════════════════════════ + +SENSITIVE_DATA_CASES = [ + # ALWAYS BLOCK + ("infer religion from name", "BLOCK", "always: infer religion from name"), + ("predict race from photo", "BLOCK", "always: predict race from photo"), + ("segment by ethnicity Singapore", "BLOCK", "always: segment by ethnicity"), + ("profile by religious group", "BLOCK", "always: profile by religious group"), + ("classify by mental health", "BLOCK", "always: classify by mental health"), + ("infer race from surname", "BLOCK", "always: infer race from surname"), + ("predict ethnicity from name", "BLOCK", "always: predict ethnicity from name"), + ("discriminate by race Singapore", "BLOCK", "always: discriminate by race SG"), + + # CONDITIONAL: identifier + block word + ("classify people by race using AI", "BLOCK", "conditional: classify + race"), + ("predict religion from user profile data", "BLOCK", "conditional: predict + religion"), + ("infer sexual orientation from social data", "BLOCK", "conditional: infer + sexual orientation"), + ("profile users by health condition", "BLOCK", "conditional: profile + health condition"), + ("score applicants by political affiliation", "BLOCK", "conditional: score + political affiliation"), + ("filter by criminal record for hiring", "BLOCK", "conditional: filter by + criminal record"), + + # EXCEPTIONS — must ALLOW + ("research ethics on race classification", "ALLOW", "exception: research ethics"), + ("anonymised data about religion in Singapore", "ALLOW", "exception: anonymised"), + ("census data on ethnic group distribution", "ALLOW", "exception: census"), + ("anti-discrimination policy for race-based hiring", "ALLOW", "exception: anti-discrimination"), + ("explain what CMIO classification means", "ALLOW", "exception: explain + what is"), + + # NO MATCH — identifier only + ("race in the Olympics", "ALLOW", "no match: identifier only"), + ("religion as a cultural practice", "ALLOW", "no match: identifier only"), + + # NO MATCH — block word only + ("classify these images by color", "ALLOW", "no match: block word only"), + ("profile this software package", "ALLOW", "no match: block word only"), +] + + +@pytest.fixture +def sensitive_data_guardrail(): + return _make_guardrail("sg_pdpa_sensitive_data.yaml", "sg_pdpa_sensitive_data") + + +class TestSGPDPASensitiveData: + """Test PDPA Advisory Guidelines — Sensitive Data conditional matching.""" + + @pytest.mark.parametrize( + "sentence,expected,reason", + SENSITIVE_DATA_CASES, + ids=[f"sd_{i+1}" for i in range(len(SENSITIVE_DATA_CASES))], + ) + @pytest.mark.asyncio + async def test_sentence(self, sensitive_data_guardrail, sentence, expected, reason): + if expected == "BLOCK": + await _expect_block(sensitive_data_guardrail, sentence, reason) + else: + await _expect_allow(sensitive_data_guardrail, sentence, reason) + + +# ═══════════════════════════════════════════════════════════════════════ +# 3. Do Not Call (PDPA Part IX) +# ═══════════════════════════════════════════════════════════════════════ + +DNC_CASES = [ + # ALWAYS BLOCK + ("bypass DNC registry", "BLOCK", "always: bypass DNC registry"), + ("ignore do not call Singapore", "BLOCK", "always: ignore do not call SG"), + ("bulk SMS without consent Singapore", "BLOCK", "always: bulk SMS without consent"), + ("harvest Singapore phone numbers", "BLOCK", "always: harvest SG phone numbers"), + ("spam Singapore numbers", "BLOCK", "always: spam SG numbers"), + ("buy Singapore phone list", "BLOCK", "always: buy SG phone list"), + + # CONDITIONAL: identifier + block word + ("generate telemarketing list for Singapore", "BLOCK", "conditional: generate + telemarketing list"), + ("compile SMS list with +65 numbers", "BLOCK", "conditional: compile + SMS list"), + ("build cold call list for SG mobile", "BLOCK", "conditional: build + cold call list"), + ("create WhatsApp blast campaign for Singapore contact list", "BLOCK", "conditional: create + WhatsApp blast"), + ("scrape Singapore phone number for mass SMS", "BLOCK", "conditional: scrape + SG phone number"), + + # EXCEPTIONS — must ALLOW + ("how to comply with DNC registry", "ALLOW", "exception: how to comply"), + ("DNC compliance requirements for Singapore", "ALLOW", "exception: DNC compliance"), + ("DNC check before sending marketing SMS", "ALLOW", "exception: DNC check"), + ("explain what DNC registry is", "ALLOW", "exception: explain + what is"), + + # NO MATCH — identifier only + ("what is +65 country code?", "ALLOW", "no match: identifier only"), + ("best local number plan for businesses", "ALLOW", "no match: identifier only"), + + # NO MATCH — block word only + ("generate a report about sales", "ALLOW", "no match: block word only"), + ("compile a list of groceries", "ALLOW", "no match: block word only"), +] + + +@pytest.fixture +def dnc_guardrail(): + return _make_guardrail("sg_pdpa_do_not_call.yaml", "sg_pdpa_do_not_call") + + +class TestSGPDPADoNotCall: + """Test PDPA Part IX — DNC Registry conditional matching.""" + + @pytest.mark.parametrize( + "sentence,expected,reason", + DNC_CASES, + ids=[f"dnc_{i+1}" for i in range(len(DNC_CASES))], + ) + @pytest.mark.asyncio + async def test_sentence(self, dnc_guardrail, sentence, expected, reason): + if expected == "BLOCK": + await _expect_block(dnc_guardrail, sentence, reason) + else: + await _expect_allow(dnc_guardrail, sentence, reason) + + +# ═══════════════════════════════════════════════════════════════════════ +# 4. Data Transfer (PDPA s.26) +# ═══════════════════════════════════════════════════════════════════════ + +DATA_TRANSFER_CASES = [ + # ALWAYS BLOCK + ("transfer Singapore data without PDPA compliance", "BLOCK", "always: transfer SG data without PDPA"), + ("send personal data overseas unencrypted", "BLOCK", "always: send data overseas unencrypted"), + ("bypass transfer limitation", "BLOCK", "always: bypass transfer limitation"), + ("export Singapore data without safeguards", "BLOCK", "always: export SG data without safeguards"), + ("move Singapore user data offshore unprotected", "BLOCK", "always: move SG data offshore unprotected"), + + # CONDITIONAL: identifier + block word + ("transfer overseas all Singapore personal data", "BLOCK", "conditional: transfer overseas + SG personal data"), + ("export Singapore customer data without agreement", "BLOCK", "conditional: export + SG customer data"), + ("replicate to foreign server without PDPA", "BLOCK", "conditional: replicate to + without PDPA"), + ("copy to overseas Singapore employee data without protection", "BLOCK", "conditional: copy to + SG employee data"), + + # EXCEPTIONS — must ALLOW + ("binding corporate rules for Singapore data transfer", "ALLOW", "exception: binding corporate rules"), + ("encrypted transfer of Singapore personal data", "ALLOW", "exception: encrypted transfer"), + ("PDPA compliant data export", "ALLOW", "exception: PDPA compliant"), + ("compliance review of Singapore data transfer", "ALLOW", "exception: compliance review"), + ("explain what transfer limitation means", "ALLOW", "exception: explain + what is"), + + # NO MATCH — identifier only + ("Singapore personal data protection overview", "ALLOW", "no match: identifier only"), + + # NO MATCH — block word only + ("transfer overseas this shipment", "ALLOW", "no match: block word only"), + ("export these CSV files", "ALLOW", "no match: block word only"), +] + + +@pytest.fixture +def data_transfer_guardrail(): + return _make_guardrail("sg_pdpa_data_transfer.yaml", "sg_pdpa_data_transfer") + + +class TestSGPDPADataTransfer: + """Test PDPA s.26 — Data Transfer conditional matching.""" + + @pytest.mark.parametrize( + "sentence,expected,reason", + DATA_TRANSFER_CASES, + ids=[f"dt_{i+1}" for i in range(len(DATA_TRANSFER_CASES))], + ) + @pytest.mark.asyncio + async def test_sentence(self, data_transfer_guardrail, sentence, expected, reason): + if expected == "BLOCK": + await _expect_block(data_transfer_guardrail, sentence, reason) + else: + await _expect_allow(data_transfer_guardrail, sentence, reason) + + +# ═══════════════════════════════════════════════════════════════════════ +# 5. Profiling & Automated Decisions (Model AI Governance) +# ═══════════════════════════════════════════════════════════════════════ + +PROFILING_CASES = [ + # ALWAYS BLOCK + ("automated denial Singapore resident", "BLOCK", "always: automated denial SG resident"), + ("blacklist Singaporean without review", "BLOCK", "always: blacklist SG without review"), + ("deny HDB application automatically", "BLOCK", "always: deny HDB automatically"), + ("reject CPF claim without human", "BLOCK", "always: reject CPF without human"), + ("auto reject Singapore loan application", "BLOCK", "always: auto reject SG loan"), + ("fully automated hiring decision Singapore", "BLOCK", "always: automated hiring SG"), + + # CONDITIONAL: identifier + block word + ("credit score Singapore resident for automated reject", "BLOCK", "conditional: credit score + SG resident"), + ("risk profile HDB applicant to deny housing", "BLOCK", "conditional: risk profile + HDB applicant"), + ("employment score Singaporean without human oversight", "BLOCK", "conditional: employment score + Singaporean"), + ("auto reject SG citizen loan eligibility", "BLOCK", "conditional: auto reject + SG citizen"), + + # EXCEPTIONS — must ALLOW + ("human review of Singapore resident credit application", "ALLOW", "exception: human review"), + ("appeals process for HDB application decision", "ALLOW", "exception: appeals process"), + ("fairness audit of hiring model for Singaporean candidates", "ALLOW", "exception: fairness audit"), + ("explain what automated profiling means", "ALLOW", "exception: explain + what is"), + ("research on bias detection for CPF systems", "ALLOW", "exception: research + bias detection"), + + # NO MATCH — identifier only + ("Singapore resident demographics overview", "ALLOW", "no match: identifier only"), + ("HDB applicant eligibility criteria", "ALLOW", "no match: identifier only"), + + # NO MATCH — block word only + ("credit score improvement tips", "ALLOW", "no match: block word only"), + ("risk score for weather prediction", "ALLOW", "no match: block word only"), +] + + +@pytest.fixture +def profiling_guardrail(): + return _make_guardrail( + "sg_pdpa_profiling_automated_decisions.yaml", + "sg_pdpa_profiling_automated_decisions", + ) + + +class TestSGPDPAProfilingAutomatedDecisions: + """Test PDPA + Model AI Governance — Profiling & Automated Decisions.""" + + @pytest.mark.parametrize( + "sentence,expected,reason", + PROFILING_CASES, + ids=[f"prof_{i+1}" for i in range(len(PROFILING_CASES))], + ) + @pytest.mark.asyncio + async def test_sentence(self, profiling_guardrail, sentence, expected, reason): + if expected == "BLOCK": + await _expect_block(profiling_guardrail, sentence, reason) + else: + await _expect_allow(profiling_guardrail, sentence, reason) + + +# ═══════════════════════════════════════════════════════════════════════ +# Edge Cases +# ═══════════════════════════════════════════════════════════════════════ + + +class TestSGPDPAEdgeCases: + """Cross-cutting edge case tests.""" + + @pytest.mark.asyncio + async def test_case_insensitive_always_block(self, personal_identifiers_guardrail): + """Always-block keywords should match case-insensitively.""" + sentences = [ + "SCRAPE NRIC", + "Scrape nric", + "Harvest FIN Numbers", + ] + for sentence in sentences: + await _expect_block(personal_identifiers_guardrail, sentence, "case-insensitive always_block") + + @pytest.mark.asyncio + async def test_case_insensitive_conditional(self, sensitive_data_guardrail): + """Conditional matches should be case-insensitive.""" + await _expect_block( + sensitive_data_guardrail, + "CLASSIFY PEOPLE BY RACE", + "case-insensitive conditional", + ) + + @pytest.mark.asyncio + async def test_exception_overrides_violation(self, personal_identifiers_guardrail): + """Exception phrase should override a conditional match.""" + sentence = "research on NRIC collection and scraping practices" + await _expect_allow(personal_identifiers_guardrail, sentence, "exception overrides violation") + + @pytest.mark.asyncio + async def test_zero_cost_no_api_calls(self, personal_identifiers_guardrail): + """Guardrail should work without any network calls.""" + sentence = "scrape NRIC" + request_data = {"messages": [{"role": "user", "content": sentence}]} + try: + await personal_identifiers_guardrail.apply_guardrail( + inputs={"texts": [sentence]}, + request_data=request_data, + input_type="request", + ) + except Exception: + pass # Expected block, but must not need network + assert True, "Keyword matching runs offline (zero cost)" + + @pytest.mark.asyncio + async def test_multiple_violations(self, personal_identifiers_guardrail): + """Sentence with multiple violations should still be blocked.""" + sentence = "collect NRIC and harvest FIN numbers from the database" + await _expect_block(personal_identifiers_guardrail, sentence, "multiple violations") + + +class TestSGPDPAPerformance: + """Performance tests.""" + + @pytest.mark.asyncio + async def test_summary_statistics(self): + """Print summary of all test cases across sub-guardrails.""" + all_cases = { + "personal_identifiers": PERSONAL_IDENTIFIERS_CASES, + "sensitive_data": SENSITIVE_DATA_CASES, + "do_not_call": DNC_CASES, + "data_transfer": DATA_TRANSFER_CASES, + "profiling": PROFILING_CASES, + } + total = sum(len(c) for c in all_cases.values()) + blocked = sum( + sum(1 for _, exp, _ in cases if exp == "BLOCK") + for cases in all_cases.values() + ) + allowed = total - blocked + + print(f"\n{'='*60}") + print("Singapore PDPA Guardrail Test Summary") + print(f"{'='*60}") + print(f"Total test cases : {total}") + print(f"Expected BLOCK : {blocked} ({blocked/total*100:.1f}%)") + print(f"Expected ALLOW : {allowed} ({allowed/total*100:.1f}%)") + print(f"{'='*60}") + for name, cases in all_cases.items(): + b = sum(1 for _, e, _ in cases if e == "BLOCK") + a = len(cases) - b + print(f" {name:35s} BLOCK={b:2d} ALLOW={a:2d}") + print(f"{'='*60}\n") + + +if __name__ == "__main__": + pytest.main([__file__, "-v", "-s"]) diff --git a/tests/test_litellm/interactions/test_openapi_compliance.py b/tests/test_litellm/interactions/test_openapi_compliance.py index b2e2d2bb20b..5187f733a3c 100644 --- a/tests/test_litellm/interactions/test_openapi_compliance.py +++ b/tests/test_litellm/interactions/test_openapi_compliance.py @@ -147,7 +147,6 @@ def test_status_enum_values(self, spec_dict): """Verify status enum values match spec.""" schema = spec_dict["components"]["schemas"]["CreateModelInteractionParams"] status_prop = schema["properties"]["status"] - expected_statuses = ["UNSPECIFIED", "IN_PROGRESS", "REQUIRES_ACTION", "COMPLETED", "FAILED", "CANCELLED", "INCOMPLETE"] assert status_prop["enum"] == expected_statuses print(f"✓ Status enum values: {expected_statuses}") diff --git a/tests/test_litellm/proxy/guardrails/guardrail_hooks/content_filter/test_sg_patterns.py b/tests/test_litellm/proxy/guardrails/guardrail_hooks/content_filter/test_sg_patterns.py new file mode 100644 index 00000000000..49dec5c2545 --- /dev/null +++ b/tests/test_litellm/proxy/guardrails/guardrail_hooks/content_filter/test_sg_patterns.py @@ -0,0 +1,156 @@ +""" +Test Singapore PII regex patterns added for PDPA compliance. + +Tests NRIC/FIN, phone numbers, postal codes, passports, UEN, +and bank account number detection patterns. +""" + +from litellm.proxy.guardrails.guardrail_hooks.litellm_content_filter.patterns import ( + get_compiled_pattern, +) + + +class TestSingaporeNRIC: + """Test Singapore NRIC/FIN detection""" + + def test_valid_nric_detected(self): + pattern = get_compiled_pattern("sg_nric") + # S-series (citizens born 1968–1999) + assert pattern.search("S1234567A") is not None + # T-series (citizens born 2000+) + assert pattern.search("T0123456Z") is not None + # F-series (foreigners before 2000) + assert pattern.search("F9876543B") is not None + # G-series (foreigners 2000+) + assert pattern.search("G1234567X") is not None + # M-series (foreigners from 2022) + assert pattern.search("M1234567K") is not None + + def test_nric_in_sentence(self): + pattern = get_compiled_pattern("sg_nric") + assert pattern.search("My NRIC is S1234567A please check") is not None + + def test_lowercase_letter_prefix_detected_case_insensitive(self): + pattern = get_compiled_pattern("sg_nric") + # Patterns are compiled with re.IGNORECASE in patterns.py + assert pattern.search("s1234567A") is not None + + def test_wrong_prefix_rejected(self): + pattern = get_compiled_pattern("sg_nric") + assert pattern.search("A1234567Z") is None + assert pattern.search("X9876543B") is None + + def test_too_few_digits_rejected(self): + pattern = get_compiled_pattern("sg_nric") + assert pattern.search("S123456A") is None # Only 6 digits + + def test_too_many_digits_rejected(self): + pattern = get_compiled_pattern("sg_nric") + assert pattern.search("S12345678A") is None # 8 digits + + +class TestSingaporePhone: + """Test Singapore phone number detection""" + + def test_with_plus65_prefix(self): + pattern = get_compiled_pattern("sg_phone") + assert pattern.search("+6591234567") is not None + assert pattern.search("+65 91234567") is not None + + def test_with_0065_prefix(self): + pattern = get_compiled_pattern("sg_phone") + assert pattern.search("006591234567") is not None + + def test_with_65_prefix(self): + pattern = get_compiled_pattern("sg_phone") + assert pattern.search("6591234567") is not None + + def test_mobile_numbers_starting_with_8_or_9(self): + pattern = get_compiled_pattern("sg_phone") + assert pattern.search("+6581234567") is not None # 8xxx + assert pattern.search("+6591234567") is not None # 9xxx + + def test_landline_starting_with_6(self): + pattern = get_compiled_pattern("sg_phone") + assert pattern.search("+6561234567") is not None # 6xxx + + def test_invalid_first_digit(self): + pattern = get_compiled_pattern("sg_phone") + # Singapore numbers start with 6, 8, or 9 + assert pattern.search("+6511234567") is None + assert pattern.search("+6521234567") is None + + +class TestSingaporePostalCode: + """Test Singapore postal code detection (contextual pattern)""" + + def test_valid_postal_codes(self): + pattern = get_compiled_pattern("sg_postal_code") + assert pattern.search("018956") is not None # CBD + assert pattern.search("520123") is not None # HDB + assert pattern.search("119077") is not None # NUS area + assert pattern.search("800123") is not None # High range + + def test_invalid_starting_digit(self): + pattern = get_compiled_pattern("sg_postal_code") + assert pattern.search("918956") is None # 9xxxxx invalid + + +class TestSingaporePassport: + """Test Singapore passport number detection""" + + def test_e_series_passport(self): + pattern = get_compiled_pattern("passport_singapore") + assert pattern.search("E1234567") is not None + + def test_k_series_passport(self): + pattern = get_compiled_pattern("passport_singapore") + assert pattern.search("K9876543") is not None + + def test_wrong_prefix_rejected(self): + pattern = get_compiled_pattern("passport_singapore") + assert pattern.search("A1234567") is None + assert pattern.search("X9876543") is None + + def test_too_few_digits_rejected(self): + pattern = get_compiled_pattern("passport_singapore") + assert pattern.search("E123456") is None # Only 6 digits + + +class TestSingaporeUEN: + """Test Singapore Unique Entity Number (UEN) detection""" + + def test_local_company_uen_8digit(self): + pattern = get_compiled_pattern("sg_uen") + # 8 digits + 1 letter (local companies) + assert pattern.search("12345678A") is not None + + def test_local_company_uen_9digit(self): + pattern = get_compiled_pattern("sg_uen") + # 9 digits + 1 letter (businesses) + assert pattern.search("123456789Z") is not None + + def test_roc_uen(self): + pattern = get_compiled_pattern("sg_uen") + # T or R + 2 digits + 2 letters + 4 digits + 1 letter + assert pattern.search("T08LL0001A") is not None + assert pattern.search("R12AB3456Z") is not None + + def test_lowercase_suffix_detected_case_insensitive(self): + pattern = get_compiled_pattern("sg_uen") + assert pattern.search("12345678a") is not None + + +class TestSingaporeBankAccount: + """Test Singapore bank account number detection""" + + def test_standard_format(self): + pattern = get_compiled_pattern("sg_bank_account") + assert pattern.search("123-45678-9") is not None + assert pattern.search("001-23456-12") is not None + assert pattern.search("999-123456-123") is not None + + def test_without_dashes_rejected(self): + pattern = get_compiled_pattern("sg_bank_account") + # Pattern requires dash format + assert pattern.search("12345678901") is None