FEAT: Colloquial Wordswap Attack (Azure#406)

Co-authored-by: Eugenia Kim <[email protected]>
rdheekonda · Oct 2, 2024 · 6a2ec71 · 6a2ec71
1 parent 812c1ba
commit 6a2ec71
Show file tree

Hide file tree

Showing 4 changed files with 221 additions and 94 deletions.
diff --git a/pyrit/prompt_converter/colloquial_wordswap_converter.py b/pyrit/prompt_converter/colloquial_wordswap_converter.py
@@ -0,0 +1,90 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import random
+import re
+from typing import Dict, List, Optional
+
+from pyrit.models import PromptDataType
+from pyrit.prompt_converter import PromptConverter, ConverterResult
+
+
+class ColloquialWordswapConverter(PromptConverter):
+    """Converts a string to a Singaporean colloquial version"""
+
+    def __init__(
+        self, deterministic: bool = False, custom_substitutions: Optional[Dict[str, List[str]]] = None
+    ) -> None:
+        """
+        Initialize the converter with optional deterministic mode and custom substitutions.
+
+        Args:
+        deterministic (bool): If True, use the first substitution for each wordswap.
+                              If False, randomly choose a substitution for each wordswap. Defaults to False.
+        custom_substitutions (Optional[Dict[str, List[str]]], optional): A dictionary of custom substitutions to
+                                                                        override the defaults. Defaults to None.
+        """
+        default_substitutions = {
+            "father": ["papa", "lao bei", "lim pei", "bapa", "appa"],
+            "mother": ["mama", "amma", "ibu"],
+            "grandfather": ["ah gong", "thatha", "dato"],
+            "grandmother": ["ah ma", "patti", "nenek"],
+            "girl": ["ah ger", "ponnu"],
+            "boy": ["ah boy", "boi", "payyan"],
+            "son": ["ah boy", "boi", "payyan"],
+            "daughter": ["ah ger", "ponnu"],
+            "aunt": ["makcik", "maami"],
+            "aunty": ["makcik", "maami"],
+            "man": ["ah beng", "shuai ge"],
+            "woman": ["ah lian", "xiao mei"],
+            "uncle": ["encik", "unker"],
+            "sister": ["xjj", "jie jie", "zhezhe", "kaka", "akka", "thangatchi"],
+            "brother": ["bro", "boiboi", "di di", "xdd", "anneh", "thambi"],
+        }
+
+        # Use custom substitutions if provided, otherwise default to the standard ones
+        self._colloquial_substitutions = custom_substitutions if custom_substitutions else default_substitutions
+        self._deterministic = deterministic
+
+    async def convert_async(self, *, prompt: str, input_type: PromptDataType = "text") -> ConverterResult:
+        """
+        Convert the given prompt to colloquial Singaporean context.
+
+        Args:
+            prompt (str): The text to convert.
+            input_type (PromptDataType): The type of input data.
+
+        Returns:
+            ConverterResult: A ConverterResult containing the Singaporean colloquial version of the prompt.
+        """
+        if not self.input_supported(input_type):
+            raise ValueError("Input type not supported")
+
+        # Tokenize the prompt into words and non-words
+        words = re.findall(r"\w+|\S+", prompt)
+        converted_prompt = []
+
+        for word in words:
+            lower_word = word.lower()
+            if lower_word in self._colloquial_substitutions:
+                if self._deterministic:
+                    # Use the first substitution for deterministic mode
+                    converted_prompt.append(self._colloquial_substitutions[lower_word][0])
+                else:
+                    # Randomly select a substitution for each wordswap
+                    converted_prompt.append(random.choice(self._colloquial_substitutions[lower_word]))
+            else:
+                # If word not in substitutions, keep it as is
+                converted_prompt.append(word)
+
+        # Join all words and punctuation with spaces
+        final_prompt = " ".join(converted_prompt)
+
+        # Clean up spaces for final prompt
+        final_prompt = re.sub(r'\s([?.!,\'"])', r"\1", final_prompt)
+        final_prompt = final_prompt.strip()
+
+        return ConverterResult(output_text=final_prompt, output_type="text")
+
+    def input_supported(self, input_type: PromptDataType) -> bool:
+        return input_type == "text"
diff --git a/tests/converter/test_colloquial_wordswap_converter.py b/tests/converter/test_colloquial_wordswap_converter.py
@@ -0,0 +1,131 @@
+# Copyright (c) Microsoft Corporation.
+# Licensed under the MIT license.
+
+import re
+import pytest
+from pyrit.prompt_converter.colloquial_wordswap_converter import ColloquialWordswapConverter
+
+
+# Test for deterministic mode
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "input_text,expected_output",
+    [
+        ("grandfather", "ah gong"),  # Single wordswap
+        ("mother and brother", "mama and bro"),  # Default substitution for mother and brother
+        ("Hello, my Father!", "Hello, my papa!"),  # Combined substitutions with punctuation
+    ],
+)
+async def test_colloquial_deterministic(input_text, expected_output):
+    converter = ColloquialWordswapConverter(deterministic=True)
+    result = await converter.convert_async(prompt=input_text)
+    assert result.output_text == expected_output
+
+
+# Test for non-deterministic mode
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "input_text",
+    [
+        "code",  # A different input set to reduce redundancy
+        "mother",
+        "uncle and brother",
+    ],
+)
+async def test_colloquial_non_deterministic(input_text):
+    converter = ColloquialWordswapConverter(deterministic=False)
+    result = await converter.convert_async(prompt=input_text)
+
+    # Valid substitution mappings in the input texts
+    valid_substitutions = {
+        "mother": ["mama", "amma", "ibu"],
+        "uncle": ["encik", "unker"],
+        "brother": ["bro", "boiboi", "di di", "xdd", "anneh", "thambi"],
+    }
+
+    # Split input and output into words, preserving multi-word substitutions as single tokens
+    input_words = re.findall(r"\w+|\S+", input_text)
+    output_words = re.findall(r"\w+|\S+", result.output_text)
+
+    # Check that each wordswap is a valid substitution
+    for input_word, output_word in zip(input_words, output_words):
+        lower_input_word = input_word.lower()
+
+        if lower_input_word in valid_substitutions:
+            assert any(sub in output_word or output_word in sub for sub in valid_substitutions[lower_input_word])
+        else:
+            assert output_word == input_word
+
+
+# Test for custom substitutions
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "input_text,custom_substitutions,expected_output",
+    [
+        ("father", {"father": ["appa", "darth vader"]}, "appa"),  # Custom substitution father -> appa
+    ],
+)
+async def test_colloquial_custom_substitutions(input_text, custom_substitutions, expected_output):
+    converter = ColloquialWordswapConverter(deterministic=True, custom_substitutions=custom_substitutions)
+    result = await converter.convert_async(prompt=input_text)
+    assert result.output_text == expected_output
+
+
+# Test for empty custom substitutions
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "input_text,expected_output",
+    [
+        ("mother and father", "mama and papa"),  # Using default substitutions when custom is empty
+    ],
+)
+async def test_colloquial_empty_custom_substitutions(input_text, expected_output):
+    converter = ColloquialWordswapConverter(deterministic=True, custom_substitutions={})
+    result = await converter.convert_async(prompt=input_text)
+    assert result.output_text == expected_output
+
+
+# Test multiple word prompts
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "input_text,expected_output",
+    [
+        ("father and mother", "papa and mama"),
+        ("brother and sister", "bro and xjj"),
+        ("aunt and uncle", "makcik and encik"),
+    ],
+)
+async def test_multiple_words(input_text, expected_output):
+    converter = ColloquialWordswapConverter(deterministic=True)
+    result = await converter.convert_async(prompt=input_text)
+    assert result.output_text == expected_output
+
+
+# Test for awkward spacing
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "input_text,expected_output",
+    [
+        ("  father  and    mother ", "papa and mama"),
+        ("sister   and   brother", "xjj and bro"),
+    ],
+)
+async def test_awkward_spacing(input_text, expected_output):
+    converter = ColloquialWordswapConverter(deterministic=True)
+    result = await converter.convert_async(prompt=input_text)
+    assert result.output_text == expected_output
+
+
+# Test for punctuation handling
+@pytest.mark.asyncio
+@pytest.mark.parametrize(
+    "input_text,expected_output",
+    [
+        ("father, mother!", "papa, mama!"),
+        ("aunt? uncle!", "makcik? encik!"),
+    ],
+)
+async def test_punctuation_handling(input_text, expected_output):
+    converter = ColloquialWordswapConverter(deterministic=True)
+    result = await converter.convert_async(prompt=input_text)
+    assert result.output_text == expected_output
diff --git a/tests/test_leetspeak_converter.py → tests/converter/test_leetspeak_converter.py b/tests/test_leetspeak_converter.py → tests/converter/test_leetspeak_converter.py
diff --git a/tests/test_leetspeakConverter.py b/tests/test_leetspeakConverter.py