From f5666e63dc2bb1cbbb6c66e0476fdb0688c7ac8b Mon Sep 17 00:00:00 2001 From: Kayla Seager Date: Thu, 17 Apr 2025 09:50:56 -0700 Subject: [PATCH 1/2] updates from implementation --- .../ai/Azure.AI.Projects/red-teams/models.tsp | 49 +++++++++- .../azure-ai-projects-1dp.json | 97 ++++++++++++++++++- 2 files changed, 140 insertions(+), 6 deletions(-) diff --git a/specification/ai/Azure.AI.Projects/red-teams/models.tsp b/specification/ai/Azure.AI.Projects/red-teams/models.tsp index 06a467b78809..b2d4f6299b73 100644 --- a/specification/ai/Azure.AI.Projects/red-teams/models.tsp +++ b/specification/ai/Azure.AI.Projects/red-teams/models.tsp @@ -24,6 +24,12 @@ union AttackStrategy { @doc("Represents a default set of easy complexity attacks. Easy complexity attack strategies are defined as attacks that do not require any Large Language Model to convert or orchestrate.") Easy: "easy", + @doc("Represents a default set of moderate complexity attacks. Moderate complexity attack strategies are defined as attacks that require a Large Language Model to convert or orchestrate.") + Moderate: "moderate", + + @doc("Represents a default set of difficult complexity attacks. Difficult complexity attack strategies are defined as attacks that require a Large Language Model to convert or orchestrate.") + Difficult: "difficult", + @doc("Represents ASCII art, a graphic design technique that uses printable characters.") AsciiArt: "ascii_art", @@ -47,6 +53,45 @@ union AttackStrategy { @doc("Represents character swapping, a technique for rearranging characters in text.") Jailbreak: "jailbreak", + + @doc("Represents ASCII attack, a technique for encoding or hiding data.") + AnsiiAttack: "ansii_attack", + + @doc("Represents character swapping, a technique for rearranging characters in text.") + CharacterSwap: "character_swap", + + @doc("Represents suffix appending, a technique for adding characters to the end of text.") + SuffixAppend: "suffix_append", + + @doc("Represents string joining, a technique for concatenating strings.") + StringJoin: "string_join", + + @doc("Represents Unicode confusable characters, which are characters that look similar but are different.") + UnicodeConfusable: "unicode_confusable", + + @doc("Represents Unicode substitution, a technique for replacing characters with similar-looking Unicode characters.") + UnicodeSubstitution: "unicode_substitution", + + @doc("Represents the use of emojis in text, which can be used to convey emotions or ideas.") + Diacritic: "diacritic", + + @doc("Represents flipping characters, a technique for reversing the order of characters.") + Flip: "flip", + + @doc("Represents leetspeak, a form of internet slang that replaces letters with similar-looking numbers or symbols.") + Leetspeak: "leetspeak", + + @doc("Represents the ROT13 cipher, a substitution cipher that replaces a letter with the 13th letter after it in the alphabet.") + ROT13: "rot13", + + @doc("Represents the morse code, a method for encoding text characters as sequences of dots and dashes.") + Morse: "morse", + + @doc("Represents the url encoding, a method for encoding special characters in URLs.") + Url: "url", + + @doc("Represents the baseline attack strategy, which is a standard or reference point for comparison.") + Baseline: "baseline" } @doc("Risk category for the attack objective.") @@ -88,13 +133,13 @@ model RedTeam { id: string; @doc("Name of the red-team scan.") - scanName: string; + scanName?: string; @doc("Number of simulation rounds.") numTurns: int32; @doc("List of attack strategies or nested lists of attack strategies.") - attackStrategy: AttackStrategy[]; + attackStrategies: AttackStrategy[]; @doc("Simulation-only or Simulation + Evaluation. Default false, if true the scan outputs conversation not evaluation result.") simulationOnly: boolean; diff --git a/specification/ai/data-plane/Azure.AI.Projects/preview/2025-05-15-preview/azure-ai-projects-1dp.json b/specification/ai/data-plane/Azure.AI.Projects/preview/2025-05-15-preview/azure-ai-projects-1dp.json index 62f2c639353c..f25d2e4633bd 100644 --- a/specification/ai/data-plane/Azure.AI.Projects/preview/2025-05-15-preview/azure-ai-projects-1dp.json +++ b/specification/ai/data-plane/Azure.AI.Projects/preview/2025-05-15-preview/azure-ai-projects-1dp.json @@ -1604,6 +1604,8 @@ "description": "Strategies for attacks.", "enum": [ "easy", + "moderate", + "difficult", "ascii_art", "ascii_smuggler", "atbash", @@ -1611,7 +1613,20 @@ "binary", "caesar", "character_space", - "jailbreak" + "jailbreak", + "ansii_attack", + "character_swap", + "suffix_append", + "string_join", + "unicode_confusable", + "unicode_substitution", + "diacritic", + "flip", + "leetspeak", + "rot13", + "morse", + "url", + "baseline" ], "x-ms-enum": { "name": "AttackStrategy", @@ -1622,6 +1637,16 @@ "value": "easy", "description": "Represents a default set of easy complexity attacks. Easy complexity attack strategies are defined as attacks that do not require any Large Language Model to convert or orchestrate." }, + { + "name": "Moderate", + "value": "moderate", + "description": "Represents a default set of moderate complexity attacks. Moderate complexity attack strategies are defined as attacks that require a Large Language Model to convert or orchestrate." + }, + { + "name": "Difficult", + "value": "difficult", + "description": "Represents a default set of difficult complexity attacks. Difficult complexity attack strategies are defined as attacks that require a Large Language Model to convert or orchestrate." + }, { "name": "AsciiArt", "value": "ascii_art", @@ -1661,6 +1686,71 @@ "name": "Jailbreak", "value": "jailbreak", "description": "Represents character swapping, a technique for rearranging characters in text." + }, + { + "name": "AnsiiAttack", + "value": "ansii_attack", + "description": "Represents ASCII attack, a technique for encoding or hiding data." + }, + { + "name": "CharacterSwap", + "value": "character_swap", + "description": "Represents character swapping, a technique for rearranging characters in text." + }, + { + "name": "SuffixAppend", + "value": "suffix_append", + "description": "Represents suffix appending, a technique for adding characters to the end of text." + }, + { + "name": "StringJoin", + "value": "string_join", + "description": "Represents string joining, a technique for concatenating strings." + }, + { + "name": "UnicodeConfusable", + "value": "unicode_confusable", + "description": "Represents Unicode confusable characters, which are characters that look similar but are different." + }, + { + "name": "UnicodeSubstitution", + "value": "unicode_substitution", + "description": "Represents Unicode substitution, a technique for replacing characters with similar-looking Unicode characters." + }, + { + "name": "Diacritic", + "value": "diacritic", + "description": "Represents the use of emojis in text, which can be used to convey emotions or ideas." + }, + { + "name": "Flip", + "value": "flip", + "description": "Represents flipping characters, a technique for reversing the order of characters." + }, + { + "name": "Leetspeak", + "value": "leetspeak", + "description": "Represents leetspeak, a form of internet slang that replaces letters with similar-looking numbers or symbols." + }, + { + "name": "ROT13", + "value": "rot13", + "description": "Represents the ROT13 cipher, a substitution cipher that replaces a letter with the 13th letter after it in the alphabet." + }, + { + "name": "Morse", + "value": "morse", + "description": "Represents the morse code, a method for encoding text characters as sequences of dots and dashes." + }, + { + "name": "Url", + "value": "url", + "description": "Represents the url encoding, a method for encoding special characters in URLs." + }, + { + "name": "Baseline", + "value": "baseline", + "description": "Represents the baseline attack strategy, which is a standard or reference point for comparison." } ] } @@ -2775,7 +2865,7 @@ "format": "int32", "description": "Number of simulation rounds." }, - "attackStrategy": { + "attackStrategies": { "type": "array", "description": "List of attack strategies or nested lists of attack strategies.", "items": { @@ -2819,9 +2909,8 @@ }, "required": [ "id", - "scanName", "numTurns", - "attackStrategy", + "attackStrategies", "simulationOnly", "riskCategories" ] From 0fbf66d6e4998da22561b95d736ed02da99dfde9 Mon Sep 17 00:00:00 2001 From: Kayla Seager Date: Thu, 17 Apr 2025 16:07:17 -0700 Subject: [PATCH 2/2] update to match docs: https://learn.microsoft.com/en-us/azure/ai-foundry/concepts/ai-red-teaming-agent#supported-attack-strategies --- .../ai/Azure.AI.Projects/red-teams/models.tsp | 48 +++++++++---------- .../azure-ai-projects-1dp.json | 48 +++++++++---------- 2 files changed, 48 insertions(+), 48 deletions(-) diff --git a/specification/ai/Azure.AI.Projects/red-teams/models.tsp b/specification/ai/Azure.AI.Projects/red-teams/models.tsp index b2d4f6299b73..a9c2175cdd68 100644 --- a/specification/ai/Azure.AI.Projects/red-teams/models.tsp +++ b/specification/ai/Azure.AI.Projects/red-teams/models.tsp @@ -21,76 +21,76 @@ namespace Azure.AI.Projects; union AttackStrategy { string, - @doc("Represents a default set of easy complexity attacks. Easy complexity attack strategies are defined as attacks that do not require any Large Language Model to convert or orchestrate.") + @doc("Represents a default set of easy complexity attacks. Easy complexity attacks require less effort, such as translation of a prompt into some encoding, and does not require any Large Language Model to convert or orchestrate.") Easy: "easy", - @doc("Represents a default set of moderate complexity attacks. Moderate complexity attack strategies are defined as attacks that require a Large Language Model to convert or orchestrate.") + @doc("Represents a default set of moderate complexity attacks. Moderate complexity attacks require having access to resources such as another generative AI model.") Moderate: "moderate", - @doc("Represents a default set of difficult complexity attacks. Difficult complexity attack strategies are defined as attacks that require a Large Language Model to convert or orchestrate.") + @doc("Represents a default set of difficult complexity attacks. Difficult complexity attacks include attacks that require access to significant resources and effort to execute an attack such as knowledge of search-based algorithms in addition to a generative AI model.") Difficult: "difficult", - @doc("Represents ASCII art, a graphic design technique that uses printable characters.") + @doc("Generates visual art using ASCII characters, often used for creative or obfuscation purposes.") AsciiArt: "ascii_art", - @doc("Represents ASCII smuggling, a technique for encoding or hiding data.") + @doc("Conceals data within ASCII characters, making it harder to detect.") AsciiSmuggler: "ascii_smuggler", - @doc("Represents the Atbash cipher, a substitution cipher that reverses the alphabet.") + @doc("Implements the Atbash cipher, a simple substitution cipher where each letter is mapped to its reverse.") Atbash: "atbash", - @doc("Represents Base64 encoding, a method for encoding binary data as text.") + @doc("Encodes binary data into a text format using Base64, commonly used for data transmission.") Base64: "base64", - @doc("Represents binary encoding, a representation of data in binary format.") + @doc("Converts text into binary code, representing data in a series of 0s and 1s.") Binary: "binary", - @doc("Represents the Caesar cipher, a substitution cipher that shifts characters.") + @doc("Applies the Caesar cipher, a substitution cipher that shifts characters by a fixed number of positions.") Caesar: "caesar", - @doc("Represents character space manipulation, a technique involving spacing between characters.") + @doc("Alters text by adding spaces between characters, often used for obfuscation.") CharacterSpace: "character_space", - @doc("Represents character swapping, a technique for rearranging characters in text.") + @doc("Injects specially crafted prompts to bypass AI safeguards, known as User Injected Prompt Attacks (UPIA).") Jailbreak: "jailbreak", - @doc("Represents ASCII attack, a technique for encoding or hiding data.") + @doc("Utilizes ANSI escape sequences to manipulate text appearance and behavior.") AnsiiAttack: "ansii_attack", - @doc("Represents character swapping, a technique for rearranging characters in text.") + @doc("Swaps characters within text to create variations or obfuscate the original content.") CharacterSwap: "character_swap", - @doc("Represents suffix appending, a technique for adding characters to the end of text.") + @doc("Appends an adversarial suffix to the prompt.") SuffixAppend: "suffix_append", - @doc("Represents string joining, a technique for concatenating strings.") + @doc("Joins multiple strings together, often used for concatenation or obfuscation.") StringJoin: "string_join", - @doc("Represents Unicode confusable characters, which are characters that look similar but are different.") + @doc("Uses Unicode characters that look similar to standard characters, creating visual confusion.") UnicodeConfusable: "unicode_confusable", - @doc("Represents Unicode substitution, a technique for replacing characters with similar-looking Unicode characters.") + @doc("Substitutes standard characters with Unicode equivalents, often for obfuscation.") UnicodeSubstitution: "unicode_substitution", - @doc("Represents the use of emojis in text, which can be used to convey emotions or ideas.") + @doc("Adds diacritical marks to characters, changing their appearance and sometimes their meaning.") Diacritic: "diacritic", - @doc("Represents flipping characters, a technique for reversing the order of characters.") + @doc("Flips characters from front to back, creating a mirrored effect.") Flip: "flip", - @doc("Represents leetspeak, a form of internet slang that replaces letters with similar-looking numbers or symbols.") + @doc("Transforms text into Leetspeak, a form of encoding that replaces letters with similar-looking numbers or symbols.") Leetspeak: "leetspeak", - @doc("Represents the ROT13 cipher, a substitution cipher that replaces a letter with the 13th letter after it in the alphabet.") + @doc("Applies the ROT13 cipher, a simple substitution cipher that shifts characters by 13 positions.") ROT13: "rot13", - @doc("Represents the morse code, a method for encoding text characters as sequences of dots and dashes.") + @doc("Encodes text into Morse code, using dots and dashes to represent characters.") Morse: "morse", - @doc("Represents the url encoding, a method for encoding special characters in URLs.") + @doc("Encodes text into URL format.") Url: "url", - @doc("Represents the baseline attack strategy, which is a standard or reference point for comparison.") + @doc("Represents the baseline direct adversarial probing, which is used by attack strategies as the attack objective.") Baseline: "baseline" } diff --git a/specification/ai/data-plane/Azure.AI.Projects/preview/2025-05-15-preview/azure-ai-projects-1dp.json b/specification/ai/data-plane/Azure.AI.Projects/preview/2025-05-15-preview/azure-ai-projects-1dp.json index f25d2e4633bd..aa19fff720f5 100644 --- a/specification/ai/data-plane/Azure.AI.Projects/preview/2025-05-15-preview/azure-ai-projects-1dp.json +++ b/specification/ai/data-plane/Azure.AI.Projects/preview/2025-05-15-preview/azure-ai-projects-1dp.json @@ -1635,122 +1635,122 @@ { "name": "Easy", "value": "easy", - "description": "Represents a default set of easy complexity attacks. Easy complexity attack strategies are defined as attacks that do not require any Large Language Model to convert or orchestrate." + "description": "Represents a default set of easy complexity attacks. Easy complexity attacks require less effort, such as translation of a prompt into some encoding, and does not require any Large Language Model to convert or orchestrate." }, { "name": "Moderate", "value": "moderate", - "description": "Represents a default set of moderate complexity attacks. Moderate complexity attack strategies are defined as attacks that require a Large Language Model to convert or orchestrate." + "description": "Represents a default set of moderate complexity attacks. Moderate complexity attacks require having access to resources such as another generative AI model." }, { "name": "Difficult", "value": "difficult", - "description": "Represents a default set of difficult complexity attacks. Difficult complexity attack strategies are defined as attacks that require a Large Language Model to convert or orchestrate." + "description": "Represents a default set of difficult complexity attacks. Difficult complexity attacks include attacks that require access to significant resources and effort to execute an attack such as knowledge of search-based algorithms in addition to a generative AI model." }, { "name": "AsciiArt", "value": "ascii_art", - "description": "Represents ASCII art, a graphic design technique that uses printable characters." + "description": "Generates visual art using ASCII characters, often used for creative or obfuscation purposes." }, { "name": "AsciiSmuggler", "value": "ascii_smuggler", - "description": "Represents ASCII smuggling, a technique for encoding or hiding data." + "description": "Conceals data within ASCII characters, making it harder to detect." }, { "name": "Atbash", "value": "atbash", - "description": "Represents the Atbash cipher, a substitution cipher that reverses the alphabet." + "description": "Implements the Atbash cipher, a simple substitution cipher where each letter is mapped to its reverse." }, { "name": "Base64", "value": "base64", - "description": "Represents Base64 encoding, a method for encoding binary data as text." + "description": "Encodes binary data into a text format using Base64, commonly used for data transmission." }, { "name": "Binary", "value": "binary", - "description": "Represents binary encoding, a representation of data in binary format." + "description": "Converts text into binary code, representing data in a series of 0s and 1s." }, { "name": "Caesar", "value": "caesar", - "description": "Represents the Caesar cipher, a substitution cipher that shifts characters." + "description": "Applies the Caesar cipher, a substitution cipher that shifts characters by a fixed number of positions." }, { "name": "CharacterSpace", "value": "character_space", - "description": "Represents character space manipulation, a technique involving spacing between characters." + "description": "Alters text by adding spaces between characters, often used for obfuscation." }, { "name": "Jailbreak", "value": "jailbreak", - "description": "Represents character swapping, a technique for rearranging characters in text." + "description": "Injects specially crafted prompts to bypass AI safeguards, known as User Injected Prompt Attacks (UPIA)." }, { "name": "AnsiiAttack", "value": "ansii_attack", - "description": "Represents ASCII attack, a technique for encoding or hiding data." + "description": "Utilizes ANSI escape sequences to manipulate text appearance and behavior." }, { "name": "CharacterSwap", "value": "character_swap", - "description": "Represents character swapping, a technique for rearranging characters in text." + "description": "Swaps characters within text to create variations or obfuscate the original content." }, { "name": "SuffixAppend", "value": "suffix_append", - "description": "Represents suffix appending, a technique for adding characters to the end of text." + "description": "Appends an adversarial suffix to the prompt." }, { "name": "StringJoin", "value": "string_join", - "description": "Represents string joining, a technique for concatenating strings." + "description": "Joins multiple strings together, often used for concatenation or obfuscation." }, { "name": "UnicodeConfusable", "value": "unicode_confusable", - "description": "Represents Unicode confusable characters, which are characters that look similar but are different." + "description": "Uses Unicode characters that look similar to standard characters, creating visual confusion." }, { "name": "UnicodeSubstitution", "value": "unicode_substitution", - "description": "Represents Unicode substitution, a technique for replacing characters with similar-looking Unicode characters." + "description": "Substitutes standard characters with Unicode equivalents, often for obfuscation." }, { "name": "Diacritic", "value": "diacritic", - "description": "Represents the use of emojis in text, which can be used to convey emotions or ideas." + "description": "Adds diacritical marks to characters, changing their appearance and sometimes their meaning." }, { "name": "Flip", "value": "flip", - "description": "Represents flipping characters, a technique for reversing the order of characters." + "description": "Flips characters from front to back, creating a mirrored effect." }, { "name": "Leetspeak", "value": "leetspeak", - "description": "Represents leetspeak, a form of internet slang that replaces letters with similar-looking numbers or symbols." + "description": "Transforms text into Leetspeak, a form of encoding that replaces letters with similar-looking numbers or symbols." }, { "name": "ROT13", "value": "rot13", - "description": "Represents the ROT13 cipher, a substitution cipher that replaces a letter with the 13th letter after it in the alphabet." + "description": "Applies the ROT13 cipher, a simple substitution cipher that shifts characters by 13 positions." }, { "name": "Morse", "value": "morse", - "description": "Represents the morse code, a method for encoding text characters as sequences of dots and dashes." + "description": "Encodes text into Morse code, using dots and dashes to represent characters." }, { "name": "Url", "value": "url", - "description": "Represents the url encoding, a method for encoding special characters in URLs." + "description": "Encodes text into URL format." }, { "name": "Baseline", "value": "baseline", - "description": "Represents the baseline attack strategy, which is a standard or reference point for comparison." + "description": "Represents the baseline direct adversarial probing, which is used by attack strategies as the attack objective." } ] }