-
Notifications
You must be signed in to change notification settings - Fork 231
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Add low resource languages buff, include deepl dependency in requirements.txt * add doc stubs, copy req into pyproject, black * add uri to LRL buff * fix silly mutability/persistence bug & clarify in buffs.base; suppress error msg after first print; log orig prompt & dest lang --------- Co-authored-by: Leon Derczynski <[email protected]>
- Loading branch information
1 parent
df0c8c4
commit adc6fbc
Showing
6 changed files
with
74 additions
and
3 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,8 @@ | ||
garak.buffs.low_resource_languages | ||
===================== | ||
|
||
.. automodule:: garak.buffs.low_resource_languages | ||
:members: | ||
:undoc-members: | ||
:show-inheritance: | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
#!/usr/bin/env python3 | ||
|
||
# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
""" Buff that converts prompts with different encodings. """ | ||
|
||
import logging | ||
from collections.abc import Iterable | ||
from deepl import Translator | ||
from os import getenv | ||
|
||
import garak.attempt | ||
from garak.buffs.base import Buff | ||
|
||
# Low resource languages supported by DeepL | ||
# ET = Estonian | ||
# ID = Indonesian | ||
# LT = Lithuanian | ||
# LV = Latvian | ||
# SK = Slovak | ||
# SL = Slovenian | ||
LOW_RESOURCE_LANGUAGES = ["ET", "ID", "LV", "SK", "SL"] | ||
|
||
|
||
class LRLBuff(Buff): | ||
"""Low Resource Language buff | ||
Uses the DeepL API to translate prompts into low-resource languages""" | ||
|
||
uri = "https://arxiv.org/abs/2310.02446" | ||
|
||
api_key_error_sent = False | ||
|
||
def transform( | ||
self, attempt: garak.attempt.Attempt | ||
) -> Iterable[garak.attempt.Attempt]: | ||
api_key = getenv("DEEPL_API_KEY", None) | ||
if api_key is None: | ||
if not self.api_key_error_sent: | ||
msg = "DEEPL_API_KEY not set in env, cannot use LRLBuff." | ||
user_msg = ( | ||
msg | ||
+ " If you do not have a DeepL API key, sign up at https://www.deepl.com/pro#developer" | ||
) | ||
logging.error(msg) | ||
print("⚠️ ", user_msg) | ||
self.api_key_error_sent = True | ||
yield attempt | ||
|
||
else: | ||
translator = Translator(api_key) | ||
prompt = attempt.prompt | ||
attempt.notes["original_prompt"] = prompt | ||
for language in LOW_RESOURCE_LANGUAGES: | ||
attempt.notes["LRL_buff_dest_lang"] = language | ||
response = translator.translate_text(prompt, target_lang=language) | ||
translated_prompt = response.text | ||
attempt.prompt = translated_prompt | ||
yield self._derive_new_attempt(attempt) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -55,6 +55,7 @@ dependencies = [ | |
"markdown", | ||
"zalgolib>=0.2.2", | ||
"ecoji>=0.1.0", | ||
"deepl==1.17.0", | ||
] | ||
|
||
[project.urls] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -24,4 +24,5 @@ torch>=2.1.0 | |
sentencepiece>=0.1.99 | ||
markdown | ||
zalgolib>=0.2.2 | ||
ecoji>=0.1.0 | ||
ecoji>=0.1.0 | ||
deepl==1.17.0 |