Skip to content

Commit

Permalink
Low Resource Languages Buff (#478)
Browse files Browse the repository at this point in the history
* Add low resource languages buff, include deepl dependency in requirements.txt

* add doc stubs, copy req into pyproject, black

* add uri to LRL buff

* fix silly mutability/persistence bug & clarify in buffs.base; suppress error msg after first print; log orig prompt & dest lang

---------

Co-authored-by: Leon Derczynski <[email protected]>
  • Loading branch information
erickgalinkin and leondz authored Feb 15, 2024
1 parent df0c8c4 commit adc6fbc
Show file tree
Hide file tree
Showing 6 changed files with 74 additions and 3 deletions.
1 change: 1 addition & 0 deletions docs/source/buffs.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,5 +7,6 @@ garak.buffs
garak.buffs
garak.buffs.base
garak.buffs.encoding
garak.buffs.low_resource_languages
garak.buffs.lowercase
garak.buffs.paraphrase
8 changes: 8 additions & 0 deletions docs/source/garak.buffs.low_resource_languages.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
garak.buffs.low_resource_languages
=====================

.. automodule:: garak.buffs.low_resource_languages
:members:
:undoc-members:
:show-inheritance:

4 changes: 2 additions & 2 deletions garak/buffs/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,7 @@ def __init__(self) -> None:
print(
f"🦾 loading {Style.BRIGHT}{Fore.LIGHTGREEN_EX}buff: {Style.RESET_ALL}{self.fullname}"
)
logging.info(f"buff init: {self}")
logging.info("buff init: %s", self)

def _derive_new_attempt(
self, source_attempt: garak.attempt.Attempt, seq=-1
Expand Down Expand Up @@ -70,7 +70,7 @@ def transform(
self, attempt: garak.attempt.Attempt
) -> Iterable[garak.attempt.Attempt]:
"""attempt copying is handled elsewhere. isn't that nice"""
yield attempt
yield self._derive_new_attempt(attempt)

def buff(
self, source_attempts: List[garak.attempt.Attempt], probename=""
Expand Down
60 changes: 60 additions & 0 deletions garak/buffs/low_resource_languages.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python3

# SPDX-FileCopyrightText: Copyright (c) 2023 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0

""" Buff that converts prompts with different encodings. """

import logging
from collections.abc import Iterable
from deepl import Translator
from os import getenv

import garak.attempt
from garak.buffs.base import Buff

# Low resource languages supported by DeepL
# ET = Estonian
# ID = Indonesian
# LT = Lithuanian
# LV = Latvian
# SK = Slovak
# SL = Slovenian
LOW_RESOURCE_LANGUAGES = ["ET", "ID", "LV", "SK", "SL"]


class LRLBuff(Buff):
"""Low Resource Language buff
Uses the DeepL API to translate prompts into low-resource languages"""

uri = "https://arxiv.org/abs/2310.02446"

api_key_error_sent = False

def transform(
self, attempt: garak.attempt.Attempt
) -> Iterable[garak.attempt.Attempt]:
api_key = getenv("DEEPL_API_KEY", None)
if api_key is None:
if not self.api_key_error_sent:
msg = "DEEPL_API_KEY not set in env, cannot use LRLBuff."
user_msg = (
msg
+ " If you do not have a DeepL API key, sign up at https://www.deepl.com/pro#developer"
)
logging.error(msg)
print("⚠️ ", user_msg)
self.api_key_error_sent = True
yield attempt

else:
translator = Translator(api_key)
prompt = attempt.prompt
attempt.notes["original_prompt"] = prompt
for language in LOW_RESOURCE_LANGUAGES:
attempt.notes["LRL_buff_dest_lang"] = language
response = translator.translate_text(prompt, target_lang=language)
translated_prompt = response.text
attempt.prompt = translated_prompt
yield self._derive_new_attempt(attempt)
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@ dependencies = [
"markdown",
"zalgolib>=0.2.2",
"ecoji>=0.1.0",
"deepl==1.17.0",
]

[project.urls]
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,4 +24,5 @@ torch>=2.1.0
sentencepiece>=0.1.99
markdown
zalgolib>=0.2.2
ecoji>=0.1.0
ecoji>=0.1.0
deepl==1.17.0

0 comments on commit adc6fbc

Please sign in to comment.