diff --git a/src/lighteval/tasks/multilingual/tasks.py b/src/lighteval/tasks/multilingual/tasks.py index e3e1cab2d..cf1b4cce6 100644 --- a/src/lighteval/tasks/multilingual/tasks.py +++ b/src/lighteval/tasks/multilingual/tasks.py @@ -463,7 +463,8 @@ Language.TURKISH, Language.VIETNAMESE, Language.CHINESE, - # Optionally: Haitian, Quechu + Language.HAITIAN, + Language.QUECHUA, ] for formulation in [MCFFormulation(), CFFormulation(), HybridFormulation()] ] diff --git a/src/lighteval/tasks/templates/nli.py b/src/lighteval/tasks/templates/nli.py index 5c7abec06..842460306 100644 --- a/src/lighteval/tasks/templates/nli.py +++ b/src/lighteval/tasks/templates/nli.py @@ -244,6 +244,8 @@ def prompt_fn(line: dict, task_name: str): choices_str = f"{translation_literals.comma}{translation_literals.word_space}".join(rearanged_labales[:-1]) hypothesis = f"{hypothesis.rstrip(PUNCT)}{translation_literals.sentence_space}{choices_str}{translation_literals.word_space}{translation_literals.or_word}{translation_literals.word_space}{rearanged_labales[-1]}{translation_literals.question_mark}" + # (hynky1999): Ideally we would not compute logprobs of the Yes/No/Also in CF fomulation. However as of right now lighteval doesn't allow to + # use multi-context. row = { "instruction": input_data.get("instruction", ""), "premise": premise, diff --git a/src/lighteval/tasks/templates/utils/translation_literals.py b/src/lighteval/tasks/templates/utils/translation_literals.py index 72a7cdc7a..0b306e1d0 100644 --- a/src/lighteval/tasks/templates/utils/translation_literals.py +++ b/src/lighteval/tasks/templates/utils/translation_literals.py @@ -74,6 +74,162 @@ def __getattribute__(self, name: str) -> str: TRANSLATION_LITERALS: dict[Language, TranslationLiterals] = { + Language.AFRIKAANS: TranslationLiterals(language=Language.AFRIKAANS), + Language.ALBANIAN: TranslationLiterals(language=Language.ALBANIAN), + Language.AMHARIC: TranslationLiterals(language=Language.AMHARIC), + Language.ARABIC: TranslationLiterals( + language=Language.ARABIC, + question_word="سؤال", + answer="إجابة", + confirmation_word="صحيح", + yes="نعم", + no="لا", + also="كذلك", + cause_word="لأن", + effect_word="لذلك", + true="صحيح", + false="خاطئ", + neither="لا هذا ولا ذاك", + or_word="أو", + full_stop=".", + comma="،", + question_mark="؟", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + indices=["أ", "ب", "ج", "د", "هـ", "و", "ز", "ح"], + ), + Language.ARMENIAN: TranslationLiterals(language=Language.ARMENIAN), + Language.ASSAMESE: TranslationLiterals(language=Language.ASSAMESE), + Language.AZERBAIJANI: TranslationLiterals(language=Language.AZERBAIJANI), + Language.BASHKIR: TranslationLiterals(language=Language.BASHKIR), + Language.BASQUE: TranslationLiterals( + language=Language.BASQUE, + question_word="galdera", + answer="erantzuna", + confirmation_word="ezta", + yes="bai", + no="ez", + also="halaber", + cause_word="izan ere", + effect_word="beraz", + or_word="ala", + true="egia", + false="faltsua", + neither="bat ere ez", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + semicolon=";", + ), + Language.BELARUSIAN: TranslationLiterals(language=Language.BELARUSIAN), + Language.BENGALI: TranslationLiterals(language=Language.BENGALI, question_word="প্রশ্ন"), + Language.BIHARI: TranslationLiterals(language=Language.BIHARI), # Deprecated + Language.BOSNIAN: TranslationLiterals(language=Language.BOSNIAN), + Language.BRETON: TranslationLiterals(language=Language.BRETON), + Language.BULGARIAN: TranslationLiterals(language=Language.BULGARIAN), + Language.BURMESE: TranslationLiterals(language=Language.BURMESE), + Language.CATALAN: TranslationLiterals(language=Language.CATALAN), + Language.CEBUANO: TranslationLiterals(language=Language.CEBUANO), + Language.CHINESE: TranslationLiterals( + language=Language.CHINESE, + question_word="问题", + answer="答案", + confirmation_word="对吗", + yes="是的", + no="不是", + also="而且", + cause_word="因为", + effect_word="所以", + true="真", + false="假", + neither="都不是", + or_word="或", + full_stop="。", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space="", + sentence_space="", + colon=":", + indices=["①", "②", "③", "④", "⑤", "⑥", "⑦", "⑧", "⑨", "⑩"], + ), + Language.CROATIAN: TranslationLiterals( + language=Language.CROATIAN, + question_word="pitanje", + answer="odgovor", + confirmation_word="zar ne", + yes="da", + no="ne", + also="također", + cause_word="jer", + effect_word="dakle", + or_word="ili", + true="točno", + false="netočno", + neither="ništa od navedenog", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + semicolon=";", + ), + Language.CZECH: TranslationLiterals( + language=Language.CZECH, + question_word="otázka", + answer="odpověď", + confirmation_word="že ano", + yes="ano", + no="ne", + also="navíc", + cause_word="protože", + effect_word="a tedy", + or_word="nebo", + true="pravda", + false="nepravda", + neither="ani jedno", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + semicolon=";", + ), + Language.DANISH: TranslationLiterals(language=Language.DANISH), + Language.DIVEHI: TranslationLiterals(language=Language.DIVEHI), + Language.DUTCH: TranslationLiterals( + language=Language.DUTCH, + question_word="vraag", + answer="antwoord", + confirmation_word="toch", + yes="ja", + no="nee", + also="ook", + cause_word="want", + effect_word="dus", + or_word="of", + true="waar", + false="onwaar", + neither="geen van beide", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + semicolon=";", + ), Language.ENGLISH: TranslationLiterals( language=Language.ENGLISH, question_word="question", @@ -96,34 +252,27 @@ def __getattribute__(self, name: str) -> str: colon=":", or_word="or", ), - Language.ARABIC: TranslationLiterals( - language=Language.ARABIC, - question_word="سؤال", - answer="إجابة", - confirmation_word="صحيح", - yes="نعم", - no="لا", - also="كذلك", - cause_word="لأن", - effect_word="لذلك", - full_stop=".", - comma="،", - question_mark="؟", - exclamation_mark="!", - word_space=" ", - sentence_space=" ", - colon=":", + Language.ESPERANTO: TranslationLiterals(language=Language.ESPERANTO), + Language.ESTONIAN: TranslationLiterals( + # From https://github.com/EleutherAI/lm-evaluation-harness/blob/0845b588303f1f59af98dd1c5bdbd78a9e75a1e2/lm_eval/tasks/xcopa/utils.py + language=Language.ESTONIAN, + cause_word="sest", + effect_word="seetõttu", ), - Language.SWAHILI: TranslationLiterals( - language=Language.SWAHILI, - question_word="swali", - answer="jibu", - confirmation_word="sahihi", - yes="ndiyo", - no="hapana", - also="pia", - cause_word="kwa sababu", - effect_word="kwa hiyo", + Language.FINNISH: TranslationLiterals( + language=Language.FINNISH, + question_word="kysymys", + answer="vastaus", + confirmation_word="eikö niin", + yes="kyllä", + no="ei", + also="myös", + cause_word="koska", + effect_word="siksi", + or_word="tai", + true="totta", + false="tarua", + neither="ei kumpikaan", full_stop=".", comma=",", question_mark="?", @@ -131,6 +280,7 @@ def __getattribute__(self, name: str) -> str: word_space=" ", sentence_space=" ", colon=":", + semicolon=";", ), Language.FRENCH: TranslationLiterals( language=Language.FRENCH, @@ -142,6 +292,10 @@ def __getattribute__(self, name: str) -> str: also="de plus", cause_word="parce que", effect_word="donc", + or_word="ou", + true="vrai", + false="faux", + neither="aucun des deux", full_stop=".", comma=",", question_mark="?", @@ -150,16 +304,22 @@ def __getattribute__(self, name: str) -> str: sentence_space=" ", colon=":", ), - Language.TELUGU: TranslationLiterals( - language=Language.TELUGU, - question_word="ప్రశ్న", - answer="జవాబు", - confirmation_word="కదా", - yes="అవును", - no="కాదు", - also="అలాగే", - cause_word="ఎందుకంటే", - effect_word="అందువలన", + Language.GALICIAN: TranslationLiterals(language=Language.GALICIAN), + Language.GEORGIAN: TranslationLiterals(language=Language.GEORGIAN), + Language.GERMAN: TranslationLiterals( + language=Language.GERMAN, + question_word="frage", + answer="antwort", + confirmation_word="richtig", + yes="ja", + no="nein", + also="auch", + cause_word="weil", + effect_word="deshalb", + or_word="oder", + true="wahr", + false="falsch", + neither="weder noch", full_stop=".", comma=",", question_mark="?", @@ -167,7 +327,39 @@ def __getattribute__(self, name: str) -> str: word_space=" ", sentence_space=" ", colon=":", + semicolon=";", + ), + Language.GREEK: TranslationLiterals( + language=Language.GREEK, + question_word="ερώτηση", + answer="απάντηση", + confirmation_word="σωστά", + yes="ναι", + no="όχι", + also="επίσης", + cause_word="επειδή", + effect_word="άρα", + or_word="ή", + true="σωστό", + false="λάθος", + neither="καμία απάντηση", + full_stop=".", + comma=",", + question_mark=";", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + semicolon="·", + ), + Language.GUJARATI: TranslationLiterals(language=Language.GUJARATI), + Language.HAITIAN: TranslationLiterals( + # From https://github.com/EleutherAI/lm-evaluation-harness/blob/0845b588303f1f59af98dd1c5bdbd78a9e75a1e2/lm_eval/tasks/xcopa/utils.py + language=Language.HAITIAN, + cause_word="poukisa", + effect_word="donk sa", ), + Language.HEBREW: TranslationLiterals(language=Language.HEBREW), Language.HINDI: TranslationLiterals( language=Language.HINDI, question_word="सवाल", @@ -178,6 +370,10 @@ def __getattribute__(self, name: str) -> str: also="साथ ही", cause_word="क्योंकि", effect_word="इसलिए", + true="सत्य", + false="असत्य", + neither="न तो यह, न वह", + or_word="या", full_stop="।", comma=",", question_mark="?", @@ -185,25 +381,208 @@ def __getattribute__(self, name: str) -> str: word_space=" ", sentence_space=" ", colon=":", + indices=["क", "ख", "ग", "घ", "ङ", "च"], ), - Language.CHINESE: TranslationLiterals( - language=Language.CHINESE, - question_word="问题", - answer="答案", - confirmation_word="是不是", - yes="是的", - no="不是", - also="而且", - cause_word="因为", - effect_word="所以", + Language.HUNGARIAN: TranslationLiterals( + language=Language.HUNGARIAN, + question_word="kérdés", + answer="válasz", + confirmation_word="ugye", + yes="igen", + no="nem", + also="is", + cause_word="mert", + effect_word="ezért", + or_word="vagy", + true="igaz", + false="hamis", + neither="egyik sem", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + semicolon=";", + ), + Language.ICELANDIC: TranslationLiterals(language=Language.ICELANDIC), + Language.INDONESIAN: TranslationLiterals( + language=Language.INDONESIAN, + question_word="pertanyaan", + answer="jawaban", + confirmation_word="kan", + yes="ya", + no="tidak", + also="juga", + cause_word="karena", + effect_word="maka", + or_word="atau", + true="benar", + false="salah", + neither="tidak satu pun", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + semicolon=";", + ), + Language.IRISH: TranslationLiterals(language=Language.IRISH), + Language.ITALIAN: TranslationLiterals( + language=Language.ITALIAN, + question_word="domanda", + answer="risposta", + confirmation_word="vero", + yes="sì", + no="no", + also="inoltre", + cause_word="perchè", + effect_word="quindi", + or_word="o", + true="vero", + false="falso", + neither="nessuno dei due", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + semicolon=";", + ), + Language.JAPANESE: TranslationLiterals( + language=Language.JAPANESE, + question_word="質問", + answer="回答", + confirmation_word="でしょうか", + yes="はい", + no="いいえ", + also="また", + cause_word="なので", + effect_word="なぜなら", + or_word="または", + true="正解", + false="不正解", + neither="どちらでもない", full_stop="。", - comma=",", + comma="、", question_mark="?", exclamation_mark="!", word_space="", sentence_space="", colon=":", + semicolon=";", ), + Language.JAVANESE: TranslationLiterals(language=Language.JAVANESE), + Language.KANNADA: TranslationLiterals(language=Language.KANNADA), + Language.KAZAKH: TranslationLiterals(language=Language.KAZAKH), + Language.KHMER: TranslationLiterals(language=Language.KHMER), + Language.KIRGHIZ: TranslationLiterals(language=Language.KIRGHIZ), + Language.KOREAN: TranslationLiterals( + language=Language.KOREAN, + confirmation_word="맞죠", + yes="예", + no="아니오", + ), + Language.KURDISH: TranslationLiterals(language=Language.KURDISH), + Language.LAO: TranslationLiterals(language=Language.LAO), + Language.LATIN: TranslationLiterals(language=Language.LATIN), + Language.LATVIAN: TranslationLiterals(language=Language.LATVIAN), + Language.LITHUANIAN: TranslationLiterals(language=Language.LITHUANIAN), + Language.LUXEMBOURGISH: TranslationLiterals(language=Language.LUXEMBOURGISH), + Language.MACEDONIAN: TranslationLiterals(language=Language.MACEDONIAN), + Language.MALAGASY: TranslationLiterals(language=Language.MALAGASY), + Language.MALAY: TranslationLiterals(language=Language.MALAY), + Language.MALAYALAM: TranslationLiterals(language=Language.MALAYALAM), + Language.MALTESE: TranslationLiterals(language=Language.MALTESE), + Language.MARATHI: TranslationLiterals(language=Language.MARATHI), + Language.NEPALI: TranslationLiterals(language=Language.NEPALI), + Language.NORWEGIAN: TranslationLiterals( + language=Language.NORWEGIAN, + question_word="spørsmål", + answer="svar", + confirmation_word="ikke sant", + yes="ja", + no="nei", + also="i tillegg", + cause_word="fordi", + effect_word="derfor", + or_word="eller", + true="sant", + false="usant", + neither="ingen av delene", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + semicolon=";", + ), + Language.NORWEGIAN_NYNORSK: TranslationLiterals(language=Language.NORWEGIAN_NYNORSK), + Language.OCCITAN: TranslationLiterals(language=Language.OCCITAN), + Language.ORIYA: TranslationLiterals(language=Language.ORIYA), + Language.PASHTO: TranslationLiterals(language=Language.PASHTO), + Language.PERSIAN: TranslationLiterals(language=Language.PERSIAN), + Language.POLISH: TranslationLiterals( + language=Language.POLISH, + question_word="pytanie", + answer="odpowiedź", + confirmation_word="prawda", + yes="tak", + no="nie", + also="ponadto", + cause_word="ponieważ", + effect_word="więc", + or_word="lub", + true="prawda", + false="fałsz", + neither="ani jedno ani drugie", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + semicolon=";", + ), + Language.PORTUGUESE: TranslationLiterals( + language=Language.PORTUGUESE, + question_word="pergunta", + answer="resposta", + confirmation_word="certo", + yes="sim", + no="não", + also="adicionalmente", + cause_word="porque", + effect_word="logo", + or_word="ou", + true="verdadeiro", + false="falso", + neither="nenhum", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + semicolon=";", + ), + Language.PUNJABI: TranslationLiterals(language=Language.PUNJABI), + Language.QUECHUA: TranslationLiterals( + # From https://github.com/EleutherAI/lm-evaluation-harness/blob/0845b588303f1f59af98dd1c5bdbd78a9e75a1e2/lm_eval/tasks/xcopa/utils.py + language=Language.QUECHUA, + cause_word="imataq", + effect_word="chaymi", + ), + Language.ROMANIAN: TranslationLiterals(language=Language.ROMANIAN), Language.RUSSIAN: TranslationLiterals( language=Language.RUSSIAN, question_word="вопрос", @@ -214,6 +593,101 @@ def __getattribute__(self, name: str) -> str: also="к тому же", cause_word="потому что", effect_word="поэтому", + true="истина", + false="ложь", + neither="ни то ни другое", + or_word="или", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + indices=["А", "Б", "В", "Г", "Д", "Е"], + ), + Language.SANSKRIT: TranslationLiterals(language=Language.SANSKRIT), + # Latin serbian script for future when separating scipts + # Language.SERBIAN_LATIN: TranslationLiterals(language=Language.SERBIAN_LATIN, + # question_word="pitanje", + # answer="odgovor", + # confirmation_word="zar ne", + # yes="da", + # no="ne", + # also="takođe", + # cause_word="jer", + # effect_word="dakle", + # or_word="ili", + # true="tačno", + # false="netačno", + # neither="ništa od navedenog", + # ), + Language.SERBIAN: TranslationLiterals( + language=Language.SERBIAN, + question_word="питање", + answer="одговор", + confirmation_word="зар не", + yes="да", + no="не", + also="такође", + cause_word="јер", + effect_word="дакле", + or_word="или", + true="тачно", + false="нетачно", + neither="ништа од наведеног", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + semicolon=";", + ), + Language.SERBOCROATIAN: TranslationLiterals(language=Language.SERBOCROATIAN), # Deprecated + Language.SINDHI: TranslationLiterals(language=Language.SINDHI), + Language.SINHALA: TranslationLiterals(language=Language.SINHALA), + Language.SLOVAK: TranslationLiterals( + language=Language.SLOVAK, + question_word="otázka", + answer="odpoveď", + confirmation_word="že áno", + yes="áno", + no="nie", + also="taktiež", + cause_word="pretože", + effect_word="takže", + or_word="alebo", + true="pravda", + false="nepravda", + neither="ani jeden", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + semicolon=";", + ), + Language.SOMALI: TranslationLiterals(language=Language.SOMALI), + Language.SORANI: TranslationLiterals(language=Language.SORANI), + Language.SOUTH_AZERBAIJANI: TranslationLiterals(language=Language.SOUTH_AZERBAIJANI), + Language.SPANISH: TranslationLiterals( + language=Language.SPANISH, + question_word="pregunta", + answer="respuesta", + confirmation_word="cierto", + yes="sí", + no="no", + also="también", + cause_word="porque", + effect_word="por lo tanto", + or_word="o", + true="verdadero", + false="falso", + neither="ninguno", full_stop=".", comma=",", question_mark="?", @@ -221,6 +695,84 @@ def __getattribute__(self, name: str) -> str: word_space=" ", sentence_space=" ", colon=":", + semicolon=";", + ), + Language.SWAHILI: TranslationLiterals( + language=Language.SWAHILI, + question_word="swali", + answer="jibu", + confirmation_word="sahihi", + yes="ndiyo", + no="hapana", + also="pia", + cause_word="kwa sababu", + effect_word="kwa hiyo", + true="kweli", + false="uongo", + neither="hakuna kati ya hizo", + or_word="au", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + ), + Language.SWEDISH: TranslationLiterals( + language=Language.SWEDISH, + question_word="fråga", + answer="svar", + confirmation_word="eller hur", + yes="ja", + no="nej", + also="också", + cause_word="eftersom", + effect_word="därför att", + or_word="eller", + true="sant", + false="falskt", + neither="ingendera", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + semicolon=";", + ), + Language.TAGALOG: TranslationLiterals(language=Language.TAGALOG), + Language.TAJIK: TranslationLiterals(language=Language.TAJIK), + Language.TAMIL: TranslationLiterals( + # From https://github.com/EleutherAI/lm-evaluation-harness/blob/0845b588303f1f59af98dd1c5bdbd78a9e75a1e2/lm_eval/tasks/xcopa/utils.py + language=Language.TAMIL, + cause_word="காரணமாக", + effect_word="எனவே", + ), + Language.TATAR: TranslationLiterals(language=Language.TATAR), + Language.TELUGU: TranslationLiterals( + language=Language.TELUGU, + question_word="ప్రశ్న", + answer="జవాబు", + confirmation_word="కదా", + yes="అవును", + no="కాదు", + also="అలాగే", + cause_word="ఎందుకంటే", + effect_word="అందువలన", + or_word="లేదా", + true="నిజం", + false="తప్పు", + neither="ఏదీ కాదు", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + indices=["ఎ", "బి", "సి", "డి", "ఇ"], ), Language.THAI: TranslationLiterals( language=Language.THAI, @@ -232,6 +784,10 @@ def __getattribute__(self, name: str) -> str: also="และ", cause_word="เพราะ", effect_word="ดังนั้น", + true="จริง", + false="เท็จ", + neither="ไม่ใช่ทั้งสองอย่าง", + or_word="หรือ", full_stop=".", comma=",", question_mark="?", @@ -239,6 +795,7 @@ def __getattribute__(self, name: str) -> str: word_space="", sentence_space=" ", colon=":", + indices=["๑", "๒", "๓", "๔", "๕", "๖", "๗", "๘", "๙", "๐"], ), Language.TURKISH: TranslationLiterals( language=Language.TURKISH, @@ -250,6 +807,10 @@ def __getattribute__(self, name: str) -> str: also="ayrıca", cause_word="çünkü", effect_word="bu yüzden", + true="doğru", + false="yanlış", + neither="hiçbiri", + or_word="veya", full_stop=".", comma=",", question_mark="?", @@ -258,96 +819,80 @@ def __getattribute__(self, name: str) -> str: sentence_space=" ", colon=":", ), - Language.SPANISH: TranslationLiterals(language=Language.SPANISH), - Language.PORTUGUESE: TranslationLiterals(language=Language.PORTUGUESE), - Language.ITALIAN: TranslationLiterals(language=Language.ITALIAN), - Language.ROMANIAN: TranslationLiterals(language=Language.ROMANIAN), - Language.GERMAN: TranslationLiterals(language=Language.GERMAN), - Language.LATIN: TranslationLiterals(language=Language.LATIN), - Language.CZECH: TranslationLiterals(language=Language.CZECH), - Language.DANISH: TranslationLiterals(language=Language.DANISH), - Language.FINNISH: TranslationLiterals(language=Language.FINNISH), - Language.GREEK: TranslationLiterals(language=Language.GREEK), - Language.NORWEGIAN: TranslationLiterals(language=Language.NORWEGIAN), - Language.POLISH: TranslationLiterals(language=Language.POLISH), - Language.SLOVENIAN: TranslationLiterals(language=Language.SLOVENIAN), - Language.DUTCH: TranslationLiterals(language=Language.DUTCH), - Language.JAPANESE: TranslationLiterals(language=Language.JAPANESE), - Language.VIETNAMESE: TranslationLiterals(language=Language.VIETNAMESE), - Language.INDONESIAN: TranslationLiterals(language=Language.INDONESIAN), - Language.PERSIAN: TranslationLiterals(language=Language.PERSIAN), - Language.KOREAN: TranslationLiterals(language=Language.KOREAN), - Language.BENGALI: TranslationLiterals(language=Language.BENGALI), - Language.TAMIL: TranslationLiterals(language=Language.TAMIL), - Language.HUNGARIAN: TranslationLiterals(language=Language.HUNGARIAN), - Language.UKRAINIAN: TranslationLiterals(language=Language.UKRAINIAN), - Language.SLOVAK: TranslationLiterals(language=Language.SLOVAK), - Language.BULGARIAN: TranslationLiterals(language=Language.BULGARIAN), - Language.CATALAN: TranslationLiterals(language=Language.CATALAN), - Language.CROATIAN: TranslationLiterals(language=Language.CROATIAN), - Language.SERBIAN: TranslationLiterals(language=Language.SERBIAN), - Language.LITHUANIAN: TranslationLiterals(language=Language.LITHUANIAN), - Language.ESTONIAN: TranslationLiterals(language=Language.ESTONIAN), - Language.HEBREW: TranslationLiterals(language=Language.HEBREW), - Language.LATVIAN: TranslationLiterals(language=Language.LATVIAN), - Language.SERBOCROATIAN: TranslationLiterals(language=Language.SERBOCROATIAN), # Deprecated - Language.ALBANIAN: TranslationLiterals(language=Language.ALBANIAN), - Language.AZERBAIJANI: TranslationLiterals(language=Language.AZERBAIJANI), - Language.ICELANDIC: TranslationLiterals(language=Language.ICELANDIC), - Language.MACEDONIAN: TranslationLiterals(language=Language.MACEDONIAN), - Language.GEORGIAN: TranslationLiterals(language=Language.GEORGIAN), - Language.GALICIAN: TranslationLiterals(language=Language.GALICIAN), - Language.ARMENIAN: TranslationLiterals(language=Language.ARMENIAN), - Language.BASQUE: TranslationLiterals(language=Language.BASQUE), - Language.MALAY: TranslationLiterals(language=Language.MALAY), - Language.TAGALOG: TranslationLiterals(language=Language.TAGALOG), - Language.JAVANESE: TranslationLiterals(language=Language.JAVANESE), - Language.PUNJABI: TranslationLiterals(language=Language.PUNJABI), - Language.BIHARI: TranslationLiterals(language=Language.BIHARI), # Deprecated - Language.GUJARATI: TranslationLiterals(language=Language.GUJARATI), - Language.YORUBA: TranslationLiterals(language=Language.YORUBA), - Language.MARATHI: TranslationLiterals(language=Language.MARATHI), - Language.URDU: TranslationLiterals(language=Language.URDU), - Language.AMHARIC: TranslationLiterals(language=Language.AMHARIC), - Language.MALAYALAM: TranslationLiterals(language=Language.MALAYALAM), - Language.KANNADA: TranslationLiterals(language=Language.KANNADA), - Language.NEPALI: TranslationLiterals(language=Language.NEPALI), - Language.KAZAKH: TranslationLiterals(language=Language.KAZAKH), - Language.BELARUSIAN: TranslationLiterals(language=Language.BELARUSIAN), - Language.BURMESE: TranslationLiterals(language=Language.BURMESE), - Language.ESPERANTO: TranslationLiterals(language=Language.ESPERANTO), + Language.TURKMEN: TranslationLiterals(language=Language.TURKMEN), + Language.UKRAINIAN: TranslationLiterals( + language=Language.UKRAINIAN, + question_word="питання", + answer="відповідь", + confirmation_word="правда", + yes="так", + no="ні", + also="також", + cause_word="тому що", + effect_word="отже", + or_word="або", + true="правда", + false="неправда", + neither="ні те, ні інше", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + semicolon=";", + ), + Language.URDU: TranslationLiterals( + language=Language.URDU, + question_word="سوال", + answer="جواب", + confirmation_word="نا", + yes="ہاں", + no="نہیں", + also="اور", + cause_word="کیونکہ", + effect_word="اس لئے", + or_word="یا", + true="درست", + false="غلط", + neither="کوئی نہیں", + full_stop="۔", + comma="،", + question_mark="؟", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + semicolon="؛", + ), Language.UZBEK: TranslationLiterals(language=Language.UZBEK), - Language.KHMER: TranslationLiterals(language=Language.KHMER), - Language.TAJIK: TranslationLiterals(language=Language.TAJIK), + Language.VIETNAMESE: TranslationLiterals( + language=Language.VIETNAMESE, + question_word="câu hỏi", + answer="trả lời", + confirmation_word="đúng", + yes="có", + no="không", + also="cũng", + cause_word="vì", + effect_word="do đó", + or_word="hoặc", + true="đúng", + false="sai", + neither="không đúng cũng không sai", + full_stop=".", + comma=",", + question_mark="?", + exclamation_mark="!", + word_space=" ", + sentence_space=" ", + colon=":", + semicolon=";", + ), + Language.WAR: TranslationLiterals(language=Language.WAR), Language.WELSH: TranslationLiterals(language=Language.WELSH), - Language.NORWEGIAN_NYNORSK: TranslationLiterals(language=Language.NORWEGIAN_NYNORSK), - Language.BOSNIAN: TranslationLiterals(language=Language.BOSNIAN), - Language.SINHALA: TranslationLiterals(language=Language.SINHALA), - Language.TATAR: TranslationLiterals(language=Language.TATAR), - Language.AFRIKAANS: TranslationLiterals(language=Language.AFRIKAANS), - Language.ORIYA: TranslationLiterals(language=Language.ORIYA), - Language.KIRGHIZ: TranslationLiterals(language=Language.KIRGHIZ), - Language.IRISH: TranslationLiterals(language=Language.IRISH), - Language.OCCITAN: TranslationLiterals(language=Language.OCCITAN), - Language.KURDISH: TranslationLiterals(language=Language.KURDISH), - Language.LAO: TranslationLiterals(language=Language.LAO), - Language.LUXEMBOURGISH: TranslationLiterals(language=Language.LUXEMBOURGISH), - Language.BASHKIR: TranslationLiterals(language=Language.BASHKIR), Language.WESTERN_FRISIAN: TranslationLiterals(language=Language.WESTERN_FRISIAN), - Language.PASHTO: TranslationLiterals(language=Language.PASHTO), - Language.MALTESE: TranslationLiterals(language=Language.MALTESE), - Language.BRETON: TranslationLiterals(language=Language.BRETON), - Language.ASSAMESE: TranslationLiterals(language=Language.ASSAMESE), - Language.MALAGASY: TranslationLiterals(language=Language.MALAGASY), - Language.DIVEHI: TranslationLiterals(language=Language.DIVEHI), Language.YIDDISH: TranslationLiterals(language=Language.YIDDISH), - Language.SOMALI: TranslationLiterals(language=Language.SOMALI), - Language.SANSKRIT: TranslationLiterals(language=Language.SANSKRIT), - Language.SINDHI: TranslationLiterals(language=Language.SINDHI), - Language.TURKMEN: TranslationLiterals(language=Language.TURKMEN), - Language.SOUTH_AZERBAIJANI: TranslationLiterals(language=Language.SOUTH_AZERBAIJANI), - Language.SORANI: TranslationLiterals(language=Language.SORANI), - Language.CEBUANO: TranslationLiterals(language=Language.CEBUANO), - Language.WAR: TranslationLiterals(language=Language.WAR), - Language.SWEDISH: TranslationLiterals(language=Language.SWEDISH), + Language.YORUBA: TranslationLiterals(language=Language.YORUBA), } diff --git a/src/lighteval/utils/language.py b/src/lighteval/utils/language.py index 9474f419e..6fb444933 100644 --- a/src/lighteval/utils/language.py +++ b/src/lighteval/utils/language.py @@ -78,6 +78,7 @@ class Language(Enum): URDU = "urd" AMHARIC = "amh" TELUGU = "tel" + HAITIAN = "hti" MALAYALAM = "mal" KANNADA = "kan" NEPALI = "nep" @@ -113,6 +114,7 @@ class Language(Enum): SOMALI = "som" SANSKRIT = "san" SINDHI = "snd" + QUECHUA = "que" TURKMEN = "tuk" SOUTH_AZERBAIJANI = "azb" SORANI = "ckb" @@ -216,7 +218,7 @@ class Language(Enum): # 'zul': Language.ZULU, # 'bod': Language.TIBETAN, "eng": Language.ENGLISH, - # 'hat': Language.HAITIAN, + "hat": Language.HAITIAN, # 'ilo': Language.ILOCANO, "kaz": Language.KAZAKH, "lit": Language.LITHUANIAN, @@ -241,6 +243,7 @@ class Language(Enum): "ary": Language.ARABIC, "cat": Language.CATALAN, "eus": Language.BASQUE, + "que": Language.QUECHUA, "heb": Language.HEBREW, "isl": Language.ICELANDIC, # 'khk': Language.MONGOLIAN,