huggingface · clefourrier · Sep 30, 2024 · Sep 5, 2024 · Sep 5, 2024 · Sep 5, 2024
diff --git a/pyproject.toml b/pyproject.toml
@@ -91,7 +91,7 @@ tensorboardX = ["tensorboardX"]
 vllm = ["vllm", "ray", "more_itertools"]
 quality = ["ruff==v0.2.2","pre-commit"]
 tests = ["pytest==7.4.0"]
-dev = ["lighteval[accelerate,quality,tests]"]
+dev = ["lighteval[accelerate,quality,tests,multilingual]"]
 extended_tasks = [
   "langdetect", # ifeval
   "openai", # llm as a judge using openai models

diff --git a/src/lighteval/tasks/default_prompts.py b/src/lighteval/tasks/default_prompts.py
@@ -36,6 +36,7 @@
 
 # fmt: off
 LETTER_INDICES = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]
+INTEGER_INDICES = list(map(str, list(range(1, 27))))
 # fmt: on
 
 

diff --git a/src/lighteval/tasks/templates/continuation.py b/src/lighteval/tasks/templates/continuation.py
@@ -0,0 +1,188 @@
+# MIT License
+
+# Copyright (c) 2024 The HuggingFace Team
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from typing import Callable
+
+from typing_extensions import NotRequired, TypedDict
+
+from lighteval.tasks.requests import Doc
+from lighteval.tasks.templates.utils.adapter_utils import create_adapter_from_dict
+from lighteval.tasks.templates.utils.formatting_utils import (
+    capitalize,
+    fix_capitalization,
+    fix_ending_punct,
+    punctuation_ends_sentence,
+)
+from lighteval.tasks.templates.utils.formulation import (
+    CFFormulation,
+    Formulation,
+    MCFFormulation,
+    build_answers,
+    build_choices,
+)
+from lighteval.tasks.templates.utils.translation_literals import TRANSLATION_LITERALS
+from lighteval.utils.language import Language
+from lighteval.utils.utils import as_list
+
+
+CONTINUATION_QUERY_CF = "{instruction}{context}"
+
+CONTINUATION_QUERY_MCF = "{instruction}{context}\n{options}{answer_word}{colon}"
+
+
+# Defined for type hinting only
+class ContinuationInput(TypedDict):
+    """
+    Input for the continuation task.
+    Args:
+        context: The contextualization of choices (e.g. If I ask you a question, you should answer it)
+        continuations: Possible continuations of the context (e.g. [you should answer it, you should leave])
+        gold_idx: The index of the correct continuation
+        instruction (optional): The instruction of the task (e.g. Following is the snippet of a dialogue, choose the most appropriate continuation)
+    """
+
+    context: str
+    continuations: list[str]
+    gold_idx: list[int] | int
+    instruction: NotRequired[str]
+
+
+class ContinuationDictAdapter(TypedDict):
+    """
+    Adapter for mapping from the dataset row into the ContinuationInput format.
+    Args:
+        context: Column name in the row that contains the contextualization of choices (e.g. If I ask you a question, you should answer it)
+        continuations: Column name in the row that contains the possible continuations of the context (e.g. [you should answer it, you should leave])
+        gold_idx: Column name in the row that contains the index of the correct continuation
+        instruction (optional): Column name in the row that contains the instruction of the task (e.g. Following is the snippet of a dialogue, choose the most appropriate continuation)
+    """
+
+    context: str
+    continuations: str
+    gold_idx: str
+    instruction: NotRequired[str]
+
+
+def get_continuation_prompt_function(
+    language: Language,
+    adapter: Callable[[dict], ContinuationInput] | ContinuationDictAdapter,
+    formulation: Formulation = MCFFormulation(),
+):
+    """
+    Create a templated prompt function for a Continuation task.
+    Example tasks:
+    - Hellaswag
+    - XStoryCloze
+
+    Format:
+    *CF*
+    Context | Continuation 1/Continuation 2/Continuation 3
+
+    *Hybrid*
+    Context
+    A. Continuation 1
+    B. Continuation 2
+    C. Continuation 3
+    Answer: Continuation 1/Continuation 2/Continuation 3
+
+    *MCF*
+    Context
+    A. Continuation 1
+    B. Continuation 2
+    C. Continuation 3
+    Answer: A/B/C
+
+    This template is very similar to the `Multiple Choice` template, except that it only takes context/continuations as input and don't use the anchor labels (Question/Answer)
+
+    Args:
+        language (Language): The language of the Continuation task.
+        adapter (Callable[[dict], ContinuationInput] | ContinuationDictAdapter): Either a function that takes a dataset row and returns a ContinuationInput, or a dictionary with keys corresponding to the field names in the dataset row.
+            Note: Both ContinuationDictAdapter and ContinuationInput are TypeDicts, this means that the caller provides dictionary and doesn't initialize any class!
+        formulation (Formulation, optional): The formulation (MCF/Hybrid/CF) to use for the task. Defaults to MCFFormulation().
+    Returns:
+        Callable: A function that generates Continuation prompt based on the given parameters.
+    """
+    adapter_fn: Callable[[dict], ContinuationInput] = create_adapter_from_dict(adapter)  # type: ignore
+    translation_literals = TRANSLATION_LITERALS[language]
+
+    def prepare_prompt(line: dict):
+        cont_input = adapter_fn(line)
+
+        instruction_val = cont_input.get("instruction")
+        instruction = f"{instruction_val}\n" if instruction_val else ""
+
+        context = f"{capitalize(fix_ending_punct(cont_input['context'], translation_literals))}"
+
+        continuations = [
+            fix_capitalization(context, fix_ending_punct(continuation, translation_literals), translation_literals)
+            for continuation in cont_input["continuations"]
+        ]
+
+        return cont_input, instruction, context, continuations
+
+    def prompt_fn_cf(line, task_name: str):
+        cont_input, instruction, context, continuations = prepare_prompt(line)
+
+        context_follows_sentence_space = punctuation_ends_sentence(context, translation_literals)
+        answers = build_answers(continuations, formulation, translation_literals, context_follows_sentence_space)
+
+        query = CONTINUATION_QUERY_CF.format(
+            instruction=instruction,
+            context=context,
+        )
+
+        return Doc(
+            task_name=task_name,
+            query=query,
+            gold_index=as_list(cont_input["gold_idx"]),
+            choices=answers,
+            instruction=instruction,
+            unconditioned_query="",
+        )
+
+    def prompt_fn_mcf(line, task_name: str):
+        cont_input, instruction, context, continuations = prepare_prompt(line)
+
+        options = build_choices(continuations, formulation, translation_literals)
+        options = f"{options}\n" if options else ""
+        answers = build_answers(continuations, formulation, translation_literals)
+
+        answer_word = capitalize(translation_literals.answer)
+
+        query = CONTINUATION_QUERY_MCF.format(
+            instruction=instruction,
+            context=context,
+            options=options,
+            answer_word=answer_word,
+            colon=translation_literals.colon,
+        )
+
+        return Doc(
+            task_name=task_name,
+            query=query,
+            gold_index=as_list(cont_input["gold_idx"]),
+            choices=answers,
+            instruction=instruction,
+            unconditioned_query=f"{answer_word}{translation_literals.colon}",
+        )
+
+    return prompt_fn_cf if isinstance(formulation, CFFormulation) else prompt_fn_mcf
diff --git a/src/lighteval/tasks/templates/copa.py b/src/lighteval/tasks/templates/copa.py
@@ -0,0 +1,146 @@
+# MIT License
+
+# Copyright (c) 2024 The HuggingFace Team
+
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to deal
+# in the Software without restriction, including without limitation the rights
+# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+# copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+from typing import Callable, Literal
+
+from typing_extensions import NotRequired, TypedDict
+
+from lighteval.tasks.templates.continuation import get_continuation_prompt_function
+from lighteval.tasks.templates.multichoice import create_adapter_from_dict
+from lighteval.tasks.templates.utils.formatting_utils import PUNCT, capitalize
+from lighteval.tasks.templates.utils.formulation import Formulation, MCFFormulation
+from lighteval.tasks.templates.utils.translation_literals import TRANSLATION_LITERALS
+from lighteval.utils.language import Language
+
+
+# NLI Cause/Effect (Copa)
+COPA_QUERY = "{context}{word_space}{cause_or_effect}"
+
+
+# Defined for type hinting only
+class COPAInput(TypedDict):
+    """
+    Input for the COPA task.
+    Args:
+        cause_effect: The type of the COPA task (X therefore Y / X because Y)
+        context: The contextualization of choices (e.g. You are young)
+        continuations: The possible continuations of the context (e.g. You are old, You are a child)
+        gold_idx: The index of the correct continuation
+        instruction (optional): The instruction of the COPA task (e.g. Choose the most appropriate continuation)
+    """
+
+    cause_effect: Literal["cause", "effect"]
+    context: str
+    continuations: list[str]
+    gold_idx: int | list[int]
+    instruction: NotRequired[str]
+
+
+class COPAAdapter(TypedDict):
+    """
+    Adapter for mapping from the dataset row into the COPAInput format.
+    Args:
+        cause_effect: Column name in the row that contains the type of the COPA task (X therefore Y / X because Y)
+        context: Column name in the row that contains the contextualization of choices (e.g. You are young)
+        continuations: Column name in the row that contains the possible continuations of the context (e.g. You are old, You are a child)
+        gold_idx: Column name in the row that contains the index of the correct continuation
+        instruction (optional): Column name in the row that contains the instruction of the task (e.g. Choose the most appropriate continuation)
+    """
+
+    cause_effect: str
+    context: str
+    continuations: str
+    gold_idx: str
+    instruction: NotRequired[str]
+
+
+def get_copa_prompt_function(
+    language: Language, adapter: Callable[[dict], COPAInput] | COPAAdapter, formulation: Formulation = MCFFormulation()
+):
+    """
+    Create a templated prompt function for a COPA task.
+    Example tasks:
+    - COPA
+    - PARUS
+
+    Format:
+    *CF*
+    Context Premise thefore/cause | (Continuation 1, Continuation 2, Continuation 3)
+
+    *Hybrid*
+    Context Premise thefore/cause
+    A. Continuation 1
+    B. Continuation 2
+    C. Continuation 3
+    Answer: | Continuation 1/Continuation 2/Continuation 3
+
+    *MCF*
+    Context Premise thefore/cause
+    A. Continuation 1
+    B. Continuation 2
+    C. Continuation 3
+    Answer: | A/B/C
+
+    Args:
+        language (Language): The language of the COPA task.
+        adapter (Callable[[dict], COPAInput] | COPAAdapter): Either a function that takes a dataset row and returns a COPAInput, or a dictionary with keys corresponding to the field names in the dataset row.
+            Note: Both COPAAdapter and COPAInput are TypeDicts, this means that the caller provides dictionary and doesn't initialize any class!
+            Note: The gold_idx must be an index or list of indices in the continuations list, indicating the correct continuation(s).
+        formulation (Formulation, optional): The formulation to use for the task. Defaults to MCFFormulation().
+    Returns:
+        Callable: A function that generates COPA prompts based on the given parameters.
+    """
+    adapter_fn: Callable[[dict], COPAInput] = create_adapter_from_dict(adapter)  # type: ignore
+    continuation_prompt_fn = get_continuation_prompt_function(
+        language, {"context": "context", "continuations": "continuations", "gold_idx": "gold_idx"}, formulation
+    )
+    translation_literals = TRANSLATION_LITERALS[language]
+
+    def copa_prompt(
+        line: dict,
+        task_name: str,
+    ):
+        input_data = adapter_fn(line)
+        context = capitalize(input_data["context"].rstrip(PUNCT))
+        cause_or_effect_trans = (
+            translation_literals.cause_word
+            if input_data["cause_effect"] == "cause"
+            else translation_literals.effect_word
+        )
+
+        context = COPA_QUERY.format(
+            context=context,
+            word_space=translation_literals.word_space,
+            cause_or_effect=cause_or_effect_trans,
+        )
+
+        return continuation_prompt_fn(
+            {
+                "instruction": input_data.get("instruction", ""),
+                "context": context,
+                "continuations": input_data["continuations"],
+                "gold_idx": input_data["gold_idx"],
+            },
+            task_name,
+        )
+
+    return copa_prompt