Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
39 commits
Select commit Hold shift + click to select a range
5c69eb0
add multilignaul dynamic generative metrics
hynky1999 Sep 5, 2024
39c4220
Merge branch 'main' into geneartive_dynamic_metrics
hynky1999 Sep 5, 2024
2a5cdca
Merge branch 'geneartive_dynamic_metrics' into config_templates
hynky1999 Sep 5, 2024
2df9a08
draft
hynky1999 Sep 6, 2024
95729ee
finish multichoice config
hynky1999 Sep 9, 2024
3aa0579
Merge branch 'main' into geneartive_dynamic_metrics
hynky1999 Sep 9, 2024
b8f90a9
update tokenizers + install nltk reqs
hynky1999 Sep 9, 2024
f5a8717
use punkt tab
hynky1999 Sep 9, 2024
227f572
Update src/lighteval/utils/imports.py
hynky1999 Sep 13, 2024
d80b3ba
Update src/lighteval/metrics/normalizations.py
hynky1999 Sep 13, 2024
532bdad
fix imports
Sep 13, 2024
75f7ac5
remove unused import
Sep 13, 2024
f99e330
Merge branch 'main' into geneartive_dynamic_metrics
NathanHB Sep 13, 2024
92daf90
Merge branch 'main' into geneartive_dynamic_metrics
clefourrier Sep 14, 2024
f2a801d
Merge branch 'main' into geneartive_dynamic_metrics
NathanHB Sep 17, 2024
91d9d4f
finish implementation of templates + move stuff around
Sep 23, 2024
9356cc6
resolve nits
Sep 23, 2024
0fbc731
when in rome do as romans do (handle error messages the same way)
Sep 23, 2024
fa1fa83
fix utils
hynky1999 Sep 23, 2024
db36e16
Merge branch 'geneartive_dynamic_metrics' into config_templates
hynky1999 Sep 23, 2024
44aeecf
nicers tests + fix them
hynky1999 Sep 23, 2024
2bff963
nicer todo
hynky1999 Sep 23, 2024
3c9eb21
add nice doscrings 📃
hynky1999 Sep 23, 2024
4216ae2
add even more docstring
hynky1999 Sep 23, 2024
d8f56b8
nit
hynky1999 Sep 23, 2024
f26e88c
fix test
hynky1999 Sep 23, 2024
111d615
add multilingual to dev group
hynky1999 Sep 24, 2024
7ca4239
merge nli, add languagees to literals
hynky1999 Sep 25, 2024
22eeddb
translation literals
hynky1999 Sep 25, 2024
2d09256
Merge branch 'geneartive_dynamic_metrics' into config_templates
hynky1999 Sep 26, 2024
ec6aa41
Update src/lighteval/tasks/templates/nli.py
hynky1999 Sep 27, 2024
2652a99
Update src/lighteval/tasks/templates/continuation.py
hynky1999 Sep 27, 2024
f8d2e9c
Update src/lighteval/tasks/templates/multichoice.py
hynky1999 Sep 27, 2024
319615b
nice docstrings + remove unecessary comments
hynky1999 Sep 27, 2024
a2a9101
Merge branch 'config_templates' of github.com:huggingface/lighteval i…
hynky1999 Sep 27, 2024
47f908c
beatufiful docstrings
hynky1999 Sep 27, 2024
c40ab45
Merge remote-tracking branch 'origin/main' into config_templates
hynky1999 Sep 27, 2024
1bc19b8
Merge remote-tracking branch 'origin/main' into config_templates
hynky1999 Sep 27, 2024
d516695
Merge branch 'config_templates' of github.com:huggingface/lighteval i…
hynky1999 Sep 27, 2024
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ tensorboardX = ["tensorboardX"]
vllm = ["vllm", "ray", "more_itertools"]
quality = ["ruff==v0.2.2","pre-commit"]
tests = ["pytest==7.4.0"]
dev = ["lighteval[accelerate,quality,tests]"]
dev = ["lighteval[accelerate,quality,tests,multilingual]"]
extended_tasks = [
"langdetect", # ifeval
"openai", # llm as a judge using openai models
Expand Down
1 change: 1 addition & 0 deletions src/lighteval/tasks/default_prompts.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

# fmt: off
LETTER_INDICES = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]
INTEGER_INDICES = list(map(str, list(range(1, 27))))
# fmt: on


Expand Down
188 changes: 188 additions & 0 deletions src/lighteval/tasks/templates/continuation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
# MIT License

# Copyright (c) 2024 The HuggingFace Team

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from typing import Callable

from typing_extensions import NotRequired, TypedDict

from lighteval.tasks.requests import Doc
from lighteval.tasks.templates.utils.adapter_utils import create_adapter_from_dict
from lighteval.tasks.templates.utils.formatting_utils import (
capitalize,
fix_capitalization,
fix_ending_punct,
punctuation_ends_sentence,
)
from lighteval.tasks.templates.utils.formulation import (
CFFormulation,
Formulation,
MCFFormulation,
build_answers,
build_choices,
)
from lighteval.tasks.templates.utils.translation_literals import TRANSLATION_LITERALS
from lighteval.utils.language import Language
from lighteval.utils.utils import as_list


CONTINUATION_QUERY_CF = "{instruction}{context}"

CONTINUATION_QUERY_MCF = "{instruction}{context}\n{options}{answer_word}{colon}"


# Defined for type hinting only
class ContinuationInput(TypedDict):
"""
Input for the continuation task.
Args:
context: The contextualization of choices (e.g. If I ask you a question, you should answer it)
continuations: Possible continuations of the context (e.g. [you should answer it, you should leave])
gold_idx: The index of the correct continuation
instruction (optional): The instruction of the task (e.g. Following is the snippet of a dialogue, choose the most appropriate continuation)
"""

context: str
continuations: list[str]
gold_idx: list[int] | int
instruction: NotRequired[str]


class ContinuationDictAdapter(TypedDict):
"""
Adapter for mapping from the dataset row into the ContinuationInput format.
Args:
context: Column name in the row that contains the contextualization of choices (e.g. If I ask you a question, you should answer it)
continuations: Column name in the row that contains the possible continuations of the context (e.g. [you should answer it, you should leave])
gold_idx: Column name in the row that contains the index of the correct continuation
instruction (optional): Column name in the row that contains the instruction of the task (e.g. Following is the snippet of a dialogue, choose the most appropriate continuation)
"""

context: str
continuations: str
gold_idx: str
instruction: NotRequired[str]


def get_continuation_prompt_function(
language: Language,
adapter: Callable[[dict], ContinuationInput] | ContinuationDictAdapter,
formulation: Formulation = MCFFormulation(),
):
"""
Create a templated prompt function for a Continuation task.
Example tasks:
- Hellaswag
- XStoryCloze

Format:
*CF*
Context | Continuation 1/Continuation 2/Continuation 3

*Hybrid*
Context
A. Continuation 1
B. Continuation 2
C. Continuation 3
Answer: Continuation 1/Continuation 2/Continuation 3

*MCF*
Context
A. Continuation 1
B. Continuation 2
C. Continuation 3
Answer: A/B/C

This template is very similar to the `Multiple Choice` template, except that it only takes context/continuations as input and don't use the anchor labels (Question/Answer)

Args:
language (Language): The language of the Continuation task.
adapter (Callable[[dict], ContinuationInput] | ContinuationDictAdapter): Either a function that takes a dataset row and returns a ContinuationInput, or a dictionary with keys corresponding to the field names in the dataset row.
Note: Both ContinuationDictAdapter and ContinuationInput are TypeDicts, this means that the caller provides dictionary and doesn't initialize any class!
formulation (Formulation, optional): The formulation (MCF/Hybrid/CF) to use for the task. Defaults to MCFFormulation().
Returns:
Callable: A function that generates Continuation prompt based on the given parameters.
"""
adapter_fn: Callable[[dict], ContinuationInput] = create_adapter_from_dict(adapter) # type: ignore
translation_literals = TRANSLATION_LITERALS[language]

def prepare_prompt(line: dict):
cont_input = adapter_fn(line)

instruction_val = cont_input.get("instruction")
instruction = f"{instruction_val}\n" if instruction_val else ""

context = f"{capitalize(fix_ending_punct(cont_input['context'], translation_literals))}"

continuations = [
fix_capitalization(context, fix_ending_punct(continuation, translation_literals), translation_literals)
for continuation in cont_input["continuations"]
]

return cont_input, instruction, context, continuations

def prompt_fn_cf(line, task_name: str):
cont_input, instruction, context, continuations = prepare_prompt(line)

context_follows_sentence_space = punctuation_ends_sentence(context, translation_literals)
answers = build_answers(continuations, formulation, translation_literals, context_follows_sentence_space)

query = CONTINUATION_QUERY_CF.format(
instruction=instruction,
context=context,
)

return Doc(
task_name=task_name,
query=query,
gold_index=as_list(cont_input["gold_idx"]),
choices=answers,
instruction=instruction,
unconditioned_query="",
)

def prompt_fn_mcf(line, task_name: str):
cont_input, instruction, context, continuations = prepare_prompt(line)

options = build_choices(continuations, formulation, translation_literals)
options = f"{options}\n" if options else ""
answers = build_answers(continuations, formulation, translation_literals)

answer_word = capitalize(translation_literals.answer)

query = CONTINUATION_QUERY_MCF.format(
instruction=instruction,
context=context,
options=options,
answer_word=answer_word,
colon=translation_literals.colon,
)

return Doc(
task_name=task_name,
query=query,
gold_index=as_list(cont_input["gold_idx"]),
choices=answers,
instruction=instruction,
unconditioned_query=f"{answer_word}{translation_literals.colon}",
)

return prompt_fn_cf if isinstance(formulation, CFFormulation) else prompt_fn_mcf
146 changes: 146 additions & 0 deletions src/lighteval/tasks/templates/copa.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
# MIT License

# Copyright (c) 2024 The HuggingFace Team

# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:

# The above copyright notice and this permission notice shall be included in all
# copies or substantial portions of the Software.

# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

from typing import Callable, Literal

from typing_extensions import NotRequired, TypedDict

from lighteval.tasks.templates.continuation import get_continuation_prompt_function
from lighteval.tasks.templates.multichoice import create_adapter_from_dict
from lighteval.tasks.templates.utils.formatting_utils import PUNCT, capitalize
from lighteval.tasks.templates.utils.formulation import Formulation, MCFFormulation
from lighteval.tasks.templates.utils.translation_literals import TRANSLATION_LITERALS
from lighteval.utils.language import Language


# NLI Cause/Effect (Copa)
COPA_QUERY = "{context}{word_space}{cause_or_effect}"


# Defined for type hinting only
class COPAInput(TypedDict):
"""
Input for the COPA task.
Args:
cause_effect: The type of the COPA task (X therefore Y / X because Y)
context: The contextualization of choices (e.g. You are young)
continuations: The possible continuations of the context (e.g. You are old, You are a child)
gold_idx: The index of the correct continuation
instruction (optional): The instruction of the COPA task (e.g. Choose the most appropriate continuation)
"""

cause_effect: Literal["cause", "effect"]
context: str
continuations: list[str]
gold_idx: int | list[int]
instruction: NotRequired[str]


class COPAAdapter(TypedDict):
"""
Adapter for mapping from the dataset row into the COPAInput format.
Args:
cause_effect: Column name in the row that contains the type of the COPA task (X therefore Y / X because Y)
context: Column name in the row that contains the contextualization of choices (e.g. You are young)
continuations: Column name in the row that contains the possible continuations of the context (e.g. You are old, You are a child)
gold_idx: Column name in the row that contains the index of the correct continuation
instruction (optional): Column name in the row that contains the instruction of the task (e.g. Choose the most appropriate continuation)
"""

cause_effect: str
context: str
continuations: str
gold_idx: str
instruction: NotRequired[str]


def get_copa_prompt_function(
language: Language, adapter: Callable[[dict], COPAInput] | COPAAdapter, formulation: Formulation = MCFFormulation()
):
"""
Create a templated prompt function for a COPA task.
Example tasks:
- COPA
- PARUS

Format:
*CF*
Context Premise thefore/cause | (Continuation 1, Continuation 2, Continuation 3)

*Hybrid*
Context Premise thefore/cause
A. Continuation 1
B. Continuation 2
C. Continuation 3
Answer: | Continuation 1/Continuation 2/Continuation 3

*MCF*
Context Premise thefore/cause
A. Continuation 1
B. Continuation 2
C. Continuation 3
Answer: | A/B/C

Args:
language (Language): The language of the COPA task.
adapter (Callable[[dict], COPAInput] | COPAAdapter): Either a function that takes a dataset row and returns a COPAInput, or a dictionary with keys corresponding to the field names in the dataset row.
Note: Both COPAAdapter and COPAInput are TypeDicts, this means that the caller provides dictionary and doesn't initialize any class!
Note: The gold_idx must be an index or list of indices in the continuations list, indicating the correct continuation(s).
formulation (Formulation, optional): The formulation to use for the task. Defaults to MCFFormulation().
Returns:
Callable: A function that generates COPA prompts based on the given parameters.
"""
adapter_fn: Callable[[dict], COPAInput] = create_adapter_from_dict(adapter) # type: ignore
continuation_prompt_fn = get_continuation_prompt_function(
language, {"context": "context", "continuations": "continuations", "gold_idx": "gold_idx"}, formulation
)
translation_literals = TRANSLATION_LITERALS[language]

def copa_prompt(
line: dict,
task_name: str,
):
input_data = adapter_fn(line)
context = capitalize(input_data["context"].rstrip(PUNCT))
cause_or_effect_trans = (
translation_literals.cause_word
if input_data["cause_effect"] == "cause"
else translation_literals.effect_word
)

context = COPA_QUERY.format(
context=context,
word_space=translation_literals.word_space,
cause_or_effect=cause_or_effect_trans,
)

return continuation_prompt_fn(
{
"instruction": input_data.get("instruction", ""),
"context": context,
"continuations": input_data["continuations"],
"gold_idx": input_data["gold_idx"],
},
task_name,
)

return copa_prompt
Loading
Loading