Skip to content

Commit 48de9ba

Browse files
hynky1999NathanHBHynek Kydlicekclefourrier
authored
Adds tasks templating (#335)
--------- Co-authored-by: Nathan Habib <[email protected]> Co-authored-by: Hynek Kydlicek <[email protected]> Co-authored-by: Clémentine Fourrier <[email protected]>
1 parent 3c574d3 commit 48de9ba

File tree

14 files changed

+1906
-1
lines changed

14 files changed

+1906
-1
lines changed

pyproject.toml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -91,7 +91,7 @@ tensorboardX = ["tensorboardX"]
9191
vllm = ["vllm", "ray", "more_itertools"]
9292
quality = ["ruff==v0.2.2","pre-commit"]
9393
tests = ["pytest==7.4.0"]
94-
dev = ["lighteval[accelerate,quality,tests]"]
94+
dev = ["lighteval[accelerate,quality,tests,multilingual]"]
9595
extended_tasks = [
9696
"langdetect", # ifeval
9797
"openai", # llm as a judge using openai models

src/lighteval/tasks/default_prompts.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,7 @@
3636

3737
# fmt: off
3838
LETTER_INDICES = ["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q", "R", "S", "T", "U", "V", "W", "X", "Y", "Z"]
39+
INTEGER_INDICES = list(map(str, list(range(1, 27))))
3940
# fmt: on
4041

4142

Lines changed: 188 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,188 @@
1+
# MIT License
2+
3+
# Copyright (c) 2024 The HuggingFace Team
4+
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
12+
# The above copyright notice and this permission notice shall be included in all
13+
# copies or substantial portions of the Software.
14+
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
23+
from typing import Callable
24+
25+
from typing_extensions import NotRequired, TypedDict
26+
27+
from lighteval.tasks.requests import Doc
28+
from lighteval.tasks.templates.utils.adapter_utils import create_adapter_from_dict
29+
from lighteval.tasks.templates.utils.formatting_utils import (
30+
capitalize,
31+
fix_capitalization,
32+
fix_ending_punct,
33+
punctuation_ends_sentence,
34+
)
35+
from lighteval.tasks.templates.utils.formulation import (
36+
CFFormulation,
37+
Formulation,
38+
MCFFormulation,
39+
build_answers,
40+
build_choices,
41+
)
42+
from lighteval.tasks.templates.utils.translation_literals import TRANSLATION_LITERALS
43+
from lighteval.utils.language import Language
44+
from lighteval.utils.utils import as_list
45+
46+
47+
CONTINUATION_QUERY_CF = "{instruction}{context}"
48+
49+
CONTINUATION_QUERY_MCF = "{instruction}{context}\n{options}{answer_word}{colon}"
50+
51+
52+
# Defined for type hinting only
53+
class ContinuationInput(TypedDict):
54+
"""
55+
Input for the continuation task.
56+
Args:
57+
context: The contextualization of choices (e.g. If I ask you a question, you should answer it)
58+
continuations: Possible continuations of the context (e.g. [you should answer it, you should leave])
59+
gold_idx: The index of the correct continuation
60+
instruction (optional): The instruction of the task (e.g. Following is the snippet of a dialogue, choose the most appropriate continuation)
61+
"""
62+
63+
context: str
64+
continuations: list[str]
65+
gold_idx: list[int] | int
66+
instruction: NotRequired[str]
67+
68+
69+
class ContinuationDictAdapter(TypedDict):
70+
"""
71+
Adapter for mapping from the dataset row into the ContinuationInput format.
72+
Args:
73+
context: Column name in the row that contains the contextualization of choices (e.g. If I ask you a question, you should answer it)
74+
continuations: Column name in the row that contains the possible continuations of the context (e.g. [you should answer it, you should leave])
75+
gold_idx: Column name in the row that contains the index of the correct continuation
76+
instruction (optional): Column name in the row that contains the instruction of the task (e.g. Following is the snippet of a dialogue, choose the most appropriate continuation)
77+
"""
78+
79+
context: str
80+
continuations: str
81+
gold_idx: str
82+
instruction: NotRequired[str]
83+
84+
85+
def get_continuation_prompt_function(
86+
language: Language,
87+
adapter: Callable[[dict], ContinuationInput] | ContinuationDictAdapter,
88+
formulation: Formulation = MCFFormulation(),
89+
):
90+
"""
91+
Create a templated prompt function for a Continuation task.
92+
Example tasks:
93+
- Hellaswag
94+
- XStoryCloze
95+
96+
Format:
97+
*CF*
98+
Context | Continuation 1/Continuation 2/Continuation 3
99+
100+
*Hybrid*
101+
Context
102+
A. Continuation 1
103+
B. Continuation 2
104+
C. Continuation 3
105+
Answer: Continuation 1/Continuation 2/Continuation 3
106+
107+
*MCF*
108+
Context
109+
A. Continuation 1
110+
B. Continuation 2
111+
C. Continuation 3
112+
Answer: A/B/C
113+
114+
This template is very similar to the `Multiple Choice` template, except that it only takes context/continuations as input and don't use the anchor labels (Question/Answer)
115+
116+
Args:
117+
language (Language): The language of the Continuation task.
118+
adapter (Callable[[dict], ContinuationInput] | ContinuationDictAdapter): Either a function that takes a dataset row and returns a ContinuationInput, or a dictionary with keys corresponding to the field names in the dataset row.
119+
Note: Both ContinuationDictAdapter and ContinuationInput are TypeDicts, this means that the caller provides dictionary and doesn't initialize any class!
120+
formulation (Formulation, optional): The formulation (MCF/Hybrid/CF) to use for the task. Defaults to MCFFormulation().
121+
Returns:
122+
Callable: A function that generates Continuation prompt based on the given parameters.
123+
"""
124+
adapter_fn: Callable[[dict], ContinuationInput] = create_adapter_from_dict(adapter) # type: ignore
125+
translation_literals = TRANSLATION_LITERALS[language]
126+
127+
def prepare_prompt(line: dict):
128+
cont_input = adapter_fn(line)
129+
130+
instruction_val = cont_input.get("instruction")
131+
instruction = f"{instruction_val}\n" if instruction_val else ""
132+
133+
context = f"{capitalize(fix_ending_punct(cont_input['context'], translation_literals))}"
134+
135+
continuations = [
136+
fix_capitalization(context, fix_ending_punct(continuation, translation_literals), translation_literals)
137+
for continuation in cont_input["continuations"]
138+
]
139+
140+
return cont_input, instruction, context, continuations
141+
142+
def prompt_fn_cf(line, task_name: str):
143+
cont_input, instruction, context, continuations = prepare_prompt(line)
144+
145+
context_follows_sentence_space = punctuation_ends_sentence(context, translation_literals)
146+
answers = build_answers(continuations, formulation, translation_literals, context_follows_sentence_space)
147+
148+
query = CONTINUATION_QUERY_CF.format(
149+
instruction=instruction,
150+
context=context,
151+
)
152+
153+
return Doc(
154+
task_name=task_name,
155+
query=query,
156+
gold_index=as_list(cont_input["gold_idx"]),
157+
choices=answers,
158+
instruction=instruction,
159+
unconditioned_query="",
160+
)
161+
162+
def prompt_fn_mcf(line, task_name: str):
163+
cont_input, instruction, context, continuations = prepare_prompt(line)
164+
165+
options = build_choices(continuations, formulation, translation_literals)
166+
options = f"{options}\n" if options else ""
167+
answers = build_answers(continuations, formulation, translation_literals)
168+
169+
answer_word = capitalize(translation_literals.answer)
170+
171+
query = CONTINUATION_QUERY_MCF.format(
172+
instruction=instruction,
173+
context=context,
174+
options=options,
175+
answer_word=answer_word,
176+
colon=translation_literals.colon,
177+
)
178+
179+
return Doc(
180+
task_name=task_name,
181+
query=query,
182+
gold_index=as_list(cont_input["gold_idx"]),
183+
choices=answers,
184+
instruction=instruction,
185+
unconditioned_query=f"{answer_word}{translation_literals.colon}",
186+
)
187+
188+
return prompt_fn_cf if isinstance(formulation, CFFormulation) else prompt_fn_mcf
Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,146 @@
1+
# MIT License
2+
3+
# Copyright (c) 2024 The HuggingFace Team
4+
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
12+
# The above copyright notice and this permission notice shall be included in all
13+
# copies or substantial portions of the Software.
14+
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
23+
from typing import Callable, Literal
24+
25+
from typing_extensions import NotRequired, TypedDict
26+
27+
from lighteval.tasks.templates.continuation import get_continuation_prompt_function
28+
from lighteval.tasks.templates.multichoice import create_adapter_from_dict
29+
from lighteval.tasks.templates.utils.formatting_utils import PUNCT, capitalize
30+
from lighteval.tasks.templates.utils.formulation import Formulation, MCFFormulation
31+
from lighteval.tasks.templates.utils.translation_literals import TRANSLATION_LITERALS
32+
from lighteval.utils.language import Language
33+
34+
35+
# NLI Cause/Effect (Copa)
36+
COPA_QUERY = "{context}{word_space}{cause_or_effect}"
37+
38+
39+
# Defined for type hinting only
40+
class COPAInput(TypedDict):
41+
"""
42+
Input for the COPA task.
43+
Args:
44+
cause_effect: The type of the COPA task (X therefore Y / X because Y)
45+
context: The contextualization of choices (e.g. You are young)
46+
continuations: The possible continuations of the context (e.g. You are old, You are a child)
47+
gold_idx: The index of the correct continuation
48+
instruction (optional): The instruction of the COPA task (e.g. Choose the most appropriate continuation)
49+
"""
50+
51+
cause_effect: Literal["cause", "effect"]
52+
context: str
53+
continuations: list[str]
54+
gold_idx: int | list[int]
55+
instruction: NotRequired[str]
56+
57+
58+
class COPAAdapter(TypedDict):
59+
"""
60+
Adapter for mapping from the dataset row into the COPAInput format.
61+
Args:
62+
cause_effect: Column name in the row that contains the type of the COPA task (X therefore Y / X because Y)
63+
context: Column name in the row that contains the contextualization of choices (e.g. You are young)
64+
continuations: Column name in the row that contains the possible continuations of the context (e.g. You are old, You are a child)
65+
gold_idx: Column name in the row that contains the index of the correct continuation
66+
instruction (optional): Column name in the row that contains the instruction of the task (e.g. Choose the most appropriate continuation)
67+
"""
68+
69+
cause_effect: str
70+
context: str
71+
continuations: str
72+
gold_idx: str
73+
instruction: NotRequired[str]
74+
75+
76+
def get_copa_prompt_function(
77+
language: Language, adapter: Callable[[dict], COPAInput] | COPAAdapter, formulation: Formulation = MCFFormulation()
78+
):
79+
"""
80+
Create a templated prompt function for a COPA task.
81+
Example tasks:
82+
- COPA
83+
- PARUS
84+
85+
Format:
86+
*CF*
87+
Context Premise thefore/cause | (Continuation 1, Continuation 2, Continuation 3)
88+
89+
*Hybrid*
90+
Context Premise thefore/cause
91+
A. Continuation 1
92+
B. Continuation 2
93+
C. Continuation 3
94+
Answer: | Continuation 1/Continuation 2/Continuation 3
95+
96+
*MCF*
97+
Context Premise thefore/cause
98+
A. Continuation 1
99+
B. Continuation 2
100+
C. Continuation 3
101+
Answer: | A/B/C
102+
103+
Args:
104+
language (Language): The language of the COPA task.
105+
adapter (Callable[[dict], COPAInput] | COPAAdapter): Either a function that takes a dataset row and returns a COPAInput, or a dictionary with keys corresponding to the field names in the dataset row.
106+
Note: Both COPAAdapter and COPAInput are TypeDicts, this means that the caller provides dictionary and doesn't initialize any class!
107+
Note: The gold_idx must be an index or list of indices in the continuations list, indicating the correct continuation(s).
108+
formulation (Formulation, optional): The formulation to use for the task. Defaults to MCFFormulation().
109+
Returns:
110+
Callable: A function that generates COPA prompts based on the given parameters.
111+
"""
112+
adapter_fn: Callable[[dict], COPAInput] = create_adapter_from_dict(adapter) # type: ignore
113+
continuation_prompt_fn = get_continuation_prompt_function(
114+
language, {"context": "context", "continuations": "continuations", "gold_idx": "gold_idx"}, formulation
115+
)
116+
translation_literals = TRANSLATION_LITERALS[language]
117+
118+
def copa_prompt(
119+
line: dict,
120+
task_name: str,
121+
):
122+
input_data = adapter_fn(line)
123+
context = capitalize(input_data["context"].rstrip(PUNCT))
124+
cause_or_effect_trans = (
125+
translation_literals.cause_word
126+
if input_data["cause_effect"] == "cause"
127+
else translation_literals.effect_word
128+
)
129+
130+
context = COPA_QUERY.format(
131+
context=context,
132+
word_space=translation_literals.word_space,
133+
cause_or_effect=cause_or_effect_trans,
134+
)
135+
136+
return continuation_prompt_fn(
137+
{
138+
"instruction": input_data.get("instruction", ""),
139+
"context": context,
140+
"continuations": input_data["continuations"],
141+
"gold_idx": input_data["gold_idx"],
142+
},
143+
task_name,
144+
)
145+
146+
return copa_prompt

0 commit comments

Comments
 (0)