Skip to content

Commit e27aa0e

Browse files
StopwolfclefourrierNathanHB
authored
Adding OZ Eval task (#225)
--------- Co-authored-by: Clémentine Fourrier <[email protected]> Co-authored-by: Nathan Habib <[email protected]>
1 parent 9134ca8 commit e27aa0e

File tree

1 file changed

+94
-0
lines changed

1 file changed

+94
-0
lines changed

community_tasks/oz_evals.py

Lines changed: 94 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,94 @@
1+
# MIT License
2+
3+
# Copyright (c) 2024 The HuggingFace Team
4+
5+
# Permission is hereby granted, free of charge, to any person obtaining a copy
6+
# of this software and associated documentation files (the "Software"), to deal
7+
# in the Software without restriction, including without limitation the rights
8+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9+
# copies of the Software, and to permit persons to whom the Software is
10+
# furnished to do so, subject to the following conditions:
11+
12+
# The above copyright notice and this permission notice shall be included in all
13+
# copies or substantial portions of the Software.
14+
15+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21+
# SOFTWARE.
22+
23+
# ruff: noqa: F405, F403, F401
24+
"""
25+
Custom evaluation tasks for lighteval.
26+
27+
This file generally creates just a TASKS_TABLE and TASKS_GROUPS which are then imported by LightEval.
28+
29+
OZ Eval (sr. Opšte Znanje Evaluacija) dataset was created for the purposes of evaluating General Knowledge of LLM models in Serbian language.
30+
Data consists of 1k+ high-quality questions and answers which were used as part of entry exams at the Faculty of Philosophy and Faculty of Organizational Sciences, University of Belgrade.
31+
The exams test the General Knowledge of students and were used in the enrollment periods from 2003 to 2024.
32+
For more details and results see: https://huggingface.co/datasets/DjMel/oz-eval
33+
34+
In order to have comparable results to ours, please do not forget to run with --use_chat_template
35+
"""
36+
37+
from lighteval.metrics.metrics import Metrics
38+
from lighteval.tasks.lighteval_task import LightevalTaskConfig
39+
from lighteval.tasks.requests import Doc
40+
41+
42+
def prompt_fn_oz_eval_task(line, task_name: str = None):
43+
query_template = """Pitanje: {question}\n
44+
Ponuđeni odgovori:
45+
A. {choice_a}
46+
B. {choice_b}
47+
C. {choice_c}
48+
D. {choice_d}
49+
E. {choice_e}
50+
51+
Krajnji odgovor:"""
52+
53+
options = line["options"]
54+
55+
query = query_template.format(
56+
question=line["questions"],
57+
choice_a=options[0],
58+
choice_b=options[1],
59+
choice_c=options[2],
60+
choice_d=options[3],
61+
choice_e=options[4],
62+
)
63+
64+
choices = ["A", "B", "C", "D", "E"]
65+
return Doc(
66+
task_name=task_name,
67+
query=query,
68+
choices=choices,
69+
gold_index=choices.index(line["answer"]),
70+
)
71+
72+
73+
oz_eval_task = LightevalTaskConfig(
74+
name="serbian_evals:oz_task",
75+
prompt_function=prompt_fn_oz_eval_task,
76+
suite=["community"],
77+
hf_repo="DjMel/oz-eval",
78+
hf_subset="default",
79+
hf_avail_splits=["test"],
80+
evaluation_splits=["test"],
81+
few_shots_split=None,
82+
few_shots_select=None,
83+
metric=[Metrics.loglikelihood_acc],
84+
version=0,
85+
)
86+
87+
88+
# STORE YOUR EVALS
89+
TASKS_TABLE = [oz_eval_task]
90+
91+
92+
if __name__ == "__main__":
93+
print(t["name"] for t in TASKS_TABLE)
94+
print(len(TASKS_TABLE))

0 commit comments

Comments
 (0)