|
| 1 | +# MIT License |
| 2 | + |
| 3 | +# Copyright (c) 2024 The HuggingFace Team |
| 4 | + |
| 5 | +# Permission is hereby granted, free of charge, to any person obtaining a copy |
| 6 | +# of this software and associated documentation files (the "Software"), to deal |
| 7 | +# in the Software without restriction, including without limitation the rights |
| 8 | +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell |
| 9 | +# copies of the Software, and to permit persons to whom the Software is |
| 10 | +# furnished to do so, subject to the following conditions: |
| 11 | + |
| 12 | +# The above copyright notice and this permission notice shall be included in all |
| 13 | +# copies or substantial portions of the Software. |
| 14 | + |
| 15 | +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| 16 | +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| 17 | +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE |
| 18 | +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| 19 | +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
| 20 | +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
| 21 | +# SOFTWARE. |
| 22 | + |
| 23 | +# ruff: noqa: F405, F403, F401 |
| 24 | +""" |
| 25 | +Custom evaluation tasks for lighteval. |
| 26 | +
|
| 27 | +This file generally creates just a TASKS_TABLE and TASKS_GROUPS which are then imported by LightEval. |
| 28 | +
|
| 29 | +OZ Eval (sr. Opšte Znanje Evaluacija) dataset was created for the purposes of evaluating General Knowledge of LLM models in Serbian language. |
| 30 | +Data consists of 1k+ high-quality questions and answers which were used as part of entry exams at the Faculty of Philosophy and Faculty of Organizational Sciences, University of Belgrade. |
| 31 | +The exams test the General Knowledge of students and were used in the enrollment periods from 2003 to 2024. |
| 32 | +For more details and results see: https://huggingface.co/datasets/DjMel/oz-eval |
| 33 | +
|
| 34 | +In order to have comparable results to ours, please do not forget to run with --use_chat_template |
| 35 | +""" |
| 36 | + |
| 37 | +from lighteval.metrics.metrics import Metrics |
| 38 | +from lighteval.tasks.lighteval_task import LightevalTaskConfig |
| 39 | +from lighteval.tasks.requests import Doc |
| 40 | + |
| 41 | + |
| 42 | +def prompt_fn_oz_eval_task(line, task_name: str = None): |
| 43 | + query_template = """Pitanje: {question}\n |
| 44 | + Ponuđeni odgovori: |
| 45 | + A. {choice_a} |
| 46 | + B. {choice_b} |
| 47 | + C. {choice_c} |
| 48 | + D. {choice_d} |
| 49 | + E. {choice_e} |
| 50 | +
|
| 51 | + Krajnji odgovor:""" |
| 52 | + |
| 53 | + options = line["options"] |
| 54 | + |
| 55 | + query = query_template.format( |
| 56 | + question=line["questions"], |
| 57 | + choice_a=options[0], |
| 58 | + choice_b=options[1], |
| 59 | + choice_c=options[2], |
| 60 | + choice_d=options[3], |
| 61 | + choice_e=options[4], |
| 62 | + ) |
| 63 | + |
| 64 | + choices = ["A", "B", "C", "D", "E"] |
| 65 | + return Doc( |
| 66 | + task_name=task_name, |
| 67 | + query=query, |
| 68 | + choices=choices, |
| 69 | + gold_index=choices.index(line["answer"]), |
| 70 | + ) |
| 71 | + |
| 72 | + |
| 73 | +oz_eval_task = LightevalTaskConfig( |
| 74 | + name="serbian_evals:oz_task", |
| 75 | + prompt_function=prompt_fn_oz_eval_task, |
| 76 | + suite=["community"], |
| 77 | + hf_repo="DjMel/oz-eval", |
| 78 | + hf_subset="default", |
| 79 | + hf_avail_splits=["test"], |
| 80 | + evaluation_splits=["test"], |
| 81 | + few_shots_split=None, |
| 82 | + few_shots_select=None, |
| 83 | + metric=[Metrics.loglikelihood_acc], |
| 84 | + version=0, |
| 85 | +) |
| 86 | + |
| 87 | + |
| 88 | +# STORE YOUR EVALS |
| 89 | +TASKS_TABLE = [oz_eval_task] |
| 90 | + |
| 91 | + |
| 92 | +if __name__ == "__main__": |
| 93 | + print(t["name"] for t in TASKS_TABLE) |
| 94 | + print(len(TASKS_TABLE)) |
0 commit comments