|
43 | 43 | # fmt: on
|
44 | 44 |
|
45 | 45 |
|
| 46 | +def aime_prompt_fn(line, task_name: str = None): |
| 47 | + # Prompt template adapted from |
| 48 | + # - simple-evals: https://github.com/openai/simple-evals/blob/6e84f4e2aed6b60f6a0c7b8f06bbbf4bfde72e58/math_eval.py#L17 |
| 49 | + # - Llama 3: https://huggingface.co/datasets/meta-llama/Llama-3.2-1B-Instruct-evals/viewer/Llama-3.2-1B-Instruct-evals__math__details?views%5B%5D=llama_32_1b_instruct_evals__math__details |
| 50 | + # Note that it is important to have the final answer in a box for math-verify to work correctly |
| 51 | + MATH_QUERY_TEMPLATE = """ |
| 52 | +Solve the following math problem efficiently and clearly. The last line of your response should be of the following format: 'Therefore, the final answer is: $\\boxed{{ANSWER}}$. I hope it is correct' (without quotes) where ANSWER is just the final number or expression that solves the problem. Think step by step before answering. |
| 53 | +
|
| 54 | +{Question} |
| 55 | +""".strip() |
| 56 | + return Doc( |
| 57 | + task_name=task_name, |
| 58 | + query=MATH_QUERY_TEMPLATE.format(Question=line["problem"]), |
| 59 | + choices=[line["answer"]], |
| 60 | + gold_index=0, |
| 61 | + ) |
| 62 | + |
| 63 | + |
46 | 64 | def anli(line, task_name: str = None):
|
47 | 65 | return Doc(
|
48 | 66 | task_name=task_name,
|
@@ -710,22 +728,31 @@ def ethics_virtue(line, task_name: str = None):
|
710 | 728 |
|
711 | 729 |
|
712 | 730 | def gpqa(line, task_name: str = None):
|
| 731 | + # Prompt template from simple-evals: https://github.com/openai/simple-evals/blob/83ed7640a7d9cd26849bcb3340125002ef14abbe/common.py#L14 |
| 732 | + GPQA_QUERY_TEMPLATE = """ |
| 733 | +Answer the following multiple choice question. The last line of your response should be of the following format: 'Answer: $LETTER' (without quotes) where LETTER is one of ABCD. Think step by step before answering. |
| 734 | +
|
| 735 | +{Question} |
| 736 | +
|
| 737 | +A) {A} |
| 738 | +B) {B} |
| 739 | +C) {C} |
| 740 | +D) {D} |
| 741 | +""".strip() |
713 | 742 | gold_index = random.randint(0, 3)
|
714 | 743 | choices = [line["Incorrect Answer 1"], line["Incorrect Answer 2"], line["Incorrect Answer 3"]]
|
715 | 744 | choices.insert(gold_index, line["Correct Answer"])
|
716 | 745 |
|
717 |
| - instruction = "Select the correct answer to the following questions.\n\n" |
718 |
| - |
719 |
| - query = f"Question: {line['Question']}\n" |
720 |
| - query += "".join([f"{key}. {choice}\n" for key, choice in zip(LETTER_INDICES, choices)]) |
721 |
| - query += "Answer: " |
| 746 | + query = GPQA_QUERY_TEMPLATE.format( |
| 747 | + A=choices[0], B=choices[1], C=choices[2], D=choices[3], Question=line["Question"] |
| 748 | + ) |
722 | 749 |
|
723 | 750 | return Doc(
|
724 | 751 | task_name=task_name,
|
725 |
| - query=f"{instruction}{query}", |
| 752 | + query=query, |
726 | 753 | choices=LETTER_INDICES[: len(choices)],
|
727 | 754 | gold_index=gold_index,
|
728 |
| - instruction=instruction, |
| 755 | + instruction=query, |
729 | 756 | )
|
730 | 757 |
|
731 | 758 |
|
@@ -1257,6 +1284,25 @@ def lsat_qa(line, task_name: str = None):
|
1257 | 1284 | )
|
1258 | 1285 |
|
1259 | 1286 |
|
| 1287 | +def math_500(line, task_name: str = None): |
| 1288 | + # Prompt template adapted from |
| 1289 | + # - simple-evals: https://github.com/openai/simple-evals/blob/6e84f4e2aed6b60f6a0c7b8f06bbbf4bfde72e58/math_eval.py#L17 |
| 1290 | + # - Llama 3: https://huggingface.co/datasets/meta-llama/Llama-3.2-1B-Instruct-evals/viewer/Llama-3.2-1B-Instruct-evals__math__details?views%5B%5D=llama_32_1b_instruct_evals__math__details |
| 1291 | + # Note that it is important to have the final answer in a box for math-verify to work correctly |
| 1292 | + MATH_QUERY_TEMPLATE = """ |
| 1293 | +Solve the following math problem efficiently and clearly. The last line of your response should be of the following format: 'Therefore, the final answer is: $\\boxed{{ANSWER}}$. I hope it is correct' (without quotes) where ANSWER is just the final number or expression that solves the problem. Think step by step before answering. |
| 1294 | +
|
| 1295 | +{Question} |
| 1296 | +""".strip() |
| 1297 | + |
| 1298 | + return Doc( |
| 1299 | + task_name=task_name, |
| 1300 | + query=MATH_QUERY_TEMPLATE.format(Question=line["problem"]), |
| 1301 | + gold_index=0, |
| 1302 | + choices=[line["solution"]], |
| 1303 | + ) |
| 1304 | + |
| 1305 | + |
1260 | 1306 | def math(line, task_name: str = None):
|
1261 | 1307 | return Doc(
|
1262 | 1308 | task_name=task_name,
|
|
0 commit comments