-
Notifications
You must be signed in to change notification settings - Fork 362
Remove target_for_fewshot_sorting
and duplicate target calls
#241
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
61e11ef
f7ed5ff
6146844
dc81343
aa36d2d
656a03f
c5c9936
c545863
3d96734
8b51d61
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -176,7 +176,6 @@ def bbh_harness(line, task_name: str = None): | |
query=query, | ||
choices=choices, | ||
gold_index=correct_index, | ||
target_for_fewshot_sorting=choices, | ||
instruction=line.get("task_prefix", None), | ||
) | ||
|
||
|
@@ -196,18 +195,17 @@ def bbh_lighteval(line, task_name: str = None): | |
query=query, | ||
choices=LETTER_INDICES[: len(line["choices"])], | ||
gold_index=line["target_idx"], | ||
target_for_fewshot_sorting=LETTER_INDICES[: len(line["choices"])], | ||
instruction=line.get("task_prefix", None), | ||
) | ||
|
||
|
||
def bbh(line, instruction, choices, task_name: str = None): | ||
is_few_shots = line.get("__few_shots", False) | ||
return Doc( | ||
task_name=task_name, | ||
query=f"{instruction}Q: {line['input']}\nA:", | ||
choices=choices, | ||
choices=[(" " if is_few_shots else "") + c for c in choices], | ||
gold_index=choices.index(line["target"]), | ||
target_for_fewshot_sorting=[f" {c}" for c in choices], | ||
instruction=instruction, | ||
) | ||
|
||
|
@@ -799,7 +797,6 @@ def hellaswag_generative(line, task_name: str = None): | |
choices=[" " + i for i in LETTER_INDICES[: len(line["endings"])]], | ||
gold_index=gold_ix, # -1 for test, | ||
instruction="The following are multiple choice questions (with answers) about common sense.\n\n", | ||
target_for_fewshot_sorting=line["endings"][gold_ix] if gold_ix > -1 else "", | ||
) | ||
|
||
|
||
|
@@ -1352,7 +1349,6 @@ def mmlu(line, topic, task_name: str = None): | |
choices=[" A", " B", " C", " D"] if is_few_shots else ["A", "B", "C", "D"], | ||
gold_index=gold_ix, | ||
instruction=f"The following are multiple choice questions (with answers) about {topic.replace('_', ' ')}.\n\n", | ||
target_for_fewshot_sorting=[" A", " B", " C", " D"][gold_ix], | ||
) | ||
|
||
|
||
|
@@ -1373,7 +1369,6 @@ def custom_mmlu_thom(line, task_name: str = None): | |
choices=[" A", " B", " C", " D"] if is_few_shots else ["A", "B", "C", "D"], | ||
gold_index=gold_ix, | ||
instruction=f"The following are multiple choice questions (with answers) about {topic.replace('_', ' ')}.\n\n", | ||
target_for_fewshot_sorting=[" A", " B", " C", " D"][gold_ix], | ||
) | ||
|
||
|
||
|
@@ -1613,15 +1608,13 @@ def mmlu_harness(line, task_name: str = None): | |
query += "Answer:" | ||
|
||
gold_ix = LETTER_INDICES.index(line["answer"]) if isinstance(line["answer"], str) else line["answer"] | ||
"__few_shots" in line and line["__few_shots"] is True # We are adding few shots | ||
|
||
return Doc( | ||
task_name=task_name, | ||
query=query, | ||
choices=[" A", " B", " C", " D"], | ||
gold_index=gold_ix, | ||
instruction=f"The following are multiple choice questions (with answers) about {topic.replace('_', ' ')}.\n\n", | ||
target_for_fewshot_sorting=[" A", " B", " C", " D"][gold_ix], | ||
) | ||
|
||
|
||
|
@@ -1632,14 +1625,14 @@ def mmlu_helm(line, task_name: str = None): | |
query += "\nAnswer:" | ||
|
||
gold_ix = LETTER_INDICES.index(line["answer"]) if isinstance(line["answer"], str) else line["answer"] | ||
is_few_shots = line.get("__few_shots", False) # We are adding few shots | ||
|
||
return Doc( | ||
task_name=task_name, | ||
query=query, | ||
choices=[" A", " B", " C", " D"], | ||
choices=[" A", " B", " C", " D"] if not is_few_shots else line["choices"], | ||
gold_index=gold_ix, | ||
instruction=f"The following are multiple choice questions (with answers) about {subject.replace('_', ' ')}.\n\n", | ||
target_for_fewshot_sorting=line["choices"][gold_ix], # specific to HELM evals | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The conversion is incorrect here. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is it correct now? choice contents for fewshot and labels(A, B, C, D) for eval. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Now I set |
||
) | ||
|
||
|
||
|
@@ -1816,7 +1809,6 @@ def openbookqa_helm(line, task_name: str = None): | |
choices=["A", "B", "C", "D", "E"], | ||
gold_index=gold_ix, | ||
instruction="The following are multiple choice questions (with answers) about common sense.\n", | ||
target_for_fewshot_sorting=line["choices"]["text"][gold_ix], # specific to HELM evals | ||
) | ||
|
||
|
||
|
@@ -1837,14 +1829,13 @@ def piqa_helm(line, task_name: str = None): | |
query += "Answer: " | ||
|
||
gold_ix = int(line["label"]) | ||
|
||
is_few_shots = line.get("__few_shots", False) | ||
return Doc( | ||
task_name=task_name, | ||
query=query, | ||
choices=["A", "B"], | ||
choices=["A", "B"] if not is_few_shots else [line["sol1"], line["sol2"]], | ||
gold_index=gold_ix, | ||
instruction="The following are multiple choice questions (with answers) about common sense.\n", | ||
target_for_fewshot_sorting=[line["sol1"], line["sol2"]][gold_ix], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It's the same here, you changed the logic There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I changed this accordingly. |
||
) | ||
|
||
|
||
|
@@ -1877,13 +1868,11 @@ def pubmed_qa_helm(line, task_name: str = None): | |
) | ||
query += f"\n\nQuestion: {line['question']}\nAnswer: " | ||
gold_ix = ["yes", "no", "maybe"].index(line["final_decision"]) | ||
|
||
return Doc( | ||
task_name=task_name, | ||
query=query, | ||
choices=["A", "B", "C"], | ||
gold_index=gold_ix, | ||
target_for_fewshot_sorting=["yes", "no", "maybe"][gold_ix], | ||
) | ||
|
||
|
||
|
@@ -2263,13 +2252,11 @@ def truthful_qa_helm(line, task_name: str = None): | |
query = f"Question: {line['question']}\n" | ||
query += "".join([f"{key}. {choice}\n" for key, choice in zip(LETTER_INDICES, line["choices"])]) | ||
query += "Answer:" | ||
|
||
return Doc( | ||
task_name=task_name, | ||
query=query, | ||
choices=LETTER_INDICES[: len(line["choices"])], | ||
gold_index=line["gold_index"], | ||
target_for_fewshot_sorting=line["choices"][line["gold_index"]], | ||
) | ||
|
||
|
||
|
Uh oh!
There was an error while loading. Please reload this page.