generated from allenai/python-package-template
-
Notifications
You must be signed in to change notification settings - Fork 40
/
Copy pathtask_specs_std.jsonl
132 lines (132 loc) · 9.77 KB
/
task_specs_std.jsonl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
{"name": "arc_easy_std", "split": "test", "limit": 1000, "random_subsample_seed": 1234, "num_shots": 5}
{"name": "arc_easy_mc_std", "split": "test", "limit": 1000, "random_subsample_seed": 1234, "num_shots": 5}
{"name": "arc_challenge_std", "split": "test", "num_shots": 5}
{"name": "arc_challenge_mc_std", "split": "test", "num_shots": 5}
{"name": "boolq_std", "split": "validation", "limit": 1000, "random_subsample_seed": 1234, "num_shots": 5}
{"name": "boolq_mc_std", "split": "validation", "limit": 1000, "random_subsample_seed": 1234, "num_shots": 5}
{"name": "csqa_std", "split": "validation", "num_shots": 5}
{"name": "csqa_mc_std", "split": "validation", "num_shots": 5}
{"name": "hellaswag_std", "split": "validation", "limit": 1000, "random_subsample_seed": 1234, "num_shots": 5}
{"name": "hellaswag_mc_std", "split": "validation", "limit": 1000, "random_subsample_seed": 1234, "num_shots": 5}
{"name": "openbookqa_std", "split": "test", "num_shots": 5}
{"name": "openbookqa_mc_std", "split": "test", "num_shots": 5}
{"name": "piqa_std", "split": "validation", "limit": 1000, "random_subsample_seed": 1234, "num_shots": 5}
{"name": "piqa_mc_std", "split": "validation", "limit": 1000, "random_subsample_seed": 1234, "num_shots": 5}
{"name": "socialiqa_std", "split": "validation", "limit": 1000, "random_subsample_seed": 1234, "num_shots": 5}
{"name": "socialiqa_mc_std", "split": "validation", "limit": 1000, "random_subsample_seed": 1234, "num_shots": 5}
{"name": "winogrande_std", "split": "validation", "num_shots": 5}
{"name": "winogrande_mc_std", "split": "validation", "num_shots": 5}
{"name": "mmlu_std_abstract_algebra", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_abstract_algebra", "split": "test", "num_shots": 5}
{"name": "mmlu_std_anatomy", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_anatomy", "split": "test", "num_shots": 5}
{"name": "mmlu_std_astronomy", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_astronomy", "split": "test", "num_shots": 5}
{"name": "mmlu_std_business_ethics", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_business_ethics", "split": "test", "num_shots": 5}
{"name": "mmlu_std_clinical_knowledge", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_clinical_knowledge", "split": "test", "num_shots": 5}
{"name": "mmlu_std_college_biology", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_college_biology", "split": "test", "num_shots": 5}
{"name": "mmlu_std_college_chemistry", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_college_chemistry", "split": "test", "num_shots": 5}
{"name": "mmlu_std_college_computer_science", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_college_computer_science", "split": "test", "num_shots": 5}
{"name": "mmlu_std_college_mathematics", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_college_mathematics", "split": "test", "num_shots": 5}
{"name": "mmlu_std_college_medicine", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_college_medicine", "split": "test", "num_shots": 5}
{"name": "mmlu_std_college_physics", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_college_physics", "split": "test", "num_shots": 5}
{"name": "mmlu_std_computer_security", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_computer_security", "split": "test", "num_shots": 5}
{"name": "mmlu_std_conceptual_physics", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_conceptual_physics", "split": "test", "num_shots": 5}
{"name": "mmlu_std_econometrics", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_econometrics", "split": "test", "num_shots": 5}
{"name": "mmlu_std_electrical_engineering", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_electrical_engineering", "split": "test", "num_shots": 5}
{"name": "mmlu_std_elementary_mathematics", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_elementary_mathematics", "split": "test", "num_shots": 5}
{"name": "mmlu_std_formal_logic", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_formal_logic", "split": "test", "num_shots": 5}
{"name": "mmlu_std_global_facts", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_global_facts", "split": "test", "num_shots": 5}
{"name": "mmlu_std_high_school_biology", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_high_school_biology", "split": "test", "num_shots": 5}
{"name": "mmlu_std_high_school_chemistry", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_high_school_chemistry", "split": "test", "num_shots": 5}
{"name": "mmlu_std_high_school_computer_science", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_high_school_computer_science", "split": "test", "num_shots": 5}
{"name": "mmlu_std_high_school_european_history", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_high_school_european_history", "split": "test", "num_shots": 5}
{"name": "mmlu_std_high_school_geography", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_high_school_geography", "split": "test", "num_shots": 5}
{"name": "mmlu_std_high_school_government_and_politics", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_high_school_government_and_politics", "split": "test", "num_shots": 5}
{"name": "mmlu_std_high_school_macroeconomics", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_high_school_macroeconomics", "split": "test", "num_shots": 5}
{"name": "mmlu_std_high_school_mathematics", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_high_school_mathematics", "split": "test", "num_shots": 5}
{"name": "mmlu_std_high_school_microeconomics", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_high_school_microeconomics", "split": "test", "num_shots": 5}
{"name": "mmlu_std_high_school_physics", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_high_school_physics", "split": "test", "num_shots": 5}
{"name": "mmlu_std_high_school_psychology", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_high_school_psychology", "split": "test", "num_shots": 5}
{"name": "mmlu_std_high_school_statistics", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_high_school_statistics", "split": "test", "num_shots": 5}
{"name": "mmlu_std_high_school_us_history", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_high_school_us_history", "split": "test", "num_shots": 5}
{"name": "mmlu_std_high_school_world_history", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_high_school_world_history", "split": "test", "num_shots": 5}
{"name": "mmlu_std_human_aging", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_human_aging", "split": "test", "num_shots": 5}
{"name": "mmlu_std_human_sexuality", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_human_sexuality", "split": "test", "num_shots": 5}
{"name": "mmlu_std_international_law", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_international_law", "split": "test", "num_shots": 5}
{"name": "mmlu_std_jurisprudence", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_jurisprudence", "split": "test", "num_shots": 5}
{"name": "mmlu_std_logical_fallacies", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_logical_fallacies", "split": "test", "num_shots": 5}
{"name": "mmlu_std_machine_learning", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_machine_learning", "split": "test", "num_shots": 5}
{"name": "mmlu_std_management", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_management", "split": "test", "num_shots": 5}
{"name": "mmlu_std_marketing", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_marketing", "split": "test", "num_shots": 5}
{"name": "mmlu_std_medical_genetics", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_medical_genetics", "split": "test", "num_shots": 5}
{"name": "mmlu_std_miscellaneous", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_miscellaneous", "split": "test", "num_shots": 5}
{"name": "mmlu_std_moral_disputes", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_moral_disputes", "split": "test", "num_shots": 5}
{"name": "mmlu_std_moral_scenarios", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_moral_scenarios", "split": "test", "num_shots": 5}
{"name": "mmlu_std_nutrition", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_nutrition", "split": "test", "num_shots": 5}
{"name": "mmlu_std_philosophy", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_philosophy", "split": "test", "num_shots": 5}
{"name": "mmlu_std_prehistory", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_prehistory", "split": "test", "num_shots": 5}
{"name": "mmlu_std_professional_accounting", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_professional_accounting", "split": "test", "num_shots": 5}
{"name": "mmlu_std_professional_law", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_professional_law", "split": "test", "num_shots": 5}
{"name": "mmlu_std_professional_medicine", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_professional_medicine", "split": "test", "num_shots": 5}
{"name": "mmlu_std_professional_psychology", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_professional_psychology", "split": "test", "num_shots": 5}
{"name": "mmlu_std_public_relations", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_public_relations", "split": "test", "num_shots": 5}
{"name": "mmlu_std_security_studies", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_security_studies", "split": "test", "num_shots": 5}
{"name": "mmlu_std_sociology", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_sociology", "split": "test", "num_shots": 5}
{"name": "mmlu_std_us_foreign_policy", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_us_foreign_policy", "split": "test", "num_shots": 5}
{"name": "mmlu_std_virology", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_virology", "split": "test", "num_shots": 5}
{"name": "mmlu_std_world_religions", "split": "test", "num_shots": 5}
{"name": "mmlu_mc_std_world_religions", "split": "test", "num_shots": 5}