Skip to content

Commit a495eaa

Browse files
committed
merge aime
Signed-off-by: Yuki Huang <[email protected]>
1 parent ff6e305 commit a495eaa

File tree

5 files changed

+24
-61
lines changed

5 files changed

+24
-61
lines changed

docs/guides/eval.md

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -89,8 +89,7 @@ score=0.1000 (3.0/30)
8989

9090
## List of currently supported benchmarks
9191

92-
- [AIME-2024](../../nemo_rl/data/datasets/eval_datasets/aime2024.py): the corresponding `data.dataset_name` is `"aime2024"`.
93-
- [AIME-2025](../../nemo_rl/data/datasets/eval_datasets/aime2025.py): the corresponding `data.dataset_name` is `"aime2025"`.
92+
- [AIME-2024 and AIME-2025](../../nemo_rl/data/datasets/eval_datasets/aime.py): the corresponding `data.dataset_name` are `"aime2024"` and `"aime2025"`.
9493
- [GPQA and GPQA-diamond](../../nemo_rl/data/datasets/eval_datasets/gpqa.py): the corresponding `data.dataset_name` are `"gpqa"` and `"gpqa-diamond"`.
9594
- [MATH and MATH-500](../../nemo_rl/data/datasets/eval_datasets/math.py): the corresponding `data.dataset_name` are `"math"` and `"math500"`.
9695
- [MMLU](../../nemo_rl/data/datasets/eval_datasets/mmlu.py): this also includes MMMLU (Multilingual MMLU), a total of 14 languages. When `data.dataset_name` is set to `mmlu`, the English version is used. If one wants to run evaluation on another language, `data.dataset_name` should be set to `mmlu_{language}` where `language` is one of following 14 values, `["AR-XY", "BN-BD", "DE-DE", "ES-LA", "FR-FR", "HI-IN", "ID-ID", "IT-IT", "JA-JP", "KO-KR", "PT-BR", "ZH-CN", "SW-KE", "YO-NG"]`.

docs/guides/sft.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,6 @@ NeMo RL SFT uses Hugging Face chat templates to format the individual examples.
7171
custom_template: "{% for message in messages %}{%- if message['role'] == 'system' %}{{'Context: ' + message['content'].strip()}}{%- elif message['role'] == 'user' %}{{' Question: ' + message['content'].strip() + ' Answer: '}}{%- elif message['role'] == 'assistant' %}{{message['content'].strip()}}{%- endif %}{% endfor %}"
7272
```
7373
74-
7574
By default, NeMo RL has support for [OpenAssistant](https://github.com/NVIDIA-NeMo/RL/blob/main/nemo_rl/data/datasets/response_datasets/oasst.py), [Squad](https://github.com/NVIDIA-NeMo/RL/blob/main/nemo_rl/data/datasets/response_datasets/squad.py) and [OpenMathInstruct-2](https://github.com/NVIDIA-NeMo/RL/blob/main/nemo_rl/data/datasets/response_datasets/openmathinstruct2.py) datasets. All of these datasets are downloaded from Hugging Face and preprocessed on-the-fly, so there's no need to provide a path to any datasets on disk.
7675
7776
We provide a [ResponseDataset](../../nemo_rl/data/datasets/response_datasets/response_dataset.py) class that is compatible with jsonl-formatted response datasets. You can use `input_key`, `output_key` to specify which fields in your data correspond to the question and answer respectively. Here's an example configuration:

nemo_rl/data/datasets/eval_datasets/__init__.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,7 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
from nemo_rl.data.datasets.eval_datasets.aime2024 import AIME2024Dataset
16-
from nemo_rl.data.datasets.eval_datasets.aime2025 import AIME2025Dataset
15+
from nemo_rl.data.datasets.eval_datasets.aime import AIMEDataset
1716
from nemo_rl.data.datasets.eval_datasets.gpqa import GPQADataset
1817
from nemo_rl.data.datasets.eval_datasets.local_math_dataset import LocalMathDataset
1918
from nemo_rl.data.datasets.eval_datasets.math import MathDataset
@@ -46,12 +45,14 @@ def load_eval_dataset(data_config):
4645
)
4746
# aime
4847
elif dataset_name == "aime2024":
49-
base_dataset = AIME2024Dataset(
48+
base_dataset = AIMEDataset(
49+
variant="2024",
5050
prompt_file=data_config["prompt_file"],
5151
system_prompt_file=data_config["system_prompt_file"],
5252
)
5353
elif dataset_name == "aime2025":
54-
base_dataset = AIME2025Dataset(
54+
base_dataset = AIMEDataset(
55+
variant="2025",
5556
prompt_file=data_config["prompt_file"],
5657
system_prompt_file=data_config["system_prompt_file"],
5758
)
@@ -98,8 +99,7 @@ def load_eval_dataset(data_config):
9899

99100

100101
__all__ = [
101-
"AIME2024Dataset",
102-
"AIME2025Dataset",
102+
"AIMEDataset",
103103
"GPQADataset",
104104
"LocalMathDataset",
105105
"MathDataset",

nemo_rl/data/datasets/eval_datasets/aime2025.py renamed to nemo_rl/data/datasets/eval_datasets/aime.py

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -12,35 +12,44 @@
1212
# See the License for the specific language governing permissions and
1313
# limitations under the License.
1414

15-
"""AIME 2025 dataset."""
15+
"""AIME dataset."""
1616

17-
from typing import Any, Optional
17+
from typing import Any, Literal, Optional
1818

1919
from datasets import concatenate_datasets, load_dataset
2020

2121
from nemo_rl.data import processors
2222
from nemo_rl.data.interfaces import TaskDataSpec
2323

2424

25-
class AIME2025Dataset:
25+
class AIMEDataset:
2626
def __init__(
2727
self,
28+
variant: Literal["2024", "2025"] = "2025",
2829
prompt_file: Optional[str] = None,
2930
system_prompt_file: Optional[str] = None,
3031
):
31-
ds0 = load_dataset("opencompass/AIME2025", "AIME2025-I", split="test")
32-
ds1 = load_dataset("opencompass/AIME2025", "AIME2025-II", split="test")
33-
ds = concatenate_datasets([ds0, ds1])
32+
if variant == "2024":
33+
ds = load_dataset("HuggingFaceH4/aime_2024", split="train")
34+
self.input_key = "problem"
35+
elif variant == "2025":
36+
ds0 = load_dataset("opencompass/AIME2025", "AIME2025-I", split="test")
37+
ds1 = load_dataset("opencompass/AIME2025", "AIME2025-II", split="test")
38+
ds = concatenate_datasets([ds0, ds1])
39+
self.input_key = "question"
40+
else:
41+
raise ValueError(f"Invalid variant for aime dataset: aime{variant}")
42+
3443
self.rekeyed_ds = ds.map(self._rekey, remove_columns=ds.column_names)
3544
self.task_spec = TaskDataSpec(
36-
task_name="aime2025",
45+
task_name=f"aime{variant}",
3746
prompt_file=prompt_file,
3847
system_prompt_file=system_prompt_file,
3948
)
4049
self.processor = processors.math_data_processor
4150

4251
def _rekey(self, data: dict[str, Any]):
4352
return {
44-
"problem": data["question"],
53+
"problem": data[self.input_key],
4554
"expected_answer": data["answer"],
4655
}

nemo_rl/data/datasets/eval_datasets/aime2024.py

Lines changed: 0 additions & 44 deletions
This file was deleted.

0 commit comments

Comments
 (0)