From c2830b874119ef1413cb0129377abefb946f0725 Mon Sep 17 00:00:00 2001 From: Brian Yu Date: Sun, 14 Dec 2025 16:20:39 -0800 Subject: [PATCH] update config paths Signed-off-by: Brian Yu --- README.md | 25 +++++++++++-------- .../calendar/configs/calendar.yaml | 14 +++++------ .../code_gen/configs/code_gen.yaml | 5 ++-- .../configs/math_stack_overflow.yaml | 12 ++++----- .../configs/math_with_judge.yaml | 8 ------ 5 files changed, 27 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index 8dedd912b..eb29dc65d 100644 --- a/README.md +++ b/README.md @@ -150,17 +150,20 @@ Purpose: Training-ready environments with curated datasets. > Each resource server includes example data, configuration files, and tests. See each server's README for details. -| Resource Server | Domain | Dataset | Description | Value | Config | Train | Validation | License | -| -------------------------- | --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------- | ----- | ---------- | ---------------------------------------------- | -| Google Search | agent | Nemotron-RL-knowledge-web_search-mcqa | Multi-choice question answering problems with search tools integrated | Improve knowledge-related benchmarks with search tools | config | ✓ | - | Apache 2.0 | -| Math Advanced Calculations | agent | Nemotron-RL-math-advanced_calculations | An instruction following math environment with counter-intuitive calculators | Improve instruction following capabilities in specific math environments | config | ✓ | - | Apache 2.0 | -| Workplace Assistant | agent | Nemotron-RL-agent-workplace_assistant | Workplace assistant multi-step tool-using environment | Improve multi-step tool use capability | config | ✓ | ✓ | Apache 2.0 | -| Mini Swe Agent | coding | SWE-Gym | A software development with mini-swe-agent orchestration | Improve software development capabilities, like SWE-bench | config | ✓ | ✓ | MIT | -| Instruction Following | instruction_following | Nemotron-RL-instruction_following | Instruction following datasets targeting IFEval and IFBench style instruction following capabilities | Improve IFEval and IFBench | config | ✓ | - | Apache 2.0 | -| Structured Outputs | instruction_following | Nemotron-RL-instruction_following-structured_outputs | Check if responses are following structured output requirements in prompts | Improve instruction following capabilities | config | ✓ | ✓ | Apache 2.0 | -| Equivalence Llm Judge | knowledge | Nemotron-RL-knowledge-openQA | Short answer questions with LLM-as-a-judge | Improve knowledge-related benchmarks like GPQA / HLE | config | ✓ | - | Apache 2.0 | -| Mcqa | knowledge | Nemotron-RL-knowledge-mcqa | Multi-choice question answering problems | Improve benchmarks like MMLU / GPQA / HLE | config | ✓ | - | Apache 2.0 | -| Math With Judge | math | Nemotron-RL-math-OpenMathReasoning | Math dataset with math-verify and LLM-as-a-judge | Improve math capabilities including AIME 24 / 25 | config | ✓ | ✓ | Creative Commons Attribution 4.0 International | +| Resource Server | Domain | Dataset | Description | Value | Config | Train | Validation | License | +| -------------------------- | --------------------- | -------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------- | ------------------------------------------------------------------------ | --------------------------------------------------------------------------------------------------------- | ----- | ---------- | --------------------------------------------------------- | +| Calendar | agent | Nemotron-RL-agent-calendar_scheduling | - | - | config | ✓ | ✓ | Apache 2.0 | +| Google Search | agent | Nemotron-RL-knowledge-web_search-mcqa | Multi-choice question answering problems with search tools integrated | Improve knowledge-related benchmarks with search tools | config | ✓ | - | Apache 2.0 | +| Math Advanced Calculations | agent | Nemotron-RL-math-advanced_calculations | An instruction following math environment with counter-intuitive calculators | Improve instruction following capabilities in specific math environments | config | ✓ | - | Apache 2.0 | +| Workplace Assistant | agent | Nemotron-RL-agent-workplace_assistant | Workplace assistant multi-step tool-using environment | Improve multi-step tool use capability | config | ✓ | ✓ | Apache 2.0 | +| Code Gen | coding | nemotron-RL-coding-competitive_coding | - | - | config | ✓ | ✓ | Apache 2.0 | +| Mini Swe Agent | coding | SWE-Gym | A software development with mini-swe-agent orchestration | Improve software development capabilities, like SWE-bench | config | ✓ | ✓ | MIT | +| Instruction Following | instruction_following | Nemotron-RL-instruction_following | Instruction following datasets targeting IFEval and IFBench style instruction following capabilities | Improve IFEval and IFBench | config | ✓ | - | Apache 2.0 | +| Structured Outputs | instruction_following | Nemotron-RL-instruction_following-structured_outputs | Check if responses are following structured output requirements in prompts | Improve instruction following capabilities | config | ✓ | ✓ | Apache 2.0 | +| Equivalence Llm Judge | knowledge | Nemotron-RL-knowledge-openQA | Short answer questions with LLM-as-a-judge | Improve knowledge-related benchmarks like GPQA / HLE | config | ✓ | - | Apache 2.0 | +| Mcqa | knowledge | Nemotron-RL-knowledge-mcqa | Multi-choice question answering problems | Improve benchmarks like MMLU / GPQA / HLE | config | ✓ | - | Apache 2.0 | +| Math With Judge | math | Nemotron-RL-math-OpenMathReasoning | Math dataset with math-verify and LLM-as-a-judge | Improve math capabilities including AIME 24 / 25 | config | ✓ | ✓ | Creative Commons Attribution 4.0 International | +| Math With Judge | math | Nemotron-RL-math-stack_overflow | - | - | config | ✓ | ✓ | Creative Commons Attribution-ShareAlike 4.0 International | ## 📖 Documentation diff --git a/resources_servers/calendar/configs/calendar.yaml b/resources_servers/calendar/configs/calendar.yaml index f03df4440..21c9ff204 100644 --- a/resources_servers/calendar/configs/calendar.yaml +++ b/resources_servers/calendar/configs/calendar.yaml @@ -18,18 +18,16 @@ calendar_simple_agent: - name: train type: train jsonl_fpath: resources_servers/calendar/data/train.jsonl - gitlab_identifier: - dataset_name: calendar - version: 0.0.1 - artifact_fpath: calendar/train.jsonl + huggingface_identifier: + repo_id: nvidia/Nemotron-RL-agent-calendar_scheduling + artifact_fpath: train.jsonl license: Apache 2.0 - name: validation type: validation jsonl_fpath: resources_servers/calendar/data/validation.jsonl - gitlab_identifier: - dataset_name: calendar - version: 0.0.1 - artifact_fpath: calendar/validation.jsonl + huggingface_identifier: + repo_id: nvidia/Nemotron-RL-agent-calendar_scheduling + artifact_fpath: validation.jsonl license: Apache 2.0 - name: example type: example diff --git a/resources_servers/code_gen/configs/code_gen.yaml b/resources_servers/code_gen/configs/code_gen.yaml index 8bd919eb6..48698fedf 100644 --- a/resources_servers/code_gen/configs/code_gen.yaml +++ b/resources_servers/code_gen/configs/code_gen.yaml @@ -22,9 +22,8 @@ code_gen_simple_agent: - name: opencodereasoning_filtered_train type: train jsonl_fpath: resources_servers/code_gen/data/opencodereasoning_filtered_25k_train.jsonl - gitlab_identifier: - dataset_name: opencodereasoning_filtered - version: 0.0.1 + huggingface_identifier: + repo_id: nvidia/nemotron-RL-coding-competitive_coding artifact_fpath: opencodereasoning_filtered_25k_train.jsonl license: Apache 2.0 num_repeats: 1 diff --git a/resources_servers/math_with_judge/configs/math_stack_overflow.yaml b/resources_servers/math_with_judge/configs/math_stack_overflow.yaml index eae1af308..3a9bf1ecf 100644 --- a/resources_servers/math_with_judge/configs/math_stack_overflow.yaml +++ b/resources_servers/math_with_judge/configs/math_stack_overflow.yaml @@ -25,16 +25,14 @@ math_with_judge_simple_agent: - name: train type: train jsonl_fpath: resources_servers/math_with_judge/data/math_stack_overflow_train.jsonl - gitlab_identifier: - dataset_name: math_stack_overflow - version: 0.0.1 + huggingface_identifier: + repo_id: nvidia/Nemotron-RL-math-stack_overflow artifact_fpath: math_stack_overflow_problems.jsonl license: Creative Commons Attribution-ShareAlike 4.0 International - name: validation type: validation jsonl_fpath: resources_servers/math_with_judge/data/aime24_validation.jsonl - gitlab_identifier: - dataset_name: aime24 - version: 0.0.1 - artifact_fpath: aime24.jsonl + huggingface_identifier: + repo_id: nvidia/Nemotron-RL-math-OpenMathReasoning + artifact_fpath: aime24_validation.jsonl license: Apache 2.0 diff --git a/resources_servers/math_with_judge/configs/math_with_judge.yaml b/resources_servers/math_with_judge/configs/math_with_judge.yaml index 9998bc15c..333449b5c 100644 --- a/resources_servers/math_with_judge/configs/math_with_judge.yaml +++ b/resources_servers/math_with_judge/configs/math_with_judge.yaml @@ -27,10 +27,6 @@ math_with_judge_simple_agent: - name: train type: train jsonl_fpath: resources_servers/math_with_judge/data/train.jsonl - gitlab_identifier: - dataset_name: math_open_math_reasoning - version: 0.0.1 - artifact_fpath: open_math_reasoning_problems.jsonl huggingface_identifier: repo_id: nvidia/Nemotron-RL-math-OpenMathReasoning artifact_fpath: open_math_reasoning_problems.jsonl @@ -38,10 +34,6 @@ math_with_judge_simple_agent: - name: validation type: validation jsonl_fpath: resources_servers/math_with_judge/data/aime24_validation.jsonl - gitlab_identifier: - dataset_name: aime24 - version: 0.0.1 - artifact_fpath: aime24.jsonl huggingface_identifier: repo_id: nvidia/Nemotron-RL-math-OpenMathReasoning artifact_fpath: aime24_validation.jsonl