Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,8 @@
url = https://github.com/NVIDIA-NeMo/Automodel.git
branch = nemo-rl-submodule
shallow = true
[submodule "3rdparty/Gym-workspace/Gym"]
path = 3rdparty/Gym-workspace/Gym
url = https://github.com/NVIDIA-NeMo/Gym.git
branch = main
shallow = true
1 change: 1 addition & 0 deletions 3rdparty/Gym-workspace/Gym
Submodule Gym added at 035c91
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
try:
from penguin import config_types # noqa: F401
from nemo_gym import config_types # noqa: F401

INSTALLED = True
except Exception:
INSTALLED = False

print(f"PENGUIN {INSTALLED=}")
print(f"NEMO_GYM {INSTALLED=}")
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,8 @@ requires = ["setuptools>=61.0", "wheel"]
build-backend = "setuptools.build_meta"

[project]
name = "penguin"
name = "nemo_gym"
dynamic = ["dependencies", "version"]
authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
description = "Standalone packaging for the Penguin sub-module."
description = "Standalone packaging for the Gym sub-module."
requires-python = ">=3.10"
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,8 @@
final_packages = []
final_package_dir = {}

# If the submodule is present, expose `penguin` package from the checkout
src_dir = Path("Penguin")
# If the submodule is present, expose `nemo_gym` package from the checkout
src_dir = Path("Gym")


CACHED_DEPENDENCIES = [
Expand All @@ -41,6 +41,7 @@
"aiohttp",
"yappi",
"ray[default]",
"psutil",
]

if src_dir.exists():
Expand All @@ -49,7 +50,7 @@
pyproject_toml = tomllib.load(f)
if not pyproject_toml_path.exists():
raise FileNotFoundError(
f"[Penguin][setup] {pyproject_toml_path} not found; skipping dependency consistency check."
f"[Gym][setup] {pyproject_toml_path} not found; skipping dependency consistency check."
)

packages = pyproject_toml["tool"]["setuptools"]["packages"]["find"]["include"]
Expand All @@ -69,19 +70,19 @@

if missing_in_cached or extra_in_cached:
print(
"[Penguin][setup] Dependency mismatch between Penguin-workspace/Penguin/pyproject.toml vs Penguin-workspace/setup.py::CACHED_DEPENDENCIES.",
"[Gym][setup] Dependency mismatch between Gym-workspace/Gym/pyproject.toml vs Gym-workspace/setup.py::CACHED_DEPENDENCIES.",
file=sys.stderr,
)
if missing_in_cached:
print(
" - Present in Penguin-workspace/Penguin/pyproject.toml but missing from CACHED_DEPENDENCIES:",
" - Present in Gym-workspace/Gym/pyproject.toml but missing from CACHED_DEPENDENCIES:",
file=sys.stderr,
)
for dep in sorted(missing_in_cached):
print(f" * {dep}", file=sys.stderr)
if extra_in_cached:
print(
" - Present in CACHED_DEPENDENCIES but not in Penguin-workspace/Penguin/pyproject.toml:",
" - Present in CACHED_DEPENDENCIES but not in Gym-workspace/Gym/pyproject.toml:",
file=sys.stderr,
)
for dep in sorted(extra_in_cached):
Expand All @@ -93,19 +94,19 @@
sys.exit(1)
else:
print(
"[Penguin][setup] Dependency sets are consistent with the submodule pyproject.",
"[Gym][setup] Dependency sets are consistent with the submodule pyproject.",
file=sys.stderr,
)


setuptools.setup(
name="penguin",
name="nemo_gym",
version="0.0.0",
description="Standalone packaging for the Penguin sub-module.",
description="Standalone packaging for the Gym sub-module.",
author="NVIDIA",
author_email="nemo-toolkit@nvidia.com",
packages=final_packages,
package_dir=final_package_dir,
py_modules=["is_penguin_installed"],
py_modules=["is_nemo_gym_installed"],
install_requires=CACHED_DEPENDENCIES,
)
Original file line number Diff line number Diff line change
Expand Up @@ -232,15 +232,15 @@ policy:
num_nodes: null # Decides number of nodes to be dedicated to generation

data:
train_jsonl_fpath: 3rdparty/Penguin-workspace/Penguin/data/bytedtsinghua_dapo17k/train.jsonl
validation_jsonl_fpath: 3rdparty/Penguin-workspace/Penguin/data/bytedtsinghua_dapo17k/validation.jsonl
train_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/bytedtsinghua_dapo17k/train.jsonl
validation_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/bytedtsinghua_dapo17k/validation.jsonl
shuffle: true
num_workers: 0

env:
should_use_penguin: true
should_log_penguin_responses: true # If you have low logging storage, set this to false
penguin: # This is passed into Penguin as the initial_global_config_dict
should_use_nemo_gym: true
should_log_nemo_gym_responses: true # If you have low logging storage, set this to false
nemo_gym: # This is passed into NeMo-Gym as the initial_global_config_dict
config_paths:
- responses_api_models/vllm_model/configs/vllm_model_for_training.yaml # Required! And it must be *for_training
- resources_servers/library_judge_math/configs/library_judge_math.yaml
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
MasterConfig,
StatefulDataLoader,
TokenizerType,
_should_use_penguin,
_should_use_nemo_gym,
grpo_train,
refit_policy_generation,
setup,
Expand All @@ -48,13 +48,13 @@
get_actor_python_env,
)
from nemo_rl.distributed.virtual_cluster import init_ray
from nemo_rl.environments.penguin import (
Penguin,
PenguinConfig,
penguin_example_to_nemo_rl_datum_spec,
setup_penguin_config,
from nemo_rl.environments.nemo_gym import (
NemoGym,
NemoGymConfig,
nemo_gym_example_to_nemo_rl_datum_spec,
setup_nemo_gym_config,
)
from nemo_rl.experience.rollouts import run_async_penguin_rollout
from nemo_rl.experience.rollouts import run_async_nemo_gym_rollout
from nemo_rl.models.generation import configure_generation_config
from nemo_rl.utils.config import load_config, parse_hydra_overrides
from nemo_rl.utils.logger import get_next_experiment_dir
Expand All @@ -75,29 +75,29 @@ def parse_args() -> tuple[argparse.Namespace, list[str]]:
return args, overrides


def setup_single_penguin_dataset(
def setup_single_nemo_gym_dataset(
jsonl_fpath: str, tokenizer, num_repeats: Optional[int] = None
):
with open(jsonl_fpath) as f:
penguin_examples = list(map(json.loads, f))
nemo_gym_examples = list(map(json.loads, f))

print(f"Loaded data at {jsonl_fpath}. Found {len(penguin_examples)} examples")
print(f"Loaded data at {jsonl_fpath}. Found {len(nemo_gym_examples)} examples")

if num_repeats:
previous_length = len(penguin_examples)
penguin_examples = list(
previous_length = len(nemo_gym_examples)
nemo_gym_examples = list(
chain.from_iterable(
repeat(penguin_example, num_repeats)
for penguin_example in penguin_examples
repeat(nemo_gym_example, num_repeats)
for nemo_gym_example in nemo_gym_examples
)
)
print(
f"Repeating examples (in a pattern of abc to aabbcc) for {jsonl_fpath} from {previous_length} to {len(penguin_examples)}!"
f"Repeating examples (in a pattern of abc to aabbcc) for {jsonl_fpath} from {previous_length} to {len(nemo_gym_examples)}!"
)

nemo_rl_compatible_examples: list[DatumSpec] = [
penguin_example_to_nemo_rl_datum_spec(penguin_example, idx)
for idx, penguin_example in enumerate(penguin_examples)
nemo_gym_example_to_nemo_rl_datum_spec(nemo_gym_example, idx)
for idx, nemo_gym_example in enumerate(nemo_gym_examples)
]

passthrough_task_processor = lambda datum_dict, *args, **kwargs: datum_dict
Expand Down Expand Up @@ -129,7 +129,7 @@ def collect_trajectories(
print("\n🔍 Running trajectory collection...", flush=True)
generation_config = master_config["policy"]["generation"]
for val_batch in val_dataloader:
penguin_rollout_result = run_async_penguin_rollout(
nemo_gym_rollout_result = run_async_nemo_gym_rollout(
policy_generation=policy_generation,
input_batch=val_batch,
tokenizer=tokenizer,
Expand All @@ -141,7 +141,7 @@ def collect_trajectories(
)

rows_to_log: list[str] = []
for key, value in penguin_rollout_result.rollout_metrics.items():
for key, value in nemo_gym_rollout_result.rollout_metrics.items():
if "full_result" not in key:
continue

Expand Down Expand Up @@ -195,18 +195,18 @@ def main() -> None:
config["policy"]["generation"], tokenizer
)

# Penguin specific config setup.
setup_penguin_config(config, tokenizer)
# NeMo-Gym specific config setup.
setup_nemo_gym_config(config, tokenizer)

# We assert here since this is right after the final config has been materialized.
assert _should_use_penguin(config)
assert _should_use_nemo_gym(config)

print("\n▶ Setting up data...")
train_dataset = setup_single_penguin_dataset(
train_dataset = setup_single_nemo_gym_dataset(
jsonl_fpath=config["data"]["train_jsonl_fpath"],
tokenizer=tokenizer,
)
val_dataset = setup_single_penguin_dataset(
val_dataset = setup_single_nemo_gym_dataset(
jsonl_fpath=config["data"]["validation_jsonl_fpath"],
tokenizer=tokenizer,
)
Expand Down Expand Up @@ -247,23 +247,23 @@ def main() -> None:
) = setup(config, tokenizer, train_dataset, val_dataset)

is_trajectory_collection = (
config["env"]["penguin"].pop("is_trajectory_collection", False) or False
config["env"]["nemo_gym"].pop("is_trajectory_collection", False) or False
)
penguin_config = PenguinConfig(
nemo_gym_config = NemoGymConfig(
model_name=policy_generation.cfg["model_name"],
base_urls=policy_generation.dp_openai_server_base_urls,
initial_global_config_dict=config["env"]["penguin"],
initial_global_config_dict=config["env"]["nemo_gym"],
)
penguin = Penguin.options(
nemo_gym = NemoGym.options(
runtime_env={
"py_executable": get_actor_python_env(
"nemo_rl.environments.penguin.Penguin"
"nemo_rl.environments.nemo_gym.NemoGym"
),
}
).remote(penguin_config)
# Blocking wait for penguin to spin up
ray.get(penguin.health_check.remote())
task_to_env = {"penguin": penguin}
).remote(nemo_gym_config)
# Blocking wait for NeMo-Gym to spin up
ray.get(nemo_gym.health_check.remote())
task_to_env = {"nemo_gym": nemo_gym}
val_task_to_env = task_to_env

if is_trajectory_collection:
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,21 @@
# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

# Fail on errors
set -e

uv sync --group={build,docs,dev,test} --extra penguin
uv sync --all-groups --extra nemo_gym

# Stop pesky previous Ray servers that may have not been able to spin down from previous users.
uv run ray stop --force
Expand All @@ -27,7 +41,7 @@ uv run python -c "import ray; ray.shutdown()"
./tests/run_unit.sh unit/environments/test_math_environment.py::test_math_env_step_basic

# NeMo Gym integrates directly into NeMo RL as an Environment since that is the cleanest way. This tests the NeMo Gym integration logic and correctness.
./tests/run_unit.sh unit/environments/test_penguin.py::test_penguin_sanity
./tests/run_unit.sh unit/environments/test_nemo_gym.py::test_nemo_gym_sanity

# NeMo Gym uses a separate rollout loop inside grpo_train in NeMo RL. This tests the e2e rollout functionality and correctness.
./tests/run_unit.sh unit/experience/test_rollouts.py::test_run_async_penguin_rollout
./tests/run_unit.sh unit/experience/test_rollouts.py::test_run_async_nemo_gym_rollout
Loading
Loading