NVIDIA-NeMo · terrykong · Dec 5, 2025 · Dec 2, 2025 · Dec 2, 2025 · Dec 2, 2025
@@ -13,3 +13,8 @@
 	url = https://github.com/NVIDIA-NeMo/Automodel.git
 	branch = nemo-rl-submodule
 	shallow = true
+[submodule "3rdparty/Gym-workspace/Gym"]
+	path = 3rdparty/Gym-workspace/Gym
+	url = https://github.com/NVIDIA-NeMo/Gym.git
+	branch = main
+	shallow = true
@@ -12,10 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 try:
-    from penguin import config_types  # noqa: F401
+    from nemo_gym import config_types  # noqa: F401
 
     INSTALLED = True
 except Exception:
     INSTALLED = False
 
-print(f"PENGUIN {INSTALLED=}")
+print(f"NEMO_GYM {INSTALLED=}")
@@ -3,8 +3,8 @@ requires = ["setuptools>=61.0", "wheel"]
 build-backend = "setuptools.build_meta"
 
 [project]
-name = "penguin"
+name = "nemo_gym"
 dynamic = ["dependencies", "version"]
 authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }]
-description = "Standalone packaging for the Penguin sub-module."
+description = "Standalone packaging for the Gym sub-module."
 requires-python = ">=3.10"
@@ -20,8 +20,8 @@
 final_packages = []
 final_package_dir = {}
 
-# If the submodule is present, expose `penguin` package from the checkout
-src_dir = Path("Penguin")
+# If the submodule is present, expose `nemo_gym` package from the checkout
+src_dir = Path("Gym")
 
 
 CACHED_DEPENDENCIES = [
@@ -41,6 +41,7 @@
     "aiohttp",
     "yappi",
     "ray[default]",
+    "psutil",
 ]
 
 if src_dir.exists():
@@ -49,7 +50,7 @@
         pyproject_toml = tomllib.load(f)
     if not pyproject_toml_path.exists():
         raise FileNotFoundError(
-            f"[Penguin][setup] {pyproject_toml_path} not found; skipping dependency consistency check."
+            f"[Gym][setup] {pyproject_toml_path} not found; skipping dependency consistency check."
         )
 
     packages = pyproject_toml["tool"]["setuptools"]["packages"]["find"]["include"]
@@ -69,19 +70,19 @@
 
     if missing_in_cached or extra_in_cached:
         print(
-            "[Penguin][setup] Dependency mismatch between Penguin-workspace/Penguin/pyproject.toml vs Penguin-workspace/setup.py::CACHED_DEPENDENCIES.",
+            "[Gym][setup] Dependency mismatch between Gym-workspace/Gym/pyproject.toml vs Gym-workspace/setup.py::CACHED_DEPENDENCIES.",
             file=sys.stderr,
         )
         if missing_in_cached:
             print(
-                "  - Present in Penguin-workspace/Penguin/pyproject.toml but missing from CACHED_DEPENDENCIES:",
+                "  - Present in Gym-workspace/Gym/pyproject.toml but missing from CACHED_DEPENDENCIES:",
                 file=sys.stderr,
             )
             for dep in sorted(missing_in_cached):
                 print(f"    * {dep}", file=sys.stderr)
         if extra_in_cached:
             print(
-                "  - Present in CACHED_DEPENDENCIES but not in Penguin-workspace/Penguin/pyproject.toml:",
+                "  - Present in CACHED_DEPENDENCIES but not in Gym-workspace/Gym/pyproject.toml:",
                 file=sys.stderr,
             )
             for dep in sorted(extra_in_cached):
@@ -93,19 +94,19 @@
         sys.exit(1)
     else:
         print(
-            "[Penguin][setup] Dependency sets are consistent with the submodule pyproject.",
+            "[Gym][setup] Dependency sets are consistent with the submodule pyproject.",
             file=sys.stderr,
         )
 
 
 setuptools.setup(
-    name="penguin",
+    name="nemo_gym",
     version="0.0.0",
-    description="Standalone packaging for the Penguin sub-module.",
+    description="Standalone packaging for the Gym sub-module.",
     author="NVIDIA",
     author_email="nemo-toolkit@nvidia.com",
     packages=final_packages,
     package_dir=final_package_dir,
-    py_modules=["is_penguin_installed"],
+    py_modules=["is_nemo_gym_installed"],
     install_requires=CACHED_DEPENDENCIES,
 )
@@ -232,15 +232,15 @@ policy:
         num_nodes: null # Decides number of nodes to be dedicated to generation
 
 data:
-  train_jsonl_fpath: 3rdparty/Penguin-workspace/Penguin/data/bytedtsinghua_dapo17k/train.jsonl
-  validation_jsonl_fpath: 3rdparty/Penguin-workspace/Penguin/data/bytedtsinghua_dapo17k/validation.jsonl
+  train_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/bytedtsinghua_dapo17k/train.jsonl
+  validation_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/bytedtsinghua_dapo17k/validation.jsonl
   shuffle: true
   num_workers: 0
 
 env:
-  should_use_penguin: true
-  should_log_penguin_responses: true  # If you have low logging storage, set this to false
-  penguin:  # This is passed into Penguin as the initial_global_config_dict
+  should_use_nemo_gym: true
+  should_log_nemo_gym_responses: true  # If you have low logging storage, set this to false
+  nemo_gym:  # This is passed into NeMo-Gym as the initial_global_config_dict
     config_paths:
     - responses_api_models/vllm_model/configs/vllm_model_for_training.yaml  # Required! And it must be *for_training
     - resources_servers/library_judge_math/configs/library_judge_math.yaml

@@ -36,7 +36,7 @@
     MasterConfig,
     StatefulDataLoader,
     TokenizerType,
-    _should_use_penguin,
+    _should_use_nemo_gym,
     grpo_train,
     refit_policy_generation,
     setup,
@@ -48,13 +48,13 @@
     get_actor_python_env,
 )
 from nemo_rl.distributed.virtual_cluster import init_ray
-from nemo_rl.environments.penguin import (
-    Penguin,
-    PenguinConfig,
-    penguin_example_to_nemo_rl_datum_spec,
-    setup_penguin_config,
+from nemo_rl.environments.nemo_gym import (
+    NemoGym,
+    NemoGymConfig,
+    nemo_gym_example_to_nemo_rl_datum_spec,
+    setup_nemo_gym_config,
 )
-from nemo_rl.experience.rollouts import run_async_penguin_rollout
+from nemo_rl.experience.rollouts import run_async_nemo_gym_rollout
 from nemo_rl.models.generation import configure_generation_config
 from nemo_rl.utils.config import load_config, parse_hydra_overrides
 from nemo_rl.utils.logger import get_next_experiment_dir
@@ -75,29 +75,29 @@ def parse_args() -> tuple[argparse.Namespace, list[str]]:
     return args, overrides
 
 
-def setup_single_penguin_dataset(
+def setup_single_nemo_gym_dataset(
     jsonl_fpath: str, tokenizer, num_repeats: Optional[int] = None
 ):
     with open(jsonl_fpath) as f:
-        penguin_examples = list(map(json.loads, f))
+        nemo_gym_examples = list(map(json.loads, f))
 
-    print(f"Loaded data at {jsonl_fpath}. Found {len(penguin_examples)} examples")
+    print(f"Loaded data at {jsonl_fpath}. Found {len(nemo_gym_examples)} examples")
 
     if num_repeats:
-        previous_length = len(penguin_examples)
-        penguin_examples = list(
+        previous_length = len(nemo_gym_examples)
+        nemo_gym_examples = list(
             chain.from_iterable(
-                repeat(penguin_example, num_repeats)
-                for penguin_example in penguin_examples
+                repeat(nemo_gym_example, num_repeats)
+                for nemo_gym_example in nemo_gym_examples
             )
         )
         print(
-            f"Repeating examples (in a pattern of abc to aabbcc) for {jsonl_fpath} from {previous_length} to {len(penguin_examples)}!"
+            f"Repeating examples (in a pattern of abc to aabbcc) for {jsonl_fpath} from {previous_length} to {len(nemo_gym_examples)}!"
         )
 
     nemo_rl_compatible_examples: list[DatumSpec] = [
-        penguin_example_to_nemo_rl_datum_spec(penguin_example, idx)
-        for idx, penguin_example in enumerate(penguin_examples)
+        nemo_gym_example_to_nemo_rl_datum_spec(nemo_gym_example, idx)
+        for idx, nemo_gym_example in enumerate(nemo_gym_examples)
     ]
 
     passthrough_task_processor = lambda datum_dict, *args, **kwargs: datum_dict
@@ -129,7 +129,7 @@ def collect_trajectories(
     print("\n🔍 Running trajectory collection...", flush=True)
     generation_config = master_config["policy"]["generation"]
     for val_batch in val_dataloader:
-        penguin_rollout_result = run_async_penguin_rollout(
+        nemo_gym_rollout_result = run_async_nemo_gym_rollout(
             policy_generation=policy_generation,
             input_batch=val_batch,
             tokenizer=tokenizer,
@@ -141,7 +141,7 @@ def collect_trajectories(
         )
 
         rows_to_log: list[str] = []
-        for key, value in penguin_rollout_result.rollout_metrics.items():
+        for key, value in nemo_gym_rollout_result.rollout_metrics.items():
             if "full_result" not in key:
                 continue
 
@@ -195,18 +195,18 @@ def main() -> None:
         config["policy"]["generation"], tokenizer
     )
 
-    # Penguin specific config setup.
-    setup_penguin_config(config, tokenizer)
+    # NeMo-Gym specific config setup.
+    setup_nemo_gym_config(config, tokenizer)
 
     # We assert here since this is right after the final config has been materialized.
-    assert _should_use_penguin(config)
+    assert _should_use_nemo_gym(config)
 
     print("\n▶ Setting up data...")
-    train_dataset = setup_single_penguin_dataset(
+    train_dataset = setup_single_nemo_gym_dataset(
         jsonl_fpath=config["data"]["train_jsonl_fpath"],
         tokenizer=tokenizer,
     )
-    val_dataset = setup_single_penguin_dataset(
+    val_dataset = setup_single_nemo_gym_dataset(
         jsonl_fpath=config["data"]["validation_jsonl_fpath"],
         tokenizer=tokenizer,
     )
@@ -247,23 +247,23 @@ def main() -> None:
     ) = setup(config, tokenizer, train_dataset, val_dataset)
 
     is_trajectory_collection = (
-        config["env"]["penguin"].pop("is_trajectory_collection", False) or False
+        config["env"]["nemo_gym"].pop("is_trajectory_collection", False) or False
     )
-    penguin_config = PenguinConfig(
+    nemo_gym_config = NemoGymConfig(
         model_name=policy_generation.cfg["model_name"],
         base_urls=policy_generation.dp_openai_server_base_urls,
-        initial_global_config_dict=config["env"]["penguin"],
+        initial_global_config_dict=config["env"]["nemo_gym"],
     )
-    penguin = Penguin.options(
+    nemo_gym = NemoGym.options(
         runtime_env={
             "py_executable": get_actor_python_env(
-                "nemo_rl.environments.penguin.Penguin"
+                "nemo_rl.environments.nemo_gym.NemoGym"
             ),
         }
-    ).remote(penguin_config)
-    # Blocking wait for penguin to spin up
-    ray.get(penguin.health_check.remote())
-    task_to_env = {"penguin": penguin}
+    ).remote(nemo_gym_config)
+    # Blocking wait for NeMo-Gym to spin up
+    ray.get(nemo_gym.health_check.remote())
+    task_to_env = {"nemo_gym": nemo_gym}
     val_task_to_env = task_to_env
 
     if is_trajectory_collection:

@@ -1,7 +1,21 @@
+# Copyright (c) 2025, NVIDIA CORPORATION.  All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
 # Fail on errors
 set -e
 
-uv sync --group={build,docs,dev,test} --extra penguin
+uv sync --all-groups --extra nemo_gym
 
 # Stop pesky previous Ray servers that may have not been able to spin down from previous users.
 uv run ray stop --force
@@ -27,7 +41,7 @@ uv run python -c "import ray; ray.shutdown()"
 ./tests/run_unit.sh unit/environments/test_math_environment.py::test_math_env_step_basic
 
 # NeMo Gym integrates directly into NeMo RL as an Environment since that is the cleanest way. This tests the NeMo Gym integration logic and correctness.
-./tests/run_unit.sh unit/environments/test_penguin.py::test_penguin_sanity
+./tests/run_unit.sh unit/environments/test_nemo_gym.py::test_nemo_gym_sanity
 
 # NeMo Gym uses a separate rollout loop inside grpo_train in NeMo RL. This tests the e2e rollout functionality and correctness.
-./tests/run_unit.sh unit/experience/test_rollouts.py::test_run_async_penguin_rollout
+./tests/run_unit.sh unit/experience/test_rollouts.py::test_run_async_nemo_gym_rollout