diff --git a/.gitmodules b/.gitmodules index 7c2bda3560..fd75d5044f 100644 --- a/.gitmodules +++ b/.gitmodules @@ -13,3 +13,8 @@ url = https://github.com/NVIDIA-NeMo/Automodel.git branch = nemo-rl-submodule shallow = true +[submodule "3rdparty/Gym-workspace/Gym"] + path = 3rdparty/Gym-workspace/Gym + url = https://github.com/NVIDIA-NeMo/Gym.git + branch = main + shallow = true diff --git a/3rdparty/Gym-workspace/Gym b/3rdparty/Gym-workspace/Gym new file mode 160000 index 0000000000..035c91e4b5 --- /dev/null +++ b/3rdparty/Gym-workspace/Gym @@ -0,0 +1 @@ +Subproject commit 035c91e4b5b74598c1a313a28980926e1c2b8439 diff --git a/3rdparty/Penguin-workspace/is_penguin_installed.py b/3rdparty/Gym-workspace/is_nemo_gym_installed.py similarity index 88% rename from 3rdparty/Penguin-workspace/is_penguin_installed.py rename to 3rdparty/Gym-workspace/is_nemo_gym_installed.py index 56563cf447..1a7572b077 100644 --- a/3rdparty/Penguin-workspace/is_penguin_installed.py +++ b/3rdparty/Gym-workspace/is_nemo_gym_installed.py @@ -12,10 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. try: - from penguin import config_types # noqa: F401 + from nemo_gym import config_types # noqa: F401 INSTALLED = True except Exception: INSTALLED = False -print(f"PENGUIN {INSTALLED=}") +print(f"NEMO_GYM {INSTALLED=}") diff --git a/3rdparty/Penguin-workspace/pyproject.toml b/3rdparty/Gym-workspace/pyproject.toml similarity index 74% rename from 3rdparty/Penguin-workspace/pyproject.toml rename to 3rdparty/Gym-workspace/pyproject.toml index 62b135e012..dfda26adaf 100644 --- a/3rdparty/Penguin-workspace/pyproject.toml +++ b/3rdparty/Gym-workspace/pyproject.toml @@ -3,8 +3,8 @@ requires = ["setuptools>=61.0", "wheel"] build-backend = "setuptools.build_meta" [project] -name = "penguin" +name = "nemo_gym" dynamic = ["dependencies", "version"] authors = [{ name = "NVIDIA", email = "nemo-toolkit@nvidia.com" }] -description = "Standalone packaging for the Penguin sub-module." +description = "Standalone packaging for the Gym sub-module." requires-python = ">=3.10" diff --git a/3rdparty/Penguin-workspace/setup.py b/3rdparty/Gym-workspace/setup.py similarity index 77% rename from 3rdparty/Penguin-workspace/setup.py rename to 3rdparty/Gym-workspace/setup.py index cd61a73035..ddb5c62284 100644 --- a/3rdparty/Penguin-workspace/setup.py +++ b/3rdparty/Gym-workspace/setup.py @@ -20,8 +20,8 @@ final_packages = [] final_package_dir = {} -# If the submodule is present, expose `penguin` package from the checkout -src_dir = Path("Penguin") +# If the submodule is present, expose `nemo_gym` package from the checkout +src_dir = Path("Gym") CACHED_DEPENDENCIES = [ @@ -41,6 +41,7 @@ "aiohttp", "yappi", "ray[default]", + "psutil", ] if src_dir.exists(): @@ -49,7 +50,7 @@ pyproject_toml = tomllib.load(f) if not pyproject_toml_path.exists(): raise FileNotFoundError( - f"[Penguin][setup] {pyproject_toml_path} not found; skipping dependency consistency check." + f"[Gym][setup] {pyproject_toml_path} not found; skipping dependency consistency check." ) packages = pyproject_toml["tool"]["setuptools"]["packages"]["find"]["include"] @@ -69,19 +70,19 @@ if missing_in_cached or extra_in_cached: print( - "[Penguin][setup] Dependency mismatch between Penguin-workspace/Penguin/pyproject.toml vs Penguin-workspace/setup.py::CACHED_DEPENDENCIES.", + "[Gym][setup] Dependency mismatch between Gym-workspace/Gym/pyproject.toml vs Gym-workspace/setup.py::CACHED_DEPENDENCIES.", file=sys.stderr, ) if missing_in_cached: print( - " - Present in Penguin-workspace/Penguin/pyproject.toml but missing from CACHED_DEPENDENCIES:", + " - Present in Gym-workspace/Gym/pyproject.toml but missing from CACHED_DEPENDENCIES:", file=sys.stderr, ) for dep in sorted(missing_in_cached): print(f" * {dep}", file=sys.stderr) if extra_in_cached: print( - " - Present in CACHED_DEPENDENCIES but not in Penguin-workspace/Penguin/pyproject.toml:", + " - Present in CACHED_DEPENDENCIES but not in Gym-workspace/Gym/pyproject.toml:", file=sys.stderr, ) for dep in sorted(extra_in_cached): @@ -93,19 +94,19 @@ sys.exit(1) else: print( - "[Penguin][setup] Dependency sets are consistent with the submodule pyproject.", + "[Gym][setup] Dependency sets are consistent with the submodule pyproject.", file=sys.stderr, ) setuptools.setup( - name="penguin", + name="nemo_gym", version="0.0.0", - description="Standalone packaging for the Penguin sub-module.", + description="Standalone packaging for the Gym sub-module.", author="NVIDIA", author_email="nemo-toolkit@nvidia.com", packages=final_packages, package_dir=final_package_dir, - py_modules=["is_penguin_installed"], + py_modules=["is_nemo_gym_installed"], install_requires=CACHED_DEPENDENCIES, ) diff --git a/examples/penguin/grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml b/examples/nemo_gym/grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml similarity index 95% rename from examples/penguin/grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml rename to examples/nemo_gym/grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml index 0d138a8d34..1d73d88381 100644 --- a/examples/penguin/grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml +++ b/examples/nemo_gym/grpo_dapo17k_bytedtsinghua_qwen3_4binstruct_nf.yaml @@ -232,15 +232,15 @@ policy: num_nodes: null # Decides number of nodes to be dedicated to generation data: - train_jsonl_fpath: 3rdparty/Penguin-workspace/Penguin/data/bytedtsinghua_dapo17k/train.jsonl - validation_jsonl_fpath: 3rdparty/Penguin-workspace/Penguin/data/bytedtsinghua_dapo17k/validation.jsonl + train_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/bytedtsinghua_dapo17k/train.jsonl + validation_jsonl_fpath: 3rdparty/Gym-workspace/Gym/data/bytedtsinghua_dapo17k/validation.jsonl shuffle: true num_workers: 0 env: - should_use_penguin: true - should_log_penguin_responses: true # If you have low logging storage, set this to false - penguin: # This is passed into Penguin as the initial_global_config_dict + should_use_nemo_gym: true + should_log_nemo_gym_responses: true # If you have low logging storage, set this to false + nemo_gym: # This is passed into NeMo-Gym as the initial_global_config_dict config_paths: - responses_api_models/vllm_model/configs/vllm_model_for_training.yaml # Required! And it must be *for_training - resources_servers/library_judge_math/configs/library_judge_math.yaml diff --git a/examples/penguin/run_grpo_penguin.py b/examples/nemo_gym/run_grpo_nemo_gym.py similarity index 83% rename from examples/penguin/run_grpo_penguin.py rename to examples/nemo_gym/run_grpo_nemo_gym.py index 96d33e9528..c8d2c911e2 100644 --- a/examples/penguin/run_grpo_penguin.py +++ b/examples/nemo_gym/run_grpo_nemo_gym.py @@ -36,7 +36,7 @@ MasterConfig, StatefulDataLoader, TokenizerType, - _should_use_penguin, + _should_use_nemo_gym, grpo_train, refit_policy_generation, setup, @@ -48,13 +48,13 @@ get_actor_python_env, ) from nemo_rl.distributed.virtual_cluster import init_ray -from nemo_rl.environments.penguin import ( - Penguin, - PenguinConfig, - penguin_example_to_nemo_rl_datum_spec, - setup_penguin_config, +from nemo_rl.environments.nemo_gym import ( + NemoGym, + NemoGymConfig, + nemo_gym_example_to_nemo_rl_datum_spec, + setup_nemo_gym_config, ) -from nemo_rl.experience.rollouts import run_async_penguin_rollout +from nemo_rl.experience.rollouts import run_async_nemo_gym_rollout from nemo_rl.models.generation import configure_generation_config from nemo_rl.utils.config import load_config, parse_hydra_overrides from nemo_rl.utils.logger import get_next_experiment_dir @@ -75,29 +75,29 @@ def parse_args() -> tuple[argparse.Namespace, list[str]]: return args, overrides -def setup_single_penguin_dataset( +def setup_single_nemo_gym_dataset( jsonl_fpath: str, tokenizer, num_repeats: Optional[int] = None ): with open(jsonl_fpath) as f: - penguin_examples = list(map(json.loads, f)) + nemo_gym_examples = list(map(json.loads, f)) - print(f"Loaded data at {jsonl_fpath}. Found {len(penguin_examples)} examples") + print(f"Loaded data at {jsonl_fpath}. Found {len(nemo_gym_examples)} examples") if num_repeats: - previous_length = len(penguin_examples) - penguin_examples = list( + previous_length = len(nemo_gym_examples) + nemo_gym_examples = list( chain.from_iterable( - repeat(penguin_example, num_repeats) - for penguin_example in penguin_examples + repeat(nemo_gym_example, num_repeats) + for nemo_gym_example in nemo_gym_examples ) ) print( - f"Repeating examples (in a pattern of abc to aabbcc) for {jsonl_fpath} from {previous_length} to {len(penguin_examples)}!" + f"Repeating examples (in a pattern of abc to aabbcc) for {jsonl_fpath} from {previous_length} to {len(nemo_gym_examples)}!" ) nemo_rl_compatible_examples: list[DatumSpec] = [ - penguin_example_to_nemo_rl_datum_spec(penguin_example, idx) - for idx, penguin_example in enumerate(penguin_examples) + nemo_gym_example_to_nemo_rl_datum_spec(nemo_gym_example, idx) + for idx, nemo_gym_example in enumerate(nemo_gym_examples) ] passthrough_task_processor = lambda datum_dict, *args, **kwargs: datum_dict @@ -129,7 +129,7 @@ def collect_trajectories( print("\nšŸ” Running trajectory collection...", flush=True) generation_config = master_config["policy"]["generation"] for val_batch in val_dataloader: - penguin_rollout_result = run_async_penguin_rollout( + nemo_gym_rollout_result = run_async_nemo_gym_rollout( policy_generation=policy_generation, input_batch=val_batch, tokenizer=tokenizer, @@ -141,7 +141,7 @@ def collect_trajectories( ) rows_to_log: list[str] = [] - for key, value in penguin_rollout_result.rollout_metrics.items(): + for key, value in nemo_gym_rollout_result.rollout_metrics.items(): if "full_result" not in key: continue @@ -195,18 +195,18 @@ def main() -> None: config["policy"]["generation"], tokenizer ) - # Penguin specific config setup. - setup_penguin_config(config, tokenizer) + # NeMo-Gym specific config setup. + setup_nemo_gym_config(config, tokenizer) # We assert here since this is right after the final config has been materialized. - assert _should_use_penguin(config) + assert _should_use_nemo_gym(config) print("\nā–¶ Setting up data...") - train_dataset = setup_single_penguin_dataset( + train_dataset = setup_single_nemo_gym_dataset( jsonl_fpath=config["data"]["train_jsonl_fpath"], tokenizer=tokenizer, ) - val_dataset = setup_single_penguin_dataset( + val_dataset = setup_single_nemo_gym_dataset( jsonl_fpath=config["data"]["validation_jsonl_fpath"], tokenizer=tokenizer, ) @@ -247,23 +247,23 @@ def main() -> None: ) = setup(config, tokenizer, train_dataset, val_dataset) is_trajectory_collection = ( - config["env"]["penguin"].pop("is_trajectory_collection", False) or False + config["env"]["nemo_gym"].pop("is_trajectory_collection", False) or False ) - penguin_config = PenguinConfig( + nemo_gym_config = NemoGymConfig( model_name=policy_generation.cfg["model_name"], base_urls=policy_generation.dp_openai_server_base_urls, - initial_global_config_dict=config["env"]["penguin"], + initial_global_config_dict=config["env"]["nemo_gym"], ) - penguin = Penguin.options( + nemo_gym = NemoGym.options( runtime_env={ "py_executable": get_actor_python_env( - "nemo_rl.environments.penguin.Penguin" + "nemo_rl.environments.nemo_gym.NemoGym" ), } - ).remote(penguin_config) - # Blocking wait for penguin to spin up - ray.get(penguin.health_check.remote()) - task_to_env = {"penguin": penguin} + ).remote(nemo_gym_config) + # Blocking wait for NeMo-Gym to spin up + ray.get(nemo_gym.health_check.remote()) + task_to_env = {"nemo_gym": nemo_gym} val_task_to_env = task_to_env if is_trajectory_collection: diff --git a/examples/penguin/run_penguin_single_node_sanity_tests.sh b/examples/nemo_gym/run_nemo_gym_single_node_sanity_tests.sh similarity index 74% rename from examples/penguin/run_penguin_single_node_sanity_tests.sh rename to examples/nemo_gym/run_nemo_gym_single_node_sanity_tests.sh index 1337cf3102..b12905694d 100755 --- a/examples/penguin/run_penguin_single_node_sanity_tests.sh +++ b/examples/nemo_gym/run_nemo_gym_single_node_sanity_tests.sh @@ -1,7 +1,21 @@ +# Copyright (c) 2025, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + # Fail on errors set -e -uv sync --group={build,docs,dev,test} --extra penguin +uv sync --all-groups --extra nemo_gym # Stop pesky previous Ray servers that may have not been able to spin down from previous users. uv run ray stop --force @@ -27,7 +41,7 @@ uv run python -c "import ray; ray.shutdown()" ./tests/run_unit.sh unit/environments/test_math_environment.py::test_math_env_step_basic # NeMo Gym integrates directly into NeMo RL as an Environment since that is the cleanest way. This tests the NeMo Gym integration logic and correctness. -./tests/run_unit.sh unit/environments/test_penguin.py::test_penguin_sanity +./tests/run_unit.sh unit/environments/test_nemo_gym.py::test_nemo_gym_sanity # NeMo Gym uses a separate rollout loop inside grpo_train in NeMo RL. This tests the e2e rollout functionality and correctness. -./tests/run_unit.sh unit/experience/test_rollouts.py::test_run_async_penguin_rollout +./tests/run_unit.sh unit/experience/test_rollouts.py::test_run_async_nemo_gym_rollout diff --git a/nemo_rl/algorithms/grpo.py b/nemo_rl/algorithms/grpo.py index e52cf7e41f..76aeb54dc3 100644 --- a/nemo_rl/algorithms/grpo.py +++ b/nemo_rl/algorithms/grpo.py @@ -57,7 +57,7 @@ from nemo_rl.environments.interfaces import EnvironmentInterface from nemo_rl.experience.rollouts import ( run_async_multi_turn_rollout, - run_async_penguin_rollout, + run_async_nemo_gym_rollout, run_multi_turn_rollout, ) from nemo_rl.models.generation.interfaces import GenerationInterface @@ -878,16 +878,16 @@ def _should_use_async_rollouts(master_config: MasterConfig) -> bool: return vllm_cfg.get("async_engine", False) -def _should_use_penguin(master_config: MasterConfig) -> bool: - """Determine if Penguin should be used for rollouts and validation based on the configuration.""" +def _should_use_nemo_gym(master_config: MasterConfig) -> bool: + """Determine if NeMo-Gym should be used for rollouts and validation based on the configuration.""" env_config = master_config.get("env") or dict() - should_use_penguin = bool(env_config.get("should_use_penguin")) - if not should_use_penguin: - return should_use_penguin + should_use_nemo_gym = bool(env_config.get("should_use_nemo_gym")) + if not should_use_nemo_gym: + return should_use_nemo_gym - # Validate the setup for training with Penguin + # Validate the setup for training with NeMo-Gym assert _should_use_async_rollouts(master_config), ( - "āŒ Error: In order to use Penguin, you must use vllm generation backend with `async_engine: true`!" + "āŒ Error: In order to use NeMo-Gym, you must use vllm generation backend with `async_engine: true`!" ) generation_config = master_config["policy"]["generation"] @@ -895,10 +895,10 @@ def _should_use_penguin(master_config: MasterConfig) -> bool: # We piggyback off of `_should_use_async_rollouts` to guarantee the existence of these configs. should_expose_http_server = generation_config["vllm_cfg"].get("expose_http_server") assert should_expose_http_server, ( - "In order to use Penguin, you must expose the vllm server via `expose_http_server: true`!" + "In order to use NeMo-Gym, you must expose the vllm server via `expose_http_server: true`!" ) - return should_use_penguin + return should_use_nemo_gym def refit_policy_generation( @@ -1153,10 +1153,10 @@ def grpo_train( policy_generation, "clear_vllm_logger_metrics" ): policy_generation.clear_vllm_logger_metrics() - # Use penguin rollouts if enabled. We cascade penguin first since penguin requires async rollouts. - if _should_use_penguin(master_config): + # Use NeMo-Gym rollouts if enabled. We cascade NeMo-Gym first since NeMo-Gym requires async rollouts. + if _should_use_nemo_gym(master_config): generation_config = master_config["policy"]["generation"] - penguin_rollout_result = run_async_penguin_rollout( + nemo_gym_rollout_result = run_async_nemo_gym_rollout( policy_generation=policy_generation, input_batch=repeated_batch, tokenizer=tokenizer, @@ -1166,9 +1166,9 @@ def grpo_train( max_rollout_turns=None, greedy=False, ) - input_ids = penguin_rollout_result.input_ids - repeated_batch = penguin_rollout_result.final_batch - rollout_metrics = penguin_rollout_result.rollout_metrics + input_ids = nemo_gym_rollout_result.input_ids + repeated_batch = nemo_gym_rollout_result.final_batch + rollout_metrics = nemo_gym_rollout_result.rollout_metrics # Use async rollouts if vLLM async engine is enabled elif _should_use_async_rollouts(master_config): ( @@ -1699,10 +1699,10 @@ def validate( additional_metrics_to_report = dict() # Generate responses (updates the LLMMessageLogType in batch_with_msg_logs) # Use async rollouts if vLLM async engine is enabled - # We cascade penguin first since penguin also uses async rollouts. - if _should_use_penguin(master_config): + # We cascade NeMo-Gym first since NeMo-Gym also uses async rollouts. + if _should_use_nemo_gym(master_config): generation_config = master_config["policy"]["generation"] - penguin_rollout_result = run_async_penguin_rollout( + nemo_gym_rollout_result = run_async_nemo_gym_rollout( policy_generation=policy_generation, input_batch=val_batch, tokenizer=tokenizer, @@ -1712,8 +1712,8 @@ def validate( max_rollout_turns=None, greedy=False, ) - val_batch = penguin_rollout_result.final_batch - gen_metrics = penguin_rollout_result.rollout_metrics + val_batch = nemo_gym_rollout_result.final_batch + gen_metrics = nemo_gym_rollout_result.rollout_metrics additional_metrics_to_report = gen_metrics elif _should_use_async_rollouts(master_config): val_batch, gen_metrics = run_async_multi_turn_rollout( diff --git a/nemo_rl/distributed/ray_actor_environment_registry.py b/nemo_rl/distributed/ray_actor_environment_registry.py index 6a3529d4a1..5c0ac317d7 100644 --- a/nemo_rl/distributed/ray_actor_environment_registry.py +++ b/nemo_rl/distributed/ray_actor_environment_registry.py @@ -42,7 +42,7 @@ # ReplayBuffer needs vLLM environment to handle trajectory data from VllmGenerationWorker "nemo_rl.algorithms.async_utils.ReplayBuffer": PY_EXECUTABLES.VLLM, "nemo_rl.environments.tools.retriever.RAGEnvironment": PY_EXECUTABLES.SYSTEM, - "nemo_rl.environments.penguin.Penguin": PY_EXECUTABLES.PENGUIN, + "nemo_rl.environments.nemo_gym.NemoGym": PY_EXECUTABLES.NEMO_GYM, } diff --git a/nemo_rl/distributed/virtual_cluster.py b/nemo_rl/distributed/virtual_cluster.py index 879c0f22c4..3021b760e4 100644 --- a/nemo_rl/distributed/virtual_cluster.py +++ b/nemo_rl/distributed/virtual_cluster.py @@ -55,8 +55,8 @@ class PY_EXECUTABLES: # Use NeMo-RL direct dependencies and Megatron. MCORE = f"uv run --locked --extra mcore --directory {git_root}" - # Use Penguin dependencies - PENGUIN = f"uv run --locked --extra penguin --directory {git_root}" + # Use NeMo-Gym dependencies + NEMO_GYM = f"uv run --locked --extra nemo_gym --directory {git_root}" @ray.remote # pragma: no cover diff --git a/nemo_rl/environments/penguin.py b/nemo_rl/environments/nemo_gym.py similarity index 82% rename from nemo_rl/environments/penguin.py rename to nemo_rl/environments/nemo_gym.py index 1f7462a866..83e9858b8e 100644 --- a/nemo_rl/environments/penguin.py +++ b/nemo_rl/environments/nemo_gym.py @@ -24,28 +24,28 @@ from nemo_rl.utils.timer import Timer -class PenguinConfig(TypedDict): +class NemoGymConfig(TypedDict): model_name: str base_urls: List[str] initial_global_config_dict: Dict[str, Any] @ray.remote(max_restarts=-1, max_task_retries=-1) # pragma: no cover -class Penguin(EnvironmentInterface): - """This environment class isn't really used for training. It's really meant as an integration wrapper around Penguin that hooks into the existing NeMo RL resource management via ray. So there is still one source of truth for resource management in NeMo RL.""" +class NemoGym(EnvironmentInterface): + """This environment class isn't really used for training. It's really meant as an integration wrapper around NeMo-Gym that hooks into the existing NeMo RL resource management via ray. So there is still one source of truth for resource management in NeMo RL.""" - def __init__(self, cfg: PenguinConfig): + def __init__(self, cfg: NemoGymConfig): self.cfg = cfg self.node_ip = _get_node_ip_local() self.head_server_port = _get_free_port_local() + from nemo_gym.cli import GlobalConfigDictParserConfig, RunHelper + from nemo_gym.rollout_collection import RolloutCollectionHelper + from nemo_gym.server_utils import HEAD_SERVER_KEY_NAME, BaseServerConfig from omegaconf import DictConfig - from penguin.cli import GlobalConfigDictParserConfig, RunHelper - from penguin.rollout_collection import RolloutCollectionHelper - from penguin.server_utils import HEAD_SERVER_KEY_NAME, BaseServerConfig - RELATIVE_PATH = "nemo_rl/environments/penguin.py" + RELATIVE_PATH = "nemo_rl/environments/nemo_gym.py" assert __file__.endswith(RELATIVE_PATH) initial_global_config_dict = ( @@ -69,7 +69,7 @@ def __init__(self, cfg: PenguinConfig): # Get Ray head node address if Ray is initialized assert ray.is_initialized(), ( - "Ray must be initialized before using Penguin environment" + "Ray must be initialized before using NeMo-Gym environment" ) ray_context = ray.get_runtime_context() assert ray_context.gcs_address, "Ray must have a GCS address" @@ -87,7 +87,7 @@ def __init__(self, cfg: PenguinConfig): self.rh.start( global_config_dict_parser_config=GlobalConfigDictParserConfig( dotenv_path=Path(__file__.removesuffix(RELATIVE_PATH)).absolute() - / "penguin_env.yaml", + / "nemo_gym_env.yaml", initial_global_config_dict=DictConfig(initial_global_config_dict), skip_load_from_cli=True, ) @@ -105,25 +105,25 @@ def health_check(self) -> bool: async def run_rollouts( self, - penguin_examples: list[dict], + nemo_gym_examples: list[dict], tokenizer: PreTrainedTokenizerBase, timer_prefix: str, ) -> list[dict]: timer = Timer() - penguin_result_iterator = self.rch.run_examples( - examples=penguin_examples, head_server_config=self.head_server_config + nemo_gym_result_iterator = self.rch.run_examples( + examples=nemo_gym_examples, head_server_config=self.head_server_config ) timer.start("_run_rollouts_total") nemo_rl_results = [] - for task in penguin_result_iterator: + for task in nemo_gym_result_iterator: with timer.time(label=f"{timer_prefix}/await_results"): - penguin_result = await task + nemo_gym_result = await task with timer.time(label=f"{timer_prefix}/postprocess_results"): - nemo_rl_result = self._postprocess_penguin_to_nemo_rl_result( - penguin_result, tokenizer + nemo_rl_result = self._postprocess_nemo_gym_to_nemo_rl_result( + nemo_gym_result, tokenizer ) nemo_rl_results.append(nemo_rl_result) @@ -137,17 +137,17 @@ async def run_rollouts( return nemo_rl_results, timing_metrics - def _postprocess_penguin_to_nemo_rl_result( - self, penguin_result: dict, tokenizer: PreTrainedTokenizerBase + def _postprocess_nemo_gym_to_nemo_rl_result( + self, nemo_gym_result: dict, tokenizer: PreTrainedTokenizerBase ) -> dict: nemo_rl_message_log = [] seen_token_ids: List[int] = [] - for output_item_dict in penguin_result["response"]["output"]: + for output_item_dict in nemo_gym_result["response"]["output"]: # Nemo RL really only has two types of messages: assistant and not assistant since that is all that it is concerned with (i.e. to train or not to train) # Here we map all the trainable messages to assistant and all the non-trainable messages to user. # Eventually we can maybe be smarter about this, but this is functional for now. - # Note that Penguin will only return token ids on "assistant" messages and not other message types. + # Note that NeMo-Gym will only return token ids on "assistant" messages and not other message types. if "generation_token_ids" not in output_item_dict: continue @@ -194,14 +194,14 @@ def _postprocess_penguin_to_nemo_rl_result( return { "message_log": nemo_rl_message_log, "input_message_log": nemo_rl_message_log[:1], - "full_result": penguin_result, + "full_result": nemo_gym_result, } def shutdown(self) -> None: self.rh.shutdown() def step(self, message_log_batch, metadata): - # This is not used since Penguin will handle the rollouts entirely. + # This is not used since NeMo-Gym will handle the rollouts entirely. raise NotImplementedError def global_post_process_and_metrics(self, batch): @@ -214,7 +214,7 @@ def global_post_process_and_metrics(self, batch): ######################################## -def setup_penguin_config(config, tokenizer) -> None: +def setup_nemo_gym_config(config, tokenizer) -> None: generation_config = config["policy"]["generation"] # Enable the http server. Requires both async engine and the expose_http_server flag @@ -232,16 +232,18 @@ def setup_penguin_config(config, tokenizer) -> None: # We do some light preprocessing here to make our data format compatible with nemo rl format -def penguin_example_to_nemo_rl_datum_spec(penguin_example: dict, idx: int) -> DatumSpec: +def nemo_gym_example_to_nemo_rl_datum_spec( + nemo_gym_example: dict, idx: int +) -> DatumSpec: return DatumSpec( message_log=[ {"role": "user", "content": "", "token_ids": torch.tensor([])} ], # Fake message length=0, - extra_env_info=penguin_example, + extra_env_info=nemo_gym_example, loss_multiplier=1.0, # Fix to 1.0 to backprop on all examples idx=idx, - task_name="penguin", + task_name="nemo_gym", stop_strings=None, # Extra vars token_ids=[], # Just need this empty key to be compatible with the current NeMo RL GRPO impl diff --git a/nemo_rl/experience/rollouts.py b/nemo_rl/experience/rollouts.py index b8b378542c..44ffbaf767 100644 --- a/nemo_rl/experience/rollouts.py +++ b/nemo_rl/experience/rollouts.py @@ -936,7 +936,7 @@ async def run_single_sample_with_error_handling(i, sample_state): @dataclass -class AsyncPenguinRolloutResult: +class AsyncNemoGymRolloutResult: input_ids: torch.Tensor final_batch: BatchedDataDict[DatumSpec] rollout_metrics: dict[str, Any] @@ -955,7 +955,7 @@ def _calculate_single_metric( } -def run_async_penguin_rollout( +def run_async_nemo_gym_rollout( policy_generation: GenerationInterface, input_batch: BatchedDataDict[DatumSpec], tokenizer: TokenizerType, @@ -964,35 +964,35 @@ def run_async_penguin_rollout( max_seq_len: Optional[int] = None, max_rollout_turns: Optional[int] = None, greedy: bool = False, -) -> AsyncPenguinRolloutResult: - """Run multi-turn rollouts with Penguin. Please refer to the `run_async_multi_turn_rollout` docs for more information on the parameters.""" +) -> AsyncNemoGymRolloutResult: + """Run multi-turn rollouts with NeMo-Gym. Please refer to the `run_async_multi_turn_rollout` docs for more information on the parameters.""" # We leverage the same `extra_env_info` key as `run_async_multi_turn_rollout`. - penguin_rows = input_batch["extra_env_info"] + nemo_gym_rows = input_batch["extra_env_info"] # Handle generation parameters up front so we don't hide anything inside here to avoid being unintuitive to the user. - # Penguin policy is "What you see is what you get". - assert not greedy, "`greedy` is not supported in Penguin path!" + # NeMo-Gym policy is "What you see is what you get". + assert not greedy, "`greedy` is not supported in NeMo-Gym path!" assert max_rollout_turns is None, ( - "`max_rollout_turns` is not supported in Penguin path!" + "`max_rollout_turns` is not supported in NeMo-Gym path!" ) - assert max_seq_len is None, "`max_seq_len` is not supported in Penguin path!" + assert max_seq_len is None, "`max_seq_len` is not supported in NeMo-Gym path!" # We don't use these stop criteria assert not generation_config["stop_strings"], ( - "Stop strings is not supported in the generation config in Penguin path!" + "Stop strings is not supported in the generation config in NeMo-Gym path!" ) assert not generation_config["stop_token_ids"], ( - "Stop strings is not supported in the generation config in Penguin path!" + "Stop strings is not supported in the generation config in NeMo-Gym path!" ) - # Top k is not OpenAI compatible, so Penguin does not guarantee support over it. + # Top k is not OpenAI compatible, so NeMo-Gym does not guarantee support over it. assert not generation_config["top_k"], ( - "Top k is not supported in the generation config in Penguin path!" + "Top k is not supported in the generation config in NeMo-Gym path!" ) timer = Timer() timer_prefix = "timing/rollout" timer.start(f"{timer_prefix}/total") - for row in penguin_rows: + for row in nemo_gym_rows: # We may need better handling here. The max tokens set here would be the max new generated tokens, not the total max tokens. # Currently, we just rely on the underlying vLLM engine to do the truncation for us using the max model seq len set in the config. # row["max_tokens"] = max_seq_len @@ -1005,16 +1005,16 @@ def run_async_penguin_rollout( # generation_config["max_new_tokens"] with timer.time(f"{timer_prefix}/run_rollouts"): - penguin_environment = task_to_env["penguin"] + nemo_gym_environment = task_to_env["nemo_gym"] results, rollout_loop_timing_metrics = ray.get( - penguin_environment.run_rollouts.remote( - penguin_rows, tokenizer, timer_prefix + nemo_gym_environment.run_rollouts.remote( + nemo_gym_rows, tokenizer, timer_prefix ) ) # Prepare for the rollout metrics calculation below. Not strictly necessary here, but good to have parity with `run_async_multi_turn_rollout` with timer.time(f"{timer_prefix}/prepare_for_metrics_calculation"): - batch_size = len(penguin_rows) + batch_size = len(nemo_gym_rows) max_total_tokens_per_sample = policy_generation.cfg["vllm_cfg"]["max_model_len"] all_sample_metrics = [ { @@ -1073,8 +1073,8 @@ def run_async_penguin_rollout( # Per-agent misc metrics with timer.time(f"{timer_prefix}/per_agent_misc_metrics"): agent_to_results: dict[str, list[dict]] = defaultdict(list) - for penguin_row, result in zip(penguin_rows, results): - agent_name = penguin_row["agent_ref"]["name"] + for nemo_gym_row, result in zip(nemo_gym_rows, results): + agent_name = nemo_gym_row["agent_ref"]["name"] agent_to_results[agent_name].append(result["full_result"]) per_agent_metrics = {} @@ -1138,7 +1138,7 @@ def run_async_penguin_rollout( } ) - return AsyncPenguinRolloutResult( + return AsyncNemoGymRolloutResult( input_ids=input_ids, final_batch=final_batch, rollout_metrics=rollout_metrics, diff --git a/nemo_rl/models/generation/vllm/vllm_worker_async.py b/nemo_rl/models/generation/vllm/vllm_worker_async.py index 4887984e8a..c8ab9fcf2a 100644 --- a/nemo_rl/models/generation/vllm/vllm_worker_async.py +++ b/nemo_rl/models/generation/vllm/vllm_worker_async.py @@ -50,7 +50,7 @@ def _replace_prefix_tokens( in order to preserve the monotonic tokens property for optimized multi-turn training. - Some environments (namely Penguin) require an OpenAI compatible server + Some environments (namely NeMo-Gym) require an OpenAI compatible server endpoint rather than an inference engine handle. This is fine for the most part, but it may cause issues when the environment is used as a part of training. @@ -303,7 +303,7 @@ def _setup_vllm_openai_api_server(self, app: FastAPI) -> FastAPI: class NeMoRLOpenAIChatRequestMixin: def model_post_init(self, context): - # Penguin specific processing. This is just how Penguin returns the extra token information. + # NeMo-Gym specific processing. This is just how NeMo-Gym returns the extra token information. if self.required_prefix_token_ids is None: for message in reversed(self.messages): if "prompt_token_ids" in message: diff --git a/nemo_rl/utils/logger.py b/nemo_rl/utils/logger.py index 736a6abc7b..da51f67296 100644 --- a/nemo_rl/utils/logger.py +++ b/nemo_rl/utils/logger.py @@ -132,7 +132,7 @@ def log_metrics( step_metric: Optional step metric name (ignored in TensorBoard) """ for name, value in metrics.items(): - # Penguin will add additional metrics like wandb histograms. However, some people will log to Tensorboard instead which may not be compatible + # NeMo-Gym will add additional metrics like wandb histograms. However, some people will log to Tensorboard instead which may not be compatible # This logic catches non-compatible objects being logged. if not isinstance(value, (int, float, bool, str)): continue diff --git a/pyproject.toml b/pyproject.toml index 73eb392ba5..7478fcc1f6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -98,7 +98,7 @@ mcore = [ # https://github.com/facebookresearch/xformers/blob/8354497deb2c04c67fbb2e2ad911e86530da0e90/xformers/ops/fmha/flash.py#L76 "flash-attn==2.8.1", ] -penguin = ["penguin"] +nemo_gym = ["nemo_gym"] [dependency-groups] @@ -149,7 +149,7 @@ test = [ megatron-core = { workspace = true } nemo-automodel = { workspace = true } megatron-bridge = { workspace = true } -penguin = { workspace = true } +nemo_gym = { workspace = true } nemo_run = { git = "https://github.com/NVIDIA-NeMo/Run", rev = "414f0077c648fde2c71bb1186e97ccbf96d6844c" } # torch/torchvision/triton all come from the torch index in order to pick up aarch64 wheels torch = [ @@ -172,7 +172,7 @@ members = [ "3rdparty/Megatron-LM-workspace", "3rdparty/Automodel-workspace/Automodel", "3rdparty/Megatron-Bridge-workspace", - "3rdparty/Penguin-workspace", + "3rdparty/Gym-workspace", # Research projects are also added here in order for them to share the global root level uv.lock. # If we don't do this, the research projects do not see the global uv.lock, and may mistakenly # install numpy>=2.0 because nemo-rl's core [dependencies] do not pin numpy, but when you inspect diff --git a/tests/unit/environments/penguin_test_data/test_penguin_sanity.json b/tests/unit/environments/nemo_gym_test_data/test_nemo_gym_sanity.json similarity index 100% rename from tests/unit/environments/penguin_test_data/test_penguin_sanity.json rename to tests/unit/environments/nemo_gym_test_data/test_nemo_gym_sanity.json diff --git a/tests/unit/environments/test_penguin.py b/tests/unit/environments/test_nemo_gym.py similarity index 70% rename from tests/unit/environments/test_penguin.py rename to tests/unit/environments/test_nemo_gym.py index 78dd6e5d7c..04412954ce 100644 --- a/tests/unit/environments/test_penguin.py +++ b/tests/unit/environments/test_nemo_gym.py @@ -23,7 +23,7 @@ from nemo_rl.distributed.ray_actor_environment_registry import ( get_actor_python_env, ) -from nemo_rl.environments.penguin import Penguin, PenguinConfig, setup_penguin_config +from nemo_rl.environments.nemo_gym import NemoGym, NemoGymConfig, setup_nemo_gym_config from nemo_rl.models.generation.vllm import VllmGeneration # cluster and tokenizer are fixture imports @@ -32,35 +32,37 @@ cluster, # noqa: F401 ) from tests.unit.models.generation.test_vllm_generation import ( - tokenizer as penguin_tokenizer, # noqa: F401 + tokenizer as nemo_gym_tokenizer, # noqa: F401 ) try: - from penguin import config_types # noqa: F401 + from nemo_gym import config_types # noqa: F401 - PENGUIN_INSTALLED = True + NEMO_GYM_INSTALLED = True except ImportError: - penguin = None - PENGUIN_INSTALLED = False + nemo_gym = None + NEMO_GYM_INSTALLED = False @pytest.mark.skipif( - not PENGUIN_INSTALLED, - reason="Skipping Penguin test since Penguin is not installed!", + not NEMO_GYM_INSTALLED, + reason="Skipping NeMo-Gym test since NeMo-Gym is not installed!", ) -def test_penguin_stub_module(): - print(f"Penguin test successfully run! Penguin config_types module: {config_types}") +def test_nemo_gym_stub_module(): + print( + f"NeMo-Gym test successfully run! NeMo-Gym config_types module: {config_types}" + ) @pytest.fixture(scope="function") -def penguin_vllm_generation(cluster, penguin_tokenizer): # noqa: F811 +def nemo_gym_vllm_generation(cluster, nemo_gym_tokenizer): # noqa: F811 generation_config = deepcopy(basic_vllm_test_config) master_config = { "policy": { "generation": generation_config, }, } - setup_penguin_config(master_config, penguin_tokenizer) + setup_nemo_gym_config(master_config, nemo_gym_tokenizer) generation_config["vllm_cfg"]["max_model_len"] = 16_384 # This is the tool parser for Qwen/Qwen3-0.6B. This needs to be changed for other models. @@ -77,8 +79,8 @@ def penguin_vllm_generation(cluster, penguin_tokenizer): # noqa: F811 @pytest.fixture(scope="function") -def penguin(penguin_vllm_generation): - """Create a Penguin actor for testing.""" +def nemo_gym(nemo_gym_vllm_generation): + """Create a NeMo-Gym actor for testing.""" yaml_str = r"""example_multi_step_resources_server: resources_servers: @@ -106,20 +108,20 @@ def penguin(penguin_vllm_generation): uses_reasoning_parser: true """ - config = PenguinConfig( - model_name=penguin_vllm_generation.cfg["model_name"], - base_urls=penguin_vllm_generation.dp_openai_server_base_urls, + config = NemoGymConfig( + model_name=nemo_gym_vllm_generation.cfg["model_name"], + base_urls=nemo_gym_vllm_generation.dp_openai_server_base_urls, initial_global_config_dict=safe_load(yaml_str), ) - env = Penguin.options( + env = NemoGym.options( runtime_env={ "py_executable": get_actor_python_env( - "nemo_rl.environments.penguin.Penguin" + "nemo_rl.environments.nemo_gym.NemoGym" ), } ).remote(config) - # Blocking wait for penguin to spin up + # Blocking wait for NeMo-Gym to spin up ray.get(env.health_check.remote()) yield env @@ -131,28 +133,28 @@ def penguin(penguin_vllm_generation): @pytest.fixture(scope="function") -def penguin_sanity_test_data(): - fpath = Path(__file__).parent / "penguin_test_data/test_penguin_sanity.json" +def nemo_gym_sanity_test_data(): + fpath = Path(__file__).parent / "nemo_gym_test_data/test_nemo_gym_sanity.json" with open(fpath) as f: data = json.load(f) return data @pytest.mark.skipif( - not PENGUIN_INSTALLED, - reason="Skipping Penguin test since Penguin is not installed!", + not NEMO_GYM_INSTALLED, + reason="Skipping NeMo-Gym test since NeMo-Gym is not installed!", ) -def test_penguin_sanity( - penguin, - penguin_sanity_test_data, - penguin_vllm_generation, - penguin_tokenizer, # noqa: F811 +def test_nemo_gym_sanity( + nemo_gym, + nemo_gym_sanity_test_data, + nemo_gym_vllm_generation, + nemo_gym_tokenizer, # noqa: F811 ): """Test basic functionality of MathEnvironment step with simple messages.""" - # We need to match NeMo RL generation config params before sending to Penguin - generation_config = penguin_vllm_generation.cfg - examples = penguin_sanity_test_data["input"] + # We need to match NeMo RL generation config params before sending to NeMo-Gym + generation_config = nemo_gym_vllm_generation.cfg + examples = nemo_gym_sanity_test_data["input"] for example in examples: example["responses_create_params"]["temperature"] = generation_config[ "temperature" @@ -160,11 +162,11 @@ def test_penguin_sanity( example["responses_create_params"]["top_p"] = generation_config["top_p"] actual_result, _ = ray.get( - penguin.run_rollouts.remote( - penguin_sanity_test_data["input"], penguin_tokenizer, "" + nemo_gym.run_rollouts.remote( + nemo_gym_sanity_test_data["input"], nemo_gym_tokenizer, "" ) ) - expected_result = penguin_sanity_test_data["expected_output"] + expected_result = nemo_gym_sanity_test_data["expected_output"] # These are tensors originally and we swap them back to a list for comparison below for d in actual_result: @@ -193,6 +195,11 @@ def _standardize_single_result(d: dict): return d def _standardize(l: list[dict]): - return list(map(_standardize_single_result, l)) + # Sort by input_message_log token_ids to ensure deterministic ordering + # since NeMo-Gym returns results in completion order, not input order + standardized = list(map(_standardize_single_result, l)) + return sorted( + standardized, key=lambda d: tuple(d["input_message_log"][0]["token_ids"]) + ) assert _standardize(expected_result) == _standardize(actual_result) diff --git a/tests/unit/experience/test_rollouts.py b/tests/unit/experience/test_rollouts.py index fa8ab0b7a2..c79103cdbe 100644 --- a/tests/unit/experience/test_rollouts.py +++ b/tests/unit/experience/test_rollouts.py @@ -32,23 +32,23 @@ SlidingPuzzleGameLogic, SlidingPuzzleMetadata, ) -from nemo_rl.environments.penguin import penguin_example_to_nemo_rl_datum_spec +from nemo_rl.environments.nemo_gym import nemo_gym_example_to_nemo_rl_datum_spec from nemo_rl.experience.rollouts import ( run_async_multi_turn_rollout, - run_async_penguin_rollout, + run_async_nemo_gym_rollout, run_multi_turn_rollout, ) from nemo_rl.models.generation import configure_generation_config from nemo_rl.models.generation.vllm import VllmConfig, VllmGeneration # These are all fixtures -from tests.unit.environments.test_penguin import ( - PENGUIN_INSTALLED, +from tests.unit.environments.test_nemo_gym import ( + NEMO_GYM_INSTALLED, cluster, # noqa: F401 - penguin, # noqa: F401 - penguin_sanity_test_data, # noqa: F401 - penguin_tokenizer, # noqa: F401 - penguin_vllm_generation, # noqa: F401 + nemo_gym, # noqa: F401 + nemo_gym_sanity_test_data, # noqa: F401 + nemo_gym_tokenizer, # noqa: F401 + nemo_gym_vllm_generation, # noqa: F401 ) # Import the test environment definitions @@ -748,27 +748,27 @@ def test_run_sliding_puzzle_vllm(sliding_puzzle_setup_vllm): @pytest.mark.skipif( - not PENGUIN_INSTALLED, - reason="Skipping Penguin test since Penguin is not installed!", + not NEMO_GYM_INSTALLED, + reason="Skipping NeMo-Gym test since NeMo-Gym is not installed!", ) -def test_run_async_penguin_rollout( - penguin, # noqa: F811 - penguin_vllm_generation, # noqa: F811 - penguin_sanity_test_data, # noqa: F811 - penguin_tokenizer, # noqa: F811 +def test_run_async_nemo_gym_rollout( + nemo_gym, # noqa: F811 + nemo_gym_vllm_generation, # noqa: F811 + nemo_gym_sanity_test_data, # noqa: F811 + nemo_gym_tokenizer, # noqa: F811 ): nemo_rl_compatible_examples: list[DatumSpec] = [ - penguin_example_to_nemo_rl_datum_spec(penguin_example, idx) - for idx, penguin_example in enumerate(penguin_sanity_test_data["input"]) + nemo_gym_example_to_nemo_rl_datum_spec(nemo_gym_example, idx) + for idx, nemo_gym_example in enumerate(nemo_gym_sanity_test_data["input"]) ] input_batch: BatchedDataDict[DatumSpec] = rl_collate_fn(nemo_rl_compatible_examples) - actual_result = run_async_penguin_rollout( - policy_generation=penguin_vllm_generation, + actual_result = run_async_nemo_gym_rollout( + policy_generation=nemo_gym_vllm_generation, input_batch=input_batch, - tokenizer=penguin_tokenizer, - task_to_env={"penguin": penguin}, + tokenizer=nemo_gym_tokenizer, + task_to_env={"nemo_gym": nemo_gym}, max_seq_len=None, - generation_config=penguin_vllm_generation.cfg, + generation_config=nemo_gym_vllm_generation.cfg, max_rollout_turns=None, ) actual_result = asdict(actual_result) @@ -872,6 +872,6 @@ def _standardize(d: dict) -> dict: """ If the result here does not match, please check the following: - 1. In nemo_rl/experience/rollouts.py::run_async_penguin_rollout, the sampling params are passed appropriately + 1. In nemo_rl/experience/rollouts.py::run_async_nemo_gym_rollout, the sampling params are passed appropriately 2. In nemo_rl/models/generation/vllm/vllm_worker_async.py::VllmAsyncGenerationWorker::_setup_vllm_server::create_chat_completion, the sampling params (like top_k) are set as appropriate """ diff --git a/uv.lock b/uv.lock index 8b7132562e..7e7052cc14 100644 --- a/uv.lock +++ b/uv.lock @@ -21,8 +21,8 @@ members = [ "megatron-bridge", "megatron-core", "nemo-automodel", + "nemo-gym", "nemo-rl", - "penguin", "template-project", ] overrides = [ @@ -3198,6 +3198,50 @@ test = [ { name = "pytest" }, ] +[[package]] +name = "nemo-gym" +source = { editable = "3rdparty/Gym-workspace" } +dependencies = [ + { name = "aiohttp" }, + { name = "devtools" }, + { name = "fastapi" }, + { name = "gradio" }, + { name = "hydra-core" }, + { name = "mlflow" }, + { name = "omegaconf" }, + { name = "openai" }, + { name = "psutil" }, + { name = "pydantic" }, + { name = "pydantic-core" }, + { name = "ray", extra = ["default"] }, + { name = "tdigest" }, + { name = "tqdm" }, + { name = "uvicorn" }, + { name = "uvloop" }, + { name = "yappi" }, +] + +[package.metadata] +requires-dist = [ + { name = "aiohttp" }, + { name = "devtools" }, + { name = "fastapi" }, + { name = "gradio" }, + { name = "hydra-core" }, + { name = "mlflow" }, + { name = "omegaconf" }, + { name = "openai", specifier = "<=2.6.1" }, + { name = "psutil" }, + { name = "pydantic" }, + { name = "pydantic-core" }, + { name = "ray", extras = ["default"] }, + { name = "tdigest", specifier = ">=0.5.2.2" }, + { name = "tqdm" }, + { name = "uvicorn" }, + { name = "uvloop" }, + { name = "yappi" }, +] + [[package]] name = "nemo-rl" source = { editable = "." } @@ -3254,8 +3298,8 @@ mcore = [ { name = "transformer-engine", extra = ["pytorch"] }, { name = "vllm" }, ] -penguin = [ - { name = "penguin" }, +nemo-gym = [ + { name = "nemo-gym" }, ] vllm = [ { name = "causal-conv1d" }, @@ -3331,6 +3375,7 @@ requires-dist = [ { name = "megatron-core", marker = "extra == 'mcore'", editable = "3rdparty/Megatron-LM-workspace" }, { name = "mlflow", specifier = ">=3.5.0,<3.6.0" }, { name = "nemo-automodel", marker = "extra == 'automodel'", editable = "3rdparty/Automodel-workspace/Automodel" }, + { name = "nemo-gym", marker = "extra == 'nemo-gym'", editable = "3rdparty/Gym-workspace" }, { name = "ninja" }, { name = "num2words", specifier = ">=0.5.14" }, { name = "num2words", marker = "extra == 'vllm'", specifier = ">=0.5.14" }, @@ -3339,7 +3384,6 @@ requires-dist = [ { name = "nvidia-nvshmem-cu12", marker = "(platform_machine == 'aarch64' and sys_platform == 'linux') or (platform_machine == 'x86_64' and sys_platform == 'linux')" }, { name = "nvtx" }, { name = "omegaconf" }, - { name = "penguin", marker = "extra == 'penguin'", editable = "3rdparty/Penguin-workspace" }, { name = "pillow", specifier = ">=11.3.0" }, { name = "plotly" }, { name = "pyzmq" }, @@ -3363,7 +3407,7 @@ requires-dist = [ { name = "vllm", marker = "extra == 'vllm'", specifier = "==0.11.0" }, { name = "wandb" }, ] -provides-extras = ["automodel", "vllm", "mcore", "penguin"] +provides-extras = ["automodel", "vllm", "mcore", "nemo-gym"] [package.metadata.requires-dev] build = [ @@ -4144,48 +4188,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/49/fe/a2da1627aa9cb6310b6034598363bd26ac301c4a99d21f415b1b2855891e/peft-0.17.1-py3-none-any.whl", hash = "sha256:3d129d64def3d74779c32a080d2567e5f7b674e77d546e3585138216d903f99e", size = 504896, upload-time = "2025-08-21T09:25:18.974Z" }, ] -[[package]] -name = "penguin" -source = { editable = "3rdparty/Penguin-workspace" } -dependencies = [ - { name = "aiohttp" }, - { name = "devtools" }, - { name = "fastapi" }, - { name = "gradio" }, - { name = "hydra-core" }, - { name = "mlflow" }, - { name = "omegaconf" }, - { name = "openai" }, - { name = "pydantic" }, - { name = "pydantic-core" }, - { name = "ray", extra = ["default"] }, - { name = "tdigest" }, - { name = "tqdm" }, - { name = "uvicorn" }, - { name = "uvloop" }, - { name = "yappi" }, -] - -[package.metadata] -requires-dist = [ - { name = "aiohttp" }, - { name = "devtools" }, - { name = "fastapi" }, - { name = "gradio" }, - { name = "hydra-core" }, - { name = "mlflow" }, - { name = "omegaconf" }, - { name = "openai", specifier = "<=2.6.1" }, - { name = "pydantic" }, - { name = "pydantic-core" }, - { name = "ray", extras = ["default"] }, - { name = "tdigest", specifier = ">=0.5.2.2" }, - { name = "tqdm" }, - { name = "uvicorn" }, - { name = "uvloop" }, - { name = "yappi" }, -] - [[package]] name = "pillow" version = "11.3.0"